All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
topitch.cpp File Reference
#include "stderr.h"
#include "blobbox.h"
#include "statistc.h"
#include "drawtord.h"
#include "makerow.h"
#include "pitsync1.h"
#include "pithsync.h"
#include "tovars.h"
#include "wordseg.h"
#include "topitch.h"
#include "helpers.h"

Go to the source code of this file.

Macros

#define EXTERN
 
#define FIXED_WIDTH_MULTIPLE   5
 
#define BLOCK_STATS_CLUSTERS   10
 
#define MAX_ALLOWED_PITCH   100
 

Functions

void compute_fixed_pitch (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, BOOL8 testing_on)
 
void fix_row_pitch (TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, inT32 row_target, inT32 block_target)
 
void compute_block_pitch (TO_BLOCK *block, FCOORD rotation, inT32 block_index, BOOL8 testing_on)
 
BOOL8 compute_rows_pitch (TO_BLOCK *block, inT32 block_index, BOOL8 testing_on)
 
BOOL8 try_doc_fixed (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
 
BOOL8 try_block_fixed (TO_BLOCK *block, inT32 block_index)
 
BOOL8 try_rows_fixed (TO_BLOCK *block, inT32 block_index, BOOL8 testing_on)
 
void print_block_counts (TO_BLOCK *block, inT32 block_index)
 
void count_block_votes (TO_BLOCK *block, inT32 &def_fixed, inT32 &def_prop, inT32 &maybe_fixed, inT32 &maybe_prop, inT32 &corr_fixed, inT32 &corr_prop, inT32 &dunno)
 
BOOL8 row_pitch_stats (TO_ROW *row, inT32 maxwidth, BOOL8 testing_on)
 
BOOL8 find_row_pitch (TO_ROW *row, inT32 maxwidth, inT32 dm_gap, TO_BLOCK *block, inT32 block_index, inT32 row_index, BOOL8 testing_on)
 
BOOL8 fixed_pitch_row (TO_ROW *row, BLOCK *block, inT32 block_index)
 
BOOL8 count_pitch_stats (TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, BOOL8 ignore_outsize, BOOL8 split_outsize, inT32 dm_gap)
 
float tune_row_pitch (TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float &initial_pitch, float &best_sp_sd, inT16 &best_mid_cuts, ICOORDELT_LIST *best_cells, BOOL8 testing_on)
 
float tune_row_pitch2 (TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float &initial_pitch, float &best_sp_sd, inT16 &best_mid_cuts, ICOORDELT_LIST *best_cells, BOOL8 testing_on)
 
float compute_pitch_sd (TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float initial_pitch, float &sp_sd, inT16 &mid_cuts, ICOORDELT_LIST *row_cells, BOOL8 testing_on, inT16 start, inT16 end)
 
float compute_pitch_sd2 (TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float initial_pitch, inT16 &occupation, inT16 &mid_cuts, ICOORDELT_LIST *row_cells, BOOL8 testing_on, inT16 start, inT16 end)
 
void print_pitch_sd (TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float initial_pitch)
 
void find_repeated_chars (TO_BLOCK *block, BOOL8 testing_on)
 
void plot_fp_word (TO_BLOCK *block, float pitch, float nonspace)
 

Variables

EXTERN bool textord_all_prop = FALSE
 
EXTERN bool textord_debug_pitch_test = FALSE
 
EXTERN bool textord_disable_pitch_test = FALSE
 
EXTERN bool textord_fast_pitch_test = FALSE
 
EXTERN bool textord_debug_pitch_metric = FALSE
 
EXTERN bool textord_show_row_cuts = FALSE
 
EXTERN bool textord_show_page_cuts = FALSE
 
EXTERN bool textord_pitch_cheat = FALSE
 
EXTERN bool textord_blockndoc_fixed = FALSE
 
EXTERN double textord_projection_scale = 0.200
 
EXTERN double textord_balance_factor = 1.0
 

Macro Definition Documentation

#define BLOCK_STATS_CLUSTERS   10

Definition at line 62 of file topitch.cpp.

#define EXTERN

Definition at line 40 of file topitch.cpp.

#define FIXED_WIDTH_MULTIPLE   5

Definition at line 61 of file topitch.cpp.

#define MAX_ALLOWED_PITCH   100

Definition at line 63 of file topitch.cpp.

Function Documentation

void compute_block_pitch ( TO_BLOCK block,
FCOORD  rotation,
inT32  block_index,
BOOL8  testing_on 
)

Definition at line 308 of file topitch.cpp.

311  { // correct orientation
312  TBOX block_box; //bounding box
313 
314  block_box = block->block->bounding_box ();
315  if (testing_on && textord_debug_pitch_test) {
316  tprintf ("Block %d at (%d,%d)->(%d,%d)\n",
317  block_index,
318  block_box.left (), block_box.bottom (),
319  block_box.right (), block_box.top ());
320  }
321  block->min_space = (inT32) floor (block->xheight
323  block->max_nonspace = (inT32) ceil (block->xheight
325  block->fixed_pitch = 0.0f;
326  block->space_size = (float) block->min_space;
327  block->kern_size = (float) block->max_nonspace;
328  block->pr_nonsp = block->xheight * words_default_prop_nonspace;
330  if (!block->get_rows ()->empty ()) {
331  ASSERT_HOST (block->xheight > 0);
332  find_repeated_chars(block, textord_show_initial_words && testing_on);
333 #ifndef GRAPHICS_DISABLED
334  if (textord_show_initial_words && testing_on)
335  //overlap_picture_ops(TRUE);
337 #endif
338  compute_rows_pitch(block,
339  block_index,
340  textord_debug_pitch_test &&testing_on);
341  }
342 }
EXTERN double words_default_prop_nonspace
Definition: tovars.cpp:72
static void Update()
Definition: scrollview.cpp:715
#define tprintf(...)
Definition: tprintf.h:31
float space_size
Definition: blobbox.h:787
EXTERN double textord_words_default_minspace
Definition: tovars.cpp:48
EXTERN double textord_words_default_nonspace
Definition: tovars.cpp:51
float fixed_pitch
Definition: blobbox.h:785
inT16 right() const
Definition: rect.h:75
#define ASSERT_HOST(x)
Definition: errcode.h:84
EXTERN bool textord_debug_pitch_test
Definition: topitch.cpp:44
float xheight
Definition: blobbox.h:784
EXTERN double textord_spacesize_ratioprop
Definition: tovars.cpp:80
float kern_size
Definition: blobbox.h:786
inT16 left() const
Definition: rect.h:68
float pr_nonsp
Definition: blobbox.h:793
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
inT16 bottom() const
Definition: rect.h:61
EXTERN bool textord_show_initial_words
Definition: tovars.cpp:25
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
inT32 min_space
Definition: blobbox.h:788
Definition: rect.h:30
void find_repeated_chars(TO_BLOCK *block, BOOL8 testing_on)
Definition: topitch.cpp:1758
BOOL8 compute_rows_pitch(TO_BLOCK *block, inT32 block_index, BOOL8 testing_on)
Definition: topitch.cpp:351
float pr_space
Definition: blobbox.h:792
inT16 top() const
Definition: rect.h:54
inT32 max_nonspace
Definition: blobbox.h:789
BLOCK * block
Definition: blobbox.h:773
int inT32
Definition: host.h:102
void compute_fixed_pitch ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks,
float  gradient,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 73 of file topitch.cpp.

77  { // correct orientation
78  TO_BLOCK_IT block_it; //iterator
79  TO_BLOCK *block; //current block;
80  TO_ROW_IT row_it; //row iterator
81  TO_ROW *row; //current row
82  int block_index; //block number
83  int row_index; //row number
84 
85 #ifndef GRAPHICS_DISABLED
86  if (textord_show_initial_words && testing_on) {
87  if (to_win == NULL)
88  create_to_win(page_tr);
89  }
90 #endif
91 
92  block_it.set_to_list (port_blocks);
93  block_index = 1;
94  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
95  block_it.forward ()) {
96  block = block_it.data ();
97  compute_block_pitch(block, rotation, block_index, testing_on);
98  block_index++;
99  }
100 
101  if (!try_doc_fixed (page_tr, port_blocks, gradient)) {
102  block_index = 1;
103  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
104  block_it.forward ()) {
105  block = block_it.data ();
106  if (!try_block_fixed (block, block_index))
107  try_rows_fixed(block, block_index, testing_on);
108  block_index++;
109  }
110  }
111 
112  block_index = 1;
113  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
114  block_it.forward()) {
115  block = block_it.data ();
116  POLY_BLOCK* pb = block->block->poly_block();
117  if (pb != NULL && !pb->IsText()) continue; // Non-text doesn't exist!
118  row_it.set_to_list (block->get_rows ());
119  row_index = 1;
120  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
121  row = row_it.data ();
122  fix_row_pitch(row, block, port_blocks, row_index, block_index);
123  row_index++;
124  }
125  block_index++;
126  }
127 #ifndef GRAPHICS_DISABLED
128  if (textord_show_initial_words && testing_on) {
130  }
131 #endif
132 }
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
static void Update()
Definition: scrollview.cpp:715
void fix_row_pitch(TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, inT32 row_target, inT32 block_target)
Definition: topitch.cpp:142
BOOL8 try_rows_fixed(TO_BLOCK *block, inT32 block_index, BOOL8 testing_on)
Definition: topitch.cpp:549
bool IsText() const
Definition: polyblk.h:52
BOOL8 try_doc_fixed(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
Definition: topitch.cpp:395
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
EXTERN bool textord_show_initial_words
Definition: tovars.cpp:25
BOOL8 try_block_fixed(TO_BLOCK *block, inT32 block_index)
Definition: topitch.cpp:535
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
#define NULL
Definition: host.h:144
void compute_block_pitch(TO_BLOCK *block, FCOORD rotation, inT32 block_index, BOOL8 testing_on)
Definition: topitch.cpp:308
POLY_BLOCK * poly_block() const
Definition: pdblock.h:59
BLOCK * block
Definition: blobbox.h:773
float compute_pitch_sd ( TO_ROW row,
STATS projection,
inT16  projection_left,
inT16  projection_right,
float  space_size,
float  initial_pitch,
float &  sp_sd,
inT16 mid_cuts,
ICOORDELT_LIST *  row_cells,
BOOL8  testing_on,
inT16  start,
inT16  end 
)

Definition at line 1375 of file topitch.cpp.

1388  {
1389  inT16 occupation; //no of cells in word.
1390  //blobs
1391  BLOBNBOX_IT blob_it = row->blob_list ();
1392  BLOBNBOX_IT start_it; //start of word
1393  BLOBNBOX_IT plot_it; //for plotting
1394  inT16 blob_count; //no of blobs
1395  TBOX blob_box; //bounding box
1396  TBOX prev_box; //of super blob
1397  inT32 prev_right; //of word sync
1398  int scale_factor; //on scores for big words
1399  inT32 sp_count; //spaces
1400  FPSEGPT_LIST seg_list; //char cells
1401  FPSEGPT_IT seg_it; //iterator
1402  inT16 segpos; //position of segment
1403  inT16 cellpos; //previous cell boundary
1404  //iterator
1405  ICOORDELT_IT cell_it = row_cells;
1406  ICOORDELT *cell; //new cell
1407  double sqsum; //sum of squares
1408  double spsum; //of spaces
1409  double sp_var; //space error
1410  double word_sync; //result for word
1411  inT32 total_count; //total blobs
1412 
1413  if ((pitsync_linear_version & 3) > 1) {
1414  word_sync = compute_pitch_sd2 (row, projection, projection_left,
1415  projection_right, initial_pitch,
1416  occupation, mid_cuts, row_cells,
1417  testing_on, start, end);
1418  sp_sd = occupation;
1419  return word_sync;
1420  }
1421  mid_cuts = 0;
1422  cellpos = 0;
1423  total_count = 0;
1424  sqsum = 0;
1425  sp_count = 0;
1426  spsum = 0;
1427  prev_right = -1;
1428  if (blob_it.empty ())
1429  return space_size * 10;
1430 #ifndef GRAPHICS_DISABLED
1431  if (testing_on && to_win != NULL) {
1432  blob_box = blob_it.data ()->bounding_box ();
1433  projection->plot (to_win, projection_left,
1434  row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1435  }
1436 #endif
1437  start_it = blob_it;
1438  blob_count = 0;
1439  blob_box = box_next (&blob_it);//first blob
1440  blob_it.mark_cycle_pt ();
1441  do {
1442  for (; blob_count > 0; blob_count--)
1443  box_next(&start_it);
1444  do {
1445  prev_box = blob_box;
1446  blob_count++;
1447  blob_box = box_next (&blob_it);
1448  }
1449  while (!blob_it.cycled_list ()
1450  && blob_box.left () - prev_box.right () < space_size);
1451  plot_it = start_it;
1452  if (pitsync_linear_version & 3)
1453  word_sync =
1454  check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
1455  projection, projection_left, projection_right,
1457  occupation, &seg_list, start, end);
1458  else
1459  word_sync =
1460  check_pitch_sync (&start_it, blob_count, (inT16) initial_pitch, 2,
1461  projection, &seg_list);
1462  if (testing_on) {
1463  tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ",
1464  prev_box.right (), prev_box.top (),
1465  seg_list.length () - 1, word_sync);
1466  seg_it.set_to_list (&seg_list);
1467  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
1468  seg_it.forward ()) {
1469  if (seg_it.data ()->faked)
1470  tprintf ("(F)");
1471  tprintf ("%d, ", seg_it.data ()->position ());
1472  // tprintf("C=%g, s=%g, sq=%g\n",
1473  // seg_it.data()->cost_function(),
1474  // seg_it.data()->sum(),
1475  // seg_it.data()->squares());
1476  }
1477  tprintf ("\n");
1478  }
1479 #ifndef GRAPHICS_DISABLED
1480  if (textord_show_fixed_cuts && blob_count > 0 && to_win != NULL)
1481  plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1482 #endif
1483  seg_it.set_to_list (&seg_list);
1484  if (prev_right >= 0) {
1485  sp_var = seg_it.data ()->position () - prev_right;
1486  sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1487  sp_var *= sp_var;
1488  spsum += sp_var;
1489  sp_count++;
1490  }
1491  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1492  segpos = seg_it.data ()->position ();
1493  if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
1494  //big gap
1495  while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
1496  cell = new ICOORDELT (cellpos + (inT16) initial_pitch, 0);
1497  cell_it.add_after_then_move (cell);
1498  cellpos += (inT16) initial_pitch;
1499  }
1500  //make new one
1501  cell = new ICOORDELT (segpos, 0);
1502  cell_it.add_after_then_move (cell);
1503  cellpos = segpos;
1504  }
1505  else if (segpos > cellpos - initial_pitch / 2) {
1506  cell = cell_it.data ();
1507  //average positions
1508  cell->set_x ((cellpos + segpos) / 2);
1509  cellpos = cell->x ();
1510  }
1511  }
1512  seg_it.move_to_last ();
1513  prev_right = seg_it.data ()->position ();
1515  scale_factor = (seg_list.length () - 2) / 2;
1516  if (scale_factor < 1)
1517  scale_factor = 1;
1518  }
1519  else
1520  scale_factor = 1;
1521  sqsum += word_sync * scale_factor;
1522  total_count += (seg_list.length () - 1) * scale_factor;
1523  seg_list.clear ();
1524  }
1525  while (!blob_it.cycled_list ());
1526  sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1527  return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1528 }
void set_x(inT16 xin)
rewrite function
Definition: points.h:61
EXTERN bool textord_pitch_scalebigwords
Definition: tovars.cpp:69
#define tprintf(...)
Definition: tprintf.h:31
float intercept() const
Definition: blobbox.h:584
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
EXTERN double textord_projection_scale
Definition: topitch.cpp:57
double check_pitch_sync(BLOBNBOX_IT *blob_it, inT16 blob_count, inT16 pitch, inT16 pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
Definition: pitsync1.cpp:148
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
inT16 left() const
Definition: rect.h:68
EXTERN bool textord_show_fixed_cuts
Definition: drawtord.cpp:36
double check_pitch_sync2(BLOBNBOX_IT *blob_it, inT16 blob_count, inT16 pitch, inT16 pitch_error, STATS *projection, inT16 projection_left, inT16 projection_right, float projection_scale, inT16 &occupation_count, FPSEGPT_LIST *seg_list, inT16 start, inT16 end)
Definition: pithsync.cpp:298
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:629
float compute_pitch_sd2(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float initial_pitch, inT16 &occupation, inT16 &mid_cuts, ICOORDELT_LIST *row_cells, BOOL8 testing_on, inT16 start, inT16 end)
Definition: topitch.cpp:1538
inT16 x() const
access function
Definition: points.h:52
Definition: rect.h:30
#define NULL
Definition: host.h:144
float xheight
Definition: blobbox.h:653
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:589
inT16 top() const
Definition: rect.h:54
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
Definition: drawtord.cpp:364
short inT16
Definition: host.h:100
int inT32
Definition: host.h:102
float compute_pitch_sd2 ( TO_ROW row,
STATS projection,
inT16  projection_left,
inT16  projection_right,
float  initial_pitch,
inT16 occupation,
inT16 mid_cuts,
ICOORDELT_LIST *  row_cells,
BOOL8  testing_on,
inT16  start,
inT16  end 
)

Definition at line 1538 of file topitch.cpp.

1550  {
1551  //blobs
1552  BLOBNBOX_IT blob_it = row->blob_list ();
1553  BLOBNBOX_IT plot_it;
1554  inT16 blob_count; //no of blobs
1555  TBOX blob_box; //bounding box
1556  FPSEGPT_LIST seg_list; //char cells
1557  FPSEGPT_IT seg_it; //iterator
1558  inT16 segpos; //position of segment
1559  //iterator
1560  ICOORDELT_IT cell_it = row_cells;
1561  ICOORDELT *cell; //new cell
1562  double word_sync; //result for word
1563 
1564  mid_cuts = 0;
1565  if (blob_it.empty ()) {
1566  occupation = 0;
1567  return initial_pitch * 10;
1568  }
1569 #ifndef GRAPHICS_DISABLED
1570  if (testing_on && to_win != NULL) {
1571  projection->plot (to_win, projection_left,
1572  row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1573  }
1574 #endif
1575  blob_count = 0;
1576  blob_it.mark_cycle_pt ();
1577  do {
1578  //first blob
1579  blob_box = box_next (&blob_it);
1580  blob_count++;
1581  }
1582  while (!blob_it.cycled_list ());
1583  plot_it = blob_it;
1584  word_sync = check_pitch_sync2 (&blob_it, blob_count, (inT16) initial_pitch,
1585  2, projection, projection_left,
1586  projection_right,
1588  occupation, &seg_list, start, end);
1589  if (testing_on) {
1590  tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ",
1591  blob_box.right (), blob_box.top (),
1592  seg_list.length () - 1, word_sync);
1593  seg_it.set_to_list (&seg_list);
1594  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1595  if (seg_it.data ()->faked)
1596  tprintf ("(F)");
1597  tprintf ("%d, ", seg_it.data ()->position ());
1598  // tprintf("C=%g, s=%g, sq=%g\n",
1599  // seg_it.data()->cost_function(),
1600  // seg_it.data()->sum(),
1601  // seg_it.data()->squares());
1602  }
1603  tprintf ("\n");
1604  }
1605 #ifndef GRAPHICS_DISABLED
1606  if (textord_show_fixed_cuts && blob_count > 0 && to_win != NULL)
1607  plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1608 #endif
1609  seg_it.set_to_list (&seg_list);
1610  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1611  segpos = seg_it.data ()->position ();
1612  //make new one
1613  cell = new ICOORDELT (segpos, 0);
1614  cell_it.add_after_then_move (cell);
1615  if (seg_it.at_last ())
1616  mid_cuts = seg_it.data ()->cheap_cuts ();
1617  }
1618  seg_list.clear ();
1619  return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
1620 }
#define tprintf(...)
Definition: tprintf.h:31
float intercept() const
Definition: blobbox.h:584
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
EXTERN double textord_projection_scale
Definition: topitch.cpp:57
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
EXTERN bool textord_show_fixed_cuts
Definition: drawtord.cpp:36
double check_pitch_sync2(BLOBNBOX_IT *blob_it, inT16 blob_count, inT16 pitch, inT16 pitch_error, STATS *projection, inT16 projection_left, inT16 projection_right, float projection_scale, inT16 &occupation_count, FPSEGPT_LIST *seg_list, inT16 start, inT16 end)
Definition: pithsync.cpp:298
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:629
Definition: rect.h:30
#define NULL
Definition: host.h:144
float xheight
Definition: blobbox.h:653
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:589
inT16 top() const
Definition: rect.h:54
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
Definition: drawtord.cpp:364
short inT16
Definition: host.h:100
BOOL8 compute_rows_pitch ( TO_BLOCK block,
inT32  block_index,
BOOL8  testing_on 
)

Definition at line 351 of file topitch.cpp.

355  {
356  inT32 maxwidth; //of spaces
357  TO_ROW *row; //current row
358  inT32 row_index; //row number.
359  float lower, upper; //cluster thresholds
360  TO_ROW_IT row_it = block->get_rows ();
361 
362  row_index = 1;
363  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
364  row = row_it.data ();
365  ASSERT_HOST (row->xheight > 0);
367  maxwidth = (inT32) ceil (row->xheight * textord_words_maxspace);
368  if (row_pitch_stats (row, maxwidth, testing_on)
369  && find_row_pitch (row, maxwidth,
370  textord_dotmatrix_gap + 1, block, block_index,
371  row_index, testing_on)) {
372  if (row->fixed_pitch == 0) {
373  lower = row->pr_nonsp;
374  upper = row->pr_space;
375  row->space_size = upper;
376  row->kern_size = lower;
377  }
378  }
379  else {
380  row->fixed_pitch = 0.0f; //insufficient data
382  }
383  row_index++;
384  }
385  return FALSE;
386 }
float kern_size
Definition: blobbox.h:662
BOOL8 row_pitch_stats(TO_ROW *row, inT32 maxwidth, BOOL8 testing_on)
Definition: topitch.cpp:701
#define ASSERT_HOST(x)
Definition: errcode.h:84
float fixed_pitch
Definition: blobbox.h:647
EXTERN int textord_dotmatrix_gap
Definition: tovars.cpp:35
EXTERN double textord_words_maxspace
Definition: tovars.cpp:44
float pr_nonsp
Definition: blobbox.h:651
BOOL8 find_row_pitch(TO_ROW *row, inT32 maxwidth, inT32 dm_gap, TO_BLOCK *block, inT32 block_index, inT32 row_index, BOOL8 testing_on)
Definition: topitch.cpp:834
float pr_space
Definition: blobbox.h:650
void compute_vertical_projection()
Definition: blobbox.cpp:789
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float space_size
Definition: blobbox.h:663
PITCH_TYPE pitch_decision
Definition: blobbox.h:646
#define FALSE
Definition: capi.h:29
float xheight
Definition: blobbox.h:653
int inT32
Definition: host.h:102
void count_block_votes ( TO_BLOCK block,
inT32 def_fixed,
inT32 def_prop,
inT32 maybe_fixed,
inT32 maybe_prop,
inT32 corr_fixed,
inT32 corr_prop,
inT32 dunno 
)

Definition at line 654 of file topitch.cpp.

662  {
663  TO_ROW *row; //current row
664  TO_ROW_IT row_it = block->get_rows ();
665 
666  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
667  row = row_it.data ();
668  switch (row->pitch_decision) {
669  case PITCH_DUNNO:
670  dunno++;
671  break;
672  case PITCH_DEF_PROP:
673  def_prop++;
674  break;
675  case PITCH_MAYBE_PROP:
676  maybe_prop++;
677  break;
678  case PITCH_DEF_FIXED:
679  def_fixed++;
680  break;
681  case PITCH_MAYBE_FIXED:
682  maybe_fixed++;
683  break;
684  case PITCH_CORR_PROP:
685  corr_prop++;
686  break;
687  case PITCH_CORR_FIXED:
688  corr_fixed++;
689  break;
690  }
691  }
692 }
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
PITCH_TYPE pitch_decision
Definition: blobbox.h:646
BOOL8 count_pitch_stats ( TO_ROW row,
STATS gap_stats,
STATS pitch_stats,
float  initial_pitch,
float  min_space,
BOOL8  ignore_outsize,
BOOL8  split_outsize,
inT32  dm_gap 
)

Definition at line 1058 of file topitch.cpp.

1067  {
1068  BOOL8 prev_valid; //not word broken
1069  BLOBNBOX *blob; //current blob
1070  //blobs
1071  BLOBNBOX_IT blob_it = row->blob_list ();
1072  inT32 prev_right; //end of prev blob
1073  inT32 prev_centre; //centre of previous blob
1074  inT32 x_centre; //centre of this blob
1075  inT32 blob_width; //width of blob
1076  inT32 width_units; //no of widths in blob
1077  float width; //blob width
1078  TBOX blob_box; //bounding box
1079  TBOX joined_box; //of super blob
1080 
1081  gap_stats->clear ();
1082  pitch_stats->clear ();
1083  if (blob_it.empty ())
1084  return FALSE;
1085  prev_valid = FALSE;
1086  prev_centre = 0;
1087  prev_right = 0; //stop complier warning
1088  joined_box = blob_it.data ()->bounding_box ();
1089  do {
1090  blob_it.forward ();
1091  blob = blob_it.data ();
1092  if (!blob->joined_to_prev ()) {
1093  blob_box = blob->bounding_box ();
1094  if ((blob_box.left () - joined_box.right () < dm_gap
1095  && !blob_it.at_first ())
1096  || blob->cblob() == NULL)
1097  joined_box += blob_box; //merge blobs
1098  else {
1099  blob_width = joined_box.width ();
1100  if (split_outsize) {
1101  width_units =
1102  (inT32) floor ((float) blob_width / initial_pitch + 0.5);
1103  if (width_units < 1)
1104  width_units = 1;
1105  width_units--;
1106  }
1107  else if (ignore_outsize) {
1108  width = (float) blob_width / initial_pitch;
1109  width_units = width < 1 + words_default_fixed_limit
1110  && width > 1 - words_default_fixed_limit ? 0 : -1;
1111  }
1112  else
1113  width_units = 0; //everything in
1114  x_centre = (inT32) (joined_box.left ()
1115  + (blob_width -
1116  width_units * initial_pitch) / 2);
1117  if (prev_valid && width_units >= 0) {
1118  // if (width_units>0)
1119  // {
1120  // tprintf("wu=%d, width=%d, xc=%d, adding %d\n",
1121  // width_units,blob_width,x_centre,x_centre-prev_centre);
1122  // }
1123  gap_stats->add (joined_box.left () - prev_right, 1);
1124  pitch_stats->add (x_centre - prev_centre, 1);
1125  }
1126  prev_centre = (inT32) (x_centre + width_units * initial_pitch);
1127  prev_right = joined_box.right ();
1128  prev_valid = blob_box.left () - joined_box.right () < min_space;
1129  prev_valid = prev_valid && width_units >= 0;
1130  joined_box = blob_box;
1131  }
1132  }
1133  }
1134  while (!blob_it.at_first ());
1135  return gap_stats->get_total () >= 3;
1136 }
inT32 get_total() const
Definition: statistc.h:86
bool joined_to_prev() const
Definition: blobbox.h:241
void add(inT32 value, inT32 count)
Definition: statistc.cpp:104
unsigned char BOOL8
Definition: host.h:113
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 left() const
Definition: rect.h:68
C_BLOB * cblob() const
Definition: blobbox.h:253
inT16 width() const
Definition: rect.h:111
#define FALSE
Definition: capi.h:29
Definition: rect.h:30
EXTERN double words_default_fixed_limit
Definition: tovars.cpp:74
void clear()
Definition: statistc.cpp:81
#define NULL
Definition: host.h:144
const TBOX & bounding_box() const
Definition: blobbox.h:215
int inT32
Definition: host.h:102
void find_repeated_chars ( TO_BLOCK block,
BOOL8  testing_on 
)

Definition at line 1758 of file topitch.cpp.

1759  { // Debug mode.
1760  POLY_BLOCK* pb = block->block->poly_block();
1761  if (pb != NULL && !pb->IsText())
1762  return; // Don't find repeated chars in non-text blocks.
1763 
1764  TO_ROW *row;
1765  BLOBNBOX_IT box_it;
1766  BLOBNBOX_IT search_it; // forward search
1767  WERD_IT word_it; // new words
1768  WERD *word; // new word
1769  TBOX word_box; // for plotting
1770  int blobcount, repeated_set;
1771 
1772  TO_ROW_IT row_it = block->get_rows();
1773  if (row_it.empty()) return; // empty block
1774  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1775  row = row_it.data();
1776  box_it.set_to_list(row->blob_list());
1777  if (box_it.empty()) continue; // no blobs in this row
1778  if (!row->rep_chars_marked()) {
1779  mark_repeated_chars(row);
1780  }
1781  if (row->num_repeated_sets() == 0) continue; // nothing to do for this row
1782  word_it.set_to_list(&row->rep_words);
1783  do {
1784  if (box_it.data()->repeated_set() != 0 &&
1785  !box_it.data()->joined_to_prev()) {
1786  blobcount = 1;
1787  repeated_set = box_it.data()->repeated_set();
1788  search_it = box_it;
1789  search_it.forward();
1790  while (!search_it.at_first() &&
1791  search_it.data()->repeated_set() == repeated_set) {
1792  blobcount++;
1793  search_it.forward();
1794  }
1795  // After the call to make_real_word() all the blobs from this
1796  // repeated set will be removed from the blob list. box_it will be
1797  // set to point to the blob after the end of the extracted sequence.
1798  word = make_real_word(&box_it, blobcount, box_it.at_first(), 1);
1799  if (!box_it.empty() && box_it.data()->joined_to_prev()) {
1800  tprintf("Bad box joined to prev at");
1801  box_it.data()->bounding_box().print();
1802  tprintf("After repeated word:");
1803  word->bounding_box().print();
1804  }
1805  ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
1806  word->set_flag(W_REP_CHAR, true);
1807  word->set_flag(W_DONT_CHOP, true);
1808  word_it.add_after_then_move(word);
1809  } else {
1810  box_it.forward();
1811  }
1812  } while (!box_it.at_first());
1813  }
1814 }
WERD * make_real_word(BLOBNBOX_IT *box_it, inT32 blobcount, BOOL8 bol, uinT8 blanks)
Definition: wordseg.cpp:594
#define tprintf(...)
Definition: tprintf.h:31
void print() const
Definition: rect.h:270
int num_repeated_sets() const
Definition: blobbox.h:633
TBOX bounding_box() const
Definition: werd.cpp:160
bool IsText() const
Definition: polyblk.h:52
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
#define ASSERT_HOST(x)
Definition: errcode.h:84
WERD_LIST rep_words
Definition: blobbox.h:664
bool rep_chars_marked() const
Definition: blobbox.h:627
Definition: werd.h:60
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
Definition: rect.h:30
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2671
#define NULL
Definition: host.h:144
POLY_BLOCK * poly_block() const
Definition: pdblock.h:59
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
BLOCK * block
Definition: blobbox.h:773
BOOL8 find_row_pitch ( TO_ROW row,
inT32  maxwidth,
inT32  dm_gap,
TO_BLOCK block,
inT32  block_index,
inT32  row_index,
BOOL8  testing_on 
)

Definition at line 834 of file topitch.cpp.

842  {
843  BOOL8 used_dm_model; //looks lik dot matrix
844  float min_space; //estimate threshold
845  float non_space; //gap size
846  float gap_iqr; //interquartile range
847  float pitch_iqr;
848  float dm_gap_iqr; //interquartile range
849  float dm_pitch_iqr;
850  float dm_pitch; //pitch with dm on
851  float pitch; //revised estimate
852  float initial_pitch; //guess at pitch
853  STATS gap_stats (0, maxwidth);
854  //centre-centre
855  STATS pitch_stats (0, maxwidth);
856 
857  row->fixed_pitch = 0.0f;
858  initial_pitch = row->fp_space;
859  if (initial_pitch > row->xheight * (1 + words_default_fixed_limit))
860  initial_pitch = row->xheight;//keep pitch decent
861  non_space = row->fp_nonsp;
862  if (non_space > initial_pitch)
863  non_space = initial_pitch;
864  min_space = (initial_pitch + non_space) / 2;
865 
866  if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
867  initial_pitch, min_space, TRUE, FALSE, dm_gap)) {
868  dm_gap_iqr = 0.0001;
869  dm_pitch_iqr = maxwidth * 2.0f;
870  dm_pitch = initial_pitch;
871  }
872  else {
873  dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
874  dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
875  dm_pitch = pitch_stats.ile (0.5);
876  }
877  gap_stats.clear ();
878  pitch_stats.clear ();
879  if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
880  initial_pitch, min_space, TRUE, FALSE, 0)) {
881  gap_iqr = 0.0001;
882  pitch_iqr = maxwidth * 3.0f;
883  }
884  else {
885  gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
886  pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
887  if (testing_on)
888  tprintf
889  ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
890  initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
891  initial_pitch = pitch_stats.ile (0.5);
892  if (min_space > initial_pitch
893  && count_pitch_stats (row, &gap_stats, &pitch_stats,
894  initial_pitch, initial_pitch, TRUE, FALSE, 0)) {
895  min_space = initial_pitch;
896  gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
897  pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
898  if (testing_on)
899  tprintf
900  ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
901  initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
902  initial_pitch = pitch_stats.ile (0.5);
903  }
904  }
906  tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
907  block_index, row_index, 'X',
908  pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
909  pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' :
910  (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
911  if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
914  tprintf ("\n");
915  return FALSE; //insufficient data
916  }
917  if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
918  if (testing_on)
919  tprintf
920  ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
921  pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
922  gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
923  pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
924  pitch = pitch_stats.ile (0.5);
925  used_dm_model = FALSE;
926  }
927  else {
928  if (testing_on)
929  tprintf
930  ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
931  pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
932  gap_iqr = dm_gap_iqr;
933  pitch_iqr = dm_pitch_iqr;
934  pitch = dm_pitch;
935  used_dm_model = TRUE;
936  }
938  tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
939  pitch_iqr, gap_iqr, pitch);
940  tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
941  pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
942  pitch_iqr < gap_iqr * textord_fpiqr_ratio
943  && pitch_iqr < block->xheight * textord_max_pitch_iqr
944  && pitch < block->xheight * textord_words_default_maxspace
945  ? 'F' : 'P');
946  }
947  if (pitch_iqr < gap_iqr * textord_fpiqr_ratio
948  && pitch_iqr < block->xheight * textord_max_pitch_iqr
949  && pitch < block->xheight * textord_words_default_maxspace)
951  else
953  row->fixed_pitch = pitch;
954  row->kern_size = gap_stats.ile (0.5);
955  row->min_space = (inT32) (row->fixed_pitch + non_space) / 2;
956  if (row->min_space > row->fixed_pitch)
957  row->min_space = (inT32) row->fixed_pitch;
958  row->max_nonspace = row->min_space;
959  row->space_size = row->fixed_pitch;
960  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
961  row->used_dm_model = used_dm_model;
962  return TRUE;
963 }
float kern_size
Definition: blobbox.h:662
EXTERN double textord_words_default_maxspace
Definition: tovars.cpp:46
inT32 min_space
Definition: blobbox.h:659
#define tprintf(...)
Definition: tprintf.h:31
Definition: statistc.h:33
unsigned char BOOL8
Definition: host.h:113
EXTERN bool textord_debug_pitch_metric
Definition: topitch.cpp:50
float fixed_pitch
Definition: blobbox.h:647
BOOL8 used_dm_model
Definition: blobbox.h:643
EXTERN double textord_fpiqr_ratio
Definition: tovars.cpp:81
float xheight
Definition: blobbox.h:784
EXTERN double textord_max_pitch_iqr
Definition: tovars.cpp:82
BOOL8 count_pitch_stats(TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, BOOL8 ignore_outsize, BOOL8 split_outsize, inT32 dm_gap)
Definition: topitch.cpp:1058
float fp_space
Definition: blobbox.h:648
inT32 max_nonspace
Definition: blobbox.h:660
float space_size
Definition: blobbox.h:663
PITCH_TYPE pitch_decision
Definition: blobbox.h:646
#define FALSE
Definition: capi.h:29
float fp_nonsp
Definition: blobbox.h:649
#define TRUE
Definition: capi.h:28
EXTERN double words_default_fixed_limit
Definition: tovars.cpp:74
float xheight
Definition: blobbox.h:653
inT32 space_threshold
Definition: blobbox.h:661
int inT32
Definition: host.h:102
void fix_row_pitch ( TO_ROW bad_row,
TO_BLOCK bad_block,
TO_BLOCK_LIST *  blocks,
inT32  row_target,
inT32  block_target 
)

Definition at line 142 of file topitch.cpp.

146  { // number of block
147  inT16 mid_cuts;
148  int block_votes; //votes in block
149  int like_votes; //votes over page
150  int other_votes; //votes of unlike blocks
151  int block_index; //number of block
152  int row_index; //number of row
153  int maxwidth; //max pitch
154  TO_BLOCK_IT block_it = blocks; //block iterator
155  TO_ROW_IT row_it;
156  TO_BLOCK *block; //current block
157  TO_ROW *row; //current row
158  float sp_sd; //space deviation
159  STATS block_stats; //pitches in block
160  STATS like_stats; //pitches in page
161 
162  block_votes = like_votes = other_votes = 0;
163  maxwidth = (inT32) ceil (bad_row->xheight * textord_words_maxspace);
164  if (bad_row->pitch_decision != PITCH_DEF_FIXED
165  && bad_row->pitch_decision != PITCH_DEF_PROP) {
166  block_stats.set_range (0, maxwidth);
167  like_stats.set_range (0, maxwidth);
168  block_index = 1;
169  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
170  block_it.forward()) {
171  block = block_it.data();
172  POLY_BLOCK* pb = block->block->poly_block();
173  if (pb != NULL && !pb->IsText()) continue; // Non text doesn't exist!
174  row_index = 1;
175  row_it.set_to_list (block->get_rows ());
176  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
177  row_it.forward ()) {
178  row = row_it.data ();
179  if ((bad_row->all_caps
180  && row->xheight + row->ascrise
181  <
182  (bad_row->xheight + bad_row->ascrise) * (1 +
184  && row->xheight + row->ascrise >
185  (bad_row->xheight + bad_row->ascrise) * (1 -
187  || (!bad_row->all_caps
188  && row->xheight <
189  bad_row->xheight * (1 + textord_pitch_rowsimilarity)
190  && row->xheight >
191  bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
192  if (block_index == block_target) {
193  if (row->pitch_decision == PITCH_DEF_FIXED) {
194  block_votes += textord_words_veto_power;
195  block_stats.add ((inT32) row->fixed_pitch,
197  }
198  else if (row->pitch_decision == PITCH_MAYBE_FIXED
199  || row->pitch_decision == PITCH_CORR_FIXED) {
200  block_votes++;
201  block_stats.add ((inT32) row->fixed_pitch, 1);
202  }
203  else if (row->pitch_decision == PITCH_DEF_PROP)
204  block_votes -= textord_words_veto_power;
205  else if (row->pitch_decision == PITCH_MAYBE_PROP
206  || row->pitch_decision == PITCH_CORR_PROP)
207  block_votes--;
208  }
209  else {
210  if (row->pitch_decision == PITCH_DEF_FIXED) {
211  like_votes += textord_words_veto_power;
212  like_stats.add ((inT32) row->fixed_pitch,
214  }
215  else if (row->pitch_decision == PITCH_MAYBE_FIXED
216  || row->pitch_decision == PITCH_CORR_FIXED) {
217  like_votes++;
218  like_stats.add ((inT32) row->fixed_pitch, 1);
219  }
220  else if (row->pitch_decision == PITCH_DEF_PROP)
221  like_votes -= textord_words_veto_power;
222  else if (row->pitch_decision == PITCH_MAYBE_PROP
223  || row->pitch_decision == PITCH_CORR_PROP)
224  like_votes--;
225  }
226  }
227  else {
228  if (row->pitch_decision == PITCH_DEF_FIXED)
229  other_votes += textord_words_veto_power;
230  else if (row->pitch_decision == PITCH_MAYBE_FIXED
231  || row->pitch_decision == PITCH_CORR_FIXED)
232  other_votes++;
233  else if (row->pitch_decision == PITCH_DEF_PROP)
234  other_votes -= textord_words_veto_power;
235  else if (row->pitch_decision == PITCH_MAYBE_PROP
236  || row->pitch_decision == PITCH_CORR_PROP)
237  other_votes--;
238  }
239  row_index++;
240  }
241  block_index++;
242  }
243  if (block_votes > textord_words_veto_power) {
244  bad_row->fixed_pitch = block_stats.ile (0.5);
245  bad_row->pitch_decision = PITCH_CORR_FIXED;
246  }
247  else if (block_votes <= textord_words_veto_power && like_votes > 0) {
248  bad_row->fixed_pitch = like_stats.ile (0.5);
249  bad_row->pitch_decision = PITCH_CORR_FIXED;
250  }
251  else {
252  bad_row->pitch_decision = PITCH_CORR_PROP;
253  if (block_votes == 0 && like_votes == 0 && other_votes > 0
255  tprintf
256  ("Warning:row %d of block %d set prop with no like rows against trend\n",
257  row_target, block_target);
258  }
259  }
261  tprintf(":b_votes=%d:l_votes=%d:o_votes=%d",
262  block_votes, like_votes, other_votes);
263  tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise);
264  }
265  if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
266  if (bad_row->fixed_pitch < textord_min_xheight) {
267  if (block_votes > 0)
268  bad_row->fixed_pitch = block_stats.ile (0.5);
269  else if (block_votes == 0 && like_votes > 0)
270  bad_row->fixed_pitch = like_stats.ile (0.5);
271  else {
272  tprintf
273  ("Warning:guessing pitch as xheight on row %d, block %d\n",
274  row_target, block_target);
275  bad_row->fixed_pitch = bad_row->xheight;
276  }
277  }
278  if (bad_row->fixed_pitch < textord_min_xheight)
279  bad_row->fixed_pitch = (float) textord_min_xheight;
280  bad_row->kern_size = bad_row->fixed_pitch / 4;
281  bad_row->min_space = (inT32) (bad_row->fixed_pitch * 0.6);
282  bad_row->max_nonspace = (inT32) (bad_row->fixed_pitch * 0.4);
283  bad_row->space_threshold =
284  (bad_row->min_space + bad_row->max_nonspace) / 2;
285  bad_row->space_size = bad_row->fixed_pitch;
286  if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) {
287  tune_row_pitch (bad_row, &bad_row->projection,
288  bad_row->projection_left, bad_row->projection_right,
289  (bad_row->fixed_pitch +
290  bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
291  sp_sd, mid_cuts, &bad_row->char_cells, FALSE);
292  }
293  }
294  else if (bad_row->pitch_decision == PITCH_CORR_PROP
295  || bad_row->pitch_decision == PITCH_DEF_PROP) {
296  bad_row->fixed_pitch = 0.0f;
297  bad_row->char_cells.clear ();
298  }
299 }
EXTERN double textord_pitch_rowsimilarity
Definition: tovars.cpp:67
float kern_size
Definition: blobbox.h:662
inT32 min_space
Definition: blobbox.h:659
#define tprintf(...)
Definition: tprintf.h:31
Definition: statistc.h:33
void add(inT32 value, inT32 count)
Definition: statistc.cpp:104
inT16 projection_left
Definition: blobbox.h:644
bool IsText() const
Definition: polyblk.h:52
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
EXTERN bool textord_debug_pitch_metric
Definition: topitch.cpp:50
float fixed_pitch
Definition: blobbox.h:647
EXTERN bool textord_debug_pitch_test
Definition: topitch.cpp:44
double ile(double frac) const
Definition: statistc.cpp:177
EXTERN double textord_words_maxspace
Definition: tovars.cpp:44
BOOL8 all_caps
Definition: blobbox.h:642
inT16 projection_right
Definition: blobbox.h:645
ICOORDELT_LIST char_cells
Definition: blobbox.h:665
float ascrise
Definition: blobbox.h:655
int textord_min_xheight
Definition: makerow.cpp:69
inT32 max_nonspace
Definition: blobbox.h:660
float tune_row_pitch(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float &initial_pitch, float &best_sp_sd, inT16 &best_mid_cuts, ICOORDELT_LIST *best_cells, BOOL8 testing_on)
Definition: topitch.cpp:1146
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float space_size
Definition: blobbox.h:663
PITCH_TYPE pitch_decision
Definition: blobbox.h:646
#define FALSE
Definition: capi.h:29
STATS projection
Definition: blobbox.h:667
bool set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1)
Definition: statistc.cpp:62
EXTERN int textord_words_veto_power
Definition: tovars.cpp:65
#define NULL
Definition: host.h:144
float xheight
Definition: blobbox.h:653
POLY_BLOCK * poly_block() const
Definition: pdblock.h:59
BLOCK * block
Definition: blobbox.h:773
inT32 space_threshold
Definition: blobbox.h:661
short inT16
Definition: host.h:100
int inT32
Definition: host.h:102
BOOL8 fixed_pitch_row ( TO_ROW row,
BLOCK block,
inT32  block_index 
)

Definition at line 974 of file topitch.cpp.

977  {
978  const char *res_string; // pitch result
979  inT16 mid_cuts; // no of cheap cuts
980  float non_space; // gap size
981  float pitch_sd; // error on pitch
982  float sp_sd = 0.0f; // space sd
983 
984  non_space = row->fp_nonsp;
985  if (non_space > row->fixed_pitch)
986  non_space = row->fixed_pitch;
987  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
988  if (textord_all_prop || (pb != NULL && !pb->IsText())) {
989  // Set the decision to definitely proportional.
990  pitch_sd = textord_words_def_prop * row->fixed_pitch;
992  } else {
993  pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left,
994  row->projection_right,
995  (row->fixed_pitch + non_space * 3) / 4,
996  row->fixed_pitch, sp_sd, mid_cuts,
997  &row->char_cells,
998  block_index == textord_debug_block);
999  if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
1000  && ((pitsync_linear_version & 3) < 3
1001  || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model
1002  || sp_sd > 20
1003  || (pitch_sd == 0 && sp_sd > 10))))) {
1004  if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
1005  && !row->all_caps
1006  && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
1008  else
1010  }
1011  else if ((pitsync_linear_version & 3) < 3
1012  || sp_sd > 20
1013  || mid_cuts > 0
1014  || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
1015  if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
1017  else
1019  }
1020  else
1021  row->pitch_decision = PITCH_DUNNO;
1022  }
1023 
1025  res_string = "??";
1026  switch (row->pitch_decision) {
1027  case PITCH_DEF_PROP:
1028  res_string = "DP";
1029  break;
1030  case PITCH_MAYBE_PROP:
1031  res_string = "MP";
1032  break;
1033  case PITCH_DEF_FIXED:
1034  res_string = "DF";
1035  break;
1036  case PITCH_MAYBE_FIXED:
1037  res_string = "MF";
1038  break;
1039  default:
1040  res_string = "??";
1041  }
1042  tprintf (":sd/p=%g:occ=%g:init_res=%s\n",
1043  pitch_sd / row->fixed_pitch, sp_sd, res_string);
1044  }
1045  return TRUE;
1046 }
#define tprintf(...)
Definition: tprintf.h:31
inT16 projection_left
Definition: blobbox.h:644
bool IsText() const
Definition: polyblk.h:52
EXTERN bool textord_debug_pitch_metric
Definition: topitch.cpp:50
float fixed_pitch
Definition: blobbox.h:647
BOOL8 used_dm_model
Definition: blobbox.h:643
BOOL8 all_caps
Definition: blobbox.h:642
inT16 projection_right
Definition: blobbox.h:645
ICOORDELT_LIST char_cells
Definition: blobbox.h:665
EXTERN double textord_words_pitchsd_threshold
Definition: tovars.cpp:59
float tune_row_pitch(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float &initial_pitch, float &best_sp_sd, inT16 &best_mid_cuts, ICOORDELT_LIST *best_cells, BOOL8 testing_on)
Definition: topitch.cpp:1146
PITCH_TYPE pitch_decision
Definition: blobbox.h:646
STATS projection
Definition: blobbox.h:667
float fp_nonsp
Definition: blobbox.h:649
EXTERN bool textord_all_prop
Definition: topitch.cpp:42
#define TRUE
Definition: capi.h:28
#define NULL
Definition: host.h:144
EXTERN int textord_debug_block
Definition: tovars.cpp:36
POLY_BLOCK * poly_block() const
Definition: pdblock.h:59
EXTERN double textord_words_def_prop
Definition: tovars.cpp:63
short inT16
Definition: host.h:100
void plot_fp_word ( TO_BLOCK block,
float  pitch,
float  nonspace 
)

Definition at line 1824 of file topitch.cpp.

1828  {
1829  TO_ROW *row; //current row
1830  TO_ROW_IT row_it = block->get_rows ();
1831 
1832  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1833  row = row_it.data ();
1834  row->min_space = (inT32) ((pitch + nonspace) / 2);
1835  row->max_nonspace = row->min_space;
1836  row->space_threshold = row->min_space;
1837  plot_word_decisions (to_win, (inT16) pitch, row);
1838  }
1839 }
inT32 min_space
Definition: blobbox.h:659
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
inT32 max_nonspace
Definition: blobbox.h:660
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void plot_word_decisions(ScrollView *win, inT16 pitch, TO_ROW *row)
Definition: drawtord.cpp:250
inT32 space_threshold
Definition: blobbox.h:661
short inT16
Definition: host.h:100
int inT32
Definition: host.h:102
void print_block_counts ( TO_BLOCK block,
inT32  block_index 
)

Definition at line 617 of file topitch.cpp.

620  {
621  inT32 def_fixed = 0; //counters
622  inT32 def_prop = 0;
623  inT32 maybe_fixed = 0;
624  inT32 maybe_prop = 0;
625  inT32 dunno = 0;
626  inT32 corr_fixed = 0;
627  inT32 corr_prop = 0;
628 
629  count_block_votes(block,
630  def_fixed,
631  def_prop,
632  maybe_fixed,
633  maybe_prop,
634  corr_fixed,
635  corr_prop,
636  dunno);
637  tprintf ("Block %d has (%d,%d,%d)",
638  block_index, def_fixed, maybe_fixed, corr_fixed);
639  if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed))
640  tprintf (" (Wrongly)");
641  tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
642  if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop))
643  tprintf (" (Wrongly)");
644  tprintf (" prop, %d dunno\n", dunno);
645 }
#define tprintf(...)
Definition: tprintf.h:31
EXTERN bool textord_blocksall_fixed
Definition: tovars.cpp:29
void count_block_votes(TO_BLOCK *block, inT32 &def_fixed, inT32 &def_prop, inT32 &maybe_fixed, inT32 &maybe_prop, inT32 &corr_fixed, inT32 &corr_prop, inT32 &dunno)
Definition: topitch.cpp:654
EXTERN bool textord_blocksall_prop
Definition: tovars.cpp:31
int inT32
Definition: host.h:102
void print_pitch_sd ( TO_ROW row,
STATS projection,
inT16  projection_left,
inT16  projection_right,
float  space_size,
float  initial_pitch 
)

Definition at line 1630 of file topitch.cpp.

1637  {
1638  const char *res2; //pitch result
1639  inT16 occupation; //used cells
1640  float sp_sd; //space sd
1641  //blobs
1642  BLOBNBOX_IT blob_it = row->blob_list ();
1643  BLOBNBOX_IT start_it; //start of word
1644  BLOBNBOX_IT row_start; //start of row
1645  inT16 blob_count; //no of blobs
1646  inT16 total_blob_count; //total blobs in line
1647  TBOX blob_box; //bounding box
1648  TBOX prev_box; //of super blob
1649  inT32 prev_right; //of word sync
1650  int scale_factor; //on scores for big words
1651  inT32 sp_count; //spaces
1652  FPSEGPT_LIST seg_list; //char cells
1653  FPSEGPT_IT seg_it; //iterator
1654  double sqsum; //sum of squares
1655  double spsum; //of spaces
1656  double sp_var; //space error
1657  double word_sync; //result for word
1658  double total_count; //total cuts
1659 
1660  if (blob_it.empty ())
1661  return;
1662  row_start = blob_it;
1663  total_blob_count = 0;
1664 
1665  total_count = 0;
1666  sqsum = 0;
1667  sp_count = 0;
1668  spsum = 0;
1669  prev_right = -1;
1670  blob_it = row_start;
1671  start_it = blob_it;
1672  blob_count = 0;
1673  blob_box = box_next (&blob_it);//first blob
1674  blob_it.mark_cycle_pt ();
1675  do {
1676  for (; blob_count > 0; blob_count--)
1677  box_next(&start_it);
1678  do {
1679  prev_box = blob_box;
1680  blob_count++;
1681  blob_box = box_next (&blob_it);
1682  }
1683  while (!blob_it.cycled_list ()
1684  && blob_box.left () - prev_box.right () < space_size);
1685  word_sync =
1686  check_pitch_sync2 (&start_it, blob_count, (inT16) initial_pitch, 2,
1687  projection, projection_left, projection_right,
1689  occupation, &seg_list, 0, 0);
1690  total_blob_count += blob_count;
1691  seg_it.set_to_list (&seg_list);
1692  if (prev_right >= 0) {
1693  sp_var = seg_it.data ()->position () - prev_right;
1694  sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1695  sp_var *= sp_var;
1696  spsum += sp_var;
1697  sp_count++;
1698  }
1699  seg_it.move_to_last ();
1700  prev_right = seg_it.data ()->position ();
1702  scale_factor = (seg_list.length () - 2) / 2;
1703  if (scale_factor < 1)
1704  scale_factor = 1;
1705  }
1706  else
1707  scale_factor = 1;
1708  sqsum += word_sync * scale_factor;
1709  total_count += (seg_list.length () - 1) * scale_factor;
1710  seg_list.clear ();
1711  }
1712  while (!blob_it.cycled_list ());
1713  sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1714  word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1715  tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
1716  word_sync, word_sync / initial_pitch, sp_sd,
1717  word_sync < textord_words_pitchsd_threshold * initial_pitch
1718  ? 'F' : 'P');
1719 
1720  start_it = row_start;
1721  blob_it = row_start;
1722  word_sync =
1723  check_pitch_sync2 (&blob_it, total_blob_count, (inT16) initial_pitch, 2,
1724  projection, projection_left, projection_right,
1725  row->xheight * textord_projection_scale, occupation,
1726  &seg_list, 0, 0);
1727  if (occupation > 1)
1728  word_sync /= occupation;
1729  word_sync = sqrt (word_sync);
1730 
1731 #ifndef GRAPHICS_DISABLED
1732  if (textord_show_row_cuts && to_win != NULL)
1733  plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
1734 #endif
1735  seg_list.clear ();
1736  if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
1737  if (word_sync < textord_words_def_fixed * initial_pitch
1738  && !row->all_caps)
1739  res2 = "DF";
1740  else
1741  res2 = "MF";
1742  }
1743  else
1744  res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
1745  tprintf
1746  ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
1747  word_sync, word_sync / initial_pitch,
1748  word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P',
1749  occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
1750 }
EXTERN double textord_words_def_fixed
Definition: tovars.cpp:61
EXTERN bool textord_pitch_scalebigwords
Definition: tovars.cpp:69
#define tprintf(...)
Definition: tprintf.h:31
EXTERN bool textord_show_row_cuts
Definition: topitch.cpp:51
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
EXTERN double textord_projection_scale
Definition: topitch.cpp:57
float fixed_pitch
Definition: blobbox.h:647
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
BOOL8 all_caps
Definition: blobbox.h:642
inT16 left() const
Definition: rect.h:68
EXTERN double textord_words_pitchsd_threshold
Definition: tovars.cpp:59
double check_pitch_sync2(BLOBNBOX_IT *blob_it, inT16 blob_count, inT16 pitch, inT16 pitch_error, STATS *projection, inT16 projection_left, inT16 projection_right, float projection_scale, inT16 &occupation_count, FPSEGPT_LIST *seg_list, inT16 start, inT16 end)
Definition: pithsync.cpp:298
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:629
Definition: rect.h:30
#define NULL
Definition: host.h:144
float xheight
Definition: blobbox.h:653
EXTERN double textord_words_def_prop
Definition: tovars.cpp:63
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
Definition: drawtord.cpp:364
short inT16
Definition: host.h:100
int inT32
Definition: host.h:102
BOOL8 row_pitch_stats ( TO_ROW row,
inT32  maxwidth,
BOOL8  testing_on 
)

Definition at line 701 of file topitch.cpp.

705  {
706  BLOBNBOX *blob; //current blob
707  int gap_index; //current gap
708  inT32 prev_x; //end of prev blob
709  inT32 cluster_count; //no of clusters
710  inT32 prev_count; //of clusters
711  inT32 smooth_factor; //for smoothing stats
712  TBOX blob_box; //bounding box
713  float lower, upper; //cluster thresholds
714  //gap sizes
715  float gaps[BLOCK_STATS_CLUSTERS];
716  //blobs
717  BLOBNBOX_IT blob_it = row->blob_list ();
718  STATS gap_stats (0, maxwidth);
719  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
720  //clusters
721 
722  smooth_factor =
724  if (!blob_it.empty ()) {
725  prev_x = blob_it.data ()->bounding_box ().right ();
726  blob_it.forward ();
727  while (!blob_it.at_first ()) {
728  blob = blob_it.data ();
729  if (!blob->joined_to_prev ()) {
730  blob_box = blob->bounding_box ();
731  if (blob_box.left () - prev_x < maxwidth)
732  gap_stats.add (blob_box.left () - prev_x, 1);
733  prev_x = blob_box.right ();
734  }
735  blob_it.forward ();
736  }
737  }
738  if (gap_stats.get_total () == 0) {
739  return FALSE;
740  }
741  cluster_count = 0;
742  lower = row->xheight * words_initial_lower;
743  upper = row->xheight * words_initial_upper;
744  gap_stats.smooth (smooth_factor);
745  do {
746  prev_count = cluster_count;
747  cluster_count = gap_stats.cluster (lower, upper,
749  BLOCK_STATS_CLUSTERS, cluster_stats);
750  }
751  while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
752  if (cluster_count < 1) {
753  return FALSE;
754  }
755  for (gap_index = 0; gap_index < cluster_count; gap_index++)
756  gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
757  //get medians
758  if (testing_on) {
759  tprintf ("cluster_count=%d:", cluster_count);
760  for (gap_index = 0; gap_index < cluster_count; gap_index++)
761  tprintf (" %g(%d)", gaps[gap_index],
762  cluster_stats[gap_index + 1].get_total ());
763  tprintf ("\n");
764  }
765  qsort (gaps, cluster_count, sizeof (float), sort_floats);
766 
767  //Try to find proportional non-space and space for row.
768  lower = row->xheight * words_default_prop_nonspace;
769  upper = row->xheight * textord_words_min_minspace;
770  for (gap_index = 0; gap_index < cluster_count
771  && gaps[gap_index] < lower; gap_index++);
772  if (gap_index == 0) {
773  if (testing_on)
774  tprintf ("No clusters below nonspace threshold!!\n");
775  if (cluster_count > 1) {
776  row->pr_nonsp = gaps[0];
777  row->pr_space = gaps[1];
778  }
779  else {
780  row->pr_nonsp = lower;
781  row->pr_space = gaps[0];
782  }
783  }
784  else {
785  row->pr_nonsp = gaps[gap_index - 1];
786  while (gap_index < cluster_count && gaps[gap_index] < upper)
787  gap_index++;
788  if (gap_index == cluster_count) {
789  if (testing_on)
790  tprintf ("No clusters above nonspace threshold!!\n");
791  row->pr_space = lower * textord_spacesize_ratioprop;
792  }
793  else
794  row->pr_space = gaps[gap_index];
795  }
796 
797  //Now try to find the fixed pitch space and non-space.
798  upper = row->xheight * words_default_fixed_space;
799  for (gap_index = 0; gap_index < cluster_count
800  && gaps[gap_index] < upper; gap_index++);
801  if (gap_index == 0) {
802  if (testing_on)
803  tprintf ("No clusters below space threshold!!\n");
804  row->fp_nonsp = upper;
805  row->fp_space = gaps[0];
806  }
807  else {
808  row->fp_nonsp = gaps[gap_index - 1];
809  if (gap_index == cluster_count) {
810  if (testing_on)
811  tprintf ("No clusters above space threshold!!\n");
812  row->fp_space = row->xheight;
813  }
814  else
815  row->fp_space = gaps[gap_index];
816  }
817  if (testing_on) {
818  tprintf
819  ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
820  row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
821  }
822  return TRUE; //computed some stats
823 }
EXTERN double words_initial_upper
Definition: tovars.cpp:71
EXTERN double textord_wordstats_smooth_factor
Definition: tovars.cpp:39
#define BLOCK_STATS_CLUSTERS
Definition: topitch.cpp:62
EXTERN double words_default_prop_nonspace
Definition: tovars.cpp:72
bool joined_to_prev() const
Definition: blobbox.h:241
#define tprintf(...)
Definition: tprintf.h:31
Definition: statistc.h:33
inT16 right() const
Definition: rect.h:75
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
int sort_floats(const void *arg1, const void *arg2)
Definition: helpers.h:97
EXTERN double textord_spacesize_ratioprop
Definition: tovars.cpp:80
float pr_nonsp
Definition: blobbox.h:651
inT16 left() const
Definition: rect.h:68
float pr_space
Definition: blobbox.h:650
EXTERN double textord_words_min_minspace
Definition: tovars.cpp:49
float fp_space
Definition: blobbox.h:648
#define FALSE
Definition: capi.h:29
float fp_nonsp
Definition: blobbox.h:649
Definition: rect.h:30
#define TRUE
Definition: capi.h:28
EXTERN double words_default_fixed_space
Definition: tovars.cpp:73
const TBOX & bounding_box() const
Definition: blobbox.h:215
EXTERN double words_initial_lower
Definition: tovars.cpp:70
float xheight
Definition: blobbox.h:653
int inT32
Definition: host.h:102
BOOL8 try_block_fixed ( TO_BLOCK block,
inT32  block_index 
)

Definition at line 535 of file topitch.cpp.

538  {
539  return FALSE;
540 }
#define FALSE
Definition: capi.h:29
BOOL8 try_doc_fixed ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks,
float  gradient 
)

Definition at line 395 of file topitch.cpp.

399  {
400  inT16 master_x; //uniform shifts
401  inT16 pitch; //median pitch.
402  int x; //profile coord
403  int prop_blocks; //correct counts
404  int fixed_blocks;
405  int total_row_count; //total in page
406  //iterator
407  TO_BLOCK_IT block_it = port_blocks;
408  TO_BLOCK *block; //current block;
409  TO_ROW_IT row_it; //row iterator
410  TO_ROW *row; //current row
411  inT16 projection_left; //edges
412  inT16 projection_right;
413  inT16 row_left; //edges of row
414  inT16 row_right;
415  ICOORDELT_LIST *master_cells; //cells for page
416  float master_y; //uniform shifts
417  float shift_factor; //page skew correction
418  float row_shift; //shift for row
419  float final_pitch; //output pitch
420  float row_y; //baseline
421  STATS projection; //entire page
422  STATS pitches (0, MAX_ALLOWED_PITCH);
423  //for median
424  float sp_sd; //space sd
425  inT16 mid_cuts; //no of cheap cuts
426  float pitch_sd; //sync rating
427 
428  if (block_it.empty ()
429  // || block_it.data()==block_it.data_relative(1)
431  return FALSE;
432  shift_factor = gradient / (gradient * gradient + 1);
433  row_it.set_to_list (block_it.data ()->get_rows ());
434  master_x = row_it.data ()->projection_left;
435  master_y = row_it.data ()->baseline.y (master_x);
436  projection_left = MAX_INT16;
437  projection_right = -MAX_INT16;
438  prop_blocks = 0;
439  fixed_blocks = 0;
440  total_row_count = 0;
441 
442  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
443  block_it.forward ()) {
444  block = block_it.data ();
445  row_it.set_to_list (block->get_rows ());
446  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
447  row = row_it.data ();
448  total_row_count++;
449  if (row->fixed_pitch > 0)
450  pitches.add ((inT32) (row->fixed_pitch), 1);
451  //find median
452  row_y = row->baseline.y (master_x);
453  row_left =
454  (inT16) (row->projection_left -
455  shift_factor * (master_y - row_y));
456  row_right =
457  (inT16) (row->projection_right -
458  shift_factor * (master_y - row_y));
459  if (row_left < projection_left)
460  projection_left = row_left;
461  if (row_right > projection_right)
462  projection_right = row_right;
463  }
464  }
465  if (pitches.get_total () == 0)
466  return FALSE;
467  projection.set_range (projection_left, projection_right);
468 
469  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
470  block_it.forward ()) {
471  block = block_it.data ();
472  row_it.set_to_list (block->get_rows ());
473  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
474  row = row_it.data ();
475  row_y = row->baseline.y (master_x);
476  row_left =
477  (inT16) (row->projection_left -
478  shift_factor * (master_y - row_y));
479  for (x = row->projection_left; x < row->projection_right;
480  x++, row_left++) {
481  projection.add (row_left, row->projection.pile_count (x));
482  }
483  }
484  }
485 
486  row_it.set_to_list (block_it.data ()->get_rows ());
487  row = row_it.data ();
488 #ifndef GRAPHICS_DISABLED
490  projection.plot (to_win, projection_left,
491  row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
492 #endif
493  final_pitch = pitches.ile (0.5);
494  pitch = (inT16) final_pitch;
495  pitch_sd =
496  tune_row_pitch (row, &projection, projection_left, projection_right,
497  pitch * 0.75, final_pitch, sp_sd, mid_cuts,
498  &row->char_cells, FALSE);
499 
501  tprintf
502  ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
503  prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
504  pitch_sd / total_row_count, pitch_sd / pitch,
505  pitch_sd / total_row_count / pitch);
506 
507 #ifndef GRAPHICS_DISABLED
508  if (textord_show_page_cuts && to_win != NULL) {
509  master_cells = &row->char_cells;
510  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
511  block_it.forward ()) {
512  block = block_it.data ();
513  row_it.set_to_list (block->get_rows ());
514  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
515  row_it.forward ()) {
516  row = row_it.data ();
517  row_y = row->baseline.y (master_x);
518  row_shift = shift_factor * (master_y - row_y);
519  plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
520  }
521  }
522  }
523 #endif
524  row->char_cells.clear ();
525  return FALSE;
526 }
#define tprintf(...)
Definition: tprintf.h:31
Definition: statistc.h:33
void add(inT32 value, inT32 count)
Definition: statistc.cpp:104
QSPLINE baseline
Definition: blobbox.h:666
float intercept() const
Definition: blobbox.h:584
inT16 projection_left
Definition: blobbox.h:644
EXTERN bool textord_debug_pitch_metric
Definition: topitch.cpp:50
float fixed_pitch
Definition: blobbox.h:647
void plot_row_cells(ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)
Definition: drawtord.cpp:397
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
EXTERN bool textord_blockndoc_fixed
Definition: topitch.cpp:56
#define MAX_ALLOWED_PITCH
Definition: topitch.cpp:63
inT16 projection_right
Definition: blobbox.h:645
ICOORDELT_LIST char_cells
Definition: blobbox.h:665
double y(double x) const
Definition: quspline.cpp:217
float tune_row_pitch(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float &initial_pitch, float &best_sp_sd, inT16 &best_mid_cuts, ICOORDELT_LIST *best_cells, BOOL8 testing_on)
Definition: topitch.cpp:1146
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
#define FALSE
Definition: capi.h:29
STATS projection
Definition: blobbox.h:667
#define MAX_INT16
Definition: host.h:119
bool set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1)
Definition: statistc.cpp:62
EXTERN bool textord_show_page_cuts
Definition: topitch.cpp:52
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
#define NULL
Definition: host.h:144
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:589
short inT16
Definition: host.h:100
int inT32
Definition: host.h:102
BOOL8 try_rows_fixed ( TO_BLOCK block,
inT32  block_index,
BOOL8  testing_on 
)

Definition at line 549 of file topitch.cpp.

553  {
554  TO_ROW *row; //current row
555  inT32 row_index; //row number.
556  inT32 def_fixed = 0; //counters
557  inT32 def_prop = 0;
558  inT32 maybe_fixed = 0;
559  inT32 maybe_prop = 0;
560  inT32 dunno = 0;
561  inT32 corr_fixed = 0;
562  inT32 corr_prop = 0;
563  float lower, upper; //cluster thresholds
564  TO_ROW_IT row_it = block->get_rows ();
565 
566  row_index = 1;
567  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
568  row = row_it.data ();
569  ASSERT_HOST (row->xheight > 0);
570  if (row->fixed_pitch > 0 &&
571  fixed_pitch_row(row, block->block, block_index)) {
572  if (row->fixed_pitch == 0) {
573  lower = row->pr_nonsp;
574  upper = row->pr_space;
575  row->space_size = upper;
576  row->kern_size = lower;
577  }
578  }
579  row_index++;
580  }
581  count_block_votes(block,
582  def_fixed,
583  def_prop,
584  maybe_fixed,
585  maybe_prop,
586  corr_fixed,
587  corr_prop,
588  dunno);
589  if (testing_on
592  tprintf ("Initially:");
593  print_block_counts(block, block_index);
594  }
595  if (def_fixed > def_prop * textord_words_veto_power)
597  else if (def_prop > def_fixed * textord_words_veto_power)
599  else if (def_fixed > 0 || def_prop > 0)
600  block->pitch_decision = PITCH_DUNNO;
601  else if (maybe_fixed > maybe_prop * textord_words_veto_power)
603  else if (maybe_prop > maybe_fixed * textord_words_veto_power)
605  else
606  block->pitch_decision = PITCH_DUNNO;
607  return FALSE;
608 }
float kern_size
Definition: blobbox.h:662
PITCH_TYPE pitch_decision
Definition: blobbox.h:774
#define tprintf(...)
Definition: tprintf.h:31
#define ASSERT_HOST(x)
Definition: errcode.h:84
float fixed_pitch
Definition: blobbox.h:647
EXTERN bool textord_debug_pitch_test
Definition: topitch.cpp:44
EXTERN bool textord_blocksall_fixed
Definition: tovars.cpp:29
float pr_nonsp
Definition: blobbox.h:651
float pr_space
Definition: blobbox.h:650
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float space_size
Definition: blobbox.h:663
#define FALSE
Definition: capi.h:29
void print_block_counts(TO_BLOCK *block, inT32 block_index)
Definition: topitch.cpp:617
void count_block_votes(TO_BLOCK *block, inT32 &def_fixed, inT32 &def_prop, inT32 &maybe_fixed, inT32 &maybe_prop, inT32 &corr_fixed, inT32 &corr_prop, inT32 &dunno)
Definition: topitch.cpp:654
EXTERN int textord_words_veto_power
Definition: tovars.cpp:65
EXTERN bool textord_blocksall_prop
Definition: tovars.cpp:31
float xheight
Definition: blobbox.h:653
BOOL8 fixed_pitch_row(TO_ROW *row, BLOCK *block, inT32 block_index)
Definition: topitch.cpp:974
BLOCK * block
Definition: blobbox.h:773
int inT32
Definition: host.h:102
float tune_row_pitch ( TO_ROW row,
STATS projection,
inT16  projection_left,
inT16  projection_right,
float  space_size,
float &  initial_pitch,
float &  best_sp_sd,
inT16 best_mid_cuts,
ICOORDELT_LIST *  best_cells,
BOOL8  testing_on 
)

Definition at line 1146 of file topitch.cpp.

1157  {
1158  int pitch_delta; //offset pitch
1159  inT16 mid_cuts; //cheap cuts
1160  float pitch_sd; //current sd
1161  float best_sd; //best result
1162  float best_pitch; //pitch for best result
1163  float initial_sd; //starting error
1164  float sp_sd; //space sd
1165  ICOORDELT_LIST test_cells; //row cells
1166  ICOORDELT_IT best_it; //start of best list
1167 
1169  return tune_row_pitch2 (row, projection, projection_left,
1170  projection_right, space_size, initial_pitch,
1171  best_sp_sd,
1172  //space sd
1173  best_mid_cuts, best_cells, testing_on);
1175  best_sp_sd = initial_pitch;
1176  return initial_pitch;
1177  }
1178  initial_sd =
1179  compute_pitch_sd(row,
1180  projection,
1181  projection_left,
1182  projection_right,
1183  space_size,
1184  initial_pitch,
1185  best_sp_sd,
1186  best_mid_cuts,
1187  best_cells,
1188  testing_on);
1189  best_sd = initial_sd;
1190  best_pitch = initial_pitch;
1191  if (testing_on)
1192  tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1193  for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1194  pitch_sd =
1195  compute_pitch_sd (row, projection, projection_left, projection_right,
1196  space_size, initial_pitch + pitch_delta, sp_sd,
1197  mid_cuts, &test_cells, testing_on);
1198  if (testing_on)
1199  tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
1200  pitch_sd);
1201  if (pitch_sd < best_sd) {
1202  best_sd = pitch_sd;
1203  best_mid_cuts = mid_cuts;
1204  best_sp_sd = sp_sd;
1205  best_pitch = initial_pitch + pitch_delta;
1206  best_cells->clear ();
1207  best_it.set_to_list (best_cells);
1208  best_it.add_list_after (&test_cells);
1209  }
1210  else
1211  test_cells.clear ();
1212  if (pitch_sd > initial_sd)
1213  break; //getting worse
1214  }
1215  for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1216  pitch_sd =
1217  compute_pitch_sd (row, projection, projection_left, projection_right,
1218  space_size, initial_pitch - pitch_delta, sp_sd,
1219  mid_cuts, &test_cells, testing_on);
1220  if (testing_on)
1221  tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
1222  pitch_sd);
1223  if (pitch_sd < best_sd) {
1224  best_sd = pitch_sd;
1225  best_mid_cuts = mid_cuts;
1226  best_sp_sd = sp_sd;
1227  best_pitch = initial_pitch - pitch_delta;
1228  best_cells->clear ();
1229  best_it.set_to_list (best_cells);
1230  best_it.add_list_after (&test_cells);
1231  }
1232  else
1233  test_cells.clear ();
1234  if (pitch_sd > initial_sd)
1235  break;
1236  }
1237  initial_pitch = best_pitch;
1238 
1240  print_pitch_sd(row,
1241  projection,
1242  projection_left,
1243  projection_right,
1244  space_size,
1245  best_pitch);
1246 
1247  return best_sd;
1248 }
float tune_row_pitch2(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float &initial_pitch, float &best_sp_sd, inT16 &best_mid_cuts, ICOORDELT_LIST *best_cells, BOOL8 testing_on)
Definition: topitch.cpp:1258
#define tprintf(...)
Definition: tprintf.h:31
EXTERN int textord_pitch_range
Definition: tovars.cpp:37
EXTERN bool textord_debug_pitch_metric
Definition: topitch.cpp:50
EXTERN bool textord_fast_pitch_test
Definition: topitch.cpp:48
float compute_pitch_sd(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float initial_pitch, float &sp_sd, inT16 &mid_cuts, ICOORDELT_LIST *row_cells, BOOL8 testing_on, inT16 start, inT16 end)
Definition: topitch.cpp:1375
void print_pitch_sd(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float initial_pitch)
Definition: topitch.cpp:1630
EXTERN bool textord_disable_pitch_test
Definition: topitch.cpp:46
short inT16
Definition: host.h:100
float tune_row_pitch2 ( TO_ROW row,
STATS projection,
inT16  projection_left,
inT16  projection_right,
float  space_size,
float &  initial_pitch,
float &  best_sp_sd,
inT16 best_mid_cuts,
ICOORDELT_LIST *  best_cells,
BOOL8  testing_on 
)

Definition at line 1258 of file topitch.cpp.

1269  {
1270  int pitch_delta; //offset pitch
1271  inT16 pixel; //pixel coord
1272  inT16 best_pixel; //pixel coord
1273  inT16 best_delta; //best pitch
1274  inT16 best_pitch; //best pitch
1275  inT16 start; //of good range
1276  inT16 end; //of good range
1277  inT32 best_count; //lowest sum
1278  float best_sd; //best result
1279  STATS *sum_proj; //summed projection
1280 
1281  best_sp_sd = initial_pitch;
1282 
1283  best_pitch = static_cast<int>(initial_pitch);
1284  if (textord_disable_pitch_test || best_pitch <= textord_pitch_range) {
1285  return initial_pitch;
1286  }
1287  sum_proj = new STATS[textord_pitch_range * 2 + 1];
1288  if (sum_proj == NULL)
1289  return initial_pitch;
1290 
1291  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1292  pitch_delta++)
1293  sum_proj[textord_pitch_range + pitch_delta].set_range (0,
1294  best_pitch +
1295  pitch_delta + 1);
1296  for (pixel = projection_left; pixel <= projection_right; pixel++) {
1297  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1298  pitch_delta++) {
1299  sum_proj[textord_pitch_range + pitch_delta].add(
1300  (pixel - projection_left) % (best_pitch + pitch_delta),
1301  projection->pile_count(pixel));
1302  }
1303  }
1304  best_count = sum_proj[textord_pitch_range].pile_count (0);
1305  best_delta = 0;
1306  best_pixel = 0;
1307  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1308  pitch_delta++) {
1309  for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1310  if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel)
1311  < best_count) {
1312  best_count =
1313  sum_proj[textord_pitch_range +
1314  pitch_delta].pile_count (pixel);
1315  best_delta = pitch_delta;
1316  best_pixel = pixel;
1317  }
1318  }
1319  }
1320  if (testing_on)
1321  tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
1322  initial_pitch, best_delta, best_count);
1323  best_pitch += best_delta;
1324  initial_pitch = best_pitch;
1325  best_count++;
1326  best_count += best_count;
1327  for (start = best_pixel - 2; start > best_pixel - best_pitch
1328  && sum_proj[textord_pitch_range +
1329  best_delta].pile_count (start % best_pitch) <= best_count;
1330  start--);
1331  for (end = best_pixel + 2;
1332  end < best_pixel + best_pitch
1333  && sum_proj[textord_pitch_range +
1334  best_delta].pile_count (end % best_pitch) <= best_count;
1335  end++);
1336 
1337  best_sd =
1338  compute_pitch_sd(row,
1339  projection,
1340  projection_left,
1341  projection_right,
1342  space_size,
1343  initial_pitch,
1344  best_sp_sd,
1345  best_mid_cuts,
1346  best_cells,
1347  testing_on,
1348  start,
1349  end);
1350  if (testing_on)
1351  tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
1352  best_sd);
1353 
1355  print_pitch_sd(row,
1356  projection,
1357  projection_left,
1358  projection_right,
1359  space_size,
1360  initial_pitch);
1361 
1362  delete[]sum_proj;
1363 
1364  return best_sd;
1365 }
#define tprintf(...)
Definition: tprintf.h:31
Definition: statistc.h:33
void add(inT32 value, inT32 count)
Definition: statistc.cpp:104
EXTERN int textord_pitch_range
Definition: tovars.cpp:37
EXTERN bool textord_debug_pitch_metric
Definition: topitch.cpp:50
float compute_pitch_sd(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float initial_pitch, float &sp_sd, inT16 &mid_cuts, ICOORDELT_LIST *row_cells, BOOL8 testing_on, inT16 start, inT16 end)
Definition: topitch.cpp:1375
void print_pitch_sd(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float initial_pitch)
Definition: topitch.cpp:1630
EXTERN bool textord_disable_pitch_test
Definition: topitch.cpp:46
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
#define NULL
Definition: host.h:144
short inT16
Definition: host.h:100
int inT32
Definition: host.h:102

Variable Documentation

EXTERN bool textord_all_prop = FALSE

"All doc is proportial text"

Definition at line 42 of file topitch.cpp.

EXTERN double textord_balance_factor = 1.0

"Ding rate for unbalanced char cells"

Definition at line 59 of file topitch.cpp.

EXTERN bool textord_blockndoc_fixed = FALSE

"Attempt whole doc/block fixed pitch"

Definition at line 56 of file topitch.cpp.

EXTERN bool textord_debug_pitch_metric = FALSE

"Write full metric stuff"

Definition at line 50 of file topitch.cpp.

EXTERN bool textord_debug_pitch_test = FALSE

"Debug on fixed pitch test"

Definition at line 44 of file topitch.cpp.

EXTERN bool textord_disable_pitch_test = FALSE

"Turn off dp fixed pitch algorithm"

Definition at line 46 of file topitch.cpp.

EXTERN bool textord_fast_pitch_test = FALSE

"Do even faster pitch algorithm"

Definition at line 48 of file topitch.cpp.

EXTERN bool textord_pitch_cheat = FALSE

"Use correct answer for fixed/prop"

Definition at line 54 of file topitch.cpp.

EXTERN double textord_projection_scale = 0.200

"Ding rate for mid-cuts"

Definition at line 57 of file topitch.cpp.

EXTERN bool textord_show_page_cuts = FALSE

"Draw page-level cuts"

Definition at line 52 of file topitch.cpp.

EXTERN bool textord_show_row_cuts = FALSE

"Draw row-level cuts"

Definition at line 51 of file topitch.cpp.