tesseract  5.0.0-alpha-619-ge9db
topitch.cpp File Reference
#include "blobbox.h"
#include "statistc.h"
#include "drawtord.h"
#include "makerow.h"
#include "pitsync1.h"
#include "pithsync.h"
#include "tovars.h"
#include "wordseg.h"
#include "topitch.h"
#include <tesseract/helpers.h>
#include <memory>

Go to the source code of this file.

Macros

#define BLOCK_STATS_CLUSTERS   10
 
#define MAX_ALLOWED_PITCH   100
 

Functions

void compute_fixed_pitch (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
 
void fix_row_pitch (TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target)
 
void compute_block_pitch (TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on)
 
bool compute_rows_pitch (TO_BLOCK *block, int32_t block_index, bool testing_on)
 
bool try_doc_fixed (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
 
bool try_block_fixed (TO_BLOCK *block, int32_t block_index)
 
bool try_rows_fixed (TO_BLOCK *block, int32_t block_index, bool testing_on)
 
void print_block_counts (TO_BLOCK *block, int32_t block_index)
 
void count_block_votes (TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno)
 
bool row_pitch_stats (TO_ROW *row, int32_t maxwidth, bool testing_on)
 
bool find_row_pitch (TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on)
 
bool fixed_pitch_row (TO_ROW *row, BLOCK *block, int32_t block_index)
 
bool count_pitch_stats (TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap)
 
float tune_row_pitch (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
 
float tune_row_pitch2 (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
 
float compute_pitch_sd (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
 
float compute_pitch_sd2 (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
 
void print_pitch_sd (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch)
 
void find_repeated_chars (TO_BLOCK *block, bool testing_on)
 
void plot_fp_word (TO_BLOCK *block, float pitch, float nonspace)
 

Variables

bool textord_debug_pitch_test = false
 
bool textord_fast_pitch_test = false
 
bool textord_debug_pitch_metric = false
 
bool textord_show_row_cuts = false
 
bool textord_show_page_cuts = false
 
bool textord_pitch_cheat = false
 
bool textord_blockndoc_fixed = false
 
double textord_projection_scale = 0.200
 
double textord_balance_factor = 1.0
 

Macro Definition Documentation

◆ BLOCK_STATS_CLUSTERS

#define BLOCK_STATS_CLUSTERS   10

Definition at line 55 of file topitch.cpp.

◆ MAX_ALLOWED_PITCH

#define MAX_ALLOWED_PITCH   100

Definition at line 56 of file topitch.cpp.

Function Documentation

◆ compute_block_pitch()

void compute_block_pitch ( TO_BLOCK block,
FCOORD  rotation,
int32_t  block_index,
bool  testing_on 
)

Definition at line 310 of file topitch.cpp.

317  { // correct orientation
318  TBOX block_box; //bounding box
319 
320  block_box = block->block->pdblk.bounding_box ();
321  if (testing_on && textord_debug_pitch_test) {
322  tprintf ("Block %d at (%d,%d)->(%d,%d)\n",
323  block_index,
324  block_box.left (), block_box.bottom (),
325  block_box.right (), block_box.top ());
326  }
327  block->min_space = static_cast<int32_t>(floor (block->xheight
329  block->max_nonspace = static_cast<int32_t>(ceil (block->xheight
331  block->fixed_pitch = 0.0f;
332  block->space_size = static_cast<float>(block->min_space);
333  block->kern_size = static_cast<float>(block->max_nonspace);
334  block->pr_nonsp = block->xheight * words_default_prop_nonspace;
336  if (!block->get_rows ()->empty ()) {
337  ASSERT_HOST (block->xheight > 0);
338  find_repeated_chars(block, textord_show_initial_words && testing_on);
339 #ifndef GRAPHICS_DISABLED
340  if (textord_show_initial_words && testing_on)
341  //overlap_picture_ops(true);
343 #endif
344  compute_rows_pitch(block,

◆ compute_fixed_pitch()

void compute_fixed_pitch ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks,
float  gradient,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 78 of file topitch.cpp.

84  { // correct orientation
85  TO_BLOCK_IT block_it; //iterator
86  TO_BLOCK *block; //current block;
87  TO_ROW *row; //current row
88  int block_index; //block number
89  int row_index; //row number
90 
91 #ifndef GRAPHICS_DISABLED
92  if (textord_show_initial_words && testing_on) {
93  if (to_win == nullptr)
94  create_to_win(page_tr);
95  }
96 #endif
97 
98  block_it.set_to_list (port_blocks);
99  block_index = 1;
100  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
101  block_it.forward ()) {
102  block = block_it.data ();
103  compute_block_pitch(block, rotation, block_index, testing_on);
104  block_index++;
105  }
106 
107  if (!try_doc_fixed (page_tr, port_blocks, gradient)) {
108  block_index = 1;
109  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
110  block_it.forward ()) {
111  block = block_it.data ();
112  if (!try_block_fixed (block, block_index))
113  try_rows_fixed(block, block_index, testing_on);
114  block_index++;
115  }
116  }
117 
118  block_index = 1;
119  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
120  block_it.forward()) {
121  block = block_it.data ();
122  POLY_BLOCK* pb = block->block->pdblk.poly_block();
123  if (pb != nullptr && !pb->IsText()) continue; // Non-text doesn't exist!
124  // row iterator
125  TO_ROW_IT row_it(block->get_rows());
126  row_index = 1;
127  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
128  row = row_it.data ();
129  fix_row_pitch(row, block, port_blocks, row_index, block_index);
130  row_index++;
131  }
132  block_index++;
133  }
134 #ifndef GRAPHICS_DISABLED
135  if (textord_show_initial_words && testing_on) {
137  }

◆ compute_pitch_sd()

float compute_pitch_sd ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float  initial_pitch,
float &  sp_sd,
int16_t &  mid_cuts,
ICOORDELT_LIST *  row_cells,
bool  testing_on,
int16_t  start,
int16_t  end 
)

Definition at line 1359 of file topitch.cpp.

1389  {
1390  int16_t occupation; //no of cells in word.
1391  //blobs
1392  BLOBNBOX_IT blob_it = row->blob_list ();
1393  BLOBNBOX_IT start_it; //start of word
1394  BLOBNBOX_IT plot_it; //for plotting
1395  int16_t blob_count; //no of blobs
1396  TBOX blob_box; //bounding box
1397  TBOX prev_box; //of super blob
1398  int32_t prev_right; //of word sync
1399  int scale_factor; //on scores for big words
1400  int32_t sp_count; //spaces
1401  FPSEGPT_LIST seg_list; //char cells
1402  FPSEGPT_IT seg_it; //iterator
1403  int16_t segpos; //position of segment
1404  int16_t cellpos; //previous cell boundary
1405  //iterator
1406  ICOORDELT_IT cell_it = row_cells;
1407  ICOORDELT *cell; //new cell
1408  double sqsum; //sum of squares
1409  double spsum; //of spaces
1410  double sp_var; //space error
1411  double word_sync; //result for word
1412  int32_t total_count; //total blobs
1413 
1414  if ((pitsync_linear_version & 3) > 1) {
1415  word_sync = compute_pitch_sd2 (row, projection, projection_left,
1416  projection_right, initial_pitch,
1417  occupation, mid_cuts, row_cells,
1418  testing_on, start, end);
1419  sp_sd = occupation;
1420  return word_sync;
1421  }
1422  mid_cuts = 0;
1423  cellpos = 0;
1424  total_count = 0;
1425  sqsum = 0;
1426  sp_count = 0;
1427  spsum = 0;
1428  prev_right = -1;
1429  if (blob_it.empty ())
1430  return space_size * 10;
1431 #ifndef GRAPHICS_DISABLED
1432  if (testing_on && to_win != nullptr) {
1433  blob_box = blob_it.data ()->bounding_box ();
1434  projection->plot (to_win, projection_left,
1435  row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1436  }
1437 #endif
1438  start_it = blob_it;
1439  blob_count = 0;
1440  blob_box = box_next (&blob_it);//first blob
1441  blob_it.mark_cycle_pt ();
1442  do {
1443  for (; blob_count > 0; blob_count--)
1444  box_next(&start_it);
1445  do {
1446  prev_box = blob_box;
1447  blob_count++;
1448  blob_box = box_next (&blob_it);
1449  }
1450  while (!blob_it.cycled_list ()
1451  && blob_box.left () - prev_box.right () < space_size);
1452  plot_it = start_it;
1453  if (pitsync_linear_version & 3)
1454  word_sync =
1455  check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1456  projection, projection_left, projection_right,
1458  occupation, &seg_list, start, end);
1459  else
1460  word_sync =
1461  check_pitch_sync (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1462  projection, &seg_list);
1463  if (testing_on) {
1464  tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ",
1465  prev_box.right (), prev_box.top (),
1466  seg_list.length () - 1, word_sync);
1467  seg_it.set_to_list (&seg_list);
1468  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
1469  seg_it.forward ()) {
1470  if (seg_it.data ()->faked)
1471  tprintf ("(F)");
1472  tprintf ("%d, ", seg_it.data ()->position ());
1473  // tprintf("C=%g, s=%g, sq=%g\n",
1474  // seg_it.data()->cost_function(),
1475  // seg_it.data()->sum(),
1476  // seg_it.data()->squares());
1477  }
1478  tprintf ("\n");
1479  }
1480 #ifndef GRAPHICS_DISABLED
1481  if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr)
1482  plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
1483 #endif
1484  seg_it.set_to_list (&seg_list);
1485  if (prev_right >= 0) {
1486  sp_var = seg_it.data ()->position () - prev_right;
1487  sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1488  sp_var *= sp_var;
1489  spsum += sp_var;
1490  sp_count++;
1491  }
1492  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1493  segpos = seg_it.data ()->position ();
1494  if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
1495  //big gap
1496  while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
1497  cell = new ICOORDELT (cellpos + static_cast<int16_t>(initial_pitch), 0);
1498  cell_it.add_after_then_move (cell);
1499  cellpos += static_cast<int16_t>(initial_pitch);
1500  }
1501  //make new one
1502  cell = new ICOORDELT (segpos, 0);
1503  cell_it.add_after_then_move (cell);
1504  cellpos = segpos;
1505  }
1506  else if (segpos > cellpos - initial_pitch / 2) {
1507  cell = cell_it.data ();
1508  //average positions
1509  cell->set_x ((cellpos + segpos) / 2);
1510  cellpos = cell->x ();
1511  }
1512  }

◆ compute_pitch_sd2()

float compute_pitch_sd2 ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  initial_pitch,
int16_t &  occupation,
int16_t &  mid_cuts,
ICOORDELT_LIST *  row_cells,
bool  testing_on,
int16_t  start,
int16_t  end 
)

Definition at line 1521 of file topitch.cpp.

1527  : 0;
1528  return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1529 }
1530 
1531 
1532 /**********************************************************************
1533  * compute_pitch_sd2
1534  *
1535  * Use a dp algorithm to fit the character cells and return the sd of
1536  * the cell size over the row.
1537  **********************************************************************/
1538 
1539 float compute_pitch_sd2( //find fp cells
1540  TO_ROW* row, //row to do
1541  STATS* projection, //vertical projection
1542  int16_t projection_left, //edge
1543  int16_t projection_right, //edge
1544  float initial_pitch, //guess at pitch
1545  int16_t& occupation, //no of occupied cells
1546  int16_t& mid_cuts, //no of free cuts
1547  ICOORDELT_LIST* row_cells, //list of chop pts
1548  bool testing_on, //inidividual words
1549  int16_t start, //start of good range
1550  int16_t end //end of good range
1551 ) {
1552  //blobs
1553  BLOBNBOX_IT blob_it = row->blob_list ();
1554  BLOBNBOX_IT plot_it;
1555  int16_t blob_count; //no of blobs
1556  TBOX blob_box; //bounding box
1557  FPSEGPT_LIST seg_list; //char cells
1558  FPSEGPT_IT seg_it; //iterator
1559  int16_t segpos; //position of segment
1560  //iterator
1561  ICOORDELT_IT cell_it = row_cells;
1562  ICOORDELT *cell; //new cell
1563  double word_sync; //result for word
1564 
1565  mid_cuts = 0;
1566  if (blob_it.empty ()) {
1567  occupation = 0;
1568  return initial_pitch * 10;
1569  }
1570 #ifndef GRAPHICS_DISABLED
1571  if (testing_on && to_win != nullptr) {
1572  projection->plot (to_win, projection_left,
1573  row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1574  }
1575 #endif
1576  blob_count = 0;
1577  blob_it.mark_cycle_pt ();
1578  do {
1579  //first blob
1580  blob_box = box_next (&blob_it);
1581  blob_count++;
1582  }
1583  while (!blob_it.cycled_list ());
1584  plot_it = blob_it;
1585  word_sync = check_pitch_sync2 (&blob_it, blob_count, static_cast<int16_t>(initial_pitch),
1586  2, projection, projection_left,
1587  projection_right,
1589  occupation, &seg_list, start, end);
1590  if (testing_on) {
1591  tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ",
1592  blob_box.right (), blob_box.top (),
1593  seg_list.length () - 1, word_sync);
1594  seg_it.set_to_list (&seg_list);
1595  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1596  if (seg_it.data ()->faked)
1597  tprintf ("(F)");
1598  tprintf ("%d, ", seg_it.data ()->position ());
1599  // tprintf("C=%g, s=%g, sq=%g\n",
1600  // seg_it.data()->cost_function(),
1601  // seg_it.data()->sum(),
1602  // seg_it.data()->squares());
1603  }

◆ compute_rows_pitch()

bool compute_rows_pitch ( TO_BLOCK block,
int32_t  block_index,
bool  testing_on 
)

Definition at line 352 of file topitch.cpp.

361  {
362  int32_t maxwidth; //of spaces
363  TO_ROW *row; //current row
364  int32_t row_index; //row number.
365  float lower, upper; //cluster thresholds
366  TO_ROW_IT row_it = block->get_rows ();
367 
368  row_index = 1;
369  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
370  row = row_it.data ();
371  ASSERT_HOST (row->xheight > 0);
373  maxwidth = static_cast<int32_t>(ceil (row->xheight * textord_words_maxspace));
374  if (row_pitch_stats (row, maxwidth, testing_on)
375  && find_row_pitch (row, maxwidth,
376  textord_dotmatrix_gap + 1, block, block_index,
377  row_index, testing_on)) {
378  if (row->fixed_pitch == 0) {
379  lower = row->pr_nonsp;
380  upper = row->pr_space;
381  row->space_size = upper;
382  row->kern_size = lower;
383  }
384  }
385  else {
386  row->fixed_pitch = 0.0f; //insufficient data

◆ count_block_votes()

void count_block_votes ( TO_BLOCK block,
int32_t &  def_fixed,
int32_t &  def_prop,
int32_t &  maybe_fixed,
int32_t &  maybe_prop,
int32_t &  corr_fixed,
int32_t &  corr_prop,
int32_t &  dunno 
)

Definition at line 650 of file topitch.cpp.

668  {
669  TO_ROW *row; //current row
670  TO_ROW_IT row_it = block->get_rows ();
671 
672  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
673  row = row_it.data ();
674  switch (row->pitch_decision) {
675  case PITCH_DUNNO:
676  dunno++;
677  break;
678  case PITCH_DEF_PROP:
679  def_prop++;
680  break;
681  case PITCH_MAYBE_PROP:
682  maybe_prop++;
683  break;
684  case PITCH_DEF_FIXED:
685  def_fixed++;
686  break;
687  case PITCH_MAYBE_FIXED:
688  maybe_fixed++;

◆ count_pitch_stats()

bool count_pitch_stats ( TO_ROW row,
STATS gap_stats,
STATS pitch_stats,
float  initial_pitch,
float  min_space,
bool  ignore_outsize,
bool  split_outsize,
int32_t  dm_gap 
)

Definition at line 1050 of file topitch.cpp.

1073  {
1074  bool prev_valid; //not word broken
1075  BLOBNBOX *blob; //current blob
1076  //blobs
1077  BLOBNBOX_IT blob_it = row->blob_list ();
1078  int32_t prev_right; //end of prev blob
1079  int32_t prev_centre; //centre of previous blob
1080  int32_t x_centre; //centre of this blob
1081  int32_t blob_width; //width of blob
1082  int32_t width_units; //no of widths in blob
1083  float width; //blob width
1084  TBOX blob_box; //bounding box
1085  TBOX joined_box; //of super blob
1086 
1087  gap_stats->clear ();
1088  pitch_stats->clear ();
1089  if (blob_it.empty ())
1090  return false;
1091  prev_valid = false;
1092  prev_centre = 0;
1093  prev_right = 0; // stop compiler warning
1094  joined_box = blob_it.data ()->bounding_box ();
1095  do {
1096  blob_it.forward ();
1097  blob = blob_it.data ();
1098  if (!blob->joined_to_prev ()) {
1099  blob_box = blob->bounding_box ();
1100  if ((blob_box.left () - joined_box.right () < dm_gap
1101  && !blob_it.at_first ())
1102  || blob->cblob() == nullptr)
1103  joined_box += blob_box; //merge blobs
1104  else {
1105  blob_width = joined_box.width ();
1106  if (split_outsize) {
1107  width_units =
1108  static_cast<int32_t>(floor (static_cast<float>(blob_width) / initial_pitch + 0.5));
1109  if (width_units < 1)
1110  width_units = 1;
1111  width_units--;
1112  }
1113  else if (ignore_outsize) {
1114  width = static_cast<float>(blob_width) / initial_pitch;
1115  width_units = width < 1 + words_default_fixed_limit
1116  && width > 1 - words_default_fixed_limit ? 0 : -1;
1117  }
1118  else
1119  width_units = 0; //everything in
1120  x_centre = static_cast<int32_t>(joined_box.left ()
1121  + (blob_width -
1122  width_units * initial_pitch) / 2);
1123  if (prev_valid && width_units >= 0) {
1124  // if (width_units>0)
1125  // {
1126  // tprintf("wu=%d, width=%d, xc=%d, adding %d\n",
1127  // width_units,blob_width,x_centre,x_centre-prev_centre);
1128  // }

◆ find_repeated_chars()

void find_repeated_chars ( TO_BLOCK block,
bool  testing_on 
)

Definition at line 1739 of file topitch.cpp.

1745  : "DP";
1746  tprintf
1747  ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
1748  word_sync, word_sync / initial_pitch,
1749  word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P',
1750  occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
1751 }
1752 
1753 /**********************************************************************
1754  * find_repeated_chars
1755  *
1756  * Extract marked leader blobs and put them
1757  * into words in advance of fixed pitch checking and word generation.
1758  **********************************************************************/
1759 void find_repeated_chars(TO_BLOCK* block, // Block to search.
1760  bool testing_on) { // Debug mode.
1761  POLY_BLOCK* pb = block->block->pdblk.poly_block();
1762  if (pb != nullptr && !pb->IsText())
1763  return; // Don't find repeated chars in non-text blocks.
1764 
1765  TO_ROW *row;
1766  BLOBNBOX_IT box_it;
1767  BLOBNBOX_IT search_it; // forward search
1768  WERD *word; // new word
1769  TBOX word_box; // for plotting
1770  int blobcount, repeated_set;
1771 
1772  TO_ROW_IT row_it = block->get_rows();
1773  if (row_it.empty()) return; // empty block
1774  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1775  row = row_it.data();
1776  box_it.set_to_list(row->blob_list());
1777  if (box_it.empty()) continue; // no blobs in this row
1778  if (!row->rep_chars_marked()) {
1779  mark_repeated_chars(row);
1780  }
1781  if (row->num_repeated_sets() == 0) continue; // nothing to do for this row
1782  // new words
1783  WERD_IT word_it(&row->rep_words);
1784  do {
1785  if (box_it.data()->repeated_set() != 0 &&
1786  !box_it.data()->joined_to_prev()) {
1787  blobcount = 1;
1788  repeated_set = box_it.data()->repeated_set();
1789  search_it = box_it;
1790  search_it.forward();
1791  while (!search_it.at_first() &&
1792  search_it.data()->repeated_set() == repeated_set) {
1793  blobcount++;
1794  search_it.forward();
1795  }

◆ find_row_pitch()

bool find_row_pitch ( TO_ROW row,
int32_t  maxwidth,
int32_t  dm_gap,
TO_BLOCK block,
int32_t  block_index,
int32_t  row_index,
bool  testing_on 
)

Definition at line 828 of file topitch.cpp.

848  {
849  bool used_dm_model; //looks like dot matrix
850  float min_space; //estimate threshold
851  float non_space; //gap size
852  float gap_iqr; //interquartile range
853  float pitch_iqr;
854  float dm_gap_iqr; //interquartile range
855  float dm_pitch_iqr;
856  float dm_pitch; //pitch with dm on
857  float pitch; //revised estimate
858  float initial_pitch; //guess at pitch
859  STATS gap_stats (0, maxwidth);
860  //centre-centre
861  STATS pitch_stats (0, maxwidth);
862 
863  row->fixed_pitch = 0.0f;
864  initial_pitch = row->fp_space;
865  if (initial_pitch > row->xheight * (1 + words_default_fixed_limit))
866  initial_pitch = row->xheight;//keep pitch decent
867  non_space = row->fp_nonsp;
868  if (non_space > initial_pitch)
869  non_space = initial_pitch;
870  min_space = (initial_pitch + non_space) / 2;
871 
872  if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
873  initial_pitch, min_space, true, false, dm_gap)) {
874  dm_gap_iqr = 0.0001;
875  dm_pitch_iqr = maxwidth * 2.0f;
876  dm_pitch = initial_pitch;
877  }
878  else {
879  dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
880  dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
881  dm_pitch = pitch_stats.ile (0.5);
882  }
883  gap_stats.clear ();
884  pitch_stats.clear ();
885  if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
886  initial_pitch, min_space, true, false, 0)) {
887  gap_iqr = 0.0001;
888  pitch_iqr = maxwidth * 3.0f;
889  }
890  else {
891  gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
892  pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
893  if (testing_on)
894  tprintf
895  ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
896  initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
897  initial_pitch = pitch_stats.ile (0.5);
898  if (min_space > initial_pitch
899  && count_pitch_stats (row, &gap_stats, &pitch_stats,
900  initial_pitch, initial_pitch, true, false, 0)) {
901  min_space = initial_pitch;
902  gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
903  pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
904  if (testing_on)
905  tprintf
906  ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
907  initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
908  initial_pitch = pitch_stats.ile (0.5);
909  }
910  }
912  tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
913  block_index, row_index, 'X',
914  pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
915  pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' :
916  (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
917  if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
920  tprintf ("\n");
921  return false; //insufficient data
922  }
923  if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
924  if (testing_on)
925  tprintf
926  ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
927  pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
928  gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
929  pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
930  pitch = pitch_stats.ile (0.5);
931  used_dm_model = false;
932  }
933  else {
934  if (testing_on)
935  tprintf
936  ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
937  pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
938  gap_iqr = dm_gap_iqr;
939  pitch_iqr = dm_pitch_iqr;
940  pitch = dm_pitch;
941  used_dm_model = true;
942  }
944  tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
945  pitch_iqr, gap_iqr, pitch);
946  tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
947  pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
948  pitch_iqr < gap_iqr * textord_fpiqr_ratio
949  && pitch_iqr < block->xheight * textord_max_pitch_iqr
950  && pitch < block->xheight * textord_words_default_maxspace
951  ? 'F' : 'P');
952  }
953  if (pitch_iqr < gap_iqr * textord_fpiqr_ratio
954  && pitch_iqr < block->xheight * textord_max_pitch_iqr
955  && pitch < block->xheight * textord_words_default_maxspace)
957  else

◆ fix_row_pitch()

void fix_row_pitch ( TO_ROW bad_row,
TO_BLOCK bad_block,
TO_BLOCK_LIST *  blocks,
int32_t  row_target,
int32_t  block_target 
)

Definition at line 146 of file topitch.cpp.

153  { // number of block
154  int16_t mid_cuts;
155  int block_votes; //votes in block
156  int like_votes; //votes over page
157  int other_votes; //votes of unlike blocks
158  int block_index; //number of block
159  int row_index; //number of row
160  int maxwidth; //max pitch
161  TO_BLOCK_IT block_it = blocks; //block iterator
162  TO_BLOCK *block; //current block
163  TO_ROW *row; //current row
164  float sp_sd; //space deviation
165  STATS block_stats; //pitches in block
166  STATS like_stats; //pitches in page
167 
168  block_votes = like_votes = other_votes = 0;
169  maxwidth = static_cast<int32_t>(ceil (bad_row->xheight * textord_words_maxspace));
170  if (bad_row->pitch_decision != PITCH_DEF_FIXED
171  && bad_row->pitch_decision != PITCH_DEF_PROP) {
172  block_stats.set_range (0, maxwidth);
173  like_stats.set_range (0, maxwidth);
174  block_index = 1;
175  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
176  block_it.forward()) {
177  block = block_it.data();
178  POLY_BLOCK* pb = block->block->pdblk.poly_block();
179  if (pb != nullptr && !pb->IsText()) continue; // Non text doesn't exist!
180  row_index = 1;
181  TO_ROW_IT row_it(block->get_rows());
182  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
183  row_it.forward ()) {
184  row = row_it.data ();
185  if ((bad_row->all_caps
186  && row->xheight + row->ascrise
187  <
188  (bad_row->xheight + bad_row->ascrise) * (1 +
190  && row->xheight + row->ascrise >
191  (bad_row->xheight + bad_row->ascrise) * (1 -
193  || (!bad_row->all_caps
194  && row->xheight <
195  bad_row->xheight * (1 + textord_pitch_rowsimilarity)
196  && row->xheight >
197  bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
198  if (block_index == block_target) {
199  if (row->pitch_decision == PITCH_DEF_FIXED) {
200  block_votes += textord_words_veto_power;
201  block_stats.add (static_cast<int32_t>(row->fixed_pitch),
203  }
204  else if (row->pitch_decision == PITCH_MAYBE_FIXED
205  || row->pitch_decision == PITCH_CORR_FIXED) {
206  block_votes++;
207  block_stats.add (static_cast<int32_t>(row->fixed_pitch), 1);
208  }
209  else if (row->pitch_decision == PITCH_DEF_PROP)
210  block_votes -= textord_words_veto_power;
211  else if (row->pitch_decision == PITCH_MAYBE_PROP
212  || row->pitch_decision == PITCH_CORR_PROP)
213  block_votes--;
214  }
215  else {
216  if (row->pitch_decision == PITCH_DEF_FIXED) {
217  like_votes += textord_words_veto_power;
218  like_stats.add (static_cast<int32_t>(row->fixed_pitch),
220  }
221  else if (row->pitch_decision == PITCH_MAYBE_FIXED
222  || row->pitch_decision == PITCH_CORR_FIXED) {
223  like_votes++;
224  like_stats.add (static_cast<int32_t>(row->fixed_pitch), 1);
225  }
226  else if (row->pitch_decision == PITCH_DEF_PROP)
227  like_votes -= textord_words_veto_power;
228  else if (row->pitch_decision == PITCH_MAYBE_PROP
229  || row->pitch_decision == PITCH_CORR_PROP)
230  like_votes--;
231  }
232  }
233  else {
234  if (row->pitch_decision == PITCH_DEF_FIXED)
235  other_votes += textord_words_veto_power;
236  else if (row->pitch_decision == PITCH_MAYBE_FIXED
237  || row->pitch_decision == PITCH_CORR_FIXED)
238  other_votes++;
239  else if (row->pitch_decision == PITCH_DEF_PROP)
240  other_votes -= textord_words_veto_power;
241  else if (row->pitch_decision == PITCH_MAYBE_PROP
242  || row->pitch_decision == PITCH_CORR_PROP)
243  other_votes--;
244  }
245  row_index++;
246  }
247  block_index++;
248  }
249  if (block_votes > textord_words_veto_power) {
250  bad_row->fixed_pitch = block_stats.ile (0.5);
251  bad_row->pitch_decision = PITCH_CORR_FIXED;
252  }
253  else if (block_votes <= textord_words_veto_power && like_votes > 0) {
254  bad_row->fixed_pitch = like_stats.ile (0.5);
255  bad_row->pitch_decision = PITCH_CORR_FIXED;
256  }
257  else {
258  bad_row->pitch_decision = PITCH_CORR_PROP;
259  if (block_votes == 0 && like_votes == 0 && other_votes > 0
261  tprintf
262  ("Warning:row %d of block %d set prop with no like rows against trend\n",
263  row_target, block_target);
264  }
265  }
267  tprintf(":b_votes=%d:l_votes=%d:o_votes=%d",
268  block_votes, like_votes, other_votes);
269  tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise);
270  }
271  if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
272  if (bad_row->fixed_pitch < textord_min_xheight) {
273  if (block_votes > 0)
274  bad_row->fixed_pitch = block_stats.ile (0.5);
275  else if (block_votes == 0 && like_votes > 0)
276  bad_row->fixed_pitch = like_stats.ile (0.5);
277  else {
278  tprintf
279  ("Warning:guessing pitch as xheight on row %d, block %d\n",
280  row_target, block_target);
281  bad_row->fixed_pitch = bad_row->xheight;
282  }
283  }
284  if (bad_row->fixed_pitch < textord_min_xheight)
285  bad_row->fixed_pitch = (float) textord_min_xheight;
286  bad_row->kern_size = bad_row->fixed_pitch / 4;
287  bad_row->min_space = static_cast<int32_t>(bad_row->fixed_pitch * 0.6);
288  bad_row->max_nonspace = static_cast<int32_t>(bad_row->fixed_pitch * 0.4);
289  bad_row->space_threshold =
290  (bad_row->min_space + bad_row->max_nonspace) / 2;
291  bad_row->space_size = bad_row->fixed_pitch;
292  if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) {
293  tune_row_pitch (bad_row, &bad_row->projection,
294  bad_row->projection_left, bad_row->projection_right,
295  (bad_row->fixed_pitch +
296  bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
297  sp_sd, mid_cuts, &bad_row->char_cells, false);
298  }
299  }
300  else if (bad_row->pitch_decision == PITCH_CORR_PROP
301  || bad_row->pitch_decision == PITCH_DEF_PROP) {
302  bad_row->fixed_pitch = 0.0f;

◆ fixed_pitch_row()

bool fixed_pitch_row ( TO_ROW row,
BLOCK block,
int32_t  block_index 
)

Definition at line 967 of file topitch.cpp.

983  {
984  const char *res_string; // pitch result
985  int16_t mid_cuts; // no of cheap cuts
986  float non_space; // gap size
987  float pitch_sd; // error on pitch
988  float sp_sd = 0.0f; // space sd
989 
990  non_space = row->fp_nonsp;
991  if (non_space > row->fixed_pitch)
992  non_space = row->fixed_pitch;
993  POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
994  if (textord_all_prop || (pb != nullptr && !pb->IsText())) {
995  // Set the decision to definitely proportional.
996  pitch_sd = textord_words_def_prop * row->fixed_pitch;
998  } else {
999  pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left,
1000  row->projection_right,
1001  (row->fixed_pitch + non_space * 3) / 4,
1002  row->fixed_pitch, sp_sd, mid_cuts,
1003  &row->char_cells,
1004  block_index == textord_debug_block);
1005  if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
1006  && ((pitsync_linear_version & 3) < 3
1007  || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model
1008  || sp_sd > 20
1009  || (pitch_sd == 0 && sp_sd > 10))))) {
1010  if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
1011  && !row->all_caps
1012  && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
1014  else
1016  }
1017  else if ((pitsync_linear_version & 3) < 3
1018  || sp_sd > 20
1019  || mid_cuts > 0
1020  || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
1021  if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
1023  else
1025  }
1026  else
1027  row->pitch_decision = PITCH_DUNNO;
1028  }
1029 
1031  res_string = "??";
1032  switch (row->pitch_decision) {
1033  case PITCH_DEF_PROP:
1034  res_string = "DP";
1035  break;
1036  case PITCH_MAYBE_PROP:
1037  res_string = "MP";
1038  break;
1039  case PITCH_DEF_FIXED:

◆ plot_fp_word()

void plot_fp_word ( TO_BLOCK block,
float  pitch,
float  nonspace 
)

Definition at line 1804 of file topitch.cpp.

1810  {
1811  box_it.forward();
1812  }
1813  } while (!box_it.at_first());
1814  }
1815 }
1816 
1817 
1818 /**********************************************************************
1819  * plot_fp_word

◆ print_block_counts()

void print_block_counts ( TO_BLOCK block,
int32_t  block_index 
)

Definition at line 614 of file topitch.cpp.

626  {
627  int32_t def_fixed = 0; //counters
628  int32_t def_prop = 0;
629  int32_t maybe_fixed = 0;
630  int32_t maybe_prop = 0;
631  int32_t dunno = 0;
632  int32_t corr_fixed = 0;
633  int32_t corr_prop = 0;
634 
635  count_block_votes(block,
636  def_fixed,
637  def_prop,
638  maybe_fixed,
639  maybe_prop,
640  corr_fixed,
641  corr_prop,
642  dunno);

◆ print_pitch_sd()

void print_pitch_sd ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float  initial_pitch 
)

Definition at line 1612 of file topitch.cpp.

1620  : initial_pitch * 10;
1621 }
1622 
1623 
1624 /**********************************************************************
1625  * print_pitch_sd
1626  *
1627  * Use a dp algorithm to fit the character cells and return the sd of
1628  * the cell size over the row.
1629  **********************************************************************/
1630 
1631 void print_pitch_sd( //find fp cells
1632  TO_ROW *row, //row to do
1633  STATS *projection, //vertical projection
1634  int16_t projection_left, //edges //size of blank
1635  int16_t projection_right,
1636  float space_size,
1637  float initial_pitch //guess at pitch
1638  ) {
1639  const char *res2; //pitch result
1640  int16_t occupation; //used cells
1641  float sp_sd; //space sd
1642  //blobs
1643  BLOBNBOX_IT blob_it = row->blob_list ();
1644  BLOBNBOX_IT start_it; //start of word
1645  BLOBNBOX_IT row_start; //start of row
1646  int16_t blob_count; //no of blobs
1647  int16_t total_blob_count; //total blobs in line
1648  TBOX blob_box; //bounding box
1649  TBOX prev_box; //of super blob
1650  int32_t prev_right; //of word sync
1651  int scale_factor; //on scores for big words
1652  int32_t sp_count; //spaces
1653  FPSEGPT_LIST seg_list; //char cells
1654  FPSEGPT_IT seg_it; //iterator
1655  double sqsum; //sum of squares
1656  double spsum; //of spaces
1657  double sp_var; //space error
1658  double word_sync; //result for word
1659  double total_count; //total cuts
1660 
1661  if (blob_it.empty ())
1662  return;
1663  row_start = blob_it;
1664  total_blob_count = 0;
1665 
1666  total_count = 0;
1667  sqsum = 0;
1668  sp_count = 0;
1669  spsum = 0;
1670  prev_right = -1;
1671  blob_it = row_start;
1672  start_it = blob_it;
1673  blob_count = 0;
1674  blob_box = box_next (&blob_it);//first blob
1675  blob_it.mark_cycle_pt ();
1676  do {
1677  for (; blob_count > 0; blob_count--)
1678  box_next(&start_it);
1679  do {
1680  prev_box = blob_box;
1681  blob_count++;
1682  blob_box = box_next (&blob_it);
1683  }
1684  while (!blob_it.cycled_list ()
1685  && blob_box.left () - prev_box.right () < space_size);
1686  word_sync =
1687  check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1688  projection, projection_left, projection_right,
1690  occupation, &seg_list, 0, 0);
1691  total_blob_count += blob_count;
1692  seg_it.set_to_list (&seg_list);
1693  if (prev_right >= 0) {
1694  sp_var = seg_it.data ()->position () - prev_right;
1695  sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1696  sp_var *= sp_var;
1697  spsum += sp_var;
1698  sp_count++;
1699  }
1700  seg_it.move_to_last ();
1701  prev_right = seg_it.data ()->position ();
1703  scale_factor = (seg_list.length () - 2) / 2;
1704  if (scale_factor < 1)
1705  scale_factor = 1;
1706  }
1707  else
1708  scale_factor = 1;
1709  sqsum += word_sync * scale_factor;
1710  total_count += (seg_list.length () - 1) * scale_factor;
1711  seg_list.clear ();
1712  }
1713  while (!blob_it.cycled_list ());
1714  sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1715  word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1716  tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
1717  word_sync, word_sync / initial_pitch, sp_sd,
1718  word_sync < textord_words_pitchsd_threshold * initial_pitch
1719  ? 'F' : 'P');
1720 
1721  start_it = row_start;
1722  blob_it = row_start;
1723  word_sync =
1724  check_pitch_sync2 (&blob_it, total_blob_count, static_cast<int16_t>(initial_pitch), 2,
1725  projection, projection_left, projection_right,
1726  row->xheight * textord_projection_scale, occupation,
1727  &seg_list, 0, 0);
1728  if (occupation > 1)
1729  word_sync /= occupation;
1730  word_sync = sqrt (word_sync);
1731 
1732 #ifndef GRAPHICS_DISABLED

◆ row_pitch_stats()

bool row_pitch_stats ( TO_ROW row,
int32_t  maxwidth,
bool  testing_on 
)

Definition at line 696 of file topitch.cpp.

711  {
712  BLOBNBOX *blob; //current blob
713  int gap_index; //current gap
714  int32_t prev_x; //end of prev blob
715  int32_t cluster_count; //no of clusters
716  int32_t prev_count; //of clusters
717  int32_t smooth_factor; //for smoothing stats
718  TBOX blob_box; //bounding box
719  float lower, upper; //cluster thresholds
720  //gap sizes
721  float gaps[BLOCK_STATS_CLUSTERS];
722  //blobs
723  BLOBNBOX_IT blob_it = row->blob_list ();
724  STATS gap_stats (0, maxwidth);
725  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
726  //clusters
727 
728  smooth_factor =
729  static_cast<int32_t>(row->xheight * textord_wordstats_smooth_factor + 1.5);
730  if (!blob_it.empty ()) {
731  prev_x = blob_it.data ()->bounding_box ().right ();
732  blob_it.forward ();
733  while (!blob_it.at_first ()) {
734  blob = blob_it.data ();
735  if (!blob->joined_to_prev ()) {
736  blob_box = blob->bounding_box ();
737  if (blob_box.left () - prev_x < maxwidth)
738  gap_stats.add (blob_box.left () - prev_x, 1);
739  prev_x = blob_box.right ();
740  }
741  blob_it.forward ();
742  }
743  }
744  if (gap_stats.get_total () == 0) {
745  return false;
746  }
747  cluster_count = 0;
748  lower = row->xheight * words_initial_lower;
749  upper = row->xheight * words_initial_upper;
750  gap_stats.smooth (smooth_factor);
751  do {
752  prev_count = cluster_count;
753  cluster_count = gap_stats.cluster (lower, upper,
755  BLOCK_STATS_CLUSTERS, cluster_stats);
756  }
757  while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
758  if (cluster_count < 1) {
759  return false;
760  }
761  for (gap_index = 0; gap_index < cluster_count; gap_index++)
762  gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
763  //get medians
764  if (testing_on) {
765  tprintf ("cluster_count=%d:", cluster_count);
766  for (gap_index = 0; gap_index < cluster_count; gap_index++)
767  tprintf (" %g(%d)", gaps[gap_index],
768  cluster_stats[gap_index + 1].get_total ());
769  tprintf ("\n");
770  }
771  qsort (gaps, cluster_count, sizeof (float), sort_floats);
772 
773  //Try to find proportional non-space and space for row.
774  lower = row->xheight * words_default_prop_nonspace;
775  upper = row->xheight * textord_words_min_minspace;
776  for (gap_index = 0; gap_index < cluster_count
777  && gaps[gap_index] < lower; gap_index++);
778  if (gap_index == 0) {
779  if (testing_on)
780  tprintf ("No clusters below nonspace threshold!!\n");
781  if (cluster_count > 1) {
782  row->pr_nonsp = gaps[0];
783  row->pr_space = gaps[1];
784  }
785  else {
786  row->pr_nonsp = lower;
787  row->pr_space = gaps[0];
788  }
789  }
790  else {
791  row->pr_nonsp = gaps[gap_index - 1];
792  while (gap_index < cluster_count && gaps[gap_index] < upper)
793  gap_index++;
794  if (gap_index == cluster_count) {
795  if (testing_on)
796  tprintf ("No clusters above nonspace threshold!!\n");
797  row->pr_space = lower * textord_spacesize_ratioprop;
798  }
799  else
800  row->pr_space = gaps[gap_index];
801  }
802 
803  //Now try to find the fixed pitch space and non-space.
804  upper = row->xheight * words_default_fixed_space;
805  for (gap_index = 0; gap_index < cluster_count
806  && gaps[gap_index] < upper; gap_index++);
807  if (gap_index == 0) {
808  if (testing_on)
809  tprintf ("No clusters below space threshold!!\n");
810  row->fp_nonsp = upper;
811  row->fp_space = gaps[0];
812  }
813  else {
814  row->fp_nonsp = gaps[gap_index - 1];
815  if (gap_index == cluster_count) {
816  if (testing_on)
817  tprintf ("No clusters above space threshold!!\n");
818  row->fp_space = row->xheight;

◆ try_block_fixed()

bool try_block_fixed ( TO_BLOCK block,
int32_t  block_index 
)

Definition at line 534 of file topitch.cpp.

544  {

◆ try_doc_fixed()

bool try_doc_fixed ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks,
float  gradient 
)

Definition at line 395 of file topitch.cpp.

405  {
406  int16_t master_x; //uniform shifts
407  int16_t pitch; //median pitch.
408  int x; //profile coord
409  int prop_blocks; //correct counts
410  int fixed_blocks;
411  int total_row_count; //total in page
412  //iterator
413  TO_BLOCK_IT block_it = port_blocks;
414  TO_BLOCK *block; //current block;
415  TO_ROW *row; //current row
416  int16_t projection_left; //edges
417  int16_t projection_right;
418  int16_t row_left; //edges of row
419  int16_t row_right;
420  ICOORDELT_LIST *master_cells; //cells for page
421  float master_y; //uniform shifts
422  float shift_factor; //page skew correction
423  float row_shift; //shift for row
424  float final_pitch; //output pitch
425  float row_y; //baseline
426  STATS projection; //entire page
427  STATS pitches (0, MAX_ALLOWED_PITCH);
428  //for median
429  float sp_sd; //space sd
430  int16_t mid_cuts; //no of cheap cuts
431  float pitch_sd; //sync rating
432 
433  if (block_it.empty ()
434  // || block_it.data()==block_it.data_relative(1)
436  return false;
437  shift_factor = gradient / (gradient * gradient + 1);
438  // row iterator
439  TO_ROW_IT row_it(block_it.data ()->get_rows());
440  master_x = row_it.data ()->projection_left;
441  master_y = row_it.data ()->baseline.y (master_x);
442  projection_left = INT16_MAX;
443  projection_right = -INT16_MAX;
444  prop_blocks = 0;
445  fixed_blocks = 0;
446  total_row_count = 0;
447 
448  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
449  block_it.forward ()) {
450  block = block_it.data ();
451  row_it.set_to_list (block->get_rows ());
452  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
453  row = row_it.data ();
454  total_row_count++;
455  if (row->fixed_pitch > 0)
456  pitches.add (static_cast<int32_t>(row->fixed_pitch), 1);
457  //find median
458  row_y = row->baseline.y (master_x);
459  row_left =
460  static_cast<int16_t>(row->projection_left -
461  shift_factor * (master_y - row_y));
462  row_right =
463  static_cast<int16_t>(row->projection_right -
464  shift_factor * (master_y - row_y));
465  if (row_left < projection_left)
466  projection_left = row_left;
467  if (row_right > projection_right)
468  projection_right = row_right;
469  }
470  }
471  if (pitches.get_total () == 0)
472  return false;
473  projection.set_range (projection_left, projection_right);
474 
475  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
476  block_it.forward ()) {
477  block = block_it.data ();
478  row_it.set_to_list (block->get_rows ());
479  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
480  row = row_it.data ();
481  row_y = row->baseline.y (master_x);
482  row_left =
483  static_cast<int16_t>(row->projection_left -
484  shift_factor * (master_y - row_y));
485  for (x = row->projection_left; x < row->projection_right;
486  x++, row_left++) {
487  projection.add (row_left, row->projection.pile_count (x));
488  }
489  }
490  }
491 
492  row_it.set_to_list (block_it.data ()->get_rows ());
493  row = row_it.data ();
494 #ifndef GRAPHICS_DISABLED
495  if (textord_show_page_cuts && to_win != nullptr)
496  projection.plot (to_win, projection_left,
497  row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
498 #endif
499  final_pitch = pitches.ile (0.5);
500  pitch = static_cast<int16_t>(final_pitch);
501  pitch_sd =
502  tune_row_pitch (row, &projection, projection_left, projection_right,
503  pitch * 0.75, final_pitch, sp_sd, mid_cuts,
504  &row->char_cells, false);
505 
507  tprintf
508  ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
509  prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
510  pitch_sd / total_row_count, pitch_sd / pitch,
511  pitch_sd / total_row_count / pitch);
512 
513 #ifndef GRAPHICS_DISABLED
514  if (textord_show_page_cuts && to_win != nullptr) {
515  master_cells = &row->char_cells;
516  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
517  block_it.forward ()) {
518  block = block_it.data ();
519  row_it.set_to_list (block->get_rows ());
520  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
521  row_it.forward ()) {
522  row = row_it.data ();
523  row_y = row->baseline.y (master_x);
524  row_shift = shift_factor * (master_y - row_y);
525  plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
526  }

◆ try_rows_fixed()

bool try_rows_fixed ( TO_BLOCK block,
int32_t  block_index,
bool  testing_on 
)

Definition at line 547 of file topitch.cpp.

559  {
560  TO_ROW *row; //current row
561  int32_t row_index; //row number.
562  int32_t def_fixed = 0; //counters
563  int32_t def_prop = 0;
564  int32_t maybe_fixed = 0;
565  int32_t maybe_prop = 0;
566  int32_t dunno = 0;
567  int32_t corr_fixed = 0;
568  int32_t corr_prop = 0;
569  float lower, upper; //cluster thresholds
570  TO_ROW_IT row_it = block->get_rows ();
571 
572  row_index = 1;
573  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
574  row = row_it.data ();
575  ASSERT_HOST (row->xheight > 0);
576  if (row->fixed_pitch > 0 &&
577  fixed_pitch_row(row, block->block, block_index)) {
578  if (row->fixed_pitch == 0) {
579  lower = row->pr_nonsp;
580  upper = row->pr_space;
581  row->space_size = upper;
582  row->kern_size = lower;
583  }
584  }
585  row_index++;
586  }
587  count_block_votes(block,
588  def_fixed,
589  def_prop,
590  maybe_fixed,
591  maybe_prop,
592  corr_fixed,
593  corr_prop,
594  dunno);
595  if (testing_on
598  tprintf ("Initially:");
599  print_block_counts(block, block_index);
600  }
601  if (def_fixed > def_prop * textord_words_veto_power)
603  else if (def_prop > def_fixed * textord_words_veto_power)
605  else if (def_fixed > 0 || def_prop > 0)
606  block->pitch_decision = PITCH_DUNNO;

◆ tune_row_pitch()

float tune_row_pitch ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float &  initial_pitch,
float &  best_sp_sd,
int16_t &  best_mid_cuts,
ICOORDELT_LIST *  best_cells,
bool  testing_on 
)

Definition at line 1137 of file topitch.cpp.

1163  {
1164  int pitch_delta; //offset pitch
1165  int16_t mid_cuts; //cheap cuts
1166  float pitch_sd; //current sd
1167  float best_sd; //best result
1168  float best_pitch; //pitch for best result
1169  float initial_sd; //starting error
1170  float sp_sd; //space sd
1171  ICOORDELT_LIST test_cells; //row cells
1172  ICOORDELT_IT best_it; //start of best list
1173 
1175  return tune_row_pitch2 (row, projection, projection_left,
1176  projection_right, space_size, initial_pitch,
1177  best_sp_sd,
1178  //space sd
1179  best_mid_cuts, best_cells, testing_on);
1180  if (textord_disable_pitch_test) {
1181  best_sp_sd = initial_pitch;
1182  return initial_pitch;
1183  }
1184  initial_sd =
1185  compute_pitch_sd(row,
1186  projection,
1187  projection_left,
1188  projection_right,
1189  space_size,
1190  initial_pitch,
1191  best_sp_sd,
1192  best_mid_cuts,
1193  best_cells,
1194  testing_on);
1195  best_sd = initial_sd;
1196  best_pitch = initial_pitch;
1197  if (testing_on)
1198  tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1199  for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1200  pitch_sd =
1201  compute_pitch_sd (row, projection, projection_left, projection_right,
1202  space_size, initial_pitch + pitch_delta, sp_sd,
1203  mid_cuts, &test_cells, testing_on);
1204  if (testing_on)
1205  tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
1206  pitch_sd);
1207  if (pitch_sd < best_sd) {
1208  best_sd = pitch_sd;
1209  best_mid_cuts = mid_cuts;
1210  best_sp_sd = sp_sd;
1211  best_pitch = initial_pitch + pitch_delta;
1212  best_cells->clear ();
1213  best_it.set_to_list (best_cells);
1214  best_it.add_list_after (&test_cells);
1215  }
1216  else
1217  test_cells.clear ();
1218  if (pitch_sd > initial_sd)
1219  break; //getting worse
1220  }
1221  for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1222  pitch_sd =
1223  compute_pitch_sd (row, projection, projection_left, projection_right,
1224  space_size, initial_pitch - pitch_delta, sp_sd,
1225  mid_cuts, &test_cells, testing_on);
1226  if (testing_on)
1227  tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
1228  pitch_sd);
1229  if (pitch_sd < best_sd) {
1230  best_sd = pitch_sd;
1231  best_mid_cuts = mid_cuts;
1232  best_sp_sd = sp_sd;
1233  best_pitch = initial_pitch - pitch_delta;
1234  best_cells->clear ();
1235  best_it.set_to_list (best_cells);
1236  best_it.add_list_after (&test_cells);
1237  }
1238  else
1239  test_cells.clear ();

◆ tune_row_pitch2()

float tune_row_pitch2 ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float &  initial_pitch,
float &  best_sp_sd,
int16_t &  best_mid_cuts,
ICOORDELT_LIST *  best_cells,
bool  testing_on 
)

Definition at line 1248 of file topitch.cpp.

1275  {
1276  int pitch_delta; //offset pitch
1277  int16_t pixel; //pixel coord
1278  int16_t best_pixel; //pixel coord
1279  int16_t best_delta; //best pitch
1280  int16_t best_pitch; //best pitch
1281  int16_t start; //of good range
1282  int16_t end; //of good range
1283  int32_t best_count; //lowest sum
1284  float best_sd; //best result
1285 
1286  best_sp_sd = initial_pitch;
1287 
1288  best_pitch = static_cast<int>(initial_pitch);
1289  if (textord_disable_pitch_test || best_pitch <= textord_pitch_range) {
1290  return initial_pitch;
1291  }
1292  std::unique_ptr<STATS[]> sum_proj(new STATS[textord_pitch_range * 2 + 1]); //summed projection
1293 
1294  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1295  pitch_delta++)
1296  sum_proj[textord_pitch_range + pitch_delta].set_range (0,
1297  best_pitch +
1298  pitch_delta + 1);
1299  for (pixel = projection_left; pixel <= projection_right; pixel++) {
1300  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1301  pitch_delta++) {
1302  sum_proj[textord_pitch_range + pitch_delta].add(
1303  (pixel - projection_left) % (best_pitch + pitch_delta),
1304  projection->pile_count(pixel));
1305  }
1306  }
1307  best_count = sum_proj[textord_pitch_range].pile_count (0);
1308  best_delta = 0;
1309  best_pixel = 0;
1310  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1311  pitch_delta++) {
1312  for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1313  if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel)
1314  < best_count) {
1315  best_count =
1316  sum_proj[textord_pitch_range +
1317  pitch_delta].pile_count (pixel);
1318  best_delta = pitch_delta;
1319  best_pixel = pixel;
1320  }
1321  }
1322  }
1323  if (testing_on)
1324  tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
1325  initial_pitch, best_delta, best_count);
1326  best_pitch += best_delta;
1327  initial_pitch = best_pitch;
1328  best_count++;
1329  best_count += best_count;
1330  for (start = best_pixel - 2; start > best_pixel - best_pitch
1331  && sum_proj[textord_pitch_range +
1332  best_delta].pile_count (start % best_pitch) <= best_count;
1333  start--);
1334  for (end = best_pixel + 2;
1335  end < best_pixel + best_pitch
1336  && sum_proj[textord_pitch_range +
1337  best_delta].pile_count (end % best_pitch) <= best_count;
1338  end++);
1339 
1340  best_sd =
1341  compute_pitch_sd(row,
1342  projection,
1343  projection_left,
1344  projection_right,
1345  space_size,
1346  initial_pitch,
1347  best_sp_sd,
1348  best_mid_cuts,
1349  best_cells,
1350  testing_on,

Variable Documentation

◆ textord_balance_factor

double textord_balance_factor = 1.0

"Ding rate for unbalanced char cells"

Definition at line 53 of file topitch.cpp.

◆ textord_blockndoc_fixed

bool textord_blockndoc_fixed = false

"Attempt whole doc/block fixed pitch"

Definition at line 50 of file topitch.cpp.

◆ textord_debug_pitch_metric

bool textord_debug_pitch_metric = false

"Write full metric stuff"

Definition at line 44 of file topitch.cpp.

◆ textord_debug_pitch_test

bool textord_debug_pitch_test = false

"Debug on fixed pitch test"

Definition at line 38 of file topitch.cpp.

◆ textord_fast_pitch_test

bool textord_fast_pitch_test = false

"Do even faster pitch algorithm"

Definition at line 42 of file topitch.cpp.

◆ textord_pitch_cheat

bool textord_pitch_cheat = false

"Use correct answer for fixed/prop"

Definition at line 48 of file topitch.cpp.

◆ textord_projection_scale

double textord_projection_scale = 0.200

"Ding rate for mid-cuts"

Definition at line 51 of file topitch.cpp.

◆ textord_show_page_cuts

bool textord_show_page_cuts = false

"Draw page-level cuts"

Definition at line 46 of file topitch.cpp.

◆ textord_show_row_cuts

bool textord_show_row_cuts = false

"Draw row-level cuts"

Definition at line 45 of file topitch.cpp.

TO_ROW::min_space
int32_t min_space
Definition: blobbox.h:662
ICOORD::set_x
void set_x(int16_t xin)
rewrite function
Definition: points.h:60
textord_fpiqr_ratio
double textord_fpiqr_ratio
Definition: tovars.cpp:78
TO_BLOCK::max_nonspace
int32_t max_nonspace
Definition: blobbox.h:792
textord_debug_block
int textord_debug_block
Definition: tovars.cpp:33
check_pitch_sync2
double check_pitch_sync2(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
Definition: pithsync.cpp:286
TO_ROW::rep_words
WERD_LIST rep_words
Definition: blobbox.h:667
textord_blocksall_prop
bool textord_blocksall_prop
Definition: tovars.cpp:28
TO_ROW::space_size
float space_size
Definition: blobbox.h:666
PDBLK::bounding_box
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:58
textord_projection_scale
double textord_projection_scale
Definition: topitch.cpp:51
TO_ROW::pr_nonsp
float pr_nonsp
Definition: blobbox.h:654
TO_BLOCK::kern_size
float kern_size
Definition: blobbox.h:789
create_to_win
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:42
POLY_BLOCK::IsText
bool IsText() const
Definition: polyblk.h:62
TO_ROW::pr_space
float pr_space
Definition: blobbox.h:653
words_default_fixed_space
double words_default_fixed_space
Definition: tovars.cpp:70
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
compute_block_pitch
void compute_block_pitch(TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on)
Definition: topitch.cpp:310
PITCH_DEF_PROP
Definition: blobbox.h:48
BLOCK_STATS_CLUSTERS
#define BLOCK_STATS_CLUSTERS
Definition: topitch.cpp:55
plot_fp_cells2
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
Definition: drawtord.cpp:351
tune_row_pitch2
float tune_row_pitch2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
Definition: topitch.cpp:1248
print_pitch_sd
void print_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch)
Definition: topitch.cpp:1612
textord_pitch_range
int textord_pitch_range
Definition: tovars.cpp:34
textord_words_default_maxspace
double textord_words_default_maxspace
Definition: tovars.cpp:43
TO_ROW::projection_left
int16_t projection_left
Definition: blobbox.h:647
TBOX::top
int16_t top() const
Definition: rect.h:57
TO_ROW::projection_right
int16_t projection_right
Definition: blobbox.h:648
STATS::pile_count
int32_t pile_count(int32_t value) const
Definition: statistc.h:75
TO_BLOCK
Definition: blobbox.h:691
textord_show_fixed_cuts
bool textord_show_fixed_cuts
Definition: drawtord.cpp:32
PITCH_CORR_PROP
Definition: blobbox.h:51
textord_dotmatrix_gap
int textord_dotmatrix_gap
Definition: tovars.cpp:32
PITCH_DEF_FIXED
Definition: blobbox.h:46
plot_row_cells
void plot_row_cells(ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)
Definition: drawtord.cpp:383
textord_words_pitchsd_threshold
double textord_words_pitchsd_threshold
Definition: tovars.cpp:56
words_default_prop_nonspace
double words_default_prop_nonspace
Definition: tovars.cpp:69
TO_ROW::pitch_decision
PITCH_TYPE pitch_decision
Definition: blobbox.h:649
try_rows_fixed
bool try_rows_fixed(TO_BLOCK *block, int32_t block_index, bool testing_on)
Definition: topitch.cpp:547
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
fixed_pitch_row
bool fixed_pitch_row(TO_ROW *row, BLOCK *block, int32_t block_index)
Definition: topitch.cpp:967
BLOBNBOX
Definition: blobbox.h:142
compute_pitch_sd
float compute_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
Definition: topitch.cpp:1359
textord_pitch_rowsimilarity
double textord_pitch_rowsimilarity
Definition: tovars.cpp:64
words_default_fixed_limit
double words_default_fixed_limit
Definition: tovars.cpp:71
textord_pitch_scalebigwords
bool textord_pitch_scalebigwords
Definition: tovars.cpp:66
check_pitch_sync
double check_pitch_sync(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
Definition: pitsync1.cpp:138
textord_blocksall_fixed
bool textord_blocksall_fixed
Definition: tovars.cpp:26
TO_ROW::fp_nonsp
float fp_nonsp
Definition: blobbox.h:652
textord_show_initial_words
bool textord_show_initial_words
Definition: tovars.cpp:22
textord_min_xheight
int textord_min_xheight
Definition: makerow.cpp:67
textord_wordstats_smooth_factor
double textord_wordstats_smooth_factor
Definition: tovars.cpp:36
textord_words_veto_power
int textord_words_veto_power
Definition: tovars.cpp:62
textord_debug_pitch_metric
bool textord_debug_pitch_metric
Definition: topitch.cpp:44
TO_ROW::rep_chars_marked
bool rep_chars_marked() const
Definition: blobbox.h:630
textord_fast_pitch_test
bool textord_fast_pitch_test
Definition: topitch.cpp:42
textord_show_page_cuts
bool textord_show_page_cuts
Definition: topitch.cpp:46
TO_ROW::used_dm_model
bool used_dm_model
Definition: blobbox.h:646
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
mark_repeated_chars
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2639
PITCH_DUNNO
Definition: blobbox.h:45
compute_rows_pitch
bool compute_rows_pitch(TO_BLOCK *block, int32_t block_index, bool testing_on)
Definition: topitch.cpp:352
PDBLK::poly_block
POLY_BLOCK * poly_block() const
Definition: pdblock.h:54
try_doc_fixed
bool try_doc_fixed(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
Definition: topitch.cpp:395
PITCH_CORR_FIXED
Definition: blobbox.h:50
TO_BLOCK::block
BLOCK * block
Definition: blobbox.h:776
tune_row_pitch
float tune_row_pitch(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
Definition: topitch.cpp:1137
TO_ROW::projection
STATS projection
Definition: blobbox.h:670
TO_BLOCK::fixed_pitch
float fixed_pitch
Definition: blobbox.h:788
words_initial_upper
double words_initial_upper
Definition: tovars.cpp:68
TO_ROW::fp_space
float fp_space
Definition: blobbox.h:651
print_block_counts
void print_block_counts(TO_BLOCK *block, int32_t block_index)
Definition: topitch.cpp:614
TO_BLOCK::xheight
float xheight
Definition: blobbox.h:787
TO_ROW::num_repeated_sets
int num_repeated_sets() const
Definition: blobbox.h:636
BLOBNBOX::joined_to_prev
bool joined_to_prev() const
Definition: blobbox.h:255
textord_spacesize_ratioprop
double textord_spacesize_ratioprop
Definition: tovars.cpp:77
TBOX::width
int16_t width() const
Definition: rect.h:114
textord_blockndoc_fixed
bool textord_blockndoc_fixed
Definition: topitch.cpp:50
STATS::plot
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:558
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
TO_ROW::fixed_pitch
float fixed_pitch
Definition: blobbox.h:650
TO_ROW::xheight
float xheight
Definition: blobbox.h:656
count_pitch_stats
bool count_pitch_stats(TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap)
Definition: topitch.cpp:1050
TO_BLOCK::space_size
float space_size
Definition: blobbox.h:790
TO_BLOCK::pitch_decision
PITCH_TYPE pitch_decision
Definition: blobbox.h:777
textord_words_maxspace
double textord_words_maxspace
Definition: tovars.cpp:41
STATS
Definition: statistc.h:30
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
textord_words_def_prop
double textord_words_def_prop
Definition: tovars.cpp:60
TO_BLOCK::pr_space
float pr_space
Definition: blobbox.h:795
TO_BLOCK::pr_nonsp
float pr_nonsp
Definition: blobbox.h:796
PITCH_MAYBE_FIXED
Definition: blobbox.h:47
TO_BLOCK::min_space
int32_t min_space
Definition: blobbox.h:791
TO_BLOCK::get_rows
TO_ROW_LIST * get_rows()
Definition: blobbox.h:703
STATS::ile
double ile(double frac) const
Definition: statistc.cpp:156
TO_ROW::space_threshold
int32_t space_threshold
Definition: blobbox.h:664
words_initial_lower
double words_initial_lower
Definition: tovars.cpp:67
TO_ROW::max_nonspace
int32_t max_nonspace
Definition: blobbox.h:663
QSPLINE::y
double y(double x) const
Definition: quspline.cpp:202
box_next
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:629
ScrollView::GOLDENROD
Definition: scrollview.h:123
TO_ROW::intercept
float intercept() const
Definition: blobbox.h:588
textord_words_default_minspace
double textord_words_default_minspace
Definition: tovars.cpp:45
WERD
Definition: werd.h:55
TBOX::left
int16_t left() const
Definition: rect.h:71
STATS::add
void add(int32_t value, int32_t count)
Definition: statistc.cpp:87
textord_words_min_minspace
double textord_words_min_minspace
Definition: tovars.cpp:46
fix_row_pitch
void fix_row_pitch(TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target)
Definition: topitch.cpp:146
find_repeated_chars
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
Definition: topitch.cpp:1739
TBOX::right
int16_t right() const
Definition: rect.h:78
compute_pitch_sd2
float compute_pitch_sd2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
Definition: topitch.cpp:1521
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
TO_ROW::char_cells
ICOORDELT_LIST char_cells
Definition: blobbox.h:668
POLY_BLOCK
Definition: polyblk.h:26
TO_ROW
Definition: blobbox.h:543
ScrollView::Update
static void Update()
Definition: scrollview.cpp:708
TO_ROW::ascrise
float ascrise
Definition: blobbox.h:658
TO_ROW::kern_size
float kern_size
Definition: blobbox.h:665
textord_words_default_nonspace
double textord_words_default_nonspace
Definition: tovars.cpp:48
TO_ROW::all_caps
bool all_caps
Definition: blobbox.h:645
ScrollView::CORAL
Definition: scrollview.h:119
BLOBNBOX::cblob
C_BLOB * cblob() const
Definition: blobbox.h:267
find_row_pitch
bool find_row_pitch(TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on)
Definition: topitch.cpp:828
TO_ROW::baseline
QSPLINE baseline
Definition: blobbox.h:669
PITCH_MAYBE_PROP
Definition: blobbox.h:49
textord_max_pitch_iqr
double textord_max_pitch_iqr
Definition: tovars.cpp:79
ICOORDELT
Definition: points.h:160
count_block_votes
void count_block_votes(TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno)
Definition: topitch.cpp:650
textord_debug_pitch_test
bool textord_debug_pitch_test
Definition: topitch.cpp:38
STATS::set_range
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
Definition: statistc.cpp:53
try_block_fixed
bool try_block_fixed(TO_BLOCK *block, int32_t block_index)
Definition: topitch.cpp:534
to_win
ScrollView * to_win
Definition: drawtord.cpp:34
row_pitch_stats
bool row_pitch_stats(TO_ROW *row, int32_t maxwidth, bool testing_on)
Definition: topitch.cpp:696
TO_ROW::compute_vertical_projection
void compute_vertical_projection()
Definition: blobbox.cpp:784
MAX_ALLOWED_PITCH
#define MAX_ALLOWED_PITCH
Definition: topitch.cpp:56
TO_ROW::blob_list
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:599
STATS::clear
void clear()
Definition: statistc.cpp:71
TBOX
Definition: rect.h:33