tesseract
5.0.0-alpha-619-ge9db
|
Go to the source code of this file.
|
void | compute_fixed_pitch (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on) |
|
void | fix_row_pitch (TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target) |
|
void | compute_block_pitch (TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on) |
|
bool | compute_rows_pitch (TO_BLOCK *block, int32_t block_index, bool testing_on) |
|
bool | try_doc_fixed (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient) |
|
bool | try_block_fixed (TO_BLOCK *block, int32_t block_index) |
|
bool | try_rows_fixed (TO_BLOCK *block, int32_t block_index, bool testing_on) |
|
void | print_block_counts (TO_BLOCK *block, int32_t block_index) |
|
void | count_block_votes (TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno) |
|
bool | row_pitch_stats (TO_ROW *row, int32_t maxwidth, bool testing_on) |
|
bool | find_row_pitch (TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on) |
|
bool | fixed_pitch_row (TO_ROW *row, BLOCK *block, int32_t block_index) |
|
bool | count_pitch_stats (TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap) |
|
float | tune_row_pitch (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on) |
|
float | tune_row_pitch2 (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on) |
|
float | compute_pitch_sd (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start=0, int16_t end=0) |
|
float | compute_pitch_sd2 (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start=0, int16_t end=0) |
|
void | print_pitch_sd (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch) |
|
void | find_repeated_chars (TO_BLOCK *block, bool testing_on) |
|
void | plot_fp_word (TO_BLOCK *block, float pitch, float nonspace) |
|
◆ compute_block_pitch()
void compute_block_pitch |
( |
TO_BLOCK * |
block, |
|
|
FCOORD |
rotation, |
|
|
int32_t |
block_index, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 310 of file topitch.cpp.
322 tprintf (
"Block %d at (%d,%d)->(%d,%d)\n",
325 block_box.
right (), block_box.
top ());
336 if (!block->
get_rows ()->empty ()) {
339 #ifndef GRAPHICS_DISABLED
◆ compute_fixed_pitch()
void compute_fixed_pitch |
( |
ICOORD |
page_tr, |
|
|
TO_BLOCK_LIST * |
port_blocks, |
|
|
float |
gradient, |
|
|
FCOORD |
rotation, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 78 of file topitch.cpp.
91 #ifndef GRAPHICS_DISABLED
98 block_it.set_to_list (port_blocks);
100 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
101 block_it.forward ()) {
102 block = block_it.data ();
109 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
110 block_it.forward ()) {
111 block = block_it.data ();
119 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
120 block_it.forward()) {
121 block = block_it.data ();
123 if (pb !=
nullptr && !pb->
IsText())
continue;
125 TO_ROW_IT row_it(block->
get_rows());
127 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
128 row = row_it.data ();
129 fix_row_pitch(row, block, port_blocks, row_index, block_index);
134 #ifndef GRAPHICS_DISABLED
◆ compute_pitch_sd()
float compute_pitch_sd |
( |
TO_ROW * |
row, |
|
|
STATS * |
projection, |
|
|
int16_t |
projection_left, |
|
|
int16_t |
projection_right, |
|
|
float |
space_size, |
|
|
float |
initial_pitch, |
|
|
float & |
sp_sd, |
|
|
int16_t & |
mid_cuts, |
|
|
ICOORDELT_LIST * |
row_cells, |
|
|
bool |
testing_on, |
|
|
int16_t |
start = 0 , |
|
|
int16_t |
end = 0 |
|
) |
| |
Definition at line 1359 of file topitch.cpp.
1392 BLOBNBOX_IT blob_it = row->
blob_list ();
1393 BLOBNBOX_IT start_it;
1394 BLOBNBOX_IT plot_it;
1401 FPSEGPT_LIST seg_list;
1406 ICOORDELT_IT cell_it = row_cells;
1412 int32_t total_count;
1414 if ((pitsync_linear_version & 3) > 1) {
1416 projection_right, initial_pitch,
1417 occupation, mid_cuts, row_cells,
1418 testing_on, start, end);
1429 if (blob_it.empty ())
1430 return space_size * 10;
1431 #ifndef GRAPHICS_DISABLED
1432 if (testing_on &&
to_win !=
nullptr) {
1433 blob_box = blob_it.data ()->bounding_box ();
1441 blob_it.mark_cycle_pt ();
1443 for (; blob_count > 0; blob_count--)
1446 prev_box = blob_box;
1450 while (!blob_it.cycled_list ()
1451 && blob_box.
left () - prev_box.
right () < space_size);
1453 if (pitsync_linear_version & 3)
1455 check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1456 projection, projection_left, projection_right,
1458 occupation, &seg_list, start, end);
1461 check_pitch_sync (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1462 projection, &seg_list);
1464 tprintf (
"Word ending at (%d,%d), len=%d, sync rating=%g, ",
1465 prev_box.
right (), prev_box.
top (),
1466 seg_list.length () - 1, word_sync);
1467 seg_it.set_to_list (&seg_list);
1468 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
1469 seg_it.forward ()) {
1470 if (seg_it.data ()->faked)
1472 tprintf (
"%d, ", seg_it.data ()->position ());
1480 #ifndef GRAPHICS_DISABLED
1484 seg_it.set_to_list (&seg_list);
1485 if (prev_right >= 0) {
1486 sp_var = seg_it.data ()->position () - prev_right;
1487 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1492 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1493 segpos = seg_it.data ()->position ();
1494 if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
1496 while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
1497 cell =
new ICOORDELT (cellpos + static_cast<int16_t>(initial_pitch), 0);
1498 cell_it.add_after_then_move (cell);
1499 cellpos += static_cast<int16_t>(initial_pitch);
1503 cell_it.add_after_then_move (cell);
1506 else if (segpos > cellpos - initial_pitch / 2) {
1507 cell = cell_it.data ();
1509 cell->
set_x ((cellpos + segpos) / 2);
1510 cellpos = cell->
x ();
◆ compute_pitch_sd2()
float compute_pitch_sd2 |
( |
TO_ROW * |
row, |
|
|
STATS * |
projection, |
|
|
int16_t |
projection_left, |
|
|
int16_t |
projection_right, |
|
|
float |
initial_pitch, |
|
|
int16_t & |
occupation, |
|
|
int16_t & |
mid_cuts, |
|
|
ICOORDELT_LIST * |
row_cells, |
|
|
bool |
testing_on, |
|
|
int16_t |
start = 0 , |
|
|
int16_t |
end = 0 |
|
) |
| |
Definition at line 1521 of file topitch.cpp.
1528 return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1542 int16_t projection_left,
1543 int16_t projection_right,
1544 float initial_pitch,
1545 int16_t& occupation,
1547 ICOORDELT_LIST* row_cells,
1553 BLOBNBOX_IT blob_it = row->
blob_list ();
1554 BLOBNBOX_IT plot_it;
1557 FPSEGPT_LIST seg_list;
1561 ICOORDELT_IT cell_it = row_cells;
1566 if (blob_it.empty ()) {
1568 return initial_pitch * 10;
1570 #ifndef GRAPHICS_DISABLED
1571 if (testing_on &&
to_win !=
nullptr) {
1577 blob_it.mark_cycle_pt ();
1583 while (!blob_it.cycled_list ());
1585 word_sync =
check_pitch_sync2 (&blob_it, blob_count, static_cast<int16_t>(initial_pitch),
1586 2, projection, projection_left,
1589 occupation, &seg_list, start, end);
1591 tprintf (
"Row ending at (%d,%d), len=%d, sync rating=%g, ",
1592 blob_box.
right (), blob_box.
top (),
1593 seg_list.length () - 1, word_sync);
1594 seg_it.set_to_list (&seg_list);
1595 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1596 if (seg_it.data ()->faked)
1598 tprintf (
"%d, ", seg_it.data ()->position ());
◆ compute_rows_pitch()
bool compute_rows_pitch |
( |
TO_BLOCK * |
block, |
|
|
int32_t |
block_index, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 352 of file topitch.cpp.
366 TO_ROW_IT row_it = block->
get_rows ();
369 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
370 row = row_it.data ();
377 row_index, testing_on)) {
◆ count_block_votes()
void count_block_votes |
( |
TO_BLOCK * |
block, |
|
|
int32_t & |
def_fixed, |
|
|
int32_t & |
def_prop, |
|
|
int32_t & |
maybe_fixed, |
|
|
int32_t & |
maybe_prop, |
|
|
int32_t & |
corr_fixed, |
|
|
int32_t & |
corr_prop, |
|
|
int32_t & |
dunno |
|
) |
| |
Definition at line 650 of file topitch.cpp.
670 TO_ROW_IT row_it = block->
get_rows ();
672 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
673 row = row_it.data ();
◆ count_pitch_stats()
bool count_pitch_stats |
( |
TO_ROW * |
row, |
|
|
STATS * |
gap_stats, |
|
|
STATS * |
pitch_stats, |
|
|
float |
initial_pitch, |
|
|
float |
min_space, |
|
|
bool |
ignore_outsize, |
|
|
bool |
split_outsize, |
|
|
int32_t |
dm_gap |
|
) |
| |
Definition at line 1050 of file topitch.cpp.
1077 BLOBNBOX_IT blob_it = row->
blob_list ();
1079 int32_t prev_centre;
1082 int32_t width_units;
1087 gap_stats->
clear ();
1088 pitch_stats->
clear ();
1089 if (blob_it.empty ())
1094 joined_box = blob_it.data ()->bounding_box ();
1097 blob = blob_it.data ();
1100 if ((blob_box.
left () - joined_box.
right () < dm_gap
1101 && !blob_it.at_first ())
1102 || blob->
cblob() ==
nullptr)
1103 joined_box += blob_box;
1105 blob_width = joined_box.
width ();
1106 if (split_outsize) {
1108 static_cast<int32_t>(floor (static_cast<float>(blob_width) / initial_pitch + 0.5));
1109 if (width_units < 1)
1113 else if (ignore_outsize) {
1114 width = static_cast<float>(blob_width) / initial_pitch;
1120 x_centre = static_cast<int32_t>(joined_box.
left ()
1122 width_units * initial_pitch) / 2);
1123 if (prev_valid && width_units >= 0) {
◆ find_repeated_chars()
void find_repeated_chars |
( |
TO_BLOCK * |
block, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 1739 of file topitch.cpp.
1747 (
"row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
1748 word_sync, word_sync / initial_pitch,
1750 occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
1762 if (pb !=
nullptr && !pb->
IsText())
1767 BLOBNBOX_IT search_it;
1770 int blobcount, repeated_set;
1772 TO_ROW_IT row_it = block->
get_rows();
1773 if (row_it.empty())
return;
1774 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1775 row = row_it.data();
1777 if (box_it.empty())
continue;
1785 if (box_it.data()->repeated_set() != 0 &&
1786 !box_it.data()->joined_to_prev()) {
1788 repeated_set = box_it.data()->repeated_set();
1790 search_it.forward();
1791 while (!search_it.at_first() &&
1792 search_it.data()->repeated_set() == repeated_set) {
1794 search_it.forward();
◆ find_row_pitch()
bool find_row_pitch |
( |
TO_ROW * |
row, |
|
|
int32_t |
maxwidth, |
|
|
int32_t |
dm_gap, |
|
|
TO_BLOCK * |
block, |
|
|
int32_t |
block_index, |
|
|
int32_t |
row_index, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 828 of file topitch.cpp.
859 STATS gap_stats (0, maxwidth);
861 STATS pitch_stats (0, maxwidth);
868 if (non_space > initial_pitch)
869 non_space = initial_pitch;
870 min_space = (initial_pitch + non_space) / 2;
873 initial_pitch, min_space,
true,
false, dm_gap)) {
875 dm_pitch_iqr = maxwidth * 2.0f;
876 dm_pitch = initial_pitch;
879 dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
880 dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
881 dm_pitch = pitch_stats.ile (0.5);
884 pitch_stats.clear ();
886 initial_pitch, min_space,
true,
false, 0)) {
888 pitch_iqr = maxwidth * 3.0f;
891 gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
892 pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
895 (
"First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
896 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
897 initial_pitch = pitch_stats.ile (0.5);
898 if (min_space > initial_pitch
900 initial_pitch, initial_pitch,
true,
false, 0)) {
901 min_space = initial_pitch;
902 gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
903 pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
906 (
"Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
907 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
908 initial_pitch = pitch_stats.ile (0.5);
912 tprintf(
"Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
913 block_index, row_index,
'X',
914 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
915 pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ?
'D' :
916 (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ?
'S' :
'M'));
917 if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
923 if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
926 (
"Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
927 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
928 gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
929 pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
930 pitch = pitch_stats.ile (0.5);
931 used_dm_model =
false;
936 (
"Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
937 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
938 gap_iqr = dm_gap_iqr;
939 pitch_iqr = dm_pitch_iqr;
941 used_dm_model =
true;
944 tprintf (
"rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
945 pitch_iqr, gap_iqr, pitch);
946 tprintf (
"p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
947 pitch_iqr / gap_iqr, pitch_iqr / block->
xheight,
◆ fix_row_pitch()
void fix_row_pitch |
( |
TO_ROW * |
bad_row, |
|
|
TO_BLOCK * |
bad_block, |
|
|
TO_BLOCK_LIST * |
blocks, |
|
|
int32_t |
row_target, |
|
|
int32_t |
block_target |
|
) |
| |
Definition at line 146 of file topitch.cpp.
161 TO_BLOCK_IT block_it = blocks;
168 block_votes = like_votes = other_votes = 0;
175 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
176 block_it.forward()) {
177 block = block_it.data();
179 if (pb !=
nullptr && !pb->
IsText())
continue;
181 TO_ROW_IT row_it(block->
get_rows());
182 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
184 row = row_it.data ();
198 if (block_index == block_target) {
253 else if (block_votes <= textord_words_veto_power && like_votes > 0) {
259 if (block_votes == 0 && like_votes == 0 && other_votes > 0
262 (
"Warning:row %d of block %d set prop with no like rows against trend\n",
263 row_target, block_target);
267 tprintf(
":b_votes=%d:l_votes=%d:o_votes=%d",
268 block_votes, like_votes, other_votes);
275 else if (block_votes == 0 && like_votes > 0)
279 (
"Warning:guessing pitch as xheight on row %d, block %d\n",
280 row_target, block_target);
297 sp_sd, mid_cuts, &bad_row->
char_cells,
false);
◆ fixed_pitch_row()
bool fixed_pitch_row |
( |
TO_ROW * |
row, |
|
|
BLOCK * |
block, |
|
|
int32_t |
block_index |
|
) |
| |
Definition at line 967 of file topitch.cpp.
984 const char *res_string;
994 if (textord_all_prop || (pb !=
nullptr && !pb->
IsText())) {
1005 if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
1006 && ((pitsync_linear_version & 3) < 3
1007 || ((pitsync_linear_version & 3) >= 3 && (row->
used_dm_model
1009 || (pitch_sd == 0 && sp_sd > 10))))) {
1010 if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
1012 && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
1017 else if ((pitsync_linear_version & 3) < 3
1021 if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
◆ plot_fp_word()
void plot_fp_word |
( |
TO_BLOCK * |
block, |
|
|
float |
pitch, |
|
|
float |
nonspace |
|
) |
| |
Definition at line 1804 of file topitch.cpp.
1813 }
while (!box_it.at_first());
◆ print_block_counts()
void print_block_counts |
( |
TO_BLOCK * |
block, |
|
|
int32_t |
block_index |
|
) |
| |
Definition at line 614 of file topitch.cpp.
627 int32_t def_fixed = 0;
628 int32_t def_prop = 0;
629 int32_t maybe_fixed = 0;
630 int32_t maybe_prop = 0;
632 int32_t corr_fixed = 0;
633 int32_t corr_prop = 0;
◆ print_pitch_sd()
void print_pitch_sd |
( |
TO_ROW * |
row, |
|
|
STATS * |
projection, |
|
|
int16_t |
projection_left, |
|
|
int16_t |
projection_right, |
|
|
float |
space_size, |
|
|
float |
initial_pitch |
|
) |
| |
Definition at line 1612 of file topitch.cpp.
1620 : initial_pitch * 10;
1634 int16_t projection_left,
1635 int16_t projection_right,
1643 BLOBNBOX_IT blob_it = row->
blob_list ();
1644 BLOBNBOX_IT start_it;
1645 BLOBNBOX_IT row_start;
1647 int16_t total_blob_count;
1653 FPSEGPT_LIST seg_list;
1661 if (blob_it.empty ())
1663 row_start = blob_it;
1664 total_blob_count = 0;
1671 blob_it = row_start;
1675 blob_it.mark_cycle_pt ();
1677 for (; blob_count > 0; blob_count--)
1680 prev_box = blob_box;
1684 while (!blob_it.cycled_list ()
1685 && blob_box.
left () - prev_box.
right () < space_size);
1687 check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1688 projection, projection_left, projection_right,
1690 occupation, &seg_list, 0, 0);
1691 total_blob_count += blob_count;
1692 seg_it.set_to_list (&seg_list);
1693 if (prev_right >= 0) {
1694 sp_var = seg_it.data ()->position () - prev_right;
1695 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1700 seg_it.move_to_last ();
1701 prev_right = seg_it.data ()->position ();
1703 scale_factor = (seg_list.length () - 2) / 2;
1704 if (scale_factor < 1)
1709 sqsum += word_sync * scale_factor;
1710 total_count += (seg_list.length () - 1) * scale_factor;
1713 while (!blob_it.cycled_list ());
1714 sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1715 word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1716 tprintf (
"new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
1717 word_sync, word_sync / initial_pitch, sp_sd,
1721 start_it = row_start;
1722 blob_it = row_start;
1724 check_pitch_sync2 (&blob_it, total_blob_count, static_cast<int16_t>(initial_pitch), 2,
1725 projection, projection_left, projection_right,
1729 word_sync /= occupation;
1730 word_sync = sqrt (word_sync);
1732 #ifndef GRAPHICS_DISABLED
◆ row_pitch_stats()
bool row_pitch_stats |
( |
TO_ROW * |
row, |
|
|
int32_t |
maxwidth, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 696 of file topitch.cpp.
715 int32_t cluster_count;
717 int32_t smooth_factor;
724 STATS gap_stats (0, maxwidth);
730 if (!blob_it.empty ()) {
731 prev_x = blob_it.data ()->bounding_box ().right ();
733 while (!blob_it.at_first ()) {
734 blob = blob_it.data ();
737 if (blob_box.
left () - prev_x < maxwidth)
738 gap_stats.add (blob_box.
left () - prev_x, 1);
739 prev_x = blob_box.
right ();
744 if (gap_stats.get_total () == 0) {
750 gap_stats.smooth (smooth_factor);
752 prev_count = cluster_count;
753 cluster_count = gap_stats.cluster (lower, upper,
758 if (cluster_count < 1) {
761 for (gap_index = 0; gap_index < cluster_count; gap_index++)
762 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
765 tprintf (
"cluster_count=%d:", cluster_count);
766 for (gap_index = 0; gap_index < cluster_count; gap_index++)
767 tprintf (
" %g(%d)", gaps[gap_index],
768 cluster_stats[gap_index + 1].get_total ());
771 qsort (gaps, cluster_count,
sizeof (
float), sort_floats);
776 for (gap_index = 0; gap_index < cluster_count
777 && gaps[gap_index] < lower; gap_index++);
778 if (gap_index == 0) {
780 tprintf (
"No clusters below nonspace threshold!!\n");
781 if (cluster_count > 1) {
791 row->
pr_nonsp = gaps[gap_index - 1];
792 while (gap_index < cluster_count && gaps[gap_index] < upper)
794 if (gap_index == cluster_count) {
796 tprintf (
"No clusters above nonspace threshold!!\n");
805 for (gap_index = 0; gap_index < cluster_count
806 && gaps[gap_index] < upper; gap_index++);
807 if (gap_index == 0) {
809 tprintf (
"No clusters below space threshold!!\n");
814 row->
fp_nonsp = gaps[gap_index - 1];
815 if (gap_index == cluster_count) {
817 tprintf (
"No clusters above space threshold!!\n");
◆ try_block_fixed()
bool try_block_fixed |
( |
TO_BLOCK * |
block, |
|
|
int32_t |
block_index |
|
) |
| |
◆ try_doc_fixed()
bool try_doc_fixed |
( |
ICOORD |
page_tr, |
|
|
TO_BLOCK_LIST * |
port_blocks, |
|
|
float |
gradient |
|
) |
| |
Definition at line 395 of file topitch.cpp.
413 TO_BLOCK_IT block_it = port_blocks;
416 int16_t projection_left;
417 int16_t projection_right;
420 ICOORDELT_LIST *master_cells;
433 if (block_it.empty ()
437 shift_factor = gradient / (gradient * gradient + 1);
439 TO_ROW_IT row_it(block_it.data ()->get_rows());
440 master_x = row_it.data ()->projection_left;
441 master_y = row_it.data ()->baseline.y (master_x);
442 projection_left = INT16_MAX;
443 projection_right = -INT16_MAX;
448 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
449 block_it.forward ()) {
450 block = block_it.data ();
451 row_it.set_to_list (block->
get_rows ());
452 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
453 row = row_it.data ();
456 pitches.add (static_cast<int32_t>(row->
fixed_pitch), 1);
461 shift_factor * (master_y - row_y));
464 shift_factor * (master_y - row_y));
465 if (row_left < projection_left)
466 projection_left = row_left;
467 if (row_right > projection_right)
468 projection_right = row_right;
471 if (pitches.get_total () == 0)
473 projection.
set_range (projection_left, projection_right);
475 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
476 block_it.forward ()) {
477 block = block_it.data ();
478 row_it.set_to_list (block->
get_rows ());
479 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
480 row = row_it.data ();
484 shift_factor * (master_y - row_y));
492 row_it.set_to_list (block_it.data ()->get_rows ());
493 row = row_it.data ();
494 #ifndef GRAPHICS_DISABLED
499 final_pitch = pitches.ile (0.5);
500 pitch = static_cast<int16_t>(final_pitch);
502 tune_row_pitch (row, &projection, projection_left, projection_right,
503 pitch * 0.75, final_pitch, sp_sd, mid_cuts,
508 (
"try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
509 prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
510 pitch_sd / total_row_count, pitch_sd / pitch,
511 pitch_sd / total_row_count / pitch);
513 #ifndef GRAPHICS_DISABLED
516 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
517 block_it.forward ()) {
518 block = block_it.data ();
519 row_it.set_to_list (block->
get_rows ());
520 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
522 row = row_it.data ();
524 row_shift = shift_factor * (master_y - row_y);
◆ try_rows_fixed()
bool try_rows_fixed |
( |
TO_BLOCK * |
block, |
|
|
int32_t |
block_index, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 547 of file topitch.cpp.
562 int32_t def_fixed = 0;
563 int32_t def_prop = 0;
564 int32_t maybe_fixed = 0;
565 int32_t maybe_prop = 0;
567 int32_t corr_fixed = 0;
568 int32_t corr_prop = 0;
570 TO_ROW_IT row_it = block->
get_rows ();
573 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
574 row = row_it.data ();
605 else if (def_fixed > 0 || def_prop > 0)
◆ tune_row_pitch()
float tune_row_pitch |
( |
TO_ROW * |
row, |
|
|
STATS * |
projection, |
|
|
int16_t |
projection_left, |
|
|
int16_t |
projection_right, |
|
|
float |
space_size, |
|
|
float & |
initial_pitch, |
|
|
float & |
best_sp_sd, |
|
|
int16_t & |
best_mid_cuts, |
|
|
ICOORDELT_LIST * |
best_cells, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 1137 of file topitch.cpp.
1171 ICOORDELT_LIST test_cells;
1172 ICOORDELT_IT best_it;
1176 projection_right, space_size, initial_pitch,
1179 best_mid_cuts, best_cells, testing_on);
1180 if (textord_disable_pitch_test) {
1181 best_sp_sd = initial_pitch;
1182 return initial_pitch;
1195 best_sd = initial_sd;
1196 best_pitch = initial_pitch;
1198 tprintf (
"tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1202 space_size, initial_pitch + pitch_delta, sp_sd,
1203 mid_cuts, &test_cells, testing_on);
1205 tprintf (
"testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
1207 if (pitch_sd < best_sd) {
1209 best_mid_cuts = mid_cuts;
1211 best_pitch = initial_pitch + pitch_delta;
1212 best_cells->clear ();
1213 best_it.set_to_list (best_cells);
1214 best_it.add_list_after (&test_cells);
1217 test_cells.clear ();
1218 if (pitch_sd > initial_sd)
1224 space_size, initial_pitch - pitch_delta, sp_sd,
1225 mid_cuts, &test_cells, testing_on);
1227 tprintf (
"testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
1229 if (pitch_sd < best_sd) {
1231 best_mid_cuts = mid_cuts;
1233 best_pitch = initial_pitch - pitch_delta;
1234 best_cells->clear ();
1235 best_it.set_to_list (best_cells);
1236 best_it.add_list_after (&test_cells);
1239 test_cells.clear ();
◆ tune_row_pitch2()
float tune_row_pitch2 |
( |
TO_ROW * |
row, |
|
|
STATS * |
projection, |
|
|
int16_t |
projection_left, |
|
|
int16_t |
projection_right, |
|
|
float |
space_size, |
|
|
float & |
initial_pitch, |
|
|
float & |
best_sp_sd, |
|
|
int16_t & |
best_mid_cuts, |
|
|
ICOORDELT_LIST * |
best_cells, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 1248 of file topitch.cpp.
1286 best_sp_sd = initial_pitch;
1288 best_pitch = static_cast<int>(initial_pitch);
1290 return initial_pitch;
1299 for (pixel = projection_left; pixel <= projection_right; pixel++) {
1303 (pixel - projection_left) % (best_pitch + pitch_delta),
1312 for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1317 pitch_delta].pile_count (pixel);
1318 best_delta = pitch_delta;
1324 tprintf (
"tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
1325 initial_pitch, best_delta, best_count);
1326 best_pitch += best_delta;
1327 initial_pitch = best_pitch;
1329 best_count += best_count;
1330 for (start = best_pixel - 2; start > best_pixel - best_pitch
1332 best_delta].pile_count (start % best_pitch) <= best_count;
1334 for (end = best_pixel + 2;
1335 end < best_pixel + best_pitch
1337 best_delta].pile_count (end % best_pitch) <= best_count;
◆ textord_balance_factor
double textord_balance_factor = 2.0 |
"Ding rate for unbalanced char cells"
Definition at line 53 of file topitch.cpp.
◆ textord_blockndoc_fixed
bool textord_blockndoc_fixed = true |
"Attempt whole doc/block fixed pitch"
Definition at line 50 of file topitch.cpp.
◆ textord_debug_pitch_metric
bool textord_debug_pitch_metric = false |
"Write full metric stuff"
Definition at line 44 of file topitch.cpp.
◆ textord_debug_pitch_test
bool textord_debug_pitch_test = false |
"Debug on fixed pitch test"
Definition at line 38 of file topitch.cpp.
◆ textord_fast_pitch_test
bool textord_fast_pitch_test = false |
"Do even faster pitch algorithm"
Definition at line 42 of file topitch.cpp.
◆ textord_pitch_cheat
bool textord_pitch_cheat = false |
"Use correct answer for fixed/prop"
Definition at line 48 of file topitch.cpp.
◆ textord_projection_scale
double textord_projection_scale = 0.125 |
"Ding rate for mid-cuts"
Definition at line 51 of file topitch.cpp.
◆ textord_show_page_cuts
bool textord_show_page_cuts = false |
"Draw page-level cuts"
Definition at line 46 of file topitch.cpp.
◆ textord_show_row_cuts
bool textord_show_row_cuts = false |
void set_x(int16_t xin)
rewrite function
double textord_fpiqr_ratio
double check_pitch_sync2(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
bool textord_blocksall_prop
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
double textord_projection_scale
ScrollView * create_to_win(ICOORD page_tr)
double words_default_fixed_space
void compute_block_pitch(TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on)
#define BLOCK_STATS_CLUSTERS
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
float tune_row_pitch2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
void print_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch)
double textord_words_default_maxspace
int32_t pile_count(int32_t value) const
bool textord_show_fixed_cuts
int textord_dotmatrix_gap
void plot_row_cells(ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)
double textord_words_pitchsd_threshold
double words_default_prop_nonspace
PITCH_TYPE pitch_decision
bool try_rows_fixed(TO_BLOCK *block, int32_t block_index, bool testing_on)
int16_t x() const
access function
bool fixed_pitch_row(TO_ROW *row, BLOCK *block, int32_t block_index)
float compute_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
double textord_pitch_rowsimilarity
double words_default_fixed_limit
bool textord_pitch_scalebigwords
double check_pitch_sync(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
bool textord_blocksall_fixed
bool textord_show_initial_words
double textord_wordstats_smooth_factor
int textord_words_veto_power
bool textord_debug_pitch_metric
bool rep_chars_marked() const
bool textord_fast_pitch_test
bool textord_show_page_cuts
PDBLK pdblk
Page Description Block.
void mark_repeated_chars(TO_ROW *row)
bool compute_rows_pitch(TO_BLOCK *block, int32_t block_index, bool testing_on)
POLY_BLOCK * poly_block() const
bool try_doc_fixed(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
float tune_row_pitch(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
double words_initial_upper
void print_block_counts(TO_BLOCK *block, int32_t block_index)
int num_repeated_sets() const
bool joined_to_prev() const
double textord_spacesize_ratioprop
bool textord_blockndoc_fixed
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
bool count_pitch_stats(TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap)
PITCH_TYPE pitch_decision
double textord_words_maxspace
const TBOX & bounding_box() const
double textord_words_def_prop
double ile(double frac) const
double words_initial_lower
TBOX box_next(BLOBNBOX_IT *it)
double textord_words_default_minspace
void add(int32_t value, int32_t count)
double textord_words_min_minspace
void fix_row_pitch(TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target)
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
float compute_pitch_sd2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
DLLSYM void tprintf(const char *format,...)
ICOORDELT_LIST char_cells
double textord_words_default_nonspace
bool find_row_pitch(TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on)
double textord_max_pitch_iqr
void count_block_votes(TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno)
bool textord_debug_pitch_test
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
bool try_block_fixed(TO_BLOCK *block, int32_t block_index)
bool row_pitch_stats(TO_ROW *row, int32_t maxwidth, bool testing_on)
void compute_vertical_projection()
#define MAX_ALLOWED_PITCH
BLOBNBOX_LIST * blob_list()