34 #include "config_auto.h" 41 "Force proportional word segmentation on all rows");
43 "Chopper is being tested.");
45 #define FIXED_WIDTH_MULTIPLE 5 46 #define BLOCK_STATS_CLUSTERS 10 57 TO_ROW_IT to_row_it(rows);
58 ROW_IT row_it(real_rows);
59 for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list();
60 to_row_it.forward()) {
61 TO_ROW* row = to_row_it.data();
65 C_BLOB_IT cblob_it(&cblobs);
67 for (;!box_it.empty(); box_it.forward()) {
70 if (bblob->
cblob() !=
nullptr) {
71 C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
72 cout_it.move_to_last();
74 delete bblob->
cblob();
77 if (bblob->
cblob() !=
nullptr)
78 cblob_it.add_after_then_move(bblob->
cblob());
86 WERD* word =
new WERD(&cblobs, 0,
nullptr);
90 word_it.add_after_then_move(word);
91 row_it.add_after_then_move(real_row);
104 TO_BLOCK_LIST *port_blocks) {
105 TO_BLOCK_IT block_it;
115 block_it.set_to_list(port_blocks);
116 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
117 block = block_it.data();
136 TO_ROW_IT row_it = block->
get_rows ();
140 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
141 row = row_it.data ();
152 tprintf (
"Assigning defaults %d non, %d space to row at %g\n",
159 #ifndef GRAPHICS_DISABLED 184 int32_t cluster_count;
186 int32_t smooth_factor;
194 STATS gap_stats (0, maxwidth);
195 STATS cluster_stats[4];
205 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
206 blob = blob_it.data ();
210 gap_stats.
add (blob_box.
width (), 1);
213 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
214 blob = blob_it.data ();
217 if (prev_valid && blob_box.
left () - prev_x < maxwidth) {
218 gap_stats.
add (blob_box.
left () - prev_x, 1);
221 prev_x = blob_box.
right ();
229 gap_stats.
smooth (smooth_factor);
232 cluster_count = gap_stats.
cluster (lower, upper,
235 while (cluster_count < 2 && ceil (lower) < floor (upper)) {
237 upper = (upper * 3 + lower) / 4;
238 lower = (lower * 3 + upper) / 4;
239 cluster_count = gap_stats.
cluster (lower, upper,
243 if (cluster_count < 2) {
248 for (gap_index = 0; gap_index < cluster_count; gap_index++)
249 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
251 if (cluster_count > 2) {
253 tprintf (
"Row at %g has 3 sizes of gap:%g,%g,%g\n",
255 cluster_stats[1].
ile (0.5),
256 cluster_stats[2].
ile (0.5), cluster_stats[3].
ile (0.5));
259 if (gaps[1] > lower) {
262 && gaps[2] > gaps[1]) {
266 else if (gaps[2] > lower
273 tprintf (
"Had to switch most common from lower to upper!!\n");
284 if (gaps[1] < gaps[0]) {
286 tprintf (
"Had to switch most common from lower to upper!!\n");
305 tprintf (
"Disagreement between block and row at %g!!\n",
307 tprintf (
"Lower=%g, upper=%g, Stats:\n", lower, upper);
327 tprintf (
"Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
354 int32_t cluster_count;
357 int32_t smooth_factor;
364 STATS gap_stats (0, maxwidth);
377 const bool testing_row =
false;
379 min_width = (int32_t) block->
pr_space;
381 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
382 blob = blob_it.data ();
385 this_valid = blob_box.
width () >= min_width;
386 if (this_valid && prev_valid
387 && blob_box.
left () - prev_x < maxwidth) {
388 gap_stats.
add (blob_box.
left () - prev_x, 1);
391 prev_x = blob_box.
right ();
392 prev_valid = this_valid;
399 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
400 blob_it.forward ()) {
401 blob = blob_it.data ();
404 if (blob_box.
left () - prev_x < maxwidth) {
405 gap_stats.
add (blob_box.
left () - prev_x, 1);
407 prev_x = blob_box.
right ();
420 gap_stats.
smooth (smooth_factor);
422 prev_count = cluster_count;
423 cluster_count = gap_stats.
cluster (lower, upper,
428 if (cluster_count < 1) {
433 for (gap_index = 0; gap_index < cluster_count; gap_index++)
434 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
437 tprintf (
"cluster_count=%d:", cluster_count);
438 for (gap_index = 0; gap_index < cluster_count; gap_index++)
439 tprintf (
" %g(%d)", gaps[gap_index],
440 cluster_stats[gap_index + 1].get_total ());
445 for (gap_index = 0; gap_index < cluster_count
447 if (gap_index < cluster_count)
448 lower = gaps[gap_index];
451 tprintf (
"No cluster below block threshold!, using default=%g\n",
455 for (gap_index = 0; gap_index < cluster_count
456 && gaps[gap_index] <= block->
max_nonspace; gap_index++);
457 if (gap_index < cluster_count)
458 upper = gaps[gap_index];
461 tprintf (
"No cluster above block threshold!, using default=%g\n",
481 tprintf (
"Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
501 TO_ROW_IT row_it = block->
get_rows ();
502 ROW *real_row =
nullptr;
507 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
508 row = row_it.data ();
521 (pb !=
nullptr && !pb->
IsText()) ||
532 if (real_row !=
nullptr) {
534 real_row_it.add_after_then_move (real_row);
560 if (word_it.empty ())
562 word_box = word_it.data ()->bounding_box ();
563 for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ())
564 word_box += word_it.data ()->bounding_box ();
566 real_row =
new ROW(row,
568 word_it.set_to_list (real_row->
word_list ());
570 word_it.add_list_after (&row->
rep_words);
588 C_OUTLINE_IT cout_it;
590 C_BLOB_IT cblob_it = &cblobs;
595 for (blobindex = 0; blobindex < blobcount; blobindex++) {
596 bblob = box_it->extract();
598 if (bblob->
cblob() !=
nullptr) {
599 cout_it.set_to_list(cblob_it.data()->out_list());
600 cout_it.move_to_last();
602 delete bblob->
cblob();
606 if (bblob->
cblob() !=
nullptr)
607 cblob_it.add_after_then_move(bblob->
cblob());
616 word =
new WERD(&cblobs, blanks,
nullptr);
620 if (box_it->at_first())
WERD * make_real_word(BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
bool use_cjk_fp_model() const
EXTERN double textord_words_min_minspace
EXTERN double textord_words_initial_upper
ROW * make_rep_words(TO_ROW *row, TO_BLOCK *block)
#define BOOL_VAR(name, val, comment)
int32_t row_words2(TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
void set_row_spaces(TO_BLOCK *block, FCOORD rotation, bool testing_on)
ROW_LIST * row_list()
get rows
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
EXTERN bool textord_fp_chopping
bool textord_test_landscape
#define BLOCK_STATS_CLUSTERS
int32_t row_words(TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
void set_flag(WERD_FLAGS mask, bool value)
EXTERN double textord_spacesize_ratioprop
EXTERN double textord_words_initial_lower
void set_stats(BOOL8 prop, int16_t kern, int16_t space, int16_t ch_pitch)
EXTERN double words_initial_upper
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
void print_summary() const
double ile(double frac) const
bool joined_to_prev() const
void make_real_words(tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)
void smooth(int32_t factor)
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
EXTERN double textord_words_definite_spread
POLY_BLOCK * poly_block() const
DLLSYM void tprintf(const char *format,...)
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
EXTERN double textord_wordstats_smooth_factor
void add(int32_t value, int32_t count)
EXTERN ScrollView * to_win
EXTERN bool textord_show_initial_words
void recalc_bounding_box()
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
int32_t cluster(float lower, float upper, float multiple, int32_t max_clusters, STATS *clusters)
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
bool contains(const FCOORD pt) const
EXTERN bool textord_force_make_prop_words
C_OUTLINE_LIST * out_list()
const TBOX & bounding_box() const
void check_pitch()
check proportional
ROW * fixed_pitch_words(TO_ROW *row, FCOORD rotation)
EXTERN double words_initial_lower
EXTERN bool textord_chopper_test
EXTERN double textord_words_minlarge
BLOBNBOX_LIST * blob_list()
int32_t get_total() const
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
PITCH_TYPE pitch_decision