tesseract
5.0.0-alpha-619-ge9db
|
#include "params.h"
#include "blobbox.h"
#include "textord.h"
Go to the source code of this file.
|
void | make_single_word (bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows) |
|
void | make_words (tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks) |
|
void | set_row_spaces (TO_BLOCK *block, FCOORD rotation, bool testing_on) |
|
int32_t | row_words (TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on) |
|
int32_t | row_words2 (TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on) |
|
void | make_real_words (tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation) |
|
ROW * | make_rep_words (TO_ROW *row, TO_BLOCK *block) |
|
WERD * | make_real_word (BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks) |
|
◆ make_real_word()
WERD* make_real_word |
( |
BLOBNBOX_IT * |
box_it, |
|
|
int32_t |
blobcount, |
|
|
bool |
bol, |
|
|
uint8_t |
blanks |
|
) |
| |
Definition at line 578 of file wordseg.cpp.
584 C_OUTLINE_IT cout_it;
586 C_BLOB_IT cblob_it = &cblobs;
591 for (blobindex = 0; blobindex < blobcount; blobindex++) {
592 bblob = box_it->extract();
594 if (bblob->
cblob() !=
nullptr) {
595 cout_it.set_to_list(cblob_it.data()->out_list());
596 cout_it.move_to_last();
598 delete bblob->
cblob();
602 if (bblob->
cblob() !=
nullptr)
603 cblob_it.add_after_then_move(bblob->
cblob());
612 word =
new WERD(&cblobs, blanks,
nullptr);
616 if (box_it->at_first())
◆ make_real_words()
Definition at line 490 of file wordseg.cpp.
497 TO_ROW_IT row_it = block->
get_rows ();
498 ROW *real_row =
nullptr;
503 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
504 row = row_it.data ();
517 (pb !=
nullptr && !pb->
IsText()) ||
528 if (real_row !=
nullptr) {
530 real_row_it.add_after_then_move (real_row);
◆ make_rep_words()
Definition at line 546 of file wordseg.cpp.
556 if (word_it.empty ())
558 word_box = word_it.data ()->bounding_box ();
559 for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ())
560 word_box += word_it.data ()->bounding_box ();
562 real_row =
new ROW(row,
564 word_it.set_to_list (real_row->
word_list ());
566 word_it.add_list_after (&row->
rep_words);
◆ make_single_word()
void make_single_word |
( |
bool |
one_blob, |
|
|
TO_ROW_LIST * |
rows, |
|
|
ROW_LIST * |
real_rows |
|
) |
| |
Definition at line 51 of file wordseg.cpp.
53 TO_ROW_IT to_row_it(rows);
54 ROW_IT row_it(real_rows);
55 for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list();
56 to_row_it.forward()) {
57 TO_ROW* row = to_row_it.data();
61 C_BLOB_IT cblob_it(&cblobs);
63 for (;!box_it.empty(); box_it.forward()) {
66 if (bblob->
cblob() !=
nullptr) {
67 C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
68 cout_it.move_to_last();
70 delete bblob->
cblob();
73 if (bblob->
cblob() !=
nullptr)
74 cblob_it.add_after_then_move(bblob->
cblob());
82 WERD* word =
new WERD(&cblobs, 0,
nullptr);
83 word->set_flag(
W_BOL,
true);
84 word->set_flag(
W_EOL,
true);
86 word_it.add_after_then_move(word);
87 row_it.add_after_then_move(real_row);
◆ make_words()
void make_words |
( |
tesseract::Textord * |
textord, |
|
|
ICOORD |
page_tr, |
|
|
float |
gradient, |
|
|
BLOCK_LIST * |
blocks, |
|
|
TO_BLOCK_LIST * |
port_blocks |
|
) |
| |
make_words
Arrange the blobs into words.
Definition at line 95 of file wordseg.cpp.
101 TO_BLOCK_IT block_it;
111 block_it.set_to_list(port_blocks);
112 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
113 block = block_it.data();
◆ row_words()
int32_t row_words |
( |
TO_BLOCK * |
block, |
|
|
TO_ROW * |
row, |
|
|
int32_t |
maxwidth, |
|
|
FCOORD |
rotation, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 169 of file wordseg.cpp.
180 int32_t cluster_count;
182 int32_t smooth_factor;
190 STATS gap_stats (0, maxwidth);
191 STATS cluster_stats[4];
201 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
202 blob = blob_it.data ();
206 gap_stats.add (blob_box.
width (), 1);
209 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
210 blob = blob_it.data ();
213 if (prev_valid && blob_box.
left () - prev_x < maxwidth) {
214 gap_stats.add (blob_box.
left () - prev_x, 1);
217 prev_x = blob_box.
right ();
220 if (gap_stats.get_total () == 0) {
225 gap_stats.smooth (smooth_factor);
228 cluster_count = gap_stats.cluster (lower, upper,
231 while (cluster_count < 2 && ceil (lower) < floor (upper)) {
233 upper = (upper * 3 + lower) / 4;
234 lower = (lower * 3 + upper) / 4;
235 cluster_count = gap_stats.cluster (lower, upper,
239 if (cluster_count < 2) {
244 for (gap_index = 0; gap_index < cluster_count; gap_index++)
245 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
247 if (cluster_count > 2) {
249 tprintf (
"Row at %g has 3 sizes of gap:%g,%g,%g\n",
251 cluster_stats[1].
ile (0.5),
252 cluster_stats[2].
ile (0.5), cluster_stats[3].
ile (0.5));
255 if (gaps[1] > lower) {
258 && gaps[2] > gaps[1]) {
262 else if (gaps[2] > lower
269 tprintf (
"Had to switch most common from lower to upper!!\n");
280 if (gaps[1] < gaps[0]) {
282 tprintf (
"Had to switch most common from lower to upper!!\n");
301 tprintf (
"Disagreement between block and row at %g!!\n",
303 tprintf (
"Lower=%g, upper=%g, Stats:\n", lower, upper);
323 tprintf (
"Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
◆ row_words2()
int32_t row_words2 |
( |
TO_BLOCK * |
block, |
|
|
TO_ROW * |
row, |
|
|
int32_t |
maxwidth, |
|
|
FCOORD |
rotation, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 336 of file wordseg.cpp.
350 int32_t cluster_count;
353 int32_t smooth_factor;
360 STATS gap_stats (0, maxwidth);
373 const bool testing_row =
false;
375 min_width = static_cast<int32_t>(block->
pr_space);
377 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
378 blob = blob_it.data ();
381 this_valid = blob_box.
width () >= min_width;
382 if (this_valid && prev_valid
383 && blob_box.
left () - prev_x < maxwidth) {
384 gap_stats.add (blob_box.
left () - prev_x, 1);
387 prev_x = blob_box.
right ();
388 prev_valid = this_valid;
391 valid_count = gap_stats.get_total ();
395 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
396 blob_it.forward ()) {
397 blob = blob_it.data ();
400 if (blob_box.
left () - prev_x < maxwidth) {
401 gap_stats.add (blob_box.
left () - prev_x, 1);
403 prev_x = blob_box.
right ();
407 if (gap_stats.get_total () == 0) {
416 gap_stats.smooth (smooth_factor);
418 prev_count = cluster_count;
419 cluster_count = gap_stats.cluster (lower, upper,
424 if (cluster_count < 1) {
429 for (gap_index = 0; gap_index < cluster_count; gap_index++)
430 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
433 tprintf (
"cluster_count=%d:", cluster_count);
434 for (gap_index = 0; gap_index < cluster_count; gap_index++)
435 tprintf (
" %g(%d)", gaps[gap_index],
436 cluster_stats[gap_index + 1].get_total ());
441 for (gap_index = 0; gap_index < cluster_count
443 if (gap_index < cluster_count)
444 lower = gaps[gap_index];
447 tprintf (
"No cluster below block threshold!, using default=%g\n",
451 for (gap_index = 0; gap_index < cluster_count
452 && gaps[gap_index] <= block->
max_nonspace; gap_index++);
453 if (gap_index < cluster_count)
454 upper = gaps[gap_index];
457 tprintf (
"No cluster above block threshold!, using default=%g\n",
477 tprintf (
"Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
◆ set_row_spaces()
void set_row_spaces |
( |
TO_BLOCK * |
block, |
|
|
FCOORD |
rotation, |
|
|
bool |
testing_on |
|
) |
| |
Definition at line 125 of file wordseg.cpp.
132 TO_ROW_IT row_it = block->
get_rows ();
136 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
137 row = row_it.data ();
140 static_cast<int32_t>(ceil (row->
pr_space -
144 static_cast<int32_t>(floor (row->
pr_nonsp +
148 tprintf (
"Assigning defaults %d non, %d space to row at %g\n",
155 #ifndef GRAPHICS_DISABLED
◆ textord_chopper_test
bool textord_chopper_test = false |
"Chopper is being tested."
Definition at line 39 of file wordseg.cpp.
◆ textord_force_make_prop_words
bool textord_force_make_prop_words = false |
"Force proportional word segmentation on all rows"
Definition at line 37 of file wordseg.cpp.
◆ textord_fp_chopping
bool textord_fp_chopping = true |
"Do fixed pitch chopping"
Definition at line 35 of file wordseg.cpp.
int32_t get_total() const
void check_pitch()
check proportional
C_OUTLINE_LIST * out_list()
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
void print_summary() const
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
ROW_LIST * row_list()
get rows
double textord_words_initial_upper
bool textord_test_landscape
void recalc_bounding_box()
bool contains(const FCOORD pt) const
PITCH_TYPE pitch_decision
double textord_words_minlarge
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
#define BLOCK_STATS_CLUSTERS
bool textord_show_initial_words
double textord_wordstats_smooth_factor
PDBLK pdblk
Page Description Block.
POLY_BLOCK * poly_block() const
double textord_words_initial_lower
void set_flag(WERD_FLAGS mask, bool value)
double words_initial_upper
bool joined_to_prev() const
double textord_spacesize_ratioprop
bool use_cjk_fp_model() const
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
bool textord_chopper_test
const TBOX & bounding_box() const
double textord_words_definite_spread
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
ROW * make_rep_words(TO_ROW *row, TO_BLOCK *block)
double ile(double frac) const
ROW * fixed_pitch_words(TO_ROW *row, FCOORD rotation)
double words_initial_lower
double textord_words_min_minspace
DLLSYM void tprintf(const char *format,...)
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
bool textord_force_make_prop_words
void set_stats(bool prop, int16_t kern, int16_t space, int16_t ch_pitch)
BLOBNBOX_LIST * blob_list()
void make_real_words(tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)