22 #include "config_auto.h" 82 : left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
83 median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_height_(0),
84 median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0),
85 blob_type_(blob_type), flow_(
BTFT_NONE), good_blob_score_(0),
86 good_width_(false), good_column_(false),
87 left_key_tab_(false), right_key_tab_(false),
88 left_key_(0), right_key_(0), type_(
PT_UNKNOWN), vertical_(vertical),
89 working_set_(
nullptr), last_add_was_vertical_(false), block_owned_(false),
90 desperately_merged_(false),
91 first_column_(-1), last_column_(-1), column_set_(
nullptr),
92 side_step_(0), top_spacing_(0), bottom_spacing_(0),
93 type_before_table_(
PT_UNKNOWN), inside_table_column_(false),
94 nearest_neighbor_above_(
nullptr), nearest_neighbor_below_(
nullptr),
95 space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0),
97 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
127 ColPartition_LIST* big_part_list) {
136 if (big_part_list !=
nullptr) {
137 ColPartition_IT part_it(big_part_list);
138 part_it.add_to_end(single);
146 ColPartition_C_IT it(&upper_partners_);
147 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
148 it.data()->RemovePartner(
false,
this);
150 it.set_to_list(&lower_partners_);
151 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
152 it.data()->RemovePartner(
true,
this);
160 int left,
int bottom,
161 int right,
int top) {
163 part->bounding_box_ =
TBOX(left, bottom, right, top);
164 part->median_bottom_ = bottom;
165 part->median_top_ = top;
166 part->median_height_ = top - bottom;
167 part->median_left_ = left;
168 part->median_right_ = right;
169 part->median_width_ = right - left;
182 if (boxes_.length() == 0) {
185 bounding_box_ += box;
189 if (!last_add_was_vertical_) {
190 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
191 last_add_was_vertical_ =
true;
193 boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>,
true, bbox);
195 if (last_add_was_vertical_) {
196 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
197 last_add_was_vertical_ =
false;
199 boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>,
true, bbox);
206 tprintf(
"Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
208 bounding_box_.
left(), bounding_box_.
right());
213 BLOBNBOX_C_IT bb_it(&boxes_);
214 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
215 if (box == bb_it.data()) {
227 BLOBNBOX_C_IT bb_it(&boxes_);
228 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
231 if (biggest ==
nullptr ||
235 if (biggest ==
nullptr ||
246 BLOBNBOX_C_IT bb_it(&boxes_);
247 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
248 if (box != bb_it.data()) {
249 result += bb_it.data()->bounding_box();
258 BLOBNBOX_C_IT bb_it(&boxes_);
259 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
262 if (other ==
nullptr) {
274 BLOBNBOX_C_IT bb_it(&boxes_);
275 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
287 BLOBNBOX_C_IT bb_it(&boxes_);
288 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
290 if (bblob->
owner() ==
this)
300 BLOBNBOX_C_IT bb_it(&boxes_);
301 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
308 if (bb_it.empty())
return false;
319 for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
321 delete bblob->
cblob();
331 BLOBNBOX_CLIST reversed_boxes;
332 BLOBNBOX_C_IT reversed_it(&reversed_boxes);
334 BLOBNBOX_C_IT bb_it(&boxes_);
335 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
336 reversed_it.add_before_then_move(bb_it.extract());
338 bb_it.add_list_after(&reversed_boxes);
340 int tmp = left_margin_;
341 left_margin_ = -right_margin_;
342 right_margin_ = -tmp;
353 if (bounding_box_.
left() > bounding_box_.
right()) {
355 tprintf(
"Bounding box invalid\n");
360 if (left_margin_ > bounding_box_.
left() ||
361 right_margin_ < bounding_box_.
right()) {
370 tprintf(
"Key inside box: %d v %d or %d v %d\n",
381 int y = (
MidY() + other.
MidY()) / 2;
424 if (bounding_box_.
right() < other.bounding_box_.
left() &&
427 if (other.bounding_box_.
right() < bounding_box_.
left() &&
430 if (bounding_box_.
left() > other.bounding_box_.
right() &&
433 if (other.bounding_box_.
left() > bounding_box_.
right() &&
441 double fractional_tolerance,
442 double constant_tolerance)
const {
444 int nonmatch_count = 0;
445 BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
446 BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
447 box_it.mark_cycle_pt();
448 other_it.mark_cycle_pt();
449 while (!box_it.cycled_list() && !other_it.cycled_list()) {
450 if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
451 fractional_tolerance,
459 return match_count > nonmatch_count;
470 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
471 int min_top = INT32_MAX;
472 int max_bottom = -INT32_MAX;
473 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
477 tprintf(
"Blob is not a diacritic:");
489 bool result = min_top > candidate.median_bottom_ &&
490 max_bottom < candidate.median_top_;
495 tprintf(
"y ranges don\'t overlap: %d-%d / %d-%d\n",
496 max_bottom, min_top, median_bottom_, median_top_);
505 if (tab_vector !=
nullptr) {
509 left_key_tab_ =
false;
517 if (tab_vector !=
nullptr) {
518 right_key_ = tab_vector->
sort_key();
521 right_key_tab_ =
false;
530 left_key_tab_ = take_box ? false : src.left_key_tab_;
532 left_key_ = src.left_key_;
537 if (left_margin_ > bounding_box_.
left())
538 left_margin_ = src.left_margin_;
543 right_key_tab_ = take_box ? false : src.right_key_tab_;
544 if (right_key_tab_) {
545 right_key_ = src.right_key_;
550 if (right_margin_ < bounding_box_.
right())
551 right_margin_ = src.right_margin_;
556 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
557 return it.data()->left_rule();
561 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
563 return it.data()->right_rule();
568 return special_blobs_densities_[
type];
573 BLOBNBOX_C_IT blob_it(&boxes_);
575 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
589 special_blobs_densities_[
type] = density;
593 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
594 if (boxes_.empty()) {
598 BLOBNBOX_C_IT blob_it(&boxes_);
599 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
602 special_blobs_densities_[
type]++;
606 special_blobs_densities_[
type] /= boxes_.length();
615 partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
617 upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
619 partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
621 lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
629 ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
630 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
631 if (it.data() == partner) {
640 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
641 if (!partners->singleton())
643 ColPartition_C_IT it(partners);
655 bounding_box_.
bottom()) ||
657 other->bounding_box_.
bottom())) {
664 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
666 unsigned w1 = boxes_.length();
667 unsigned w2 = other->boxes_.length();
668 float new_val = special_blobs_densities_[
type] * w1 +
669 other->special_blobs_densities_[
type] * w2;
672 special_blobs_densities_[
type] = new_val / (w1 + w2);
677 BLOBNBOX_C_IT it(&boxes_);
678 BLOBNBOX_C_IT it2(&other->boxes_);
679 for (; !it2.empty(); it2.forward()) {
682 if (prev_owner != other && prev_owner !=
nullptr) {
686 ASSERT_HOST(prev_owner == other || prev_owner ==
nullptr);
687 if (prev_owner == other)
689 it.add_to_end(bbox2);
691 left_margin_ = std::min(left_margin_, other->left_margin_);
692 right_margin_ = std::max(right_margin_, other->right_margin_);
693 if (other->left_key_ < left_key_) {
694 left_key_ = other->left_key_;
695 left_key_tab_ = other->left_key_tab_;
697 if (other->right_key_ > right_key_) {
698 right_key_ = other->right_key_;
699 right_key_tab_ = other->right_key_tab_;
704 flow_ = other->flow_;
705 blob_type_ = other->blob_type_;
709 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
710 last_add_was_vertical_ =
true;
712 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
713 last_add_was_vertical_ =
false;
718 for (
int upper = 0; upper < 2; ++upper) {
719 ColPartition_CLIST partners;
720 ColPartition_C_IT part_it(&partners);
721 part_it.add_list_after(upper ? &other->upper_partners_
722 : &other->lower_partners_);
723 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
748 int ok_box_overlap,
bool debug) {
752 tprintf(
"Vertical partition\n");
766 if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
767 merged_box.bottom() < bounding_box_.
top() - ok_box_overlap &&
768 merged_box.top() > bounding_box_.
bottom() + ok_box_overlap) {
770 tprintf(
"Excessive box overlap\n");
780 if (boxes_.empty() || boxes_.singleton())
782 BLOBNBOX_C_IT it(&boxes_);
783 TBOX left_box(it.data()->bounding_box());
784 for (it.forward(); !it.at_first(); it.forward()) {
787 if (left_box.overlap(box))
800 BLOBNBOX_C_IT it(&boxes_);
801 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
805 if (bbox == split_blob || !split_part->boxes_.empty()) {
806 split_part->
AddBox(it.extract());
818 right_key_tab_ =
false;
819 split_part->left_key_tab_ =
false;
834 if (split_x <= bounding_box_.
left() || split_x >= bounding_box_.
right())
838 BLOBNBOX_C_IT it(&boxes_);
839 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
844 if (box.
left() >= split_x) {
845 split_part->
AddBox(it.extract());
852 it.add_list_after(&split_part->boxes_);
861 right_key_tab_ =
false;
862 split_part->left_key_tab_ =
false;
863 right_margin_ = split_x;
864 split_part->left_margin_ = split_x;
872 bounding_box_ =
TBOX();
873 BLOBNBOX_C_IT it(&boxes_);
875 int non_leader_count = 0;
877 bounding_box_.
set_left(left_margin_);
882 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
894 tprintf(
"Computed left-illegal partition\n");
900 tprintf(
"Computed right-illegal partition\n");
907 median_top_ = bounding_box_.
top();
908 median_bottom_ = bounding_box_.
bottom();
909 median_height_ = bounding_box_.
height();
910 median_left_ = bounding_box_.
left();
911 median_right_ = bounding_box_.
right();
912 median_width_ = bounding_box_.
width();
915 STATS bottom_stats(bounding_box_.
bottom(), bounding_box_.
top() + 1);
917 STATS left_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
918 STATS right_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
919 STATS width_stats(0, bounding_box_.
width() + 1);
920 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
924 int area = box.
area();
925 top_stats.add(box.
top(), area);
926 bottom_stats.add(box.
bottom(), area);
927 height_stats.add(box.
height(), area);
928 left_stats.add(box.
left(), area);
929 right_stats.add(box.
right(), area);
930 width_stats.add(box.
width(), area);
933 median_top_ =
static_cast<int>(top_stats.median() + 0.5);
934 median_bottom_ =
static_cast<int>(bottom_stats.median() + 0.5);
935 median_height_ =
static_cast<int>(height_stats.median() + 0.5);
936 median_left_ =
static_cast<int>(left_stats.median() + 0.5);
937 median_right_ =
static_cast<int>(right_stats.median() + 0.5);
938 median_width_ =
static_cast<int>(width_stats.median() + 0.5);
942 tprintf(
"Made partition with bad right coords");
946 tprintf(
"Made partition with bad left coords");
952 for (
int upper = 0; upper < 2; ++upper) {
953 ColPartition_CLIST partners;
954 ColPartition_C_IT part_it(&partners);
955 part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
956 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
963 bounding_box_.
bottom())) {
964 tprintf(
"Recomputed box for partition %p\n",
this);
971 BLOBNBOX_C_IT it(&boxes_);
972 int overlap_count = 0;
973 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
978 return overlap_count;
984 int first_spanned_col = -1;
987 bounding_box_.
left(), bounding_box_.
right(),
988 std::min(bounding_box_.
height(), bounding_box_.
width()),
989 MidY(), left_margin_, right_margin_,
990 &first_column_, &last_column_,
992 column_set_ = columns;
993 if (first_column_ < last_column_ && span_type ==
CST_PULLOUT &&
997 if (first_spanned_col >= 0) {
998 first_column_ = first_spanned_col;
999 last_column_ = first_spanned_col;
1001 if ((first_column_ & 1) == 0)
1002 last_column_ = first_column_;
1003 else if ((last_column_ & 1) == 0)
1004 first_column_ = last_column_;
1006 first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1024 switch (blob_type_) {
1067 int* first_col,
int* last_col) {
1068 int first_spanned_col = -1;
1071 bounding_box_.
left(), bounding_box_.
right(),
1072 std::min(bounding_box_.
height(), bounding_box_.
width()),
1073 MidY(), left_margin_, right_margin_,
1074 first_col, last_col,
1075 &first_spanned_col);
1083 good_width_ = cb->
Run(width);
1084 good_column_ = blob_type_ ==
BRT_TEXT && left_key_tab_ && right_key_tab_;
1094 bool result =
false;
1096 int part_width = bounding_box_.
width();
1097 STATS gap_stats(0, part_width);
1098 STATS width_stats(0, part_width);
1099 BLOBNBOX_C_IT it(&boxes_);
1104 for (it.forward(); !it.at_first(); it.forward()) {
1109 width_stats.
add(right - left, 1);
1114 double median_gap = gap_stats.
median();
1118 double gap_iqr = gap_stats.
ile(0.75f) - gap_stats.
ile(0.25f);
1120 tprintf(
"gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1130 int offset =
static_cast<int>(ceil(gap_iqr * 2));
1131 int min_step =
static_cast<int>(median_gap +
median_width + 0.5);
1132 int max_step = min_step + offset;
1135 int part_left = bounding_box_.
left() - min_step / 2;
1136 part_width += min_step;
1138 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1143 for (
int x = left; x < right; ++x) {
1149 part_width, projection);
1150 if (best_end !=
nullptr && best_end->
total_cost() < blob_count) {
1153 bool modified_blob_list =
false;
1154 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1158 if (it.at_first()) {
1159 int gap = it.data_relative(1)->bounding_box().
left() -
1163 modified_blob_list =
true;
1169 it.data_relative(-1)->bounding_box().right();
1172 modified_blob_list =
true;
1183 if (best_end ==
nullptr) {
1190 delete [] projection;
1204 int good_blob_score_ = 0;
1205 int noisy_count = 0;
1206 int hline_count = 0;
1207 int vline_count = 0;
1208 BLOBNBOX_C_IT it(&boxes_);
1209 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1219 if (hline_count > vline_count) {
1222 }
else if (vline_count > hline_count) {
1225 }
else if (value < -1 || 1 < value) {
1229 long_side = bounding_box_.
width();
1230 short_side = bounding_box_.
height();
1233 long_side = bounding_box_.
height();
1234 short_side = bounding_box_.
width();
1250 if (flow_ ==
BTFT_CHAIN && strong_score == 3)
1258 if (noisy_count >= blob_count) {
1264 bounding_box_.
bottom())) {
1265 tprintf(
"RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1266 blob_count, noisy_count, good_blob_score_);
1267 tprintf(
" Projection value=%d, flow=%d, blob_type=%d\n",
1268 value, flow_, blob_type_);
1279 BLOBNBOX_C_IT it(&boxes_);
1280 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1297 int total_height = 0;
1299 int height_count = 0;
1301 BLOBNBOX_C_IT it(&boxes_);
1302 TBOX box(it.data()->bounding_box());
1308 ICOORD first_pt(box.right(), box.bottom());
1311 linepoints.
Add(first_pt);
1312 for (it.forward(); !it.at_last(); it.forward()) {
1315 ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1316 linepoints.
Add(box_pt);
1317 total_height += box.width();
1318 coverage += box.height();
1321 box = it.data()->bounding_box();
1322 ICOORD last_pt(box.right(), box.top());
1323 linepoints.
Add(last_pt);
1324 width = last_pt.y() - first_pt.y();
1328 TBOX box(it.data()->bounding_box());
1331 ICOORD first_pt(box.left(), box.bottom());
1332 linepoints.
Add(first_pt);
1333 for (it.forward(); !it.at_last(); it.forward()) {
1336 ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1337 linepoints.
Add(box_pt);
1338 total_height += box.height();
1339 coverage += box.width();
1342 box = it.data()->bounding_box();
1343 ICOORD last_pt(box.right(), box.bottom());
1344 linepoints.
Add(last_pt);
1345 width = last_pt.x() - first_pt.x();
1348 if (height_count == 0)
1352 double error = linepoints.
Fit(&start_pt, &end_pt);
1360 ColPartition_LIST* used_parts,
1361 WorkingPartSet_LIST* working_sets) {
1364 block_owned_ =
true;
1365 WorkingPartSet_IT it(working_sets);
1368 if (partner !=
nullptr && partner->working_set_ !=
nullptr) {
1369 working_set_ = partner->working_set_;
1374 tprintf(
"Partition with partner has no working set!:");
1382 for (it.mark_cycle_pt(); !it.cycled_list() &&
1383 col_index != first_column_;
1384 it.forward(), ++col_index);
1386 tprintf(
"Match is %s for:", (col_index & 1) ?
"Real" :
"Between");
1390 tprintf(
"Target column=%d, only had %d\n", first_column_, col_index);
1393 work_set = it.data();
1396 if (!it.cycled_list() && last_column_ != first_column_ && !
IsPulloutType()) {
1398 BLOCK_LIST completed_blocks;
1399 TO_BLOCK_LIST to_blocks;
1400 for (; !it.cycled_list() && col_index <= last_column_;
1401 it.forward(), ++col_index) {
1404 &completed_blocks, &to_blocks);
1406 work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1408 working_set_ = work_set;
1420 ColPartition_LIST* block_parts,
1421 ColPartition_LIST* used_parts,
1422 BLOCK_LIST* completed_blocks,
1423 TO_BLOCK_LIST* to_blocks) {
1424 int page_height = tright.
y() - bleft.
y();
1426 ColPartition_IT it(block_parts);
1428 int max_line_height = 0;
1434 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1440 BLOBNBOX_C_IT blob_it(part->
boxes());
1441 int prev_bottom = blob_it.data()->bounding_box().bottom();
1442 for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1445 int step = bottom - prev_bottom;
1448 side_steps.add(step, 1);
1449 prev_bottom = bottom;
1451 part->
set_side_step(static_cast<int>(side_steps.median() + 0.5));
1452 if (!it.at_last()) {
1463 tprintf(
"side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1468 if (part_count == 0)
1471 SmoothSpacings(resolution, page_height, block_parts);
1474 BLOCK_IT block_it(completed_blocks);
1475 TO_BLOCK_IT to_block_it(to_blocks);
1476 ColPartition_LIST spacing_parts;
1477 ColPartition_IT sp_block_it(&spacing_parts);
1479 for (it.mark_cycle_pt(); !it.empty();) {
1481 sp_block_it.add_to_end(part);
1483 if (it.empty() || part->
bottom_spacing() > same_block_threshold ||
1484 !part->SpacingsEqual(*it.data(), resolution)) {
1487 if (!it.empty() && part->
bottom_spacing() <= same_block_threshold) {
1491 ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1);
1493 tprintf(
"Spacings unequal: upper:%d/%d, lower:%d/%d," 1494 " sizes %d %d %d\n",
1502 if (part->SizesSimilar(*next_part) &&
1509 if (third_part ==
nullptr ||
1510 !next_part->SizesSimilar(*third_part) ||
1517 sp_block_it.add_to_end(it.extract());
1520 tprintf(
"Added line to current block.\n");
1526 if (to_block !=
nullptr) {
1527 to_block_it.add_to_end(to_block);
1528 block_it.add_to_end(to_block->
block);
1530 sp_block_it.set_to_list(&spacing_parts);
1534 tprintf(
"Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1545 if (pos->
x() < bleft.
x())
1547 if (pos->
x() > tright.
x())
1549 if (pos->
y() < bleft.
y())
1551 if (pos->
y() > tright.
y())
1559 static TO_BLOCK* MoveBlobsToBlock(
bool vertical_text,
int line_spacing,
1561 ColPartition_LIST* block_parts,
1562 ColPartition_LIST* used_parts) {
1568 STATS sizes(0, std::max(block_box.width(), block_box.height()));
1570 ColPartition_IT it(block_parts);
1572 BLOBNBOX_IT blob_it(&to_block->
blobs);
1573 ColPartition_IT used_it(used_parts);
1574 for (it.move_to_first(); !it.empty(); it.forward()) {
1575 ColPartition* part = it.extract();
1579 for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty();
1582 if (bblob->
owner() != part) {
1583 tprintf(
"Ownership incorrect for blob:");
1587 if (bblob->
owner() ==
nullptr) {
1600 C_OUTLINE_IT ol_it(outlines);
1601 ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0);
1606 blob_it.add_after_then_move(bblob);
1608 used_it.add_to_end(part);
1610 if (text_type && blob_it.empty()) {
1616 if (vertical_text) {
1618 if (block_width < line_spacing)
1619 line_spacing = block_width;
1620 to_block->
line_spacing =
static_cast<float>(line_spacing);
1621 to_block->
max_blob_size =
static_cast<float>(block_width + 1);
1624 if (block_height < line_spacing)
1625 line_spacing = block_height;
1626 to_block->
line_spacing =
static_cast<float>(line_spacing);
1627 to_block->
max_blob_size =
static_cast<float>(block_height + 1);
1635 ColPartition_LIST* block_parts,
1636 ColPartition_LIST* used_parts) {
1637 if (block_parts->empty())
1643 ColPartition_IT it(block_parts);
1654 ICOORDELT_LIST vertices;
1655 ICOORDELT_IT vert_it(&vertices);
1657 int min_x = INT32_MAX;
1658 int max_x = -INT32_MAX;
1659 int min_y = INT32_MAX;
1660 int max_y = -INT32_MAX;
1664 ColPartition::LeftEdgeRun(&it, &start, &end);
1666 ColPartition::RightEdgeRun(&it, &start, &end);
1667 ClipCoord(bleft, tright, &start);
1668 ClipCoord(bleft, tright, &end);
1669 vert_it.add_after_then_move(
new ICOORDELT(start));
1670 vert_it.add_after_then_move(
new ICOORDELT(end));
1675 if ((iteration == 0 && it.at_first()) ||
1676 (iteration == 1 && it.at_last())) {
1680 }
while (iteration < 2);
1682 tprintf(
"Making block at (%d,%d)->(%d,%d)\n",
1683 min_x, min_y, max_x, max_y);
1684 BLOCK* block =
new BLOCK(
"",
true, 0, 0, min_x, min_y, max_x, max_y);
1686 return MoveBlobsToBlock(
false, line_spacing, block, block_parts, used_parts);
1693 ColPartition_LIST* block_parts,
1694 ColPartition_LIST* used_parts) {
1695 if (block_parts->empty())
1697 ColPartition_IT it(block_parts);
1700 int line_spacing = block_box.
width();
1702 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1703 block_box += it.data()->bounding_box();
1710 block_box.
right(), block_box.
top());
1712 return MoveBlobsToBlock(
true, line_spacing, block, block_parts, used_parts);
1718 BLOBNBOX_C_IT blob_it(&boxes_);
1720 int line_size =
IsVerticalType() ? median_width_ : median_height_;
1722 for (; !blob_it.empty(); blob_it.forward()) {
1723 BLOBNBOX* blob = blob_it.extract();
1727 if (row ==
nullptr) {
1728 row =
new TO_ROW(blob, static_cast<float>(top),
1729 static_cast<float>(bottom),
1730 static_cast<float>(line_size));
1732 row->
add_blob(blob, static_cast<float>(top),
1733 static_cast<float>(bottom),
1734 static_cast<float>(line_size));
1744 part->left_margin_ = left_margin_;
1745 part->right_margin_ = right_margin_;
1746 part->bounding_box_ = bounding_box_;
1747 memcpy(part->special_blobs_densities_, special_blobs_densities_,
1748 sizeof(special_blobs_densities_));
1749 part->median_bottom_ = median_bottom_;
1750 part->median_top_ = median_top_;
1751 part->median_height_ = median_height_;
1752 part->median_left_ = median_left_;
1753 part->median_right_ = median_right_;
1754 part->median_width_ = median_width_;
1755 part->good_width_ = good_width_;
1756 part->good_column_ = good_column_;
1757 part->left_key_tab_ = left_key_tab_;
1758 part->right_key_tab_ = right_key_tab_;
1759 part->type_ = type_;
1760 part->flow_ = flow_;
1761 part->left_key_ = left_key_;
1762 part->right_key_ = right_key_;
1763 part->first_column_ = first_column_;
1764 part->last_column_ = last_column_;
1765 part->owns_blobs_ =
false;
1772 BLOBNBOX_C_IT inserter(copy->
boxes());
1773 BLOBNBOX_C_IT traverser(
boxes());
1774 for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1775 inserter.add_after_then_move(traverser.data());
1779 #ifndef GRAPHICS_DISABLED 1787 #endif // GRAPHICS_DISABLED 1790 static char kBlobTypes[
BRT_COUNT + 1] =
"NHSRIUVT";
1795 tprintf(
"ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)" 1796 " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d" 1797 " ts=%d bs=%d ls=%d rs=%d\n",
1798 boxes_.empty() ?
'E' :
' ',
1799 left_margin_, left_key_tab_ ?
'T' :
'B',
LeftAtY(y),
1800 bounding_box_.
left(), median_left_,
1801 bounding_box_.
bottom(), median_bottom_,
1802 bounding_box_.
right(),
RightAtY(y), right_key_tab_ ?
'T' :
'B',
1803 right_margin_, median_right_, bounding_box_.
top(), median_top_,
1804 good_width_, good_column_, type_,
1805 kBlobTypes[blob_type_], flow_,
1806 first_column_, last_column_, boxes_.length(),
1807 space_above_, space_below_, space_to_left_, space_to_right_);
1812 tprintf(
"Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1813 color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1814 color1_[L_ALPHA_CHANNEL],
1815 color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1820 STATS left_stats(0, working_set_count);
1821 STATS right_stats(0, working_set_count);
1826 if (partner->type_ > max_type)
1827 max_type = partner->type_;
1828 if (column_set_ == partner->column_set_) {
1829 left_stats.
add(partner->first_column_, 1);
1830 right_stats.
add(partner->last_column_, 1);
1838 first_column_ = left_stats.
mode();
1839 last_column_ = right_stats.
mode();
1840 if (last_column_ < first_column_)
1841 last_column_ = first_column_;
1846 partner->type_ = max_type;
1847 #if 0 // See TODO above 1848 if (column_set_ == partner->column_set_) {
1849 partner->first_column_ = first_column_;
1850 partner->last_column_ = last_column_;
1891 RefinePartnersInternal(
true, get_desperate, grid);
1892 RefinePartnersInternal(
false, get_desperate, grid);
1896 RefinePartnersByType(
true, &upper_partners_);
1897 RefinePartnersByType(
false, &lower_partners_);
1901 if (!upper_partners_.empty() && !upper_partners_.singleton())
1902 RefinePartnersByOverlap(
true, &upper_partners_);
1903 if (!lower_partners_.empty() && !lower_partners_.singleton())
1904 RefinePartnersByOverlap(
false, &lower_partners_);
1913 void ColPartition::RefinePartnersInternal(
bool upper,
bool get_desperate,
1915 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
1916 if (!partners->empty() && !partners->singleton()) {
1917 RefinePartnersByType(upper, partners);
1918 if (!partners->empty() && !partners->singleton()) {
1920 RefinePartnerShortcuts(upper, partners);
1921 if (!partners->empty() && !partners->singleton()) {
1925 RefineTextPartnersByMerge(upper,
false, partners, grid);
1926 if (!partners->empty() && !partners->singleton())
1927 RefineTextPartnersByMerge(upper,
true, partners, grid);
1930 if (!partners->empty() && !partners->singleton())
1931 RefinePartnersByOverlap(upper, partners);
1940 void ColPartition::RefinePartnersByType(
bool upper,
1941 ColPartition_CLIST* partners) {
1945 tprintf(
"Refining %d %s partners by type for:\n",
1946 partners->length(), upper ?
"Upper" :
"Lower");
1949 ColPartition_C_IT it(partners);
1955 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1962 partner->RemovePartner(!upper,
this);
1971 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1979 partner->RemovePartner(!upper,
this);
1994 void ColPartition::RefinePartnerShortcuts(
bool upper,
1995 ColPartition_CLIST* partners) {
1996 bool done_any =
false;
1999 ColPartition_C_IT it(partners);
2000 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2004 ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_);
2005 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
2010 a->RemovePartner(!upper,
this);
2013 ColPartition_C_IT it2(partners);
2014 for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
2019 b2->RemovePartner(!upper,
this);
2032 }
while (done_any && !partners->empty() && !partners->singleton());
2043 void ColPartition::RefineTextPartnersByMerge(
bool upper,
bool desperate,
2044 ColPartition_CLIST* partners,
2045 ColPartitionGrid* grid) {
2049 tprintf(
"Refining %d %s partners by merge for:\n",
2050 partners->length(), upper ?
"Upper" :
"Lower");
2053 while (!partners->empty() && !partners->singleton()) {
2056 ColPartition_C_IT it(partners);
2060 ColPartition_CLIST candidates;
2061 ColPartition_C_IT cand_it(&candidates);
2062 for (it.forward(); !it.at_first(); it.forward()) {
2064 if (part->first_column_ == candidate->last_column_ &&
2065 part->last_column_ == candidate->first_column_)
2066 cand_it.add_after_then_move(it.data());
2068 int overlap_increase;
2069 ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug,
2070 nullptr, &overlap_increase);
2071 if (candidate !=
nullptr && (overlap_increase <= 0 || desperate)) {
2073 tprintf(
"Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
2074 part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate),
2078 grid->RemoveBBox(candidate);
2079 grid->RemoveBBox(part);
2080 part->Absorb(candidate,
nullptr);
2082 grid->InsertBBox(
true,
true, part);
2083 if (overlap_increase > 0)
2084 part->desperately_merged_ =
true;
2093 void ColPartition::RefinePartnersByOverlap(
bool upper,
2094 ColPartition_CLIST* partners) {
2098 tprintf(
"Refining %d %s partners by overlap for:\n",
2099 partners->length(), upper ?
"Upper" :
"Lower");
2102 ColPartition_C_IT it(partners);
2105 int best_overlap = 0;
2106 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2108 int overlap = std::min(bounding_box_.
right(), partner->bounding_box_.right())
2109 - std::max(bounding_box_.
left(), partner->bounding_box_.left());
2110 if (overlap > best_overlap) {
2111 best_overlap = overlap;
2112 best_partner = partner;
2116 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2118 if (partner != best_partner) {
2123 partner->RemovePartner(!upper,
this);
2130 bool ColPartition::ThisPartitionBetter(
BLOBNBOX* bbox,
2131 const ColPartition& other) {
2134 int left = box.
left();
2135 int right = box.
right();
2136 if (left < left_margin_ || right > right_margin_)
2138 if (left < other.left_margin_ || right > other.right_margin_)
2140 int top = box.
top();
2141 int bottom = box.
bottom();
2142 int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_);
2143 int other_overlap = std::min(top, other.median_top_) -
2144 std::max(bottom, other.median_bottom_);
2145 int this_miss = median_top_ - median_bottom_ - this_overlap;
2146 int other_miss = other.median_top_ - other.median_bottom_ - other_overlap;
2148 tprintf(
"Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n",
2150 this_overlap, other_overlap, this_miss, other_miss,
2151 median_top_, other.median_top_);
2153 if (this_miss < other_miss)
2155 if (this_miss > other_miss)
2157 if (this_overlap > other_overlap)
2159 if (this_overlap < other_overlap)
2161 return median_top_ >= other.median_top_;
2168 static int MedianSpacing(
int page_height, ColPartition_IT it) {
2169 STATS stats(0, page_height);
2170 while (!it.cycled_list()) {
2171 ColPartition* part = it.data();
2173 stats.add(part->bottom_spacing(), 1);
2174 stats.add(part->top_spacing(), 1);
2176 return static_cast<int>(stats.median() + 0.5);
2190 return (last_column_ >= part.first_column_) &&
2191 (first_column_ <= part.last_column_);
2197 void ColPartition::SmoothSpacings(
int resolution,
int page_height,
2198 ColPartition_LIST* parts) {
2206 ColPartition_IT it(parts);
2213 int median_space = MedianSpacing(page_height, it);
2214 ColPartition_IT start_it(it);
2215 ColPartition_IT end_it(it);
2216 for (
int i = 0; i < PN_COUNT; ++i) {
2217 if (i < PN_UPPER || it.cycled_list()) {
2218 neighbourhood[i] =
nullptr;
2222 neighbourhood[i] = it.data();
2226 while (neighbourhood[PN_UPPER] !=
nullptr) {
2248 if (neighbourhood[PN_LOWER] ==
nullptr ||
2249 (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER],
2251 !OKSpacingBlip(resolution, median_space, neighbourhood) &&
2252 (!OKSpacingBlip(resolution, median_space, neighbourhood - 1) ||
2253 !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) &&
2254 (!OKSpacingBlip(resolution, median_space, neighbourhood + 1) ||
2255 !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) {
2258 ColPartition_IT sum_it(start_it);
2260 double total_bottom = 0.0;
2261 double total_top = 0.0;
2262 int total_count = 0;
2265 while (upper != last_part) {
2266 total_bottom += upper->bottom_spacing();
2267 total_top += upper->top_spacing();
2270 upper = sum_it.data();
2272 if (total_count > 0) {
2274 int top_spacing =
static_cast<int>(total_top / total_count + 0.5);
2275 int bottom_spacing =
static_cast<int>(total_bottom / total_count + 0.5);
2277 tprintf(
"Spacing run ended. Cause:");
2278 if (neighbourhood[PN_LOWER] ==
nullptr) {
2281 tprintf(
"Spacing change. Spacings:\n");
2282 for (
int i = 0; i < PN_COUNT; ++i) {
2283 if (neighbourhood[i] ==
nullptr) {
2285 if (i > 0 && neighbourhood[i - 1] !=
nullptr) {
2290 tprintf(
" nullptr lower partner:\n");
2296 tprintf(
"Top = %d, bottom = %d\n",
2305 upper = sum_it.data();
2306 while (upper != last_part) {
2314 upper = sum_it.data();
2321 median_space = MedianSpacing(page_height, end_it);
2324 for (
int j = 1; j < PN_COUNT; ++j) {
2325 neighbourhood[j - 1] = neighbourhood[j];
2327 if (it.cycled_list()) {
2328 neighbourhood[PN_COUNT - 1] =
nullptr;
2330 neighbourhood[PN_COUNT - 1] = it.data();
2340 bool ColPartition::OKSpacingBlip(
int resolution,
int median_spacing,
2341 ColPartition** parts) {
2342 if (parts[PN_UPPER] ==
nullptr || parts[PN_LOWER] ==
nullptr)
2346 return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER],
2347 median_spacing, resolution) &&
2348 ((parts[PN_ABOVE1] !=
nullptr &&
2349 parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) ||
2350 (parts[PN_BELOW1] !=
nullptr &&
2351 parts[PN_BELOW1]->SpacingEqual(median_spacing, resolution)));
2356 bool ColPartition::SpacingEqual(
int spacing,
int resolution)
const {
2357 int bottom_error = BottomSpacingMargin(resolution);
2358 int top_error = TopSpacingMargin(resolution);
2359 return NearlyEqual(bottom_spacing_, spacing, bottom_error) &&
2365 bool ColPartition::SpacingsEqual(
const ColPartition& other,
2366 int resolution)
const {
2367 int bottom_error = std::max(BottomSpacingMargin(resolution),
2368 other.BottomSpacingMargin(resolution));
2369 int top_error = std::max(TopSpacingMargin(resolution),
2370 other.TopSpacingMargin(resolution));
2371 return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) &&
2372 (
NearlyEqual(top_spacing_, other.top_spacing_, top_error) ||
2373 NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2,
2380 bool ColPartition::SummedSpacingOK(
const ColPartition& other,
2381 int spacing,
int resolution)
const {
2382 int bottom_error = std::max(BottomSpacingMargin(resolution),
2383 other.BottomSpacingMargin(resolution));
2384 int top_error = std::max(TopSpacingMargin(resolution),
2385 other.TopSpacingMargin(resolution));
2386 int bottom_total = bottom_spacing_ + other.bottom_spacing_;
2387 int top_total = top_spacing_ + other.top_spacing_;
2388 return (
NearlyEqual(spacing, bottom_total, bottom_error) &&
2390 (
NearlyEqual(spacing * 2, bottom_total, bottom_error) &&
2396 int ColPartition::BottomSpacingMargin(
int resolution)
const {
2402 int ColPartition::TopSpacingMargin(
int resolution)
const {
2404 BottomSpacingMargin(resolution);
2409 bool ColPartition::SizesSimilar(
const ColPartition& other)
const {
2410 return median_height_ <= other.median_height_ *
kMaxSizeRatio &&
2417 static bool UpdateLeftMargin(
const ColPartition& part,
2418 int* margin_left,
int* margin_right) {
2419 const TBOX& part_box = part.bounding_box();
2420 int top = part_box.
top();
2421 int bottom = part_box.
bottom();
2422 int tl_key = part.SortKey(part.left_margin(), top);
2423 int tr_key = part.SortKey(part_box.
left(), top);
2424 int bl_key = part.SortKey(part.left_margin(), bottom);
2425 int br_key = part.SortKey(part_box.
left(), bottom);
2426 int left_key = std::max(tl_key, bl_key);
2427 int right_key = std::min(tr_key, br_key);
2428 if (left_key <= *margin_right && right_key >= *margin_left) {
2430 *margin_right = std::min(*margin_right, right_key);
2431 *margin_left = std::max(*margin_left, left_key);
2442 void ColPartition::LeftEdgeRun(ColPartition_IT* part_it,
2446 int start_y = part->bounding_box_.top();
2447 if (!part_it->at_first()) {
2448 int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom();
2449 if (prev_bottom < start_y)
2450 start_y = prev_bottom;
2451 else if (prev_bottom > start_y)
2452 start_y = (start_y + prev_bottom) / 2;
2454 int end_y = part->bounding_box_.bottom();
2455 int margin_right = INT32_MAX;
2456 int margin_left = -INT32_MAX;
2457 UpdateLeftMargin(*part, &margin_left, &margin_right);
2460 part = part_it->data();
2461 }
while (!part_it->at_first() &&
2462 UpdateLeftMargin(*part, &margin_left, &margin_right));
2466 int next_margin_right = INT32_MAX;
2467 int next_margin_left = -INT32_MAX;
2468 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right);
2469 if (next_margin_left > margin_right) {
2470 ColPartition_IT next_it(*part_it);
2473 part = next_it.data();
2474 }
while (!next_it.at_first() &&
2475 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2479 part_it->backward();
2480 part = part_it->data();
2481 }
while (part != start_part &&
2482 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2486 part = part_it->data_relative(-1);
2487 end_y = part->bounding_box_.bottom();
2488 if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y)
2489 end_y = (end_y + part_it->data()->bounding_box_.top()) / 2;
2490 start->
set_y(start_y);
2491 start->
set_x(part->XAtY(margin_right, start_y));
2493 end->
set_x(part->XAtY(margin_right, end_y));
2495 tprintf(
"Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2496 start_y, end_y, part->XAtY(margin_left, end_y),
2497 end->
x(), part->left_margin_, part->bounding_box_.left());
2503 static bool UpdateRightMargin(
const ColPartition& part,
2504 int* margin_left,
int* margin_right) {
2505 const TBOX& part_box = part.bounding_box();
2506 int top = part_box.
top();
2507 int bottom = part_box.
bottom();
2508 int tl_key = part.SortKey(part_box.
right(), top);
2509 int tr_key = part.SortKey(part.right_margin(), top);
2510 int bl_key = part.SortKey(part_box.
right(), bottom);
2511 int br_key = part.SortKey(part.right_margin(), bottom);
2512 int left_key = std::max(tl_key, bl_key);
2513 int right_key = std::min(tr_key, br_key);
2514 if (left_key <= *margin_right && right_key >= *margin_left) {
2516 *margin_right = std::min(*margin_right, right_key);
2517 *margin_left = std::max(*margin_left, left_key);
2529 void ColPartition::RightEdgeRun(ColPartition_IT* part_it,
2533 int start_y = part->bounding_box_.bottom();
2534 if (!part_it->at_last()) {
2535 int next_y = part_it->data_relative(1)->bounding_box_.top();
2536 if (next_y > start_y)
2538 else if (next_y < start_y)
2539 start_y = (start_y + next_y) / 2;
2541 int end_y = part->bounding_box_.top();
2542 int margin_right = INT32_MAX;
2543 int margin_left = -INT32_MAX;
2544 UpdateRightMargin(*part, &margin_left, &margin_right);
2546 part_it->backward();
2547 part = part_it->data();
2548 }
while (!part_it->at_last() &&
2549 UpdateRightMargin(*part, &margin_left, &margin_right));
2552 int next_margin_right = INT32_MAX;
2553 int next_margin_left = -INT32_MAX;
2554 UpdateRightMargin(*part, &next_margin_left, &next_margin_right);
2555 if (next_margin_right < margin_left) {
2556 ColPartition_IT next_it(*part_it);
2559 part = next_it.data();
2560 }
while (!next_it.at_last() &&
2561 UpdateRightMargin(*part, &next_margin_left,
2562 &next_margin_right));
2567 part = part_it->data();
2568 }
while (part != start_part &&
2569 UpdateRightMargin(*part, &next_margin_left,
2570 &next_margin_right));
2571 part_it->backward();
2574 part = part_it->data_relative(1);
2575 end_y = part->bounding_box().top();
2576 if (!part_it->at_last() &&
2577 part_it->data()->bounding_box_.bottom() > end_y)
2578 end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2;
2579 start->
set_y(start_y);
2580 start->
set_x(part->XAtY(margin_left, start_y));
2582 end->
set_x(part->XAtY(margin_left, end_y));
2584 tprintf(
"Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2585 start_y, end_y, end->
x(), part->XAtY(margin_right, end_y),
2586 part->bounding_box_.right(), part->right_margin_);
int64_t CostWithVariance(const DPPoint *prev)
const double kMaxLeaderGapFractionOfMax
void CopyRightTab(const ColPartition &src, bool take_box)
const double kMaxLeaderGapFractionOfMin
const double kMaxBaselineError
static bool DifferentSizes(int size1, int size2)
static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
PolyBlockType PartitionType(ColumnSpanningType flow) const
ScrollView::Color BoxColor() const
void set_poly_block(POLY_BLOCK *blk)
set the poly block
const int kMinChainTextValue
void RemoveBox(BLOBNBOX *box)
void SetRightTab(const TabVector *tab_vector)
static ColPartition * MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
void set_right_margin(int margin)
bool IsInSameColumnAs(const ColPartition &part) const
void set_top_spacing(int spacing)
bool ConfirmNoTabViolation(const ColPartition &other) const
int NoisyNeighbours() const
void SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density)
void set_x(int16_t xin)
rewrite function
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
bool MatchingColumns(const ColPartition &other) const
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
BlobRegionType blob_type() const
int16_t y() const
access_function
int RightBlobRule() const
const int kMaxColorDistance
const int kHorzStrongTextlineHeight
const int kHorzStrongTextlineAspect
BlobTextFlowType flow() const
const double kMaxSpacingDrift
int median_height() const
ColPartition * SingletonPartner(bool upper)
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
bool MatchingSizes(const ColPartition &other) const
static bool WithinTestRegion(int detail_level, int x, int y)
const double kMinBaselineCoverage
float SpecialBlobsDensity(const BlobSpecialTextType type) const
int base_char_top() const
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
int bottom_spacing() const
bool MatchingTextColor(const ColPartition &other) const
void AddPartition(ColPartition *part)
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
void Absorb(ColPartition *other, WidthCallback *cb)
bool OKDiacriticMerge(const ColPartition &candidate, bool debug) const
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
const int kColumnWidthFactor
int base_char_bottom() const
#define ELIST2IZE(CLASSNAME)
void CopyLeftTab(const ColPartition &src, bool take_box)
void set_flow(BlobTextFlowType f)
void RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
int RightAtY(int y) const
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
void SmoothPartnerRun(int working_set_count)
const int kHorzStrongTextlineCount
const int kMinLeaderCount
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
bool NearlyEqual(T x, T y, T tolerance)
bool MarkAsLeaderIfMonospaced()
const int kMinStrongTextValue
void set_region_type(BlobRegionType new_type)
void Add(const ICOORD &pt)
int median_bottom() const
void set_owner(tesseract::ColPartition *new_owner)
ColPartition * SplitAtBlob(BLOBNBOX *split_blob)
void set_left_margin(int margin)
int16_t x() const
access function
ColPartition * SplitAt(int split_x)
void DisownBoxesNoAssert()
void set_owns_blobs(bool owns_blobs)
double ile(double frac) const
BlobRegionType region_type() const
int textord_debug_tabfind
void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
void set_side_step(int step)
static int SortByBBox(const void *p1, const void *p2)
void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
POLY_BLOCK * poly_block() const
void AddLocalCost(int new_cost)
void set_bottom_spacing(int spacing)
BlobSpecialTextType special_text_type() const
double Fit(ICOORD *pt1, ICOORD *pt2)
ColPartition * CopyButDontOwnBlobs()
DLLSYM void tprintf(const char *format,...)
bool IsPulloutType() const
void AddPartner(bool upper, ColPartition *partner)
bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
int XAtY(int sort_key, int y) const
void add(int32_t value, int32_t count)
void AddBox(BLOBNBOX *box)
bool ReleaseNonLeaderBoxes()
BlobTextFlowType flow() const
void set_flow(BlobTextFlowType value)
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
const TBOX & bounding_box() const
bool VSignificantCoreOverlap(const ColPartition &other) const
const double kMaxSameBlockLineSpacing
int CountOverlappingBoxes(const TBOX &box)
int SpecialBlobsCount(const BlobSpecialTextType type)
bool overlap(const TBOX &box) const
int VCoreOverlap(const ColPartition &other) const
void SetRegionAndFlowTypesFromProjectionValue(int value)
BLOBNBOX * OverlapSplitBlob(const TBOX &box)
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
void SetPartitionType(int resolution, ColPartitionSet *columns)
C_OUTLINE_LIST * out_list()
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
const TBOX & bounding_box() const
bool IsVerticalType() const
const double kMaxTopSpacingFraction
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
tesseract::ColPartition * owner() const
void ComputeSpecialBlobsDensity()
CLISTIZE(BLOCK_RES) ELISTIZE(ROW_RES) ELISTIZE(WERD_RES) static const double kStopperAmbiguityThresholdGain
TBOX BoundsWithoutBox(BLOBNBOX *box)
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
void SetLeftTab(const TabVector *tab_vector)
const int kMaxRMSColorNoise
void SetColumnGoodness(WidthCallback *cb)
void set_y(int16_t yin)
rewrite function
ColPartition * ShallowCopy() const
static C_BLOB * FakeBlob(const TBOX &box)
void RemovePartner(bool upper, ColPartition *partner)
void set_type(PolyBlockType t)
const double kMaxSizeRatio
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
void set_block_owned(bool owned)
PolyBlockType type() const
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)