22 #pragma warning(disable:4244) // Conversion warnings
26 #include "config_auto.h"
89 : left_margin_(-
MAX_INT32), right_margin_(MAX_INT32),
90 median_bottom_(MAX_INT32), median_top_(-MAX_INT32), median_size_(0),
91 median_left_(MAX_INT32), median_right_(-MAX_INT32), median_width_(0),
92 blob_type_(blob_type), flow_(
BTFT_NONE), good_blob_score_(0),
93 good_width_(false), good_column_(false),
94 left_key_tab_(false), right_key_tab_(false),
95 left_key_(0), right_key_(0), type_(
PT_UNKNOWN), vertical_(vertical),
96 working_set_(
NULL), last_add_was_vertical_(false), block_owned_(false),
97 desperately_merged_(false),
98 first_column_(-1), last_column_(-1), column_set_(NULL),
99 side_step_(0), top_spacing_(0), bottom_spacing_(0),
100 type_before_table_(PT_UNKNOWN), inside_table_column_(false),
101 nearest_neighbor_above_(NULL), nearest_neighbor_below_(NULL),
102 space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0),
104 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
134 ColPartition_LIST* big_part_list) {
143 if (big_part_list !=
NULL) {
144 ColPartition_IT part_it(big_part_list);
145 part_it.add_to_end(single);
153 ColPartition_C_IT it(&upper_partners_);
154 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
155 it.data()->RemovePartner(
false,
this);
157 it.set_to_list(&lower_partners_);
158 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
159 it.data()->RemovePartner(
true,
this);
167 int left,
int bottom,
168 int right,
int top) {
170 part->bounding_box_ =
TBOX(left, bottom, right, top);
171 part->median_bottom_ = bottom;
172 part->median_top_ = top;
173 part->median_size_ = top - bottom;
174 part->median_width_ = right - left;
187 if (boxes_.length() == 0) {
190 bounding_box_ += box;
194 if (!last_add_was_vertical_) {
195 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
196 last_add_was_vertical_ =
true;
198 boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>,
true, bbox);
200 if (last_add_was_vertical_) {
201 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
202 last_add_was_vertical_ =
false;
204 boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>,
true, bbox);
211 tprintf(
"Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
213 bounding_box_.
left(), bounding_box_.
right());
218 BLOBNBOX_C_IT bb_it(&boxes_);
219 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
220 if (box == bb_it.data()) {
232 BLOBNBOX_C_IT bb_it(&boxes_);
233 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
236 if (biggest ==
NULL ||
240 if (biggest ==
NULL ||
251 BLOBNBOX_C_IT bb_it(&boxes_);
252 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
253 if (box != bb_it.data()) {
254 result += bb_it.data()->bounding_box();
263 BLOBNBOX_C_IT bb_it(&boxes_);
264 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
279 BLOBNBOX_C_IT bb_it(&boxes_);
280 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
292 BLOBNBOX_C_IT bb_it(&boxes_);
293 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
295 if (bblob->
owner() ==
this)
305 BLOBNBOX_C_IT bb_it(&boxes_);
306 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
313 if (bb_it.empty())
return false;
324 for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
326 delete bblob->
cblob();
336 BLOBNBOX_CLIST reversed_boxes;
337 BLOBNBOX_C_IT reversed_it(&reversed_boxes);
339 BLOBNBOX_C_IT bb_it(&boxes_);
340 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
341 reversed_it.add_before_then_move(bb_it.extract());
343 bb_it.add_list_after(&reversed_boxes);
345 int tmp = left_margin_;
346 left_margin_ = -right_margin_;
347 right_margin_ = -tmp;
358 if (bounding_box_.
left() > bounding_box_.
right()) {
360 tprintf(
"Bounding box invalid\n");
365 if (left_margin_ > bounding_box_.
left() ||
366 right_margin_ < bounding_box_.
right()) {
375 tprintf(
"Key inside box: %d v %d or %d v %d\n",
386 int y = (
MidY() + other.
MidY()) / 2;
429 if (bounding_box_.
right() < other.bounding_box_.
left() &&
432 if (other.bounding_box_.
right() < bounding_box_.
left() &&
435 if (bounding_box_.
left() > other.bounding_box_.
right() &&
438 if (other.bounding_box_.
left() > bounding_box_.
right() &&
446 double fractional_tolerance,
447 double constant_tolerance)
const {
449 int nonmatch_count = 0;
450 BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
451 BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
452 box_it.mark_cycle_pt();
453 other_it.mark_cycle_pt();
454 while (!box_it.cycled_list() && !other_it.cycled_list()) {
455 if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
456 fractional_tolerance,
464 return match_count > nonmatch_count;
475 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
478 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
482 tprintf(
"Blob is not a diacritic:");
494 bool result = min_top > candidate.median_bottom_ &&
495 max_bottom < candidate.median_top_;
500 tprintf(
"y ranges don\'t overlap: %d-%d / %d-%d\n",
501 max_bottom, min_top, median_bottom_, median_top_);
510 if (tab_vector !=
NULL) {
514 left_key_tab_ =
false;
522 if (tab_vector !=
NULL) {
523 right_key_ = tab_vector->
sort_key();
526 right_key_tab_ =
false;
535 left_key_tab_ = take_box ?
false : src.left_key_tab_;
537 left_key_ = src.left_key_;
542 if (left_margin_ > bounding_box_.
left())
543 left_margin_ = src.left_margin_;
548 right_key_tab_ = take_box ?
false : src.right_key_tab_;
549 if (right_key_tab_) {
550 right_key_ = src.right_key_;
555 if (right_margin_ < bounding_box_.
right())
556 right_margin_ = src.right_margin_;
561 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
562 return it.data()->left_rule();
566 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
568 return it.data()->right_rule();
573 return special_blobs_densities_[
type];
578 BLOBNBOX_C_IT blob_it(&boxes_);
580 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
583 if (blob_type == type) {
594 special_blobs_densities_[
type] = density;
598 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
599 if (boxes_.empty()) {
603 BLOBNBOX_C_IT blob_it(&boxes_);
604 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
607 special_blobs_densities_[
type]++;
611 special_blobs_densities_[
type] /= boxes_.length();
620 partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
622 upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
624 partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
626 lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
634 ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
635 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
636 if (it.data() == partner) {
645 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
646 if (!partners->singleton())
648 ColPartition_C_IT it(partners);
660 bounding_box_.
bottom()) ||
662 other->bounding_box_.
bottom())) {
669 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
671 int w1 = boxes_.length(), w2 = other->boxes_.length();
672 float new_val = special_blobs_densities_[
type] * w1 +
673 other->special_blobs_densities_[
type] * w2;
675 special_blobs_densities_[
type] = new_val / (w1 + w2);
680 BLOBNBOX_C_IT it(&boxes_);
681 BLOBNBOX_C_IT it2(&other->boxes_);
682 for (; !it2.empty(); it2.forward()) {
685 if (prev_owner != other && prev_owner !=
NULL) {
690 if (prev_owner == other)
692 it.add_to_end(bbox2);
694 left_margin_ =
MIN(left_margin_, other->left_margin_);
695 right_margin_ =
MAX(right_margin_, other->right_margin_);
696 if (other->left_key_ < left_key_) {
697 left_key_ = other->left_key_;
698 left_key_tab_ = other->left_key_tab_;
700 if (other->right_key_ > right_key_) {
701 right_key_ = other->right_key_;
702 right_key_tab_ = other->right_key_tab_;
707 flow_ = other->flow_;
708 blob_type_ = other->blob_type_;
712 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
713 last_add_was_vertical_ =
true;
715 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
716 last_add_was_vertical_ =
false;
721 for (
int upper = 0; upper < 2; ++upper) {
722 ColPartition_CLIST partners;
723 ColPartition_C_IT part_it(&partners);
724 part_it.add_list_after(upper ? &other->upper_partners_
725 : &other->lower_partners_);
726 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
751 int ok_box_overlap,
bool debug) {
755 tprintf(
"Vertical partition\n");
769 if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
770 merged_box.bottom() < bounding_box_.
top() - ok_box_overlap &&
771 merged_box.top() > bounding_box_.
bottom() + ok_box_overlap) {
773 tprintf(
"Excessive box overlap\n");
783 if (boxes_.empty() || boxes_.singleton())
785 BLOBNBOX_C_IT it(&boxes_);
786 TBOX left_box(it.data()->bounding_box());
787 for (it.forward(); !it.at_first(); it.forward()) {
790 if (left_box.overlap(box))
803 BLOBNBOX_C_IT it(&boxes_);
804 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
808 if (bbox == split_blob || !split_part->boxes_.empty()) {
809 split_part->
AddBox(it.extract());
821 right_key_tab_ =
false;
822 split_part->left_key_tab_ =
false;
837 if (split_x <= bounding_box_.
left() || split_x >= bounding_box_.
right())
841 BLOBNBOX_C_IT it(&boxes_);
842 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
847 if (box.
left() >= split_x) {
848 split_part->
AddBox(it.extract());
855 it.add_list_after(&split_part->boxes_);
864 right_key_tab_ =
false;
865 split_part->left_key_tab_ =
false;
866 right_margin_ = split_x;
867 split_part->left_margin_ = split_x;
875 bounding_box_ =
TBOX();
876 BLOBNBOX_C_IT it(&boxes_);
878 int non_leader_count = 0;
880 bounding_box_.
set_left(left_margin_);
885 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
897 tprintf(
"Computed left-illegal partition\n");
903 tprintf(
"Computed right-illegal partition\n");
910 median_top_ = bounding_box_.
top();
911 median_bottom_ = bounding_box_.
bottom();
912 median_size_ = bounding_box_.
height();
913 median_left_ = bounding_box_.
left();
914 median_right_ = bounding_box_.
right();
915 median_width_ = bounding_box_.
width();
918 STATS bottom_stats(bounding_box_.
bottom(), bounding_box_.
top() + 1);
920 STATS left_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
921 STATS right_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
922 STATS width_stats(0, bounding_box_.
width() + 1);
923 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
927 int area = box.
area();
928 top_stats.add(box.
top(), area);
929 bottom_stats.add(box.
bottom(), area);
930 size_stats.add(box.
height(), area);
931 left_stats.add(box.
left(), area);
932 right_stats.add(box.
right(), area);
933 width_stats.add(box.
width(), area);
936 median_top_ =
static_cast<int>(top_stats.median() + 0.5);
937 median_bottom_ =
static_cast<int>(bottom_stats.median() + 0.5);
938 median_size_ =
static_cast<int>(size_stats.median() + 0.5);
939 median_left_ =
static_cast<int>(left_stats.median() + 0.5);
940 median_right_ =
static_cast<int>(right_stats.median() + 0.5);
941 median_width_ =
static_cast<int>(width_stats.median() + 0.5);
945 tprintf(
"Made partition with bad right coords");
949 tprintf(
"Made partition with bad left coords");
955 for (
int upper = 0; upper < 2; ++upper) {
956 ColPartition_CLIST partners;
957 ColPartition_C_IT part_it(&partners);
958 part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
959 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
966 bounding_box_.
bottom())) {
967 tprintf(
"Recomputed box for partition %p\n",
this);
974 BLOBNBOX_C_IT it(&boxes_);
975 int overlap_count = 0;
976 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
981 return overlap_count;
987 int first_spanned_col = -1;
990 bounding_box_.
left(), bounding_box_.
right(),
992 MidY(), left_margin_, right_margin_,
993 &first_column_, &last_column_,
995 column_set_ = columns;
996 if (first_column_ < last_column_ && span_type ==
CST_PULLOUT &&
1000 if (first_spanned_col >= 0) {
1001 first_column_ = first_spanned_col;
1002 last_column_ = first_spanned_col;
1004 if ((first_column_ & 1) == 0)
1005 last_column_ = first_column_;
1006 else if ((last_column_ & 1) == 0)
1007 first_column_ = last_column_;
1009 first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1027 switch (blob_type_) {
1070 int* first_col,
int* last_col) {
1071 int first_spanned_col = -1;
1074 bounding_box_.
left(), bounding_box_.
right(),
1076 MidY(), left_margin_, right_margin_,
1077 first_col, last_col,
1078 &first_spanned_col);
1086 good_width_ = cb->
Run(width);
1087 good_column_ = blob_type_ ==
BRT_TEXT && left_key_tab_ && right_key_tab_;
1097 bool result =
false;
1099 int part_width = bounding_box_.
width();
1100 STATS gap_stats(0, part_width);
1101 STATS width_stats(0, part_width);
1102 BLOBNBOX_C_IT it(&boxes_);
1107 for (it.forward(); !it.at_first(); it.forward()) {
1112 width_stats.
add(right - left, 1);
1117 double median_gap = gap_stats.
median();
1119 double max_width =
MAX(median_gap, median_width);
1120 double min_width =
MIN(median_gap, median_width);
1121 double gap_iqr = gap_stats.
ile(0.75f) - gap_stats.
ile(0.25f);
1123 tprintf(
"gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1133 int offset =
static_cast<int>(ceil(gap_iqr * 2));
1134 int min_step =
static_cast<int>(median_gap + median_width + 0.5);
1135 int max_step = min_step + offset;
1138 int part_left = bounding_box_.
left() - min_step / 2;
1139 part_width += min_step;
1141 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1146 for (
int x = left; x < right; ++x) {
1152 part_width, projection);
1156 bool modified_blob_list =
false;
1157 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1161 if (it.at_first()) {
1162 int gap = it.data_relative(1)->bounding_box().
left() -
1166 modified_blob_list =
true;
1172 it.data_relative(-1)->bounding_box().right();
1175 modified_blob_list =
true;
1186 if (best_end ==
NULL) {
1189 tprintf(
"Total cost = %d vs allowed %d\n",
1193 delete [] projection;
1207 int good_blob_score_ = 0;
1208 int noisy_count = 0;
1209 int hline_count = 0;
1210 int vline_count = 0;
1211 BLOBNBOX_C_IT it(&boxes_);
1212 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1222 if (hline_count > vline_count) {
1225 }
else if (vline_count > hline_count) {
1228 }
else if (value < -1 || 1 < value) {
1232 long_side = bounding_box_.
width();
1233 short_side = bounding_box_.
height();
1236 long_side = bounding_box_.
height();
1237 short_side = bounding_box_.
width();
1253 if (flow_ ==
BTFT_CHAIN && strong_score == 3)
1261 if (noisy_count >= blob_count) {
1267 bounding_box_.
bottom())) {
1268 tprintf(
"RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1269 blob_count, noisy_count, good_blob_score_);
1270 tprintf(
" Projection value=%d, flow=%d, blob_type=%d\n",
1271 value, flow_, blob_type_);
1282 BLOBNBOX_C_IT it(&boxes_);
1283 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1300 int total_height = 0;
1302 int height_count = 0;
1304 BLOBNBOX_C_IT it(&boxes_);
1305 TBOX box(it.data()->bounding_box());
1311 ICOORD first_pt(box.right(), box.bottom());
1314 linepoints.
Add(first_pt);
1315 for (it.forward(); !it.at_last(); it.forward()) {
1318 ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1319 linepoints.
Add(box_pt);
1320 total_height += box.width();
1321 coverage += box.height();
1324 box = it.data()->bounding_box();
1325 ICOORD last_pt(box.right(), box.top());
1326 linepoints.
Add(last_pt);
1327 width = last_pt.y() - first_pt.y();
1331 TBOX box(it.data()->bounding_box());
1334 ICOORD first_pt(box.left(), box.bottom());
1335 linepoints.
Add(first_pt);
1336 for (it.forward(); !it.at_last(); it.forward()) {
1339 ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1340 linepoints.
Add(box_pt);
1341 total_height += box.height();
1342 coverage += box.width();
1345 box = it.data()->bounding_box();
1346 ICOORD last_pt(box.right(), box.bottom());
1347 linepoints.
Add(last_pt);
1348 width = last_pt.x() - first_pt.x();
1353 double error = linepoints.
Fit(&start_pt, &end_pt);
1361 ColPartition_LIST* used_parts,
1362 WorkingPartSet_LIST* working_sets) {
1365 block_owned_ =
true;
1366 WorkingPartSet_IT it(working_sets);
1369 if (partner !=
NULL && partner->working_set_ !=
NULL) {
1370 working_set_ = partner->working_set_;
1375 tprintf(
"Partition with partner has no working set!:");
1383 for (it.mark_cycle_pt(); !it.cycled_list() &&
1384 col_index != first_column_;
1385 it.forward(), ++col_index);
1387 tprintf(
"Match is %s for:", (col_index & 1) ?
"Real" :
"Between");
1391 tprintf(
"Target column=%d, only had %d\n", first_column_, col_index);
1394 work_set = it.data();
1397 if (!it.cycled_list() && last_column_ != first_column_ && !
IsPulloutType()) {
1399 BLOCK_LIST completed_blocks;
1400 TO_BLOCK_LIST to_blocks;
1401 for (; !it.cycled_list() && col_index <= last_column_;
1402 it.forward(), ++col_index) {
1405 &completed_blocks, &to_blocks);
1407 work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1409 working_set_ = work_set;
1421 ColPartition_LIST* block_parts,
1422 ColPartition_LIST* used_parts,
1423 BLOCK_LIST* completed_blocks,
1424 TO_BLOCK_LIST* to_blocks) {
1425 int page_height = tright.
y() - bleft.
y();
1427 ColPartition_IT it(block_parts);
1429 int max_line_height = 0;
1435 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1441 BLOBNBOX_C_IT blob_it(part->
boxes());
1442 int prev_bottom = blob_it.data()->bounding_box().bottom();
1443 for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1446 int step = bottom - prev_bottom;
1449 side_steps.add(step, 1);
1450 prev_bottom = bottom;
1452 part->
set_side_step(static_cast<int>(side_steps.median() + 0.5));
1453 if (!it.at_last()) {
1464 tprintf(
"side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1469 if (part_count == 0)
1472 SmoothSpacings(resolution, page_height, block_parts);
1475 BLOCK_IT block_it(completed_blocks);
1476 TO_BLOCK_IT to_block_it(to_blocks);
1477 ColPartition_LIST spacing_parts;
1478 ColPartition_IT sp_block_it(&spacing_parts);
1480 for (it.mark_cycle_pt(); !it.empty();) {
1482 sp_block_it.add_to_end(part);
1484 if (it.empty() || part->
bottom_spacing() > same_block_threshold ||
1485 !part->SpacingsEqual(*it.data(), resolution)) {
1488 if (!it.empty() && part->
bottom_spacing() <= same_block_threshold) {
1494 tprintf(
"Spacings unequal: upper:%d/%d, lower:%d/%d,"
1495 " sizes %d %d %d\n",
1503 if (part->SizesSimilar(*next_part) &&
1504 next_part->
median_size() * kMaxSameBlockLineSpacing >
1510 if (third_part ==
NULL ||
1511 !next_part->SizesSimilar(*third_part) ||
1512 third_part->
median_size() * kMaxSameBlockLineSpacing <=
1514 next_part->
median_size() * kMaxSameBlockLineSpacing <=
1518 sp_block_it.add_to_end(it.extract());
1521 tprintf(
"Added line to current block.\n");
1527 if (to_block !=
NULL) {
1528 to_block_it.add_to_end(to_block);
1529 block_it.add_to_end(to_block->
block);
1531 sp_block_it.set_to_list(&spacing_parts);
1535 tprintf(
"Spacings equal: upper:%d/%d, lower:%d/%d\n",
1546 if (pos->
x() < bleft.
x())
1548 if (pos->
x() > tright.
x())
1550 if (pos->
y() < bleft.
y())
1552 if (pos->
y() > tright.
y())
1560 static TO_BLOCK* MoveBlobsToBlock(
bool vertical_text,
int line_spacing,
1562 ColPartition_LIST* block_parts,
1563 ColPartition_LIST* used_parts) {
1569 STATS sizes(0,
MAX(block_box.width(), block_box.height()));
1571 ColPartition_IT it(block_parts);
1573 BLOBNBOX_IT blob_it(&to_block->
blobs);
1574 ColPartition_IT used_it(used_parts);
1575 for (it.move_to_first(); !it.empty(); it.forward()) {
1576 ColPartition* part = it.extract();
1580 for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty();
1583 if (bblob->
owner() != part) {
1584 tprintf(
"Ownership incorrect for blob:");
1601 C_OUTLINE_IT ol_it(outlines);
1602 ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0);
1607 blob_it.add_after_then_move(bblob);
1609 used_it.add_to_end(part);
1611 if (text_type && blob_it.empty()) {
1617 if (vertical_text) {
1619 if (block_width < line_spacing)
1620 line_spacing = block_width;
1621 to_block->
line_spacing =
static_cast<float>(line_spacing);
1622 to_block->
max_blob_size =
static_cast<float>(block_width + 1);
1625 if (block_height < line_spacing)
1626 line_spacing = block_height;
1627 to_block->
line_spacing =
static_cast<float>(line_spacing);
1628 to_block->
max_blob_size =
static_cast<float>(block_height + 1);
1636 ColPartition_LIST* block_parts,
1637 ColPartition_LIST* used_parts) {
1638 if (block_parts->empty())
1640 ColPartition_IT it(block_parts);
1651 ICOORDELT_LIST vertices;
1652 ICOORDELT_IT vert_it(&vertices);
1661 ColPartition::LeftEdgeRun(&it, &start, &end);
1663 ColPartition::RightEdgeRun(&it, &start, &end);
1664 ClipCoord(bleft, tright, &start);
1665 ClipCoord(bleft, tright, &end);
1666 vert_it.add_after_then_move(
new ICOORDELT(start));
1667 vert_it.add_after_then_move(
new ICOORDELT(end));
1672 if ((iteration == 0 && it.at_first()) ||
1673 (iteration == 1 && it.at_last())) {
1677 }
while (iteration < 2);
1679 tprintf(
"Making block at (%d,%d)->(%d,%d)\n",
1680 min_x, min_y, max_x, max_y);
1681 BLOCK* block =
new BLOCK(
"",
true, 0, 0, min_x, min_y, max_x, max_y);
1683 return MoveBlobsToBlock(
false, line_spacing, block, block_parts, used_parts);
1690 ColPartition_LIST* block_parts,
1691 ColPartition_LIST* used_parts) {
1692 if (block_parts->empty())
1694 ColPartition_IT it(block_parts);
1697 int line_spacing = block_box.
width();
1699 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1700 block_box += it.data()->bounding_box();
1707 block_box.
right(), block_box.
top());
1709 return MoveBlobsToBlock(
true, line_spacing, block, block_parts, used_parts);
1715 BLOBNBOX_C_IT blob_it(&boxes_);
1719 for (; !blob_it.empty(); blob_it.forward()) {
1720 BLOBNBOX* blob = blob_it.extract();
1725 row =
new TO_ROW(blob, static_cast<float>(top),
1726 static_cast<float>(bottom),
1727 static_cast<float>(line_size));
1729 row->
add_blob(blob, static_cast<float>(top),
1730 static_cast<float>(bottom),
1731 static_cast<float>(line_size));
1741 part->left_margin_ = left_margin_;
1742 part->right_margin_ = right_margin_;
1743 part->bounding_box_ = bounding_box_;
1744 memcpy(part->special_blobs_densities_, special_blobs_densities_,
1745 sizeof(special_blobs_densities_));
1746 part->median_bottom_ = median_bottom_;
1747 part->median_top_ = median_top_;
1748 part->median_size_ = median_size_;
1749 part->median_left_ = median_left_;
1750 part->median_right_ = median_right_;
1751 part->median_width_ = median_width_;
1752 part->good_width_ = good_width_;
1753 part->good_column_ = good_column_;
1754 part->left_key_tab_ = left_key_tab_;
1755 part->right_key_tab_ = right_key_tab_;
1756 part->type_ = type_;
1757 part->flow_ = flow_;
1758 part->left_key_ = left_key_;
1759 part->right_key_ = right_key_;
1760 part->first_column_ = first_column_;
1761 part->last_column_ = last_column_;
1762 part->owns_blobs_ =
false;
1769 BLOBNBOX_C_IT inserter(copy->
boxes());
1770 BLOBNBOX_C_IT traverser(
boxes());
1771 for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1772 inserter.add_after_then_move(traverser.data());
1776 #ifndef GRAPHICS_DISABLED
1784 #endif // GRAPHICS_DISABLED
1787 static char kBlobTypes[
BRT_COUNT + 1] =
"NHSRIUVT";
1792 tprintf(
"ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1793 " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1794 " ts=%d bs=%d ls=%d rs=%d\n",
1795 boxes_.empty() ?
'E' :
' ',
1796 left_margin_, left_key_tab_ ?
'T' :
'B',
LeftAtY(y),
1797 bounding_box_.
left(), median_left_,
1798 bounding_box_.
bottom(), median_bottom_,
1799 bounding_box_.
right(),
RightAtY(y), right_key_tab_ ?
'T' :
'B',
1800 right_margin_, median_right_, bounding_box_.
top(), median_top_,
1801 good_width_, good_column_, type_,
1802 kBlobTypes[blob_type_], flow_,
1803 first_column_, last_column_, boxes_.length(),
1804 space_above_, space_below_, space_to_left_, space_to_right_);
1809 tprintf(
"Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1810 color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1811 color1_[L_ALPHA_CHANNEL],
1812 color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1817 STATS left_stats(0, working_set_count);
1818 STATS right_stats(0, working_set_count);
1823 if (partner->type_ > max_type)
1824 max_type = partner->type_;
1825 if (column_set_ == partner->column_set_) {
1826 left_stats.
add(partner->first_column_, 1);
1827 right_stats.
add(partner->last_column_, 1);
1835 first_column_ = left_stats.
mode();
1836 last_column_ = right_stats.
mode();
1837 if (last_column_ < first_column_)
1838 last_column_ = first_column_;
1843 partner->type_ = max_type;
1844 #if 0 // See TODO above
1845 if (column_set_ == partner->column_set_) {
1846 partner->first_column_ = first_column_;
1847 partner->last_column_ = last_column_;
1888 RefinePartnersInternal(
true, get_desperate, grid);
1889 RefinePartnersInternal(
false, get_desperate, grid);
1893 RefinePartnersByType(
true, &upper_partners_);
1894 RefinePartnersByType(
false, &lower_partners_);
1898 if (!upper_partners_.empty() && !upper_partners_.singleton())
1899 RefinePartnersByOverlap(
true, &upper_partners_);
1900 if (!lower_partners_.empty() && !lower_partners_.singleton())
1901 RefinePartnersByOverlap(
false, &lower_partners_);
1910 void ColPartition::RefinePartnersInternal(
bool upper,
bool get_desperate,
1912 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
1913 if (!partners->empty() && !partners->singleton()) {
1914 RefinePartnersByType(upper, partners);
1915 if (!partners->empty() && !partners->singleton()) {
1917 RefinePartnerShortcuts(upper, partners);
1918 if (!partners->empty() && !partners->singleton()) {
1922 RefineTextPartnersByMerge(upper,
false, partners, grid);
1923 if (!partners->empty() && !partners->singleton())
1924 RefineTextPartnersByMerge(upper,
true, partners, grid);
1927 if (!partners->empty() && !partners->singleton())
1928 RefinePartnersByOverlap(upper, partners);
1937 void ColPartition::RefinePartnersByType(
bool upper,
1938 ColPartition_CLIST* partners) {
1942 tprintf(
"Refining %d %s partners by type for:\n",
1943 partners->length(), upper ?
"Upper" :
"Lower");
1946 ColPartition_C_IT it(partners);
1952 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1959 partner->RemovePartner(!upper,
this);
1968 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1976 partner->RemovePartner(!upper,
this);
1991 void ColPartition::RefinePartnerShortcuts(
bool upper,
1992 ColPartition_CLIST* partners) {
1993 bool done_any =
false;
1996 ColPartition_C_IT it(partners);
1997 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2001 ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_);
2002 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
2007 a->RemovePartner(!upper,
this);
2010 ColPartition_C_IT it2(partners);
2011 for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
2016 b2->RemovePartner(!upper,
this);
2029 }
while (done_any && !partners->empty() && !partners->singleton());
2040 void ColPartition::RefineTextPartnersByMerge(
bool upper,
bool desperate,
2041 ColPartition_CLIST* partners,
2042 ColPartitionGrid* grid) {
2046 tprintf(
"Refining %d %s partners by merge for:\n",
2047 partners->length(), upper ?
"Upper" :
"Lower");
2050 while (!partners->empty() && !partners->singleton()) {
2053 ColPartition_C_IT it(partners);
2057 ColPartition_CLIST candidates;
2058 ColPartition_C_IT cand_it(&candidates);
2059 for (it.forward(); !it.at_first(); it.forward()) {
2061 if (part->first_column_ == candidate->last_column_ &&
2062 part->last_column_ == candidate->first_column_)
2063 cand_it.add_after_then_move(it.data());
2065 int overlap_increase;
2066 ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug,
2067 NULL, &overlap_increase);
2068 if (candidate !=
NULL && (overlap_increase <= 0 || desperate)) {
2070 tprintf(
"Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
2071 part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate),
2075 grid->RemoveBBox(candidate);
2076 grid->RemoveBBox(part);
2077 part->Absorb(candidate,
NULL);
2079 grid->InsertBBox(
true,
true, part);
2080 if (overlap_increase > 0)
2081 part->desperately_merged_ =
true;
2090 void ColPartition::RefinePartnersByOverlap(
bool upper,
2091 ColPartition_CLIST* partners) {
2095 tprintf(
"Refining %d %s partners by overlap for:\n",
2096 partners->length(), upper ?
"Upper" :
"Lower");
2099 ColPartition_C_IT it(partners);
2102 int best_overlap = 0;
2103 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2105 int overlap =
MIN(bounding_box_.
right(), partner->bounding_box_.right())
2106 -
MAX(bounding_box_.
left(), partner->bounding_box_.left());
2107 if (overlap > best_overlap) {
2108 best_overlap = overlap;
2109 best_partner = partner;
2113 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2115 if (partner != best_partner) {
2120 partner->RemovePartner(!upper,
this);
2127 bool ColPartition::ThisPartitionBetter(
BLOBNBOX* bbox,
2128 const ColPartition& other) {
2131 int left = box.
left();
2132 int right = box.
right();
2133 if (left < left_margin_ || right > right_margin_)
2135 if (left < other.left_margin_ || right > other.right_margin_)
2137 int top = box.
top();
2138 int bottom = box.
bottom();
2139 int this_overlap =
MIN(top, median_top_) -
MAX(bottom, median_bottom_);
2140 int other_overlap =
MIN(top, other.median_top_) -
2141 MAX(bottom, other.median_bottom_);
2142 int this_miss = median_top_ - median_bottom_ - this_overlap;
2143 int other_miss = other.median_top_ - other.median_bottom_ - other_overlap;
2145 tprintf(
"Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n",
2147 this_overlap, other_overlap, this_miss, other_miss,
2148 median_top_, other.median_top_);
2150 if (this_miss < other_miss)
2152 if (this_miss > other_miss)
2154 if (this_overlap > other_overlap)
2156 if (this_overlap < other_overlap)
2158 return median_top_ >= other.median_top_;
2165 static int MedianSpacing(
int page_height, ColPartition_IT it) {
2166 STATS stats(0, page_height);
2167 while (!it.cycled_list()) {
2168 ColPartition* part = it.data();
2170 stats.add(part->bottom_spacing(), 1);
2171 stats.add(part->top_spacing(), 1);
2173 return static_cast<int>(stats.median() + 0.5);
2187 return (last_column_ >= part.first_column_) &&
2188 (first_column_ <= part.last_column_);
2194 void ColPartition::SmoothSpacings(
int resolution,
int page_height,
2195 ColPartition_LIST* parts) {
2203 ColPartition_IT it(parts);
2210 int median_space = MedianSpacing(page_height, it);
2211 ColPartition_IT start_it(it);
2212 ColPartition_IT end_it(it);
2213 for (
int i = 0; i < PN_COUNT; ++i) {
2214 if (i < PN_UPPER || it.cycled_list()) {
2215 neighbourhood[i] =
NULL;
2219 neighbourhood[i] = it.data();
2223 while (neighbourhood[PN_UPPER] !=
NULL) {
2245 if (neighbourhood[PN_LOWER] ==
NULL ||
2246 (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER],
2248 !OKSpacingBlip(resolution, median_space, neighbourhood) &&
2249 (!OKSpacingBlip(resolution, median_space, neighbourhood - 1) ||
2250 !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) &&
2251 (!OKSpacingBlip(resolution, median_space, neighbourhood + 1) ||
2252 !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) {
2255 ColPartition_IT sum_it(start_it);
2257 double total_bottom = 0.0;
2258 double total_top = 0.0;
2259 int total_count = 0;
2262 while (upper != last_part) {
2263 total_bottom += upper->bottom_spacing();
2264 total_top += upper->top_spacing();
2267 upper = sum_it.data();
2269 if (total_count > 0) {
2271 int top_spacing =
static_cast<int>(total_top / total_count + 0.5);
2272 int bottom_spacing =
static_cast<int>(total_bottom / total_count + 0.5);
2274 tprintf(
"Spacing run ended. Cause:");
2275 if (neighbourhood[PN_LOWER] ==
NULL) {
2278 tprintf(
"Spacing change. Spacings:\n");
2279 for (
int i = 0; i < PN_COUNT; ++i) {
2280 if (neighbourhood[i] ==
NULL) {
2282 if (i > 0 && neighbourhood[i - 1] !=
NULL) {
2287 tprintf(
" NULL lower partner:\n");
2293 tprintf(
"Top = %d, bottom = %d\n",
2299 tprintf(
"Mean spacing = %d/%d\n", top_spacing, bottom_spacing);
2302 upper = sum_it.data();
2303 while (upper != last_part) {
2304 upper->set_top_spacing(top_spacing);
2305 upper->set_bottom_spacing(bottom_spacing);
2311 upper = sum_it.data();
2318 median_space = MedianSpacing(page_height, end_it);
2321 for (
int j = 1; j < PN_COUNT; ++j) {
2322 neighbourhood[j - 1] = neighbourhood[j];
2324 if (it.cycled_list()) {
2325 neighbourhood[PN_COUNT - 1] =
NULL;
2327 neighbourhood[PN_COUNT - 1] = it.data();
2337 bool ColPartition::OKSpacingBlip(
int resolution,
int median_spacing,
2338 ColPartition** parts) {
2339 if (parts[PN_UPPER] ==
NULL || parts[PN_LOWER] ==
NULL)
2343 return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER],
2344 median_spacing, resolution) &&
2345 ((parts[PN_ABOVE1] !=
NULL &&
2346 parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) ||
2347 (parts[PN_BELOW1] !=
NULL &&
2348 parts[PN_BELOW1]->SpacingEqual(median_spacing, resolution)));
2353 bool ColPartition::SpacingEqual(
int spacing,
int resolution)
const {
2354 int bottom_error = BottomSpacingMargin(resolution);
2355 int top_error = TopSpacingMargin(resolution);
2356 return NearlyEqual(bottom_spacing_, spacing, bottom_error) &&
2362 bool ColPartition::SpacingsEqual(
const ColPartition& other,
2363 int resolution)
const {
2364 int bottom_error =
MAX(BottomSpacingMargin(resolution),
2365 other.BottomSpacingMargin(resolution));
2366 int top_error =
MAX(TopSpacingMargin(resolution),
2367 other.TopSpacingMargin(resolution));
2368 return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) &&
2369 (
NearlyEqual(top_spacing_, other.top_spacing_, top_error) ||
2370 NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2,
2377 bool ColPartition::SummedSpacingOK(
const ColPartition& other,
2378 int spacing,
int resolution)
const {
2379 int bottom_error =
MAX(BottomSpacingMargin(resolution),
2380 other.BottomSpacingMargin(resolution));
2381 int top_error =
MAX(TopSpacingMargin(resolution),
2382 other.TopSpacingMargin(resolution));
2383 int bottom_total = bottom_spacing_ + other.bottom_spacing_;
2384 int top_total = top_spacing_ + other.top_spacing_;
2385 return (
NearlyEqual(spacing, bottom_total, bottom_error) &&
2387 (
NearlyEqual(spacing * 2, bottom_total, bottom_error) &&
2393 int ColPartition::BottomSpacingMargin(
int resolution)
const {
2399 int ColPartition::TopSpacingMargin(
int resolution)
const {
2401 BottomSpacingMargin(resolution);
2406 bool ColPartition::SizesSimilar(
const ColPartition& other)
const {
2407 return median_size_ <= other.median_size_ *
kMaxSizeRatio &&
2414 static bool UpdateLeftMargin(
const ColPartition& part,
2415 int* margin_left,
int* margin_right) {
2416 const TBOX& part_box = part.bounding_box();
2417 int top = part_box.
top();
2418 int bottom = part_box.
bottom();
2419 int tl_key = part.SortKey(part.left_margin(), top);
2420 int tr_key = part.SortKey(part_box.
left(), top);
2421 int bl_key = part.SortKey(part.left_margin(), bottom);
2422 int br_key = part.SortKey(part_box.
left(), bottom);
2423 int left_key =
MAX(tl_key, bl_key);
2424 int right_key =
MIN(tr_key, br_key);
2425 if (left_key <= *margin_right && right_key >= *margin_left) {
2427 *margin_right =
MIN(*margin_right, right_key);
2428 *margin_left =
MAX(*margin_left, left_key);
2439 void ColPartition::LeftEdgeRun(ColPartition_IT* part_it,
2443 int start_y = part->bounding_box_.top();
2444 if (!part_it->at_first()) {
2445 int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom();
2446 if (prev_bottom < start_y)
2447 start_y = prev_bottom;
2448 else if (prev_bottom > start_y)
2449 start_y = (start_y + prev_bottom) / 2;
2451 int end_y = part->bounding_box_.bottom();
2454 UpdateLeftMargin(*part, &margin_left, &margin_right);
2457 part = part_it->data();
2458 }
while (!part_it->at_first() &&
2459 UpdateLeftMargin(*part, &margin_left, &margin_right));
2465 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right);
2466 if (next_margin_left > margin_right) {
2467 ColPartition_IT next_it(*part_it);
2470 part = next_it.data();
2471 }
while (!next_it.at_first() &&
2472 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2476 part_it->backward();
2477 part = part_it->data();
2478 }
while (part != start_part &&
2479 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2483 part = part_it->data_relative(-1);
2484 end_y = part->bounding_box_.bottom();
2485 if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y)
2486 end_y = (end_y + part_it->data()->bounding_box_.top()) / 2;
2487 start->
set_y(start_y);
2488 start->
set_x(part->XAtY(margin_right, start_y));
2490 end->
set_x(part->XAtY(margin_right, end_y));
2492 tprintf(
"Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2493 start_y, end_y, part->XAtY(margin_left, end_y),
2494 end->
x(), part->left_margin_, part->bounding_box_.left());
2500 static bool UpdateRightMargin(
const ColPartition& part,
2501 int* margin_left,
int* margin_right) {
2502 const TBOX& part_box = part.bounding_box();
2503 int top = part_box.
top();
2504 int bottom = part_box.
bottom();
2505 int tl_key = part.SortKey(part_box.
right(), top);
2506 int tr_key = part.SortKey(part.right_margin(), top);
2507 int bl_key = part.SortKey(part_box.
right(), bottom);
2508 int br_key = part.SortKey(part.right_margin(), bottom);
2509 int left_key =
MAX(tl_key, bl_key);
2510 int right_key =
MIN(tr_key, br_key);
2511 if (left_key <= *margin_right && right_key >= *margin_left) {
2513 *margin_right =
MIN(*margin_right, right_key);
2514 *margin_left =
MAX(*margin_left, left_key);
2526 void ColPartition::RightEdgeRun(ColPartition_IT* part_it,
2530 int start_y = part->bounding_box_.bottom();
2531 if (!part_it->at_last()) {
2532 int next_y = part_it->data_relative(1)->bounding_box_.top();
2533 if (next_y > start_y)
2535 else if (next_y < start_y)
2536 start_y = (start_y + next_y) / 2;
2538 int end_y = part->bounding_box_.top();
2541 UpdateRightMargin(*part, &margin_left, &margin_right);
2543 part_it->backward();
2544 part = part_it->data();
2545 }
while (!part_it->at_last() &&
2546 UpdateRightMargin(*part, &margin_left, &margin_right));
2551 UpdateRightMargin(*part, &next_margin_left, &next_margin_right);
2552 if (next_margin_right < margin_left) {
2553 ColPartition_IT next_it(*part_it);
2556 part = next_it.data();
2557 }
while (!next_it.at_last() &&
2558 UpdateRightMargin(*part, &next_margin_left,
2559 &next_margin_right));
2564 part = part_it->data();
2565 }
while (part != start_part &&
2566 UpdateRightMargin(*part, &next_margin_left,
2567 &next_margin_right));
2568 part_it->backward();
2571 part = part_it->data_relative(1);
2572 end_y = part->bounding_box().top();
2573 if (!part_it->at_last() &&
2574 part_it->data()->bounding_box_.bottom() > end_y)
2575 end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2;
2576 start->
set_y(start_y);
2577 start->
set_x(part->XAtY(margin_left, start_y));
2579 end->
set_x(part->XAtY(margin_left, end_y));
2581 tprintf(
"Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2582 start_y, end_y, end->
x(), part->XAtY(margin_right, end_y),
2583 part->bounding_box_.right(), part->right_margin_);
void set_x(inT16 xin)
rewrite function
const int kMaxPartnerDepth
void RefinePartners(PolyBlockType type, bool get_desparate, ColPartitionGrid *grid)
const double kMaxTopSpacingFraction
const double kMinBaselineCoverage
bool IsVerticalType() const
int XAtY(int sort_key, int y) const
bool ReleaseNonLeaderBoxes()
static bool WithinTestRegion(int detail_level, int x, int y)
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
const int kHorzStrongTextlineHeight
float SpecialBlobsDensity(const BlobSpecialTextType type) const
bool ConfirmNoTabViolation(const ColPartition &other) const
void set_top_spacing(int spacing)
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
ColPartition * ShallowCopy() const
void SetRegionAndFlowTypesFromProjectionValue(int value)
void set_poly_block(POLY_BLOCK *blk)
set the poly block
static bool DifferentSizes(int size1, int size2)
void CopyRightTab(const ColPartition &src, bool take_box)
void SetRightTab(const TabVector *tab_vector)
const int kMinStrongTextValue
const TBOX & bounding_box() const
int NoisyNeighbours() const
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
void set_owner(tesseract::ColPartition *new_owner)
bool IsInSameColumnAs(const ColPartition &part) const
void set_bottom_spacing(int spacing)
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
#define ELIST2IZE(CLASSNAME)
void add(inT32 value, inT32 count)
BlobRegionType blob_type() const
void SetLeftTab(const TabVector *tab_vector)
void AddBox(BLOBNBOX *box)
BLOBNBOX * OverlapSplitBlob(const TBOX &box)
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
ColPartition * SplitAt(int split_x)
void SmoothPartnerRun(int working_set_count)
void set_flow(BlobTextFlowType value)
const int kHorzStrongTextlineAspect
void SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density)
int median_bottom() const
int bottom_spacing() const
C_OUTLINE_LIST * out_list()
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
const double kMaxSameBlockLineSpacing
TBOX BoundsWithoutBox(BLOBNBOX *box)
void CopyLeftTab(const ColPartition &src, bool take_box)
void SetPartitionType(int resolution, ColPartitionSet *columns)
static double ColorDistanceFromLine(const uinT8 *line1, const uinT8 *line2, const uinT8 *point)
int textord_debug_tabfind
void set_left_margin(int margin)
void set_owns_blobs(bool owns_blobs)
ScrollView::Color BoxColor() const
double ile(double frac) const
void set_right_margin(int margin)
#define CLISTIZE(CLASSNAME)
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
inT16 y() const
access_function
bool NearlyEqual(T x, T y, T tolerance)
ColPartition * SingletonPartner(bool upper)
const int kMaxColorDistance
BlobRegionType region_type() const
const double kMaxSpacingDrift
void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
int RightBlobRule() const
const int kMinChainTextValue
static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
ColPartition * SplitAtBlob(BLOBNBOX *split_blob)
const double kMaxSizeRatio
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
bool IsPulloutType() const
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
BlobSpecialTextType special_text_type() const
bool MatchingSizes(const ColPartition &other) const
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
inT64 CostWithVariance(const DPPoint *prev)
int RightAtY(int y) const
const int kMinLeaderCount
tesseract::ColPartition * owner() const
ColPartition * CopyButDontOwnBlobs()
PolyBlockType type() const
void DisownBoxesNoAssert()
void Absorb(ColPartition *other, WidthCallback *cb)
bool MarkAsLeaderIfMonospaced()
void Add(const ICOORD &pt)
void set_flow(BlobTextFlowType f)
const double kMaxLeaderGapFractionOfMin
const double kMaxBaselineError
void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
bool MatchingTextColor(const ColPartition &other) const
bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
PolyBlockType PartitionType(ColumnSpanningType flow) const
void set_y(inT16 yin)
rewrite function
static ColPartition * MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
void set_block_owned(bool owned)
const int kMaxRMSColorNoise
static C_BLOB * FakeBlob(const TBOX &box)
inT16 x() const
access function
double Fit(ICOORD *pt1, ICOORD *pt2)
void set_side_step(int step)
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
void set_region_type(BlobRegionType new_type)
bool MatchingColumns(const ColPartition &other) const
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
const int kHorzStrongTextlineCount
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
int VCoreOverlap(const ColPartition &other) const
void SetColumnGoodness(WidthCallback *cb)
int SpecialBlobsCount(const BlobSpecialTextType type)
void RemovePartner(bool upper, ColPartition *partner)
void AddLocalCost(int new_cost)
const TBOX & bounding_box() const
const double kMaxLeaderGapFractionOfMax
const int kColumnWidthFactor
void AddPartition(ColPartition *part)
int CountOverlappingBoxes(const TBOX &box)
bool OKDiacriticMerge(const ColPartition &candidate, bool debug) const
POLY_BLOCK * poly_block() const
int base_char_bottom() const
BlobTextFlowType flow() const
void ComputeSpecialBlobsDensity()
void AddPartner(bool upper, ColPartition *partner)
bool overlap(const TBOX &box) const
bool VSignificantCoreOverlap(const ColPartition &other) const
void RemoveBox(BLOBNBOX *box)
int base_char_top() const
void set_type(PolyBlockType t)