20 #include "config_auto.h"
55 "max fraction of mean blob width allowed for vertical gaps in vertical text");
58 "Fraction of box matches required to declare a line vertical");
65 auto* constraints =
new TabConstraint_LIST;
66 TabConstraint_IT it(constraints);
67 it.add_to_end(constraint);
69 vector->set_top_constraints(constraints);
71 vector->set_bottom_constraints(constraints);
76 TabConstraint_LIST* list2) {
79 int y_min = -INT32_MAX;
80 int y_max = INT32_MAX;
82 tprintf(
"Testing constraint compatibility\n");
83 GetConstraints(list1, &y_min, &y_max);
84 GetConstraints(list2, &y_min, &y_max);
86 tprintf(
"Resulting range = [%d,%d]\n", y_min, y_max);
87 return y_max >= y_min;
93 TabConstraint_LIST* list2) {
96 TabConstraint_IT it(list2);
98 tprintf(
"Merging constraints\n");
100 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
103 constraint->vector_->
Print(
"Merge");
104 if (constraint->is_top_)
110 it.add_list_before(list2);
117 int y_min = -INT32_MAX;
118 int y_max = INT32_MAX;
119 GetConstraints(constraints, &y_min, &y_max);
120 int y = (y_min + y_max) / 2;
121 TabConstraint_IT it(constraints);
122 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
125 if (constraint->is_top_) {
137 : vector_(vector), is_top_(is_top) {
139 y_min_ = vector->
endpt().
y();
148 void TabConstraint::GetConstraints(TabConstraint_LIST* constraints,
149 int* y_min,
int* y_max) {
150 TabConstraint_IT it(constraints);
151 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
154 tprintf(
"Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_);
155 constraint->vector_->Print(
" for");
157 *y_min = std::max(*y_min, constraint->y_min_);
158 *y_max = std::min(*y_max, constraint->y_max_);
177 int extended_start_y,
int extended_end_y,
178 BLOBNBOX_CLIST* good_points,
179 int* vertical_x,
int* vertical_y) {
180 auto* vector =
new TabVector(extended_start_y, extended_end_y,
181 alignment, good_points);
182 if (!vector->
Fit(vertical,
false)) {
187 vertical = vector->endpt_ - vector->startpt_;
189 *vertical_x += vertical.x() * weight;
190 *vertical_y += vertical.y() * weight;
200 : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_),
201 needs_refit_(true), needs_evaluation_(true),
202 alignment_(alignment) {
203 BLOBNBOX_C_IT it(&boxes_);
213 sort_key_ =
SortKey(vertical_skew,
214 (startpt_.
x() + endpt_.
x()) / 2,
215 (startpt_.
y() + endpt_.
y()) / 2);
217 Print(
"Constructed a new tab vector:");
227 copy->startpt_ = startpt_;
228 copy->endpt_ = endpt_;
229 copy->alignment_ = alignment_;
230 copy->extended_ymax_ = extended_ymax_;
231 copy->extended_ymin_ = extended_ymin_;
232 copy->intersects_other_lines_ = intersects_other_lines_;
240 BLOBNBOX_C_IT it(&boxes_);
244 while (!it.at_last() && box.
top() <= new_box.
top()) {
245 if (blob == new_blob)
251 if (box.
top() >= new_box.
top()) {
252 it.add_before_stay_put(new_blob);
258 it.add_after_stay_put(new_blob);
264 startpt_.
set_y(start_y);
274 startpt_.
rotate(rotation);
276 int dx = endpt_.
x() - startpt_.
x();
277 int dy = endpt_.
y() - startpt_.
y();
278 if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) {
298 TabVector_C_IT it(&partners_);
300 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
302 if (partner->top_constraints_ ==
nullptr ||
303 partner->bottom_constraints_ ==
nullptr) {
304 partner->
Print(
"Impossible: has no constraints");
305 Print(
"This vector has it as a partner");
308 if (prev_partner ==
nullptr) {
311 partner->bottom_constraints_))
313 partner->bottom_constraints_);
317 partner->bottom_constraints_))
319 partner->bottom_constraints_);
321 prev_partner = partner;
325 partner->top_constraints_))
327 partner->top_constraints_);
335 partner->bottom_constraints_))
337 partner->bottom_constraints_);
339 partner->top_constraints_))
341 partner->top_constraints_);
346 if (top_constraints_ !=
nullptr)
348 if (bottom_constraints_ !=
nullptr)
354 TabVector_LIST* vectors,
356 TabVector_IT it1(vectors);
357 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
359 TabVector_IT it2(it1);
360 for (it2.forward(); !it2.at_first(); it2.forward()) {
362 if (v2->
SimilarTo(vertical, *v1, grid)) {
366 v2->
Print(
"Merging");
367 v1->
Print(
"by deleting");
371 v2->
Print(
"Producing");
374 merged_vector -= v2->
startpt();
376 v2->
Print(
"Garbage result of merge?");
394 int v_scale = abs(vertical.
y());
406 if (grid ==
nullptr) {
414 sort_key_ < other.sort_key_) ?
this : &other;
415 int top_y = mover->endpt_.
y();
416 int bottom_y = mover->startpt_.
y();
417 int left = std::min(mover->
XAtY(top_y), mover->
XAtY(bottom_y));
418 int right = std::max(mover->
XAtY(top_y), mover->
XAtY(bottom_y));
419 int shift = abs(sort_key_ - other.sort_key_) / v_scale;
431 if (box.
top() > bottom_y)
436 int right_at_box = left_at_box;
438 right_at_box += shift;
440 left_at_box -= shift;
441 if (std::min(right_at_box, static_cast<int>(box.
right())) > std::max(left_at_box, static_cast<int>(box.
left())))
451 extended_ymin_ = std::min(extended_ymin_, other->extended_ymin_);
452 extended_ymax_ = std::max(extended_ymax_, other->extended_ymax_);
454 alignment_ = other->alignment_;
457 BLOBNBOX_C_IT it1(&boxes_);
458 BLOBNBOX_C_IT it2(&other->boxes_);
459 while (!it2.empty()) {
465 while (box1.
bottom() < box2.
bottom() && !it1.at_last()) {
471 it1.add_to_end(bbox2);
472 }
else if (bbox1 != bbox2) {
473 it1.add_before_stay_put(bbox2);
487 TabVector_C_IT it(&partners_);
490 if (it.data() == partner)
493 it.add_after_then_move(partner);
498 TabVector_C_IT it(&partners_);
499 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
500 if (it.data() == other)
507 static const char*
const kAlignmentNames[] = {
519 "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d,"
521 prefix, kAlignmentNames[alignment_], startpt_.
x(), startpt_.
y(),
522 endpt_.
x(), endpt_.
y(), mean_width_, percent_score_, sort_key_,
523 boxes_.length(), partners_.length());
529 BLOBNBOX_C_IT it(&boxes_);
530 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
533 tprintf(
"Box at (%d,%d)->(%d,%d)\n",
540 #ifndef GRAPHICS_DISABLED
553 tab_win->
Line(startpt_.
x(), startpt_.
y(), endpt_.
x(), endpt_.
y());
555 tab_win->
Line(startpt_.
x(), startpt_.
y(), startpt_.
x(), extended_ymin_);
556 tab_win->
Line(endpt_.
x(), extended_ymax_, endpt_.
x(), endpt_.
y());
558 snprintf(score_buf,
sizeof(score_buf),
"%d", percent_score_);
560 tab_win->
Text(startpt_.
x(), startpt_.
y(), score_buf);
569 if (needs_evaluation_)
581 needs_evaluation_ =
false;
582 int length = endpt_.
y() - startpt_.
y();
583 if (length == 0 || boxes_.empty()) {
585 Print(
"Zero length in evaluate");
589 BLOBNBOX_C_IT it(&boxes_);
591 int height_count = 0;
592 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
595 int height = box.
height();
596 mean_height += height;
599 if (height_count > 0) mean_height /= height_count;
607 STATS gutters(0, max_gutter + 1);
611 int num_deleted_boxes = 0;
612 bool text_on_image =
false;
614 const TBOX* prev_good_box =
nullptr;
615 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
618 int mid_y = (box.
top() + box.
bottom()) / 2;
621 tprintf(
"After already deleting %d boxes, ", num_deleted_boxes);
622 Print(
"Starting evaluation");
630 int tab_x =
XAtY(mid_y);
634 bbox, &gutter_width, &neighbour_gap);
636 tprintf(
"Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n",
638 gutter_width, neighbour_gap);
644 gutters.
add(gutter_width, 1);
648 if (prev_good_box !=
nullptr) {
649 int vertical_gap = box.
bottom() - prev_good_box->
top();
650 double size1 = sqrt(static_cast<double>(prev_good_box->
area()));
651 double size2 = sqrt(static_cast<double>(box.
area()));
653 good_length += vertical_gap;
655 tprintf(
"Box and prev good, gap=%d, target %g, goodlength=%d\n",
663 prev_good_box = &box;
665 text_on_image =
true;
669 tprintf(
"Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n",
671 gutter_width, neighbour_gap);
678 Print(
"Evaluating:");
683 int search_top = endpt_.
y();
684 int search_bottom = startpt_.
y();
687 prev_good_box =
nullptr;
688 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
691 int mid_y = (box.
top() + box.
bottom()) / 2;
695 int tab_x =
XAtY(mid_y);
705 bbox, &gutter_width, &neighbour_gap);
708 if (prev_good_box ==
nullptr) {
711 search_bottom = box.
top();
713 prev_good_box = &box;
714 search_top = box.
bottom();
718 tprintf(
"Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n",
720 gutter_width, median_gutter);
728 if (prev_good_box !=
nullptr) {
731 int length = endpt_.
y() - startpt_.
y();
732 percent_score_ = 100 * good_length / length;
733 if (num_deleted_boxes > 0) {
741 if (search_bottom > search_top) {
742 search_bottom = startpt_.
y();
743 search_top = endpt_.
y();
747 min_gutter_width *= mean_height;
749 if (median_gutter > max_gutter_width)
750 max_gutter_width = median_gutter;
751 int gutter_width = finder->
GutterWidth(search_bottom, search_top, *
this,
752 text_on_image, max_gutter_width,
754 if (gutter_width < min_gutter_width) {
756 tprintf(
"Rejecting bad tab Vector with %d gutter vs %g min\n",
757 gutter_width, min_gutter_width);
759 boxes_.shallow_clear();
762 tprintf(
"Final gutter %d, vs limit of %g, required shift = %d\n",
763 gutter_width, min_gutter_width, required_shift);
771 Print(
"Evaluation complete:");
781 needs_refit_ =
false;
782 if (boxes_.empty()) {
791 sort_key_ =
SortKey(vertical, midpt.
x(), midpt.
y());
792 return startpt_.
y() != endpt_.
y();
794 if (!force_parallel && !
IsRagged()) {
797 BLOBNBOX_C_IT it(&boxes_);
799 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
804 linepoints.
Add(boxpt);
807 linepoints.
Add(top_pt);
810 linepoints.
Fit(&startpt_, &endpt_);
811 if (startpt_.
y() != endpt_.
y()) {
813 vertical -= startpt_;
816 int start_y = startpt_.
y();
817 int end_y = endpt_.
y();
818 sort_key_ =
IsLeftTab() ? INT32_MAX : -INT32_MAX;
819 BLOBNBOX_C_IT it(&boxes_);
824 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
827 mean_width_ += box.
width();
832 int bottom_y = box.
bottom();
833 int top_y = box.
top();
834 int key =
SortKey(vertical, x1, bottom_y);
837 startpt_ =
ICOORD(x1, bottom_y);
839 key =
SortKey(vertical, x1, top_y);
842 startpt_ =
ICOORD(x1, top_y);
849 if (width_count > 0) {
850 mean_width_ = (mean_width_ + width_count - 1) / width_count;
852 endpt_ = startpt_ + vertical;
853 needs_evaluation_ =
true;
854 if (start_y != end_y) {
856 startpt_.
set_x(
XAtY(vertical, sort_key_, start_y));
857 startpt_.
set_y(start_y);
858 endpt_.
set_x(
XAtY(vertical, sort_key_, end_y));
867 if (!partners_.singleton())
869 TabVector_C_IT partner_it(&partners_);
877 if (!partners_.singleton())
879 TabVector_C_IT partner_it(&partners_);
881 BLOBNBOX_C_IT box_it1(&boxes_);
882 BLOBNBOX_C_IT box_it2(&partner->boxes_);
886 Print(
"Testing for vertical text");
887 partner->
Print(
" partner");
890 int num_unmatched = 0;
891 int total_widths = 0;
895 STATS gaps(0, width * 2);
897 box_it2.mark_cycle_pt();
898 for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
901 if (prev_bbox !=
nullptr) {
904 while (!box_it2.cycled_list() && box_it2.data() != bbox &&
908 if (!box_it2.cycled_list() && box_it2.data() == bbox &&
914 total_widths += box.
width();
917 if (num_unmatched + num_matched == 0)
return nullptr;
918 double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
920 int min_box_match = static_cast<int>((num_matched + num_unmatched) *
922 bool is_vertical = (gaps.
get_total() > 0 &&
923 num_matched >= min_box_match &&
924 gaps.
median() <= max_gap);
926 tprintf(
"gaps=%d, matched=%d, unmatched=%d, min_match=%d "
927 "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
928 gaps.
get_total(), num_matched, num_unmatched, min_box_match,
929 gaps.
median(), avg_width, max_gap, is_vertical?
"Yes":
"No");
931 return (is_vertical) ? partner :
nullptr;
937 : extended_ymin_(extended_ymin), extended_ymax_(extended_ymax),
938 sort_key_(0), percent_score_(0), mean_width_(0),
939 needs_refit_(true), needs_evaluation_(true), alignment_(alignment),
940 top_constraints_(nullptr), bottom_constraints_(nullptr) {
941 BLOBNBOX_C_IT it(&boxes_);
942 it.add_list_after(boxes);
948 void TabVector::Delete(TabVector* replacement) {
949 TabVector_C_IT it(&partners_);
950 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
952 TabVector_C_IT p_it(&partner->partners_);
955 TabVector* partner_replacement = replacement;
956 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
958 if (p_partner == partner_replacement) {
959 partner_replacement =
nullptr;
964 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
966 if (p_partner ==
this) {
968 if (partner_replacement !=
nullptr)
969 p_it.add_before_stay_put(partner_replacement);
972 if (partner_replacement !=
nullptr) {
973 partner_replacement->AddPartner(partner);