21 #include "config_auto.h"
87 TabVector_LIST* vlines,
int vertical_x,
int vertical_y,
90 resolution_(resolution),
91 image_origin_(0, tright.y() - 1) {
93 v_it_.set_to_list(&vectors_);
94 v_it_.add_list_after(vlines);
95 SetVerticalSkewAndParellelize(vertical_x, vertical_y);
100 if (width_cb_ !=
NULL)
114 BLOBNBOX_LIST* blobs,
116 BLOBNBOX_C_IT>* grid) {
117 BLOBNBOX_IT blob_it(blobs);
119 int reject_count = 0;
120 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
123 if (
InsertBlob(h_spread, v_spread, blob, grid)) {
130 tprintf(
"Inserted %d blobs into grid, %d rejected.\n",
131 b_count, reject_count);
142 BLOBNBOX_C_IT>* grid) {
150 grid->InsertBBox(h_spread, v_spread, blob);
165 BLOBNBOX_IT blob_it(blobs);
166 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
184 bool ignore_unmergeables,
int max_gutter_width,
185 int* required_shift) {
187 int bottom_x = v.
XAtY(bottom_y);
188 int top_x = v.
XAtY(top_y);
189 int start_x = right_to_left ?
MAX(top_x, bottom_x) :
MIN(top_x, bottom_x);
192 int min_gap = max_gutter_width;
197 if (box.
bottom() >= top_y || box.
top() <= bottom_y)
206 int mid_y = (box.
bottom() + box.
top()) / 2;
211 int tab_x = v.
XAtY(mid_y);
214 gap = tab_x - box.
right();
215 if (gap < 0 && box.
left() - tab_x < *required_shift)
216 *required_shift = box.
left() - tab_x;
218 gap = box.
left() - tab_x;
219 if (gap < 0 && box.
right() - tab_x > *required_shift)
220 *required_shift = box.
right() - tab_x;
222 if (gap > 0 && gap < min_gap)
226 return min_gap - abs(*required_shift);
231 int max_gutter,
bool left,
233 int* neighbour_gap ) {
236 int gutter_x = left ? box.
left() : box.
right();
237 int internal_x = left ? box.
right() : box.
left();
239 int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
240 *gutter_width = max_gutter;
244 *gutter_width += tab_gap;
247 tprintf(
"Looking in gutter\n");
249 BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
252 if (gutter_bbox !=
NULL) {
254 *gutter_width = left ? tab_x - gutter_box.
right()
255 : gutter_box.
left() - tab_x;
257 if (*gutter_width >= max_gutter) {
259 TBOX gutter_box(box);
261 gutter_box.
set_left(tab_x - max_gutter - 1);
262 gutter_box.
set_right(tab_x - max_gutter);
264 if (tab_gutter < tab_x - 1)
265 *gutter_width = tab_x - tab_gutter;
267 gutter_box.
set_left(tab_x + max_gutter);
268 gutter_box.
set_right(tab_x + max_gutter + 1);
270 if (tab_gutter > tab_x + 1)
271 *gutter_width = tab_gutter - tab_x;
274 if (*gutter_width > max_gutter)
275 *gutter_width = max_gutter;
278 tprintf(
"Looking for neighbour\n");
279 BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
284 if (neighbour !=
NULL) {
290 if (left && n_box.
left() < neighbour_edge)
291 neighbour_edge = n_box.
left();
292 else if (!left && n_box.
right() > neighbour_edge)
293 neighbour_edge = n_box.
right();
295 *neighbour_gap = left ? neighbour_edge - internal_x
296 : internal_x - neighbour_edge;
330 int top_y = box.
top();
331 int bottom_y = box.
bottom();
332 int mid_y = (top_y + bottom_y) / 2;
333 int right = crossing ? (box.
left() + box.
right()) / 2 : box.
right();
334 int min_key, max_key;
337 while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
339 while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
347 int x = v->
XAtY(mid_y);
349 (v->
VOverlap(top_y, bottom_y) > 0 ||
351 if (best_v ==
NULL || x < best_x) {
356 key_limit = v->
sort_key() + max_key - min_key;
361 if (v_it_.at_last() ||
365 }
while (!v_it_.at_first());
374 int top_y = box.
top();
375 int bottom_y = box.
bottom();
376 int mid_y = (top_y + bottom_y) / 2;
377 int left = crossing ? (box.
left() + box.
right()) / 2 : box.
left();
378 int min_key, max_key;
381 while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
383 while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
392 int x = v->
XAtY(mid_y);
394 (v->
VOverlap(top_y, bottom_y) > 0 ||
396 if (best_v ==
NULL || x > best_x) {
401 key_limit = v->
sort_key() - (max_key - min_key);
406 if (v_it_.at_first() ||
410 }
while (!v_it_.at_last());
418 ICOORDELT_IT it(&column_widths_);
419 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
421 if (w->
x() - 1 <= width && width <= w->
y() + 1)
430 return size1 > size2 * 2 || size2 > size1 * 2;
436 return size1 > size2 * 5 || size2 > size1 * 5;
445 BLOBNBOX_LIST* image_blobs,
TO_BLOCK* block,
446 int min_gutter_width,
447 double tabfind_aligned_gap_fraction,
451 tabfind_aligned_gap_fraction,
453 ComputeColumnWidths(tab_win, part_grid);
457 if (!Deskew(hlines, image_blobs, block, deskew, reskew))
459 part_grid->
Deskew(*deskew);
460 ApplyTabConstraints();
461 #ifndef GRAPHICS_DISABLED
466 image_origin_.
x(), image_origin_.
y());
473 #endif // GRAPHICS_DISABLED
494 BLOBNBOX_IT blob_it = &block->
blobs;
496 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
497 BLOBNBOX* large_blob = large_it.data();
499 blob_it.add_to_end(large_it.extract());
504 tprintf(
"Moved %d large blobs to normal list\n",
506 #ifndef GRAPHICS_DISABLED
511 #endif // GRAPHICS_DISABLED
520 *min_key =
MIN(key1, key2);
521 *max_key =
MAX(key1, key2);
525 #ifndef GRAPHICS_DISABLED
527 TabVector_IT it(&vectors_);
528 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
542 int min_gutter_width,
543 double tabfind_aligned_gap_fraction,
550 if (image_blobs !=
NULL)
553 ScrollView* initial_win = FindTabBoxes(min_gutter_width,
554 tabfind_aligned_gap_fraction);
555 FindAllTabVectors(min_gutter_width);
569 #ifndef GRAPHICS_DISABLED
570 for (
int i = 0; i < boxes.
size(); ++i) {
571 TBOX box = boxes[i]->bounding_box();
572 int left_x = box.
left();
573 int right_x = box.
right();
574 int top_y = box.
top();
575 int bottom_y = box.
bottom();
578 win->
Rectangle(left_x, bottom_y, right_x, top_y);
581 #endif // GRAPHICS_DISABLED
586 ScrollView* TabFind::FindTabBoxes(
int min_gutter_width,
587 double tabfind_aligned_gap_fraction) {
588 left_tab_boxes_.
clear();
589 right_tab_boxes_.
clear();
591 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(
this);
592 gsearch.StartFullSearch();
594 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
595 if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) {
605 left_tab_boxes_.
sort(SortByBoxLeft<BLOBNBOX>);
606 right_tab_boxes_.
sort(SortRightToLeft<BLOBNBOX>);
608 #ifndef GRAPHICS_DISABLED
614 DisplayBoxVector(left_tab_boxes_, tab_win);
615 DisplayBoxVector(right_tab_boxes_, tab_win);
618 #endif // GRAPHICS_DISABLED
622 bool TabFind::TestBoxForTabs(
BLOBNBOX* bbox,
int min_gutter_width,
623 double tabfind_aligned_gap_fraction) {
624 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(
this);
627 int left_column_edge = bbox->
left_rule();
630 int left_x = box.
left();
631 int right_x = box.
right();
632 int top_y = box.
top();
633 int bottom_y = box.
bottom();
634 int height = box.
height();
637 tprintf(
"Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
638 left_x, top_y, right_x, bottom_y,
639 left_column_edge, right_column_edge);
643 radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius);
649 static_cast<int>(height * tabfind_aligned_gap_fraction);
650 if (min_gutter_width > min_spacing)
651 min_spacing = min_gutter_width;
652 int min_ragged_gutter = kRaggedGutterMultiple *
gridsize();
653 if (min_gutter_width > min_ragged_gutter)
654 min_ragged_gutter = min_gutter_width;
655 int target_right = left_x - min_spacing;
656 int target_left = right_x + min_spacing;
672 bool is_left_tab =
true;
673 bool is_right_tab =
true;
674 bool maybe_ragged_left =
true;
675 bool maybe_ragged_right =
true;
676 int maybe_left_tab_up = 0;
677 int maybe_right_tab_up = 0;
678 int maybe_left_tab_down = 0;
679 int maybe_right_tab_down = 0;
682 maybe_ragged_left =
false;
687 is_right_tab =
false;
688 maybe_ragged_right =
false;
694 while ((neighbour = radsearch.NextRadSearch()) !=
NULL) {
695 if (neighbour == bbox)
698 int n_left = nbox.
left();
699 int n_right = nbox.
right();
701 tprintf(
"Neighbour at (%d,%d)->(%d,%d)\n",
702 n_left, nbox.
bottom(), n_right, nbox.
top());
705 if (n_right > right_column_edge || n_left < left_column_edge ||
706 left_x < neighbour->left_rule() || right_x > neighbour->
right_rule())
708 int n_mid_x = (n_left + n_right) / 2;
709 int n_mid_y = (nbox.
top() + nbox.
bottom()) / 2;
710 if (n_mid_x <= left_x && n_right >= target_right) {
716 if (n_mid_y > bottom_y)
718 }
else if (
NearlyEqual(left_x, n_left, alignment_tolerance)) {
721 if (n_mid_y > top_y && maybe_left_tab_up > -
MAX_INT32)
723 if (n_mid_y < bottom_y && maybe_left_tab_down > -
MAX_INT32)
724 ++maybe_left_tab_down;
725 }
else if (n_left < left_x && n_right >= left_x) {
728 tprintf(
"Maybe Not a left tab\n");
729 if (n_mid_y > top_y && maybe_left_tab_up > -
MAX_INT32)
731 if (n_mid_y < bottom_y && maybe_left_tab_down > -
MAX_INT32)
732 --maybe_left_tab_down;
734 if (n_left < left_x && nbox.
y_overlap(box) && n_right >= target_right) {
735 maybe_ragged_left =
false;
737 tprintf(
"Not a ragged left\n");
739 if (n_mid_x >= right_x && n_left <= target_left) {
742 is_right_tab =
false;
745 if (n_mid_y > bottom_y)
747 }
else if (
NearlyEqual(right_x, n_right, alignment_tolerance)) {
749 tprintf(
"Maybe a right tab\n");
750 if (n_mid_y > top_y && maybe_right_tab_up > -
MAX_INT32)
751 ++maybe_right_tab_up;
752 if (n_mid_y < bottom_y && maybe_right_tab_down > -
MAX_INT32)
753 ++maybe_right_tab_down;
754 }
else if (n_right > right_x && n_left <= right_x) {
757 tprintf(
"Maybe Not a right tab\n");
758 if (n_mid_y > top_y && maybe_right_tab_up > -
MAX_INT32)
759 --maybe_right_tab_up;
760 if (n_mid_y < bottom_y && maybe_right_tab_down > -
MAX_INT32)
761 --maybe_right_tab_down;
763 if (n_right > right_x && nbox.
y_overlap(box) && n_left <= target_left) {
764 maybe_ragged_right =
false;
766 tprintf(
"Not a ragged right\n");
772 if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
774 }
else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) {
779 if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
781 }
else if (maybe_ragged_right &&
782 ConfirmRaggedRight(bbox, min_ragged_gutter)) {
788 tprintf(
"Left result = %s, Right result=%s\n",
799 bool TabFind::ConfirmRaggedLeft(
BLOBNBOX* bbox,
int min_gutter) {
802 search_box.set_left(search_box.left() - min_gutter);
803 return NothingYOverlapsInBox(search_box, bbox->
bounding_box());
808 bool TabFind::ConfirmRaggedRight(
BLOBNBOX* bbox,
int min_gutter) {
810 search_box.
set_left(search_box.right());
811 search_box.set_right(search_box.right() + min_gutter);
812 return NothingYOverlapsInBox(search_box, bbox->
bounding_box());
817 bool TabFind::NothingYOverlapsInBox(
const TBOX& search_box,
818 const TBOX& target_box) {
820 rsearch.StartRectSearch(search_box);
822 while ((blob = rsearch.NextRectSearch()) !=
NULL) {
824 if (box.
y_overlap(target_box) && !(box == target_box))
830 void TabFind::FindAllTabVectors(
int min_gutter_width) {
832 TabVector_LIST dummy_vectors;
843 &vertical_x, &vertical_y);
847 &vertical_x, &vertical_y);
848 if (vector_count > 0)
852 dummy_vectors.clear();
853 for (
int i = 0; i < left_tab_boxes_.
size(); ++i) {
854 BLOBNBOX* bbox = left_tab_boxes_[i];
858 for (
int i = 0; i < right_tab_boxes_.
size(); ++i) {
859 BLOBNBOX* bbox = right_tab_boxes_[i];
864 tprintf(
"Beginning real tab search with vertical = %d,%d...\n",
865 vertical_x, vertical_y);
871 &dummy_vectors, &vertical_x, &vertical_y);
873 &dummy_vectors, &vertical_x, &vertical_y);
875 &dummy_vectors, &vertical_x, &vertical_y);
877 &dummy_vectors, &vertical_x, &vertical_y);
879 TabVector_IT v_it(&vectors_);
880 v_it.add_list_after(&dummy_vectors);
882 SetVerticalSkewAndParellelize(vertical_x, vertical_y);
887 int min_gutter_width, TabVector_LIST* vectors,
888 int* vertical_x,
int* vertical_y) {
889 TabVector_IT vector_it(vectors);
890 int vector_count = 0;
895 for (
int i = 0; i < boxes.
size(); ++i) {
899 TabVector* vector = FindTabVector(search_size_multiple, min_gutter_width,
901 bbox, vertical_x, vertical_y);
902 if (vector !=
NULL) {
904 vector_it.add_to_end(vector);
918 TabVector* TabFind::FindTabVector(
int search_size_multiple,
919 int min_gutter_width,
922 int* vertical_x,
int* vertical_y) {
924 AlignedBlobParams align_params(*vertical_x, *vertical_y,
926 search_size_multiple, min_gutter_width,
934 void TabFind::SetVerticalSkewAndParellelize(
int vertical_x,
int vertical_y) {
938 tprintf(
"Vertical skew vector=(%d,%d)\n",
940 v_it_.set_to_list(&vectors_);
941 for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
942 TabVector* v = v_it_.data();
950 void TabFind::SortVectors() {
952 v_it_.set_to_list(&vectors_);
956 void TabFind::EvaluateTabs() {
957 TabVector_IT rule_it(&vectors_);
958 for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
959 TabVector* tab = rule_it.data();
960 if (!tab->IsSeparator()) {
964 tab->Print(
"Too few boxes");
965 delete rule_it.extract();
966 v_it_.set_to_list(&vectors_);
968 tab->Print(
"Evaluated tab");
977 void TabFind::ComputeColumnWidths(
ScrollView* tab_win,
978 ColPartitionGrid* part_grid) {
979 #ifndef GRAPHICS_DISABLED
982 #endif // GRAPHICS_DISABLED
985 STATS col_widths(0, col_widths_size + 1);
986 ApplyPartitionsToColumnWidths(part_grid, &col_widths);
987 #ifndef GRAPHICS_DISABLED
988 if (tab_win !=
NULL) {
991 #endif // GRAPHICS_DISABLED
995 MakeColumnWidths(col_widths_size, &col_widths);
997 ApplyPartitionsToColumnWidths(part_grid,
NULL);
1006 void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
1007 STATS* col_widths) {
1011 gsearch.StartFullSearch();
1013 while ((part = gsearch.NextFullSearch()) !=
NULL) {
1014 BLOBNBOX_C_IT blob_it(part->boxes());
1015 if (blob_it.empty())
1017 BLOBNBOX* left_blob = blob_it.data();
1018 blob_it.move_to_last();
1019 BLOBNBOX* right_blob = blob_it.data();
1022 if (left_vector ==
NULL || left_vector->IsRightTab())
1026 if (right_vector ==
NULL || right_vector->IsLeftTab())
1032 int width = line_right - line_left;
1033 if (col_widths !=
NULL) {
1034 AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
1035 if (width >= kMinColumnWidth)
1039 ICOORDELT_IT it(&column_widths_);
1040 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1042 if (NearlyEqual<int>(width, w->
y(), 1)) {
1044 if (true_width <= w->y() && true_width > w->
x())
1045 w->
set_x(true_width);
1056 void TabFind::MakeColumnWidths(
int col_widths_size,
STATS* col_widths) {
1057 ICOORDELT_IT w_it(&column_widths_);
1058 int total_col_count = col_widths->
get_total();
1060 int width = col_widths->
mode();
1061 int col_count = col_widths->
pile_count(width);
1062 col_widths->
add(width, -col_count);
1064 for (
int left = width - 1; left > 0 &&
1067 int new_count = col_widths->
pile_count(left);
1068 col_count += new_count;
1069 col_widths->
add(left, -new_count);
1071 for (
int right = width + 1; right < col_widths_size &&
1074 int new_count = col_widths->
pile_count(right);
1075 col_count += new_count;
1076 col_widths->
add(right, -new_count);
1078 if (col_count > kMinLinesInColumn &&
1079 col_count > kMinFractionalLinesInColumn * total_col_count) {
1081 w_it.add_after_then_move(w);
1083 tprintf(
"Column of width %d has %d = %.2f%% lines\n",
1085 100.0 * col_count / total_col_count);
1092 void TabFind::MarkVerticalText() {
1094 tprintf(
"Checking for vertical lines\n");
1096 gsearch.StartFullSearch();
1098 while ((blob = gsearch.NextFullSearch()) !=
NULL) {
1107 int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
1108 TabVector_IT it(lines);
1109 int prev_right = -1;
1110 int max_gap =
static_cast<int>(kMaxGutterWidthAbsolute *
resolution_);
1111 STATS gaps(0, max_gap);
1112 STATS heights(0, max_gap);
1113 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1114 TabVector* v = it.data();
1115 TabVector* partner = v->GetSinglePartner();
1116 if (!v->IsLeftTab() || v->IsSeparator() || !partner)
continue;
1117 heights.add(partner->startpt().x() - v->startpt().x(), 1);
1118 if (prev_right > 0 && v->startpt().x() > prev_right) {
1119 gaps.add(v->startpt().x() - prev_right, 1);
1121 prev_right = partner->startpt().x();
1124 tprintf(
"TabGutter total %d median_gap %.2f median_hgt %.2f\n",
1125 gaps.get_total(), gaps.median(), heights.median());
1127 return static_cast<int>(gaps.median());
1136 bool look_left,
bool ignore_images,
1137 double min_overlap_fraction,
1138 int gap_limit,
int top_y,
int bottom_y) {
1139 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(
this);
1141 int left = box.
left();
1142 int right = box.
right();
1143 int mid_x = (left + right) / 2;
1144 sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
1149 while ((neighbour = sidesearch.NextSideSearch(look_left)) !=
NULL) {
1151 tprintf(
"Adjacent blob: considering box:");
1154 if (neighbour == bbox ||
1158 int n_top_y = nbox.
top();
1159 int n_bottom_y = nbox.
bottom();
1160 int v_overlap =
MIN(n_top_y, top_y) -
MAX(n_bottom_y, bottom_y);
1161 int height = top_y - bottom_y;
1162 int n_height = n_top_y - n_bottom_y;
1163 if (v_overlap > min_overlap_fraction *
MIN(height, n_height) &&
1164 (min_overlap_fraction == 0.0 || !
DifferentSizes(height, n_height))) {
1165 int n_left = nbox.
left();
1166 int n_right = nbox.
right();
1167 int h_gap =
MAX(n_left, left) -
MIN(n_right, right);
1168 int n_mid_x = (n_left + n_right) / 2;
1169 if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
1170 if (h_gap > gap_limit) {
1173 tprintf(
"Giving up due to big gap = %d vs %d\n",
1182 tprintf(
"Collision with like tab of type %d at %d,%d\n",
1190 if (result ==
NULL || h_gap < best_gap) {
1203 tprintf(
"Insufficient overlap\n");
1207 tprintf(
"Giving up due to end of search\n");
1216 TabVector* left, TabVector* right) {
1219 if (left->IsSeparator()) {
1222 if (v !=
NULL && v != left && v->IsLeftTab() &&
1223 v->XAtY(left_box.
top()) > left->XAtY(left_box.
top())) {
1225 left->ExtendToBox(left_blob);
1230 v_it_.move_to_first();
1233 if (right->IsSeparator()) {
1236 tprintf(
"Box edge (%d,%d-%d)",
1238 right->Print(
" looking for improvement for");
1241 if (v !=
NULL && v != right && v->IsRightTab() &&
1242 v->XAtY(right_box.
top()) < right->XAtY(right_box.
top())) {
1244 right->ExtendToBox(right_blob);
1246 right->Print(
"Extended vector");
1253 v_it_.move_to_first();
1255 right->Print(
"Created new vector");
1259 left->AddPartner(right);
1260 right->AddPartner(left);
1265 void TabFind::CleanupTabs() {
1269 TabVector_IT it(&vectors_);
1270 TabVector_IT dead_it(&dead_vectors_);
1271 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1272 TabVector* v = it.data();
1273 if (v->IsSeparator() || v->Partnerless()) {
1274 dead_it.add_after_then_move(it.extract());
1275 v_it_.set_to_list(&vectors_);
1284 BLOBNBOX_IT it(blobs);
1285 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1286 it.data()->rotate_box(rotation);
1292 bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
1294 ComputeDeskewVectors(deskew, reskew);
1305 int width = pixGetWidth(pix_grey);
1306 int height = pixGetHeight(pix_grey);
1307 float angle = atan2(deskew->
y(), deskew->
x());
1309 Pix* pix_rot = pixRotate(pix_grey, -angle, L_ROTATE_AREA_MAP,
1310 L_BRING_IN_WHITE, width, height);
1313 ICOORD center_offset(width / 2, height / 2);
1314 ICOORD new_center_offset(center_offset);
1315 new_center_offset.rotate(*deskew);
1316 image_origin_ += new_center_offset - center_offset;
1320 ICOORD corner_offset((width - pixGetWidth(pix_rot)) / 2,
1321 (pixGetHeight(pix_rot) - height) / 2);
1322 image_origin_ += corner_offset;
1324 pixDestroy(&pix_grey);
1325 pixDestroy(&pix_rot);
1330 TabVector_IT h_it(hlines);
1331 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1332 TabVector* h = h_it.data();
1335 TabVector_IT d_it(&dead_vectors_);
1336 for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) {
1337 TabVector* d = d_it.data();
1340 SetVerticalSkewAndParellelize(0, 1);
1343 grid_box.rotate_large(*deskew);
1344 Init(
gridsize(), grid_box.botleft(), grid_box.topright());
1354 TabVector_LIST* horizontal_lines,
1355 int* min_gutter_width) {
1359 TabVector_LIST ex_verticals;
1360 TabVector_IT ex_v_it(&ex_verticals);
1361 TabVector_LIST vlines;
1362 TabVector_IT v_it(&vlines);
1363 while (!v_it_.empty()) {
1367 ex_v_it.add_after_then_move(v);
1369 v_it.add_after_then_move(v);
1376 int median_gutter = FindMedianGutterWidth(&vlines);
1377 if (median_gutter > *min_gutter_width)
1378 *min_gutter_width = median_gutter;
1380 TabVector_IT h_it(horizontal_lines);
1381 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1385 v_it_.add_list_after(horizontal_lines);
1386 v_it_.move_to_first();
1387 h_it.set_to_list(horizontal_lines);
1388 h_it.add_list_after(&ex_verticals);
1399 v_it_.move_to_first();
1400 for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1401 if (!v_it_.data()->IsSeparator())
1402 delete v_it_.extract();
1410 TabVector_LIST temp_list;
1411 TabVector_IT temp_it(&temp_list);
1412 v_it_.move_to_first();
1416 while (!v_it_.empty()) {
1420 temp_it.add_before_then_move(v);
1422 v_it_.add_list_after(&temp_list);
1423 v_it_.move_to_first();
1426 int tmp = grid_box.
left();
1433 void TabFind::ComputeDeskewVectors(
FCOORD* deskew,
FCOORD* reskew) {
1435 length = sqrt(length);
1438 reskew->
set_x(deskew->
x());
1439 reskew->
set_y(-deskew->
y());
1444 void TabFind::ApplyTabConstraints() {
1445 TabVector_IT it(&vectors_);
1446 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1447 TabVector* v = it.data();
1448 v->SetupConstraints();
1450 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1451 TabVector* v = it.data();
1455 v->SetupPartnerConstraints();
1460 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1461 TabVector* v = it.data();
1462 if (!v->IsRightTab())
1465 TabVector_IT partner_it(it);
1466 for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
1467 TabVector* partner = partner_it.data();
1468 if (!partner->IsLeftTab() || !v->VOverlap(*partner))
1470 v->SetupPartnerConstraints(partner);
1474 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1475 TabVector* v = it.data();
1476 if (!v->IsSeparator())
1477 v->ApplyConstraints();
void set_x(inT16 xin)
rewrite function
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
const double kMinBaselineCoverage
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
void set_right_crossing_rule(int new_right)
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
static bool WithinTestRegion(int detail_level, int x, int y)
const ICOORD & botleft() const
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
const int kTabRadiusFactor
bool textord_debug_images
static bool UnMergeableType(BlobRegionType type)
static bool DifferentSizes(int size1, int size2)
bool leader_on_left() const
bool joined_to_prev() const
const int kMinVerticalSearch
static bool VeryDifferentSizes(int size1, int size2)
const double kAlignedFraction
const int kMaxTextLineBlobRatio
const double kMinGutterWidthAbsolute
const int kMaxVerticalSearch
int VOverlap(const TabVector &other) const
void add(inT32 value, inT32 count)
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
#define BOOL_VAR(name, val, comment)
void Rotate(const FCOORD &rotation)
const double kMinImageArea
static const STRING & textord_debug_pix()
bool CommonWidth(int width)
void Image(struct Pix *image, int x_pos, int y_pos)
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
const double kCosMaxSkewAngle
BBC * NextSideSearch(bool right_to_left)
TabType left_tab_type() const
void rotate_large(const FCOORD &vec)
BLOBNBOX_LIST small_blobs
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
void DisplayBoxes(ScrollView *window)
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
const int kRaggedGutterMultiple
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
void DeleteUnownedNoise()
int textord_debug_tabfind
int ExtendedOverlap(int top_y, int bottom_y) const
static int SortKey(const ICOORD &vertical, int x, int y)
const double kMaxHorizontalGap
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
inT16 y() const
access_function
const double kMinFractionalLinesInColumn
bool NearlyEqual(T x, T y, T tolerance)
void set_x(float xin)
rewrite function
void GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
BlobRegionType region_type() const
ScrollView * DisplayTabVectors(ScrollView *tab_win)
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
void Deskew(const FCOORD &deskew)
void set_y(float yin)
rewrite function
TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
const double kSmoothFactor
BLOBNBOX_LIST noise_blobs
void set_left_crossing_rule(int new_left)
bool y_overlap(const TBOX &box) const
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
const int kMinEvaluatedTabs
const ICOORD & bleft() const
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
const double kMaxGutterWidthAbsolute
tesseract::ColPartition * owner() const
static int SortVectorsByKey(const void *v1, const void *v2)
void StartSideSearch(int x, int ymin, int ymax)
void plot_graded_blobs(ScrollView *to_win)
void set_right_rule(int new_right)
const double kMaxBaselineError
ScrollView * MakeWindow(int x, int y, const char *window_name)
void TidyBlobs(TO_BLOCK *block)
inT16 x() const
access function
void SetBlockRuleEdges(TO_BLOCK *block)
void set_region_type(BlobRegionType new_type)
void set_left_rule(int new_left)
void Rectangle(int x1, int y1, int x2, int y2)
bool UniquelyVertical() const
bool textord_tabfind_show_initialtabs
inT32 pile_count(inT32 value) const
const ICOORD & tright() const
const ICOORD & topright() const
const TBOX & bounding_box() const
void set_right_tab_type(TabType new_type)
bool leader_on_right() const
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
const double kLineFragmentAspectRatio
const int kMinLinesInColumn
const int kColumnWidthFactor
void set_with_shrink(int x, int y)
Set from the given x,y, shrinking the vector to fit if needed.
void plot_noise_blobs(ScrollView *to_win)
const int kMinColumnWidth
bool textord_tabfind_show_finaltabs
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
BlobTextFlowType flow() const
BLOBNBOX_LIST large_blobs
TabType right_tab_type() const
const int kMinTextLineBlobRatio
const double kCharVerticalOverlapFraction
const int kMaxRaggedSearch
void Display(ScrollView *tab_win)
TabVector * FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
int GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
void set_left_tab_type(TabType new_type)