21 #include "config_auto.h" 
   87                  TabVector_LIST* vlines, 
int vertical_x, 
int vertical_y,
 
   90     resolution_(resolution),
 
   91     image_origin_(0, tright.y() - 1) {
 
   93   v_it_.set_to_list(&vectors_);
 
   94   v_it_.add_list_after(vlines);
 
   95   SetVerticalSkewAndParellelize(vertical_x, vertical_y);
 
  100   if (width_cb_ != 
NULL)
 
  114                                 BLOBNBOX_LIST* blobs,
 
  116                                        BLOBNBOX_C_IT>* grid) {
 
  117   BLOBNBOX_IT blob_it(blobs);
 
  119   int reject_count = 0;
 
  120   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
 
  123     if (
InsertBlob(h_spread, v_spread, blob, grid)) {
 
  130     tprintf(
"Inserted %d blobs into grid, %d rejected.\n",
 
  131             b_count, reject_count);
 
  142                                 BLOBNBOX_C_IT>* grid) {
 
  150   grid->InsertBBox(h_spread, v_spread, blob);
 
  165   BLOBNBOX_IT blob_it(blobs);
 
  166   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
 
  184                          bool ignore_unmergeables, 
int max_gutter_width,
 
  185                          int* required_shift) {
 
  187   int bottom_x = v.
XAtY(bottom_y);
 
  188   int top_x = v.
XAtY(top_y);
 
  189   int start_x = right_to_left ? 
MAX(top_x, bottom_x) : 
MIN(top_x, bottom_x);
 
  192   int min_gap = max_gutter_width;
 
  197     if (box.
bottom() >= top_y || box.
top() <= bottom_y)
 
  206     int mid_y = (box.
bottom() + box.
top()) / 2;
 
  211     int tab_x = v.
XAtY(mid_y);
 
  214       gap = tab_x - box.
right();
 
  215       if (gap < 0 && box.
left() - tab_x < *required_shift)
 
  216         *required_shift = box.
left() - tab_x;
 
  218       gap = box.
left() - tab_x;
 
  219       if (gap < 0 && box.
right() - tab_x > *required_shift)
 
  220         *required_shift = box.
right() - tab_x;
 
  222     if (gap > 0 && gap < min_gap)
 
  226   return min_gap - abs(*required_shift);
 
  231                                          int max_gutter, 
bool left,
 
  233                                          int* neighbour_gap ) {
 
  236   int gutter_x = left ? box.
left() : box.
right();
 
  237   int internal_x = left ? box.
right() : box.
left();
 
  239   int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
 
  240   *gutter_width = max_gutter;
 
  244     *gutter_width += tab_gap;
 
  247     tprintf(
"Looking in gutter\n");
 
  249   BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
 
  252   if (gutter_bbox != 
NULL) {
 
  254     *gutter_width = left ? tab_x - gutter_box.
right()
 
  255                         : gutter_box.
left() - tab_x;
 
  257   if (*gutter_width >= max_gutter) {
 
  259     TBOX gutter_box(box);
 
  261       gutter_box.
set_left(tab_x - max_gutter - 1);
 
  262       gutter_box.
set_right(tab_x - max_gutter);
 
  264       if (tab_gutter < tab_x - 1)
 
  265         *gutter_width = tab_x - tab_gutter;
 
  267       gutter_box.
set_left(tab_x + max_gutter);
 
  268       gutter_box.
set_right(tab_x + max_gutter + 1);
 
  270       if (tab_gutter > tab_x + 1)
 
  271         *gutter_width = tab_gutter - tab_x;
 
  274   if (*gutter_width > max_gutter)
 
  275     *gutter_width = max_gutter;
 
  278     tprintf(
"Looking for neighbour\n");
 
  279   BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
 
  284   if (neighbour != 
NULL) {
 
  290     if (left && n_box.
left() < neighbour_edge)
 
  291       neighbour_edge = n_box.
left();
 
  292     else if (!left && n_box.
right() > neighbour_edge)
 
  293       neighbour_edge = n_box.
right();
 
  295   *neighbour_gap = left ? neighbour_edge - internal_x
 
  296                         : internal_x - neighbour_edge;
 
  330   int top_y = box.
top();
 
  331   int bottom_y = box.
bottom();
 
  332   int mid_y = (top_y + bottom_y) / 2;
 
  333   int right = crossing ? (box.
left() + box.
right()) / 2 : box.
right();
 
  334   int min_key, max_key;
 
  337   while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
 
  339   while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
 
  347     int x = v->
XAtY(mid_y);
 
  349         (v->
VOverlap(top_y, bottom_y) > 0 ||
 
  351       if (best_v == 
NULL || x < best_x) {
 
  356         key_limit = v->
sort_key() + max_key - min_key;
 
  361     if (v_it_.at_last() ||
 
  365   } 
while (!v_it_.at_first());
 
  374   int top_y = box.
top();
 
  375   int bottom_y = box.
bottom();
 
  376   int mid_y = (top_y + bottom_y) / 2;
 
  377   int left = crossing ? (box.
left() + box.
right()) / 2 : box.
left();
 
  378   int min_key, max_key;
 
  381   while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
 
  383   while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
 
  392     int x = v->
XAtY(mid_y);
 
  394         (v->
VOverlap(top_y, bottom_y) > 0 ||
 
  396       if (best_v == 
NULL || x > best_x) {
 
  401         key_limit = v->
sort_key() - (max_key - min_key);
 
  406     if (v_it_.at_first() ||
 
  410   } 
while (!v_it_.at_last());
 
  418   ICOORDELT_IT it(&column_widths_);
 
  419   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
 
  421     if (w->
x() - 1 <= width && width <= w->
y() + 1)
 
  430   return size1 > size2 * 2 || size2 > size1 * 2;
 
  436   return size1 > size2 * 5 || size2 > size1 * 5;
 
  445                              BLOBNBOX_LIST* image_blobs, 
TO_BLOCK* block,
 
  446                              int min_gutter_width,
 
  447                              double tabfind_aligned_gap_fraction,
 
  451                                               tabfind_aligned_gap_fraction,
 
  453   ComputeColumnWidths(tab_win, part_grid);
 
  457   if (!Deskew(hlines, image_blobs, block, deskew, reskew))
 
  459   part_grid->
Deskew(*deskew);
 
  460   ApplyTabConstraints();
 
  461   #ifndef GRAPHICS_DISABLED 
  466                      image_origin_.
x(), image_origin_.
y());
 
  473   #endif  // GRAPHICS_DISABLED 
  494   BLOBNBOX_IT blob_it = &block->
blobs;
 
  496   for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
 
  497     BLOBNBOX* large_blob = large_it.data();
 
  499       blob_it.add_to_end(large_it.extract());
 
  504     tprintf(
"Moved %d large blobs to normal list\n",
 
  506     #ifndef GRAPHICS_DISABLED 
  511     #endif  // GRAPHICS_DISABLED 
  520   *min_key = 
MIN(key1, key2);
 
  521   *max_key = 
MAX(key1, key2);
 
  525 #ifndef GRAPHICS_DISABLED 
  527   TabVector_IT it(&vectors_);
 
  528   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
 
  542                                            int min_gutter_width,
 
  543                                            double tabfind_aligned_gap_fraction,
 
  550   if (image_blobs != 
NULL)
 
  553   ScrollView* initial_win = FindTabBoxes(min_gutter_width,
 
  554                                          tabfind_aligned_gap_fraction);
 
  555   FindAllTabVectors(min_gutter_width);
 
  569   #ifndef GRAPHICS_DISABLED 
  570   for (
int i = 0; i < boxes.
size(); ++i) {
 
  571     TBOX box = boxes[i]->bounding_box();
 
  572     int left_x = box.
left();
 
  573     int right_x = box.
right();
 
  574     int top_y = box.
top();
 
  575     int bottom_y = box.
bottom();
 
  578     win->
Rectangle(left_x, bottom_y, right_x, top_y);
 
  581   #endif  // GRAPHICS_DISABLED 
  586 ScrollView* TabFind::FindTabBoxes(
int min_gutter_width,
 
  587                                   double tabfind_aligned_gap_fraction) {
 
  588   left_tab_boxes_.
clear();
 
  589   right_tab_boxes_.
clear();
 
  591   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(
this);
 
  592   gsearch.StartFullSearch();
 
  594   while ((bbox = gsearch.NextFullSearch()) != 
NULL) {
 
  595     if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) {
 
  605   left_tab_boxes_.
sort(SortByBoxLeft<BLOBNBOX>);
 
  606   right_tab_boxes_.
sort(SortRightToLeft<BLOBNBOX>);
 
  608   #ifndef GRAPHICS_DISABLED 
  614     DisplayBoxVector(left_tab_boxes_, tab_win);
 
  615     DisplayBoxVector(right_tab_boxes_, tab_win);
 
  618   #endif  // GRAPHICS_DISABLED 
  622 bool TabFind::TestBoxForTabs(
BLOBNBOX* bbox, 
int min_gutter_width,
 
  623                              double tabfind_aligned_gap_fraction) {
 
  624   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(
this);
 
  627   int left_column_edge = bbox->
left_rule();
 
  630   int left_x = box.
left();
 
  631   int right_x = box.
right();
 
  632   int top_y = box.
top();
 
  633   int bottom_y = box.
bottom();
 
  634   int height = box.
height();
 
  637     tprintf(
"Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
 
  638             left_x, top_y, right_x, bottom_y,
 
  639             left_column_edge, right_column_edge);
 
  643   radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius);
 
  649       static_cast<int>(height * tabfind_aligned_gap_fraction);
 
  650   if (min_gutter_width > min_spacing)
 
  651     min_spacing = min_gutter_width;
 
  652   int min_ragged_gutter = kRaggedGutterMultiple * 
gridsize();
 
  653   if (min_gutter_width > min_ragged_gutter)
 
  654     min_ragged_gutter = min_gutter_width;
 
  655   int target_right = left_x - min_spacing;
 
  656   int target_left = right_x + min_spacing;
 
  672   bool is_left_tab = 
true;
 
  673   bool is_right_tab = 
true;
 
  674   bool maybe_ragged_left = 
true;
 
  675   bool maybe_ragged_right = 
true;
 
  676   int maybe_left_tab_up = 0;
 
  677   int maybe_right_tab_up = 0;
 
  678   int maybe_left_tab_down = 0;
 
  679   int maybe_right_tab_down = 0;
 
  682     maybe_ragged_left = 
false;
 
  687     is_right_tab = 
false;
 
  688     maybe_ragged_right = 
false;
 
  694   while ((neighbour = radsearch.NextRadSearch()) != 
NULL) {
 
  695     if (neighbour == bbox)
 
  698     int n_left = nbox.
left();
 
  699     int n_right = nbox.
right();
 
  701       tprintf(
"Neighbour at (%d,%d)->(%d,%d)\n",
 
  702               n_left, nbox.
bottom(), n_right, nbox.
top());
 
  705     if (n_right > right_column_edge || n_left < left_column_edge ||
 
  706         left_x < neighbour->left_rule() || right_x > neighbour->
right_rule())
 
  708     int n_mid_x = (n_left + n_right) / 2;
 
  709     int n_mid_y = (nbox.
top() + nbox.
bottom()) / 2;
 
  710     if (n_mid_x <= left_x && n_right >= target_right) {
 
  716       if (n_mid_y > bottom_y)
 
  718     } 
else if (
NearlyEqual(left_x, n_left, alignment_tolerance)) {
 
  721       if (n_mid_y > top_y && maybe_left_tab_up > -
MAX_INT32)
 
  723       if (n_mid_y < bottom_y && maybe_left_tab_down > -
MAX_INT32)
 
  724         ++maybe_left_tab_down;
 
  725     } 
else if (n_left < left_x && n_right >= left_x) {
 
  728         tprintf(
"Maybe Not a left tab\n");
 
  729       if (n_mid_y > top_y && maybe_left_tab_up > -
MAX_INT32)
 
  731       if (n_mid_y < bottom_y && maybe_left_tab_down > -
MAX_INT32)
 
  732         --maybe_left_tab_down;
 
  734     if (n_left < left_x && nbox.
y_overlap(box) && n_right >= target_right) {
 
  735       maybe_ragged_left = 
false;
 
  737         tprintf(
"Not a ragged left\n");
 
  739     if (n_mid_x >= right_x && n_left <= target_left) {
 
  742       is_right_tab = 
false;
 
  745       if (n_mid_y > bottom_y)
 
  747     } 
else if (
NearlyEqual(right_x, n_right, alignment_tolerance)) {
 
  749         tprintf(
"Maybe a right tab\n");
 
  750       if (n_mid_y > top_y && maybe_right_tab_up > -
MAX_INT32)
 
  751         ++maybe_right_tab_up;
 
  752       if (n_mid_y < bottom_y && maybe_right_tab_down > -
MAX_INT32)
 
  753         ++maybe_right_tab_down;
 
  754     } 
else if (n_right > right_x && n_left <= right_x) {
 
  757         tprintf(
"Maybe Not a right tab\n");
 
  758       if (n_mid_y > top_y && maybe_right_tab_up > -
MAX_INT32)
 
  759         --maybe_right_tab_up;
 
  760       if (n_mid_y < bottom_y && maybe_right_tab_down > -
MAX_INT32)
 
  761         --maybe_right_tab_down;
 
  763     if (n_right > right_x && nbox.
y_overlap(box) && n_left <= target_left) {
 
  764       maybe_ragged_right = 
false;
 
  766         tprintf(
"Not a ragged right\n");
 
  772   if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
 
  774   } 
else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) {
 
  779   if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
 
  781   } 
else if (maybe_ragged_right &&
 
  782              ConfirmRaggedRight(bbox, min_ragged_gutter)) {
 
  788     tprintf(
"Left result = %s, Right result=%s\n",
 
  799 bool TabFind::ConfirmRaggedLeft(
BLOBNBOX* bbox, 
int min_gutter) {
 
  802   search_box.set_left(search_box.left() - min_gutter);
 
  803   return NothingYOverlapsInBox(search_box, bbox->
bounding_box());
 
  808 bool TabFind::ConfirmRaggedRight(
BLOBNBOX* bbox, 
int min_gutter) {
 
  810   search_box.
set_left(search_box.right());
 
  811   search_box.set_right(search_box.right() + min_gutter);
 
  812   return NothingYOverlapsInBox(search_box, bbox->
bounding_box());
 
  817 bool TabFind::NothingYOverlapsInBox(
const TBOX& search_box,
 
  818                                     const TBOX& target_box) {
 
  820   rsearch.StartRectSearch(search_box);
 
  822   while ((blob = rsearch.NextRectSearch()) != 
NULL) {
 
  824     if (box.
y_overlap(target_box) && !(box == target_box))
 
  830 void TabFind::FindAllTabVectors(
int min_gutter_width) {
 
  832   TabVector_LIST dummy_vectors;
 
  843                                       &vertical_x, &vertical_y);
 
  847                                    &vertical_x, &vertical_y);
 
  848     if (vector_count > 0)
 
  852   dummy_vectors.clear();
 
  853   for (
int i = 0; i < left_tab_boxes_.
size(); ++i) {
 
  854     BLOBNBOX* bbox = left_tab_boxes_[i];
 
  858   for (
int i = 0; i < right_tab_boxes_.
size(); ++i) {
 
  859     BLOBNBOX* bbox = right_tab_boxes_[i];
 
  864     tprintf(
"Beginning real tab search with vertical = %d,%d...\n",
 
  865             vertical_x, vertical_y);
 
  871                  &dummy_vectors, &vertical_x, &vertical_y);
 
  873                  &dummy_vectors, &vertical_x, &vertical_y);
 
  875                  &dummy_vectors, &vertical_x, &vertical_y);
 
  877                  &dummy_vectors, &vertical_x, &vertical_y);
 
  879   TabVector_IT v_it(&vectors_);
 
  880   v_it.add_list_after(&dummy_vectors);
 
  882   SetVerticalSkewAndParellelize(vertical_x, vertical_y);
 
  887                             int min_gutter_width, TabVector_LIST* vectors,
 
  888                             int* vertical_x, 
int* vertical_y) {
 
  889   TabVector_IT vector_it(vectors);
 
  890   int vector_count = 0;
 
  895   for (
int i = 0; i < boxes.
size(); ++i) {
 
  899       TabVector* vector = FindTabVector(search_size_multiple, min_gutter_width,
 
  901                                         bbox, vertical_x, vertical_y);
 
  902       if (vector != 
NULL) {
 
  904         vector_it.add_to_end(vector);
 
  918 TabVector* TabFind::FindTabVector(
int search_size_multiple,
 
  919                                   int min_gutter_width,
 
  922                                   int* vertical_x, 
int* vertical_y) {
 
  924   AlignedBlobParams align_params(*vertical_x, *vertical_y,
 
  926                                  search_size_multiple, min_gutter_width,
 
  934 void TabFind::SetVerticalSkewAndParellelize(
int vertical_x, 
int vertical_y) {
 
  938     tprintf(
"Vertical skew vector=(%d,%d)\n",
 
  940   v_it_.set_to_list(&vectors_);
 
  941   for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
 
  942     TabVector* v = v_it_.data();
 
  950 void TabFind::SortVectors() {
 
  952   v_it_.set_to_list(&vectors_);
 
  956 void TabFind::EvaluateTabs() {
 
  957   TabVector_IT rule_it(&vectors_);
 
  958   for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
 
  959     TabVector* tab = rule_it.data();
 
  960     if (!tab->IsSeparator()) {
 
  964           tab->Print(
"Too few boxes");
 
  965         delete rule_it.extract();
 
  966         v_it_.set_to_list(&vectors_);
 
  968         tab->Print(
"Evaluated tab");
 
  977 void TabFind::ComputeColumnWidths(
ScrollView* tab_win,
 
  978                                   ColPartitionGrid* part_grid) {
 
  979   #ifndef GRAPHICS_DISABLED 
  982   #endif  // GRAPHICS_DISABLED 
  985   STATS col_widths(0, col_widths_size + 1);
 
  986   ApplyPartitionsToColumnWidths(part_grid, &col_widths);
 
  987   #ifndef GRAPHICS_DISABLED 
  988   if (tab_win != 
NULL) {
 
  991   #endif  // GRAPHICS_DISABLED 
  995   MakeColumnWidths(col_widths_size, &col_widths);
 
  997   ApplyPartitionsToColumnWidths(part_grid, 
NULL);
 
 1006 void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
 
 1007                                             STATS* col_widths) {
 
 1011   gsearch.StartFullSearch();
 
 1013   while ((part = gsearch.NextFullSearch()) != 
NULL) {
 
 1014     BLOBNBOX_C_IT blob_it(part->boxes());
 
 1015     if (blob_it.empty())
 
 1017     BLOBNBOX* left_blob = blob_it.data();
 
 1018     blob_it.move_to_last();
 
 1019     BLOBNBOX* right_blob = blob_it.data();
 
 1022     if (left_vector == 
NULL || left_vector->IsRightTab())
 
 1026     if (right_vector == 
NULL || right_vector->IsLeftTab())
 
 1032     int width = line_right - line_left;
 
 1033     if (col_widths != 
NULL) {
 
 1034       AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
 
 1035       if (width >= kMinColumnWidth)
 
 1039       ICOORDELT_IT it(&column_widths_);
 
 1040       for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
 
 1042         if (NearlyEqual<int>(width, w->
y(), 1)) {
 
 1044           if (true_width <= w->y() && true_width > w->
x())
 
 1045             w->
set_x(true_width);
 
 1056 void TabFind::MakeColumnWidths(
int col_widths_size, 
STATS* col_widths) {
 
 1057   ICOORDELT_IT w_it(&column_widths_);
 
 1058   int total_col_count = col_widths->
get_total();
 
 1060     int width = col_widths->
mode();
 
 1061     int col_count = col_widths->
pile_count(width);
 
 1062     col_widths->
add(width, -col_count);
 
 1064     for (
int left = width - 1; left > 0 &&
 
 1067       int new_count = col_widths->
pile_count(left);
 
 1068       col_count += new_count;
 
 1069       col_widths->
add(left, -new_count);
 
 1071     for (
int right = width + 1; right < col_widths_size &&
 
 1074       int new_count = col_widths->
pile_count(right);
 
 1075       col_count += new_count;
 
 1076       col_widths->
add(right, -new_count);
 
 1078     if (col_count > kMinLinesInColumn &&
 
 1079         col_count > kMinFractionalLinesInColumn * total_col_count) {
 
 1081       w_it.add_after_then_move(w);
 
 1083         tprintf(
"Column of width %d has %d = %.2f%% lines\n",
 
 1085               100.0 * col_count / total_col_count);
 
 1092 void TabFind::MarkVerticalText() {
 
 1094     tprintf(
"Checking for vertical lines\n");
 
 1096   gsearch.StartFullSearch();
 
 1098   while ((blob = gsearch.NextFullSearch()) != 
NULL) {
 
 1107 int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
 
 1108   TabVector_IT it(lines);
 
 1109   int prev_right = -1;
 
 1110   int max_gap = 
static_cast<int>(kMaxGutterWidthAbsolute * 
resolution_);
 
 1111   STATS gaps(0, max_gap);
 
 1112   STATS heights(0, max_gap);
 
 1113   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
 
 1114     TabVector* v = it.data();
 
 1115     TabVector* partner = v->GetSinglePartner();
 
 1116     if (!v->IsLeftTab() || v->IsSeparator() || !partner) 
continue;
 
 1117     heights.add(partner->startpt().x() - v->startpt().x(), 1);
 
 1118     if (prev_right > 0 && v->startpt().x() > prev_right) {
 
 1119       gaps.add(v->startpt().x() - prev_right, 1);
 
 1121     prev_right = partner->startpt().x();
 
 1124     tprintf(
"TabGutter total %d  median_gap %.2f  median_hgt %.2f\n",
 
 1125             gaps.get_total(), gaps.median(), heights.median());
 
 1127   return static_cast<int>(gaps.median());
 
 1136                                 bool look_left, 
bool ignore_images,
 
 1137                                 double min_overlap_fraction,
 
 1138                                 int gap_limit, 
int top_y, 
int bottom_y) {
 
 1139   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(
this);
 
 1141   int left = box.
left();
 
 1142   int right = box.
right();
 
 1143   int mid_x = (left + right) / 2;
 
 1144   sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
 
 1149   while ((neighbour = sidesearch.NextSideSearch(look_left)) != 
NULL) {
 
 1151       tprintf(
"Adjacent blob: considering box:");
 
 1154     if (neighbour == bbox ||
 
 1158     int n_top_y = nbox.
top();
 
 1159     int n_bottom_y = nbox.
bottom();
 
 1160     int v_overlap = 
MIN(n_top_y, top_y) - 
MAX(n_bottom_y, bottom_y);
 
 1161     int height = top_y - bottom_y;
 
 1162     int n_height = n_top_y - n_bottom_y;
 
 1163     if (v_overlap > min_overlap_fraction * 
MIN(height, n_height) &&
 
 1164         (min_overlap_fraction == 0.0 || !
DifferentSizes(height, n_height))) {
 
 1165       int n_left = nbox.
left();
 
 1166       int n_right = nbox.
right();
 
 1167       int h_gap = 
MAX(n_left, left) - 
MIN(n_right, right);
 
 1168       int n_mid_x = (n_left + n_right) / 2;
 
 1169       if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
 
 1170         if (h_gap > gap_limit) {
 
 1173             tprintf(
"Giving up due to big gap = %d vs %d\n",
 
 1182             tprintf(
"Collision with like tab of type %d at %d,%d\n",
 
 1190         if (result == 
NULL || h_gap < best_gap) {
 
 1203       tprintf(
"Insufficient overlap\n");
 
 1207     tprintf(
"Giving up due to end of search\n");
 
 1216                                TabVector* left, TabVector* right) {
 
 1219   if (left->IsSeparator()) {
 
 1222     if (v != 
NULL && v != left && v->IsLeftTab() &&
 
 1223         v->XAtY(left_box.
top()) > left->XAtY(left_box.
top())) {
 
 1225       left->ExtendToBox(left_blob);
 
 1230       v_it_.move_to_first();
 
 1233   if (right->IsSeparator()) {
 
 1236       tprintf(
"Box edge (%d,%d-%d)",
 
 1238       right->Print(
" looking for improvement for");
 
 1241     if (v != 
NULL && v != right && v->IsRightTab() &&
 
 1242         v->XAtY(right_box.
top()) < right->XAtY(right_box.
top())) {
 
 1244       right->ExtendToBox(right_blob);
 
 1246         right->Print(
"Extended vector");
 
 1253       v_it_.move_to_first();
 
 1255         right->Print(
"Created new vector");
 
 1259   left->AddPartner(right);
 
 1260   right->AddPartner(left);
 
 1265 void TabFind::CleanupTabs() {
 
 1269   TabVector_IT it(&vectors_);
 
 1270   TabVector_IT dead_it(&dead_vectors_);
 
 1271   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
 
 1272     TabVector* v = it.data();
 
 1273     if (v->IsSeparator() || v->Partnerless()) {
 
 1274       dead_it.add_after_then_move(it.extract());
 
 1275       v_it_.set_to_list(&vectors_);
 
 1284   BLOBNBOX_IT it(blobs);
 
 1285   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
 
 1286     it.data()->rotate_box(rotation);
 
 1292 bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
 
 1294   ComputeDeskewVectors(deskew, reskew);
 
 1305     int width = pixGetWidth(pix_grey);
 
 1306     int height = pixGetHeight(pix_grey);
 
 1307     float angle = atan2(deskew->
y(), deskew->
x());
 
 1309     Pix* pix_rot = pixRotate(pix_grey, -angle, L_ROTATE_AREA_MAP,
 
 1310                              L_BRING_IN_WHITE, width, height);
 
 1313     ICOORD center_offset(width / 2, height / 2);
 
 1314     ICOORD new_center_offset(center_offset);
 
 1315     new_center_offset.rotate(*deskew);
 
 1316     image_origin_ += new_center_offset - center_offset;
 
 1320     ICOORD corner_offset((width - pixGetWidth(pix_rot)) / 2,
 
 1321                          (pixGetHeight(pix_rot) - height) / 2);
 
 1322     image_origin_ += corner_offset;
 
 1324     pixDestroy(&pix_grey);
 
 1325     pixDestroy(&pix_rot);
 
 1330   TabVector_IT h_it(hlines);
 
 1331   for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
 
 1332     TabVector* h = h_it.data();
 
 1335   TabVector_IT d_it(&dead_vectors_);
 
 1336   for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) {
 
 1337     TabVector* d = d_it.data();
 
 1340   SetVerticalSkewAndParellelize(0, 1);
 
 1343   grid_box.rotate_large(*deskew);
 
 1344   Init(
gridsize(), grid_box.botleft(), grid_box.topright());
 
 1354                                    TabVector_LIST* horizontal_lines,
 
 1355                                    int* min_gutter_width) {
 
 1359   TabVector_LIST ex_verticals;
 
 1360   TabVector_IT ex_v_it(&ex_verticals);
 
 1361   TabVector_LIST vlines;
 
 1362   TabVector_IT v_it(&vlines);
 
 1363   while (!v_it_.empty()) {
 
 1367       ex_v_it.add_after_then_move(v);
 
 1369       v_it.add_after_then_move(v);
 
 1376   int median_gutter = FindMedianGutterWidth(&vlines);
 
 1377   if (median_gutter > *min_gutter_width)
 
 1378     *min_gutter_width = median_gutter;
 
 1380   TabVector_IT h_it(horizontal_lines);
 
 1381   for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
 
 1385   v_it_.add_list_after(horizontal_lines);
 
 1386   v_it_.move_to_first();
 
 1387   h_it.set_to_list(horizontal_lines);
 
 1388   h_it.add_list_after(&ex_verticals);
 
 1399   v_it_.move_to_first();
 
 1400   for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
 
 1401     if (!v_it_.data()->IsSeparator())
 
 1402       delete v_it_.extract();
 
 1410   TabVector_LIST temp_list;
 
 1411   TabVector_IT temp_it(&temp_list);
 
 1412   v_it_.move_to_first();
 
 1416   while (!v_it_.empty()) {
 
 1420     temp_it.add_before_then_move(v);
 
 1422   v_it_.add_list_after(&temp_list);
 
 1423   v_it_.move_to_first();
 
 1426   int tmp = grid_box.
left();
 
 1433 void TabFind::ComputeDeskewVectors(
FCOORD* deskew, 
FCOORD* reskew) {
 
 1435   length = sqrt(length);
 
 1438   reskew->
set_x(deskew->
x());
 
 1439   reskew->
set_y(-deskew->
y());
 
 1444 void TabFind::ApplyTabConstraints() {
 
 1445   TabVector_IT it(&vectors_);
 
 1446   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
 
 1447     TabVector* v = it.data();
 
 1448     v->SetupConstraints();
 
 1450   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
 
 1451     TabVector* v = it.data();
 
 1455     v->SetupPartnerConstraints();
 
 1460   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
 
 1461     TabVector* v = it.data();
 
 1462     if (!v->IsRightTab())
 
 1465     TabVector_IT partner_it(it);
 
 1466     for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
 
 1467       TabVector* partner = partner_it.data();
 
 1468       if (!partner->IsLeftTab() || !v->VOverlap(*partner))
 
 1470       v->SetupPartnerConstraints(partner);
 
 1474   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
 
 1475     TabVector* v = it.data();
 
 1476     if (!v->IsSeparator())
 
 1477       v->ApplyConstraints();
 
void set_x(inT16 xin)
rewrite function 
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
const double kMinBaselineCoverage
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
void set_right_crossing_rule(int new_right)
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
static bool WithinTestRegion(int detail_level, int x, int y)
const ICOORD & botleft() const 
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
const int kTabRadiusFactor
bool textord_debug_images
static bool UnMergeableType(BlobRegionType type)
static bool DifferentSizes(int size1, int size2)
bool leader_on_left() const 
bool joined_to_prev() const 
const int kMinVerticalSearch
static bool VeryDifferentSizes(int size1, int size2)
const double kAlignedFraction
const int kMaxTextLineBlobRatio
const double kMinGutterWidthAbsolute
const int kMaxVerticalSearch
int VOverlap(const TabVector &other) const 
void add(inT32 value, inT32 count)
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
#define BOOL_VAR(name, val, comment)
void Rotate(const FCOORD &rotation)
const double kMinImageArea
static const STRING & textord_debug_pix()
bool CommonWidth(int width)
void Image(struct Pix *image, int x_pos, int y_pos)
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
const double kCosMaxSkewAngle
BBC * NextSideSearch(bool right_to_left)
TabType left_tab_type() const 
void rotate_large(const FCOORD &vec)
BLOBNBOX_LIST small_blobs
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
void DisplayBoxes(ScrollView *window)
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
const int kRaggedGutterMultiple
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
void DeleteUnownedNoise()
int textord_debug_tabfind
int ExtendedOverlap(int top_y, int bottom_y) const 
static int SortKey(const ICOORD &vertical, int x, int y)
const double kMaxHorizontalGap
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
inT16 y() const 
access_function 
const double kMinFractionalLinesInColumn
bool NearlyEqual(T x, T y, T tolerance)
void set_x(float xin)
rewrite function 
void GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
BlobRegionType region_type() const 
ScrollView * DisplayTabVectors(ScrollView *tab_win)
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
void Deskew(const FCOORD &deskew)
void set_y(float yin)
rewrite function 
TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
const double kSmoothFactor
BLOBNBOX_LIST noise_blobs
void set_left_crossing_rule(int new_left)
bool y_overlap(const TBOX &box) const 
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
const int kMinEvaluatedTabs
const ICOORD & bleft() const 
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
const double kMaxGutterWidthAbsolute
tesseract::ColPartition * owner() const 
static int SortVectorsByKey(const void *v1, const void *v2)
void StartSideSearch(int x, int ymin, int ymax)
void plot_graded_blobs(ScrollView *to_win)
void set_right_rule(int new_right)
const double kMaxBaselineError
ScrollView * MakeWindow(int x, int y, const char *window_name)
void TidyBlobs(TO_BLOCK *block)
inT16 x() const 
access function 
void SetBlockRuleEdges(TO_BLOCK *block)
void set_region_type(BlobRegionType new_type)
void set_left_rule(int new_left)
void Rectangle(int x1, int y1, int x2, int y2)
bool UniquelyVertical() const 
bool textord_tabfind_show_initialtabs
inT32 pile_count(inT32 value) const 
const ICOORD & tright() const 
const ICOORD & topright() const 
const TBOX & bounding_box() const 
void set_right_tab_type(TabType new_type)
bool leader_on_right() const 
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
const double kLineFragmentAspectRatio
const int kMinLinesInColumn
const int kColumnWidthFactor
void set_with_shrink(int x, int y)
Set from the given x,y, shrinking the vector to fit if needed. 
void plot_noise_blobs(ScrollView *to_win)
const int kMinColumnWidth
bool textord_tabfind_show_finaltabs
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
BlobTextFlowType flow() const 
BLOBNBOX_LIST large_blobs
TabType right_tab_type() const 
const int kMinTextLineBlobRatio
const double kCharVerticalOverlapFraction
const int kMaxRaggedSearch
void Display(ScrollView *tab_win)
TabVector * FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
int GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
void set_left_tab_type(TabType new_type)