22 #pragma warning(disable:4244) // Conversion warnings
27 #include "config_auto.h"
67 false,
"Show partition bounds");
69 false,
"Show blobs rejected as noise");
71 "Show partition bounds, waiting if >1");
86 int resolution,
bool cjk_script,
87 double aligned_gap_fraction,
88 TabVector_LIST* vlines, TabVector_LIST* hlines,
89 int vertical_x,
int vertical_y)
90 :
TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y,
92 cjk_script_(cjk_script),
93 min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)),
94 mean_column_gap_(tright.x() - bleft.x()),
95 tabfind_aligned_gap_fraction_(aligned_gap_fraction),
96 reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f),
97 best_columns_(
NULL), stroke_width_(
NULL),
98 part_grid_(gridsize, bleft, tright), nontext_map_(
NULL),
99 projection_(resolution),
100 denorm_(
NULL), input_blobs_win_(
NULL), equation_detect_(
NULL) {
101 TabVector_IT h_it(&horizontal_lines_);
102 h_it.add_list_after(hlines);
107 if (best_columns_ !=
NULL) {
108 delete [] best_columns_;
110 if (stroke_width_ !=
NULL)
111 delete stroke_width_;
112 delete input_blobs_win_;
113 pixDestroy(&nontext_map_);
114 while (denorm_ !=
NULL) {
115 DENORM* dead_denorm = denorm_;
122 ColPartition_IT part_it(&noise_parts_);
123 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
130 part_it.set_to_list(&good_parts_);
131 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
139 BLOBNBOX_IT bb_it(&image_bblobs_);
140 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
142 delete bblob->
cblob();
157 if (stroke_width_ !=
NULL)
158 delete stroke_width_;
160 min_gutter_width_ =
static_cast<int>(kMinGutterWidthGrid *
gridsize());
162 #ifndef GRAPHICS_DISABLED
164 input_blobs_win_ =
MakeWindow(0, 0,
"Filtered Input Blobs");
167 #endif // GRAPHICS_DISABLED
169 pixDestroy(&nontext_map_);
175 photo_mask_pix, input_block);
179 stroke_width_->
Clear();
193 BLOBNBOX_CLIST* osd_blobs) {
210 bool vertical_text_lines,
211 int recognition_rotation) {
212 const FCOORD anticlockwise90(0.0f, 1.0f);
213 const FCOORD clockwise90(0.0f, -1.0f);
214 const FCOORD rotation180(-1.0f, 0.0f);
215 const FCOORD norotation(1.0f, 0.0f);
217 text_rotation_ = norotation;
220 rotation_ = norotation;
221 if (recognition_rotation == 1) {
222 rotation_ = anticlockwise90;
223 }
else if (recognition_rotation == 2) {
224 rotation_ = rotation180;
225 }
else if (recognition_rotation == 3) {
226 rotation_ = clockwise90;
232 if (recognition_rotation & 1) {
233 vertical_text_lines = !vertical_text_lines;
239 if (vertical_text_lines) {
240 rotation_.
rotate(anticlockwise90);
241 text_rotation_.
rotate(clockwise90);
244 rerotate_ =
FCOORD(rotation_.
x(), -rotation_.
y());
245 if (rotation_.
x() != 1.0f || rotation_.
y() != 0.0f) {
261 tprintf(
"Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n",
262 vertical_text_lines, recognition_rotation,
263 rotation_.
x(), rotation_.
y(),
264 text_rotation_.
x(), text_rotation_.
y());
270 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
298 int scaled_factor,
TO_BLOCK* input_block,
299 Pix* photo_mask_pix, Pix* thresholds_pix,
300 Pix* grey_pix, BLOCK_LIST* blocks,
301 BLOBNBOX_LIST* diacritic_blobs,
302 TO_BLOCK_LIST* to_blocks) {
303 pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
310 pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_,
311 &projection_, diacritic_blobs, &part_grid_, &big_parts_);
314 input_block,
this, &part_grid_, &big_parts_);
318 input_block,
this, &part_grid_, &big_parts_);
324 ColPartition_IT p_it(&big_parts_);
325 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward())
326 p_it.data()->DisownBoxesNoAssert();
328 delete stroke_width_;
329 stroke_width_ =
NULL;
355 ReflectForRtl(input_block, &image_bblobs_);
368 min_gutter_width_, tabfind_aligned_gap_fraction_,
369 &part_grid_, &deskew_, &reskew_);
373 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
374 denorm_ = new_denorm;
380 if (!MakeColumns(
false)) {
389 #ifndef GRAPHICS_DISABLED
394 #endif // GRAPHICS_DISABLED
400 GridSplitPartitions();
404 GridMergePartitions();
407 InsertRemainingNoise(input_block);
409 GridInsertHLinePartitions();
410 GridInsertVLinePartitions();
422 if (equation_detect_) {
437 GridRemoveUnderlinePartitions();
447 #ifndef GRAPHICS_DISABLED
450 if (window !=
NULL) {
462 #endif // GRAPHICS_DISABLED
468 ReleaseBlobsAndCleanupUnused(input_block);
476 TransformToBlocks(blocks, to_blocks);
478 tprintf(
"Found %d blocks, %d to_blocks\n",
479 blocks->length(), to_blocks->length());
482 DisplayBlocks(blocks);
483 RotateAndReskewBlocks(input_is_rtl, to_blocks);
485 #ifndef GRAPHICS_DISABLED
486 if (blocks_win_ !=
NULL) {
487 bool waiting =
false;
492 if (*event->parameter ==
'd')
504 #endif // GRAPHICS_DISABLED
512 deskew->
set_y(-deskew->
y());
516 equation_detect_ = detect;
522 void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) {
523 #ifndef GRAPHICS_DISABLED
525 if (blocks_win_ ==
NULL)
528 blocks_win_->
Clear();
534 BLOCK_IT block_it(blocks);
536 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
537 block_it.forward()) {
538 BLOCK* block = block_it.data();
539 block->
plot(blocks_win_, serial++,
550 void ColumnFinder::DisplayColumnBounds(
PartSetVector* sets) {
551 #ifndef GRAPHICS_DISABLED
560 ColPartitionSet* columns = best_columns_[i];
569 bool ColumnFinder::MakeColumns(
bool single_column) {
574 if (!single_column) {
579 bool good_only =
true;
582 ColPartitionSet* line_set = part_sets.get(i);
583 if (line_set !=
NULL && line_set->LegalColumnCandidate()) {
584 ColPartitionSet* column_candidate = line_set->Copy(good_only);
585 if (column_candidate !=
NULL)
586 column_candidate->AddToColumnSetsIfUnique(&column_sets_,
WidthCB());
589 good_only = !good_only;
590 }
while (column_sets_.
empty() && !good_only);
592 PrintColumnCandidates(
"Column candidates");
594 ImproveColumnCandidates(&column_sets_, &column_sets_);
596 PrintColumnCandidates(
"Improved columns");
598 ImproveColumnCandidates(&part_sets, &column_sets_);
600 ColPartitionSet* single_column_set =
602 if (single_column_set !=
NULL) {
608 PrintColumnCandidates(
"Final Columns");
609 bool has_columns = !column_sets_.
empty();
612 bool any_multi_column = AssignColumns(part_sets);
614 DisplayColumnBounds(&part_sets);
616 ComputeMeanColumnGap(any_multi_column);
618 for (
int i = 0; i < part_sets.size(); ++i) {
619 ColPartitionSet* line_set = part_sets.get(i);
620 if (line_set !=
NULL) {
621 line_set->RelinquishParts();
632 void ColumnFinder::ImproveColumnCandidates(
PartSetVector* src_sets,
635 temp_cols.
move(column_sets);
636 if (src_sets == column_sets)
637 src_sets = &temp_cols;
638 int set_size = temp_cols.size();
640 bool good_only =
true;
642 for (
int i = 0; i < set_size; ++i) {
643 ColPartitionSet* column_candidate = temp_cols.get(i);
645 ColPartitionSet* improved = column_candidate->Copy(good_only);
646 if (improved !=
NULL) {
647 improved->ImproveColumnCandidate(
WidthCB(), src_sets);
648 improved->AddToColumnSetsIfUnique(column_sets,
WidthCB());
651 good_only = !good_only;
652 }
while (column_sets->empty() && !good_only);
653 if (column_sets->empty())
654 column_sets->move(&temp_cols);
656 temp_cols.delete_data_pointers();
660 void ColumnFinder::PrintColumnCandidates(
const char* title) {
661 int set_size = column_sets_.
size();
662 tprintf(
"Found %d %s:\n", set_size, title);
664 for (
int i = 0; i < set_size; ++i) {
665 ColPartitionSet* column_set = column_sets_.
get(i);
680 bool ColumnFinder::AssignColumns(
const PartSetVector& part_sets) {
681 int set_count = part_sets.size();
684 best_columns_ =
new ColPartitionSet*[set_count];
685 for (
int y = 0; y < set_count; ++y)
686 best_columns_[y] =
NULL;
687 int column_count = column_sets_.
size();
697 bool* any_columns_possible =
new bool[set_count];
698 int* assigned_costs =
new int[set_count];
699 int** column_set_costs =
new int*[set_count];
702 for (
int part_i = 0; part_i < set_count; ++part_i) {
703 ColPartitionSet* line_set = part_sets.get(part_i);
704 bool debug = line_set !=
NULL &&
706 line_set->bounding_box().bottom());
707 column_set_costs[part_i] =
new int[column_count];
708 any_columns_possible[part_i] =
false;
710 for (
int col_i = 0; col_i < column_count; ++col_i) {
711 if (line_set !=
NULL &&
712 column_sets_.
get(col_i)->CompatibleColumns(debug, line_set,
714 column_set_costs[part_i][col_i] =
715 column_sets_.
get(col_i)->UnmatchedWidth(line_set);
716 any_columns_possible[part_i] =
true;
718 column_set_costs[part_i][col_i] =
MAX_INT32;
720 tprintf(
"Set id %d did not match at y=%d, lineset =%p\n",
721 col_i, part_i, line_set);
725 bool any_multi_column =
false;
729 while (BiggestUnassignedRange(set_count, any_columns_possible,
732 tprintf(
"Biggest unassigned range = %d- %d\n", start, end);
734 int column_set_id = RangeModalColumnSet(column_set_costs,
735 assigned_costs, start, end);
737 tprintf(
"Range modal column id = %d\n", column_set_id);
738 column_sets_.
get(column_set_id)->Print();
741 ShrinkRangeToLongestRun(column_set_costs, assigned_costs,
742 any_columns_possible,
743 column_set_id, &start, &end);
745 tprintf(
"Shrunk range = %d- %d\n", start, end);
749 ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
750 any_columns_possible,
751 column_set_id, -1, -1, &start);
753 ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
754 any_columns_possible,
755 column_set_id, 1, set_count, &end);
758 tprintf(
"Column id %d applies to range = %d - %d\n",
759 column_set_id, start, end);
761 AssignColumnToRange(column_set_id, start, end, column_set_costs,
763 if (column_sets_.
get(column_set_id)->GoodColumnCount() > 1)
764 any_multi_column =
true;
768 if (best_columns_[0] ==
NULL) {
769 AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs);
772 for (
int i = 0; i < set_count; ++i) {
773 delete [] column_set_costs[i];
775 delete [] assigned_costs;
776 delete [] any_columns_possible;
777 delete [] column_set_costs;
778 return any_multi_column;
783 bool ColumnFinder::BiggestUnassignedRange(
int set_count,
784 const bool* any_columns_possible,
785 int* best_start,
int* best_end) {
786 int best_range_size = 0;
787 *best_start = set_count;
788 *best_end = set_count;
790 for (
int start = 0; start <
gridheight_; start = end) {
792 while (start < set_count) {
793 if (best_columns_[start] ==
NULL && any_columns_possible[start])
800 while (end < set_count) {
801 if (best_columns_[end] !=
NULL)
803 if (any_columns_possible[end])
807 if (start < set_count && range_size > best_range_size) {
808 best_range_size = range_size;
813 return *best_start < *best_end;
817 int ColumnFinder::RangeModalColumnSet(
int** column_set_costs,
818 const int* assigned_costs,
819 int start,
int end) {
820 int column_count = column_sets_.
size();
821 STATS column_stats(0, column_count);
822 for (
int part_i = start; part_i < end; ++part_i) {
823 for (
int col_j = 0; col_j < column_count; ++col_j) {
824 if (column_set_costs[part_i][col_j] < assigned_costs[part_i])
825 column_stats.add(col_j, 1);
829 return column_stats.mode();
836 void ColumnFinder::ShrinkRangeToLongestRun(
int** column_set_costs,
837 const int* assigned_costs,
838 const bool* any_columns_possible,
840 int* best_start,
int* best_end) {
842 int orig_start = *best_start;
843 int orig_end = *best_end;
844 int best_range_size = 0;
845 *best_start = orig_end;
846 *best_end = orig_end;
848 for (
int start = orig_start; start < orig_end; start = end) {
850 while (start < orig_end) {
851 if (column_set_costs[start][column_set_id] < assigned_costs[start] ||
852 !any_columns_possible[start])
858 while (end < orig_end) {
859 if (column_set_costs[end][column_set_id] >= assigned_costs[start] &&
860 any_columns_possible[end])
864 if (start < orig_end && end - start > best_range_size) {
865 best_range_size = end - start;
875 void ColumnFinder::ExtendRangePastSmallGaps(
int** column_set_costs,
876 const int* assigned_costs,
877 const bool* any_columns_possible,
879 int step,
int end,
int* start) {
881 tprintf(
"Starting expansion at %d, step=%d, limit=%d\n",
886 int barrier_size = 0;
892 for (i = *start + step; i != end; i += step) {
893 if (column_set_costs[i][column_set_id] < assigned_costs[i])
896 if (any_columns_possible[i])
900 tprintf(
"At %d, Barrier size=%d\n", i, barrier_size);
901 if (barrier_size > kMaxIncompatibleColumnCount)
910 for (i += step; i != end; i += step) {
911 if (column_set_costs[i][column_set_id] < assigned_costs[i])
913 else if (any_columns_possible[i])
917 tprintf(
"At %d, good size = %d\n", i, good_size);
919 if (good_size >= barrier_size)
921 }
while (good_size >= barrier_size);
925 void ColumnFinder::AssignColumnToRange(
int column_set_id,
int start,
int end,
926 int** column_set_costs,
927 int* assigned_costs) {
928 ColPartitionSet* column_set = column_sets_.
get(column_set_id);
929 for (
int i = start; i < end; ++i) {
930 assigned_costs[i] = column_set_costs[i][column_set_id];
931 best_columns_[i] = column_set;
936 void ColumnFinder::ComputeMeanColumnGap(
bool any_multi_column) {
940 int width_samples = 0;
948 mean_column_gap_ = any_multi_column && gap_samples > 0
949 ? total_gap / gap_samples : total_width / width_samples;
957 static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) {
958 for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) {
961 delete blob->
cblob();
970 void ColumnFinder::ReleaseBlobsAndCleanupUnused(
TO_BLOCK* block) {
971 ReleaseAllBlobsAndDeleteUnused(&block->
blobs);
972 ReleaseAllBlobsAndDeleteUnused(&block->
small_blobs);
973 ReleaseAllBlobsAndDeleteUnused(&block->
noise_blobs);
974 ReleaseAllBlobsAndDeleteUnused(&block->
large_blobs);
975 ReleaseAllBlobsAndDeleteUnused(&image_bblobs_);
979 void ColumnFinder::GridSplitPartitions() {
981 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
982 gsearch(&part_grid_);
983 gsearch.StartFullSearch();
984 ColPartition* dont_repeat =
NULL;
986 while ((part = gsearch.NextFullSearch()) !=
NULL) {
987 if (part->blob_type() <
BRT_UNKNOWN || part == dont_repeat)
989 ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
993 part->ColumnRange(
resolution_, column_set, &first_col, &last_col);
1002 if (last_col != first_col + 1)
1005 int y = part->MidY();
1006 TBOX margin_box = part->bounding_box();
1010 tprintf(
"Considering partition for GridSplit:");
1013 ColPartition* column = column_set->GetColumnByIndex(first_col);
1016 margin_box.
set_left(column->RightAtY(y) + 2);
1017 column = column_set->GetColumnByIndex(last_col);
1020 margin_box.
set_right(column->LeftAtY(y) - 2);
1024 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(
this);
1026 tprintf(
"Searching box (%d,%d)->(%d,%d)\n",
1028 margin_box.
right(), margin_box.
top());
1031 rectsearch.StartRectSearch(margin_box);
1033 while ((bbox = rectsearch.NextRectSearch()) !=
NULL) {
1039 gsearch.RemoveBBox();
1040 int x_middle = (margin_box.
left() + margin_box.
right()) / 2;
1042 tprintf(
"Splitting part at %d:", x_middle);
1045 ColPartition* split_part = part->SplitAt(x_middle);
1046 if (split_part !=
NULL) {
1050 split_part->Print();
1052 part_grid_.
InsertBBox(
true,
true, split_part);
1056 tprintf(
"Split had no effect\n");
1060 gsearch.RepositionIterator();
1062 tprintf(
"Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n",
1071 void ColumnFinder::GridMergePartitions() {
1073 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1074 gsearch(&part_grid_);
1075 gsearch.StartFullSearch();
1077 while ((part = gsearch.NextFullSearch()) !=
NULL) {
1078 if (part->IsUnMergeableType())
1081 ColPartitionSet* columns = best_columns_[gsearch.GridY()];
1082 TBOX box = part->bounding_box();
1085 tprintf(
"Considering part for merge at:");
1088 int y = part->MidY();
1089 ColPartition* left_column = columns->ColumnContaining(box.
left(), y);
1090 ColPartition* right_column = columns->ColumnContaining(box.
right(), y);
1091 if (left_column ==
NULL || right_column != left_column) {
1093 tprintf(
"In different columns\n");
1096 box.
set_left(left_column->LeftAtY(y));
1097 box.
set_right(right_column->RightAtY(y));
1099 bool modified_box =
false;
1100 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1101 rsearch(&part_grid_);
1102 rsearch.SetUniqueMode(
true);
1103 rsearch.StartRectSearch(box);
1104 ColPartition* neighbour;
1106 while ((neighbour = rsearch.NextRectSearch()) !=
NULL) {
1107 if (neighbour == part || neighbour->IsUnMergeableType())
1109 const TBOX& neighbour_box = neighbour->bounding_box();
1111 tprintf(
"Considering merge with neighbour at:");
1114 if (neighbour_box.
right() < box.
left() ||
1117 if (part->VSignificantCoreOverlap(*neighbour) &&
1118 part->TypesMatch(*neighbour)) {
1124 const TBOX& part_box = part->bounding_box();
1127 if (neighbour_box.
left() > part->right_margin() &&
1128 part_box.
right() < neighbour->left_margin())
1130 if (neighbour_box.
right() < part->left_margin() &&
1131 part_box.
left() > neighbour->right_margin())
1133 int h_gap =
MAX(part_box.
left(), neighbour_box.
left()) -
1135 if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction ||
1136 part_box.
width() < mean_column_gap_ ||
1137 neighbour_box.
width() < mean_column_gap_) {
1139 tprintf(
"Running grid-based merge between:\n");
1143 rsearch.RemoveBBox();
1144 if (!modified_box) {
1146 gsearch.RemoveBBox();
1147 rsearch.RepositionIterator();
1148 modified_box =
true;
1150 part->Absorb(neighbour,
WidthCB());
1152 tprintf(
"Neighbour failed hgap test\n");
1155 tprintf(
"Neighbour failed overlap or typesmatch test\n");
1166 gsearch.RepositionIterator();
1173 void ColumnFinder::InsertRemainingNoise(
TO_BLOCK* block) {
1175 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1183 rsearch.SetUniqueMode(
true);
1184 rsearch.StartRectSearch(search_box);
1186 ColPartition* best_part =
NULL;
1187 int best_distance = 0;
1188 while ((part = rsearch.NextRectSearch()) !=
NULL) {
1189 if (part->IsUnMergeableType())
1193 if (best_part ==
NULL || distance < best_distance) {
1195 best_distance = distance;
1198 if (best_part !=
NULL &&
1199 best_distance < kMaxDistToPartSizeRatio * best_part->median_size()) {
1202 tprintf(
"Adding noise blob with distance %d, thr=%g:box:",
1204 kMaxDistToPartSizeRatio * best_part->median_size());
1210 best_part->AddBox(blob);
1211 part_grid_.
InsertBBox(
true,
true, best_part);
1225 static TBOX BoxFromHLine(
const TabVector* hline) {
1226 int top =
MAX(hline->startpt().y(), hline->endpt().y());
1227 int bottom =
MIN(hline->startpt().y(), hline->endpt().y());
1228 top += hline->mean_width();
1229 if (top == bottom) {
1235 return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top);
1240 void ColumnFinder::GridRemoveUnderlinePartitions() {
1241 TabVector_IT hline_it(&horizontal_lines_);
1242 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
1243 TabVector* hline = hline_it.data();
1244 if (hline->intersects_other_lines())
1246 TBOX line_box = BoxFromHLine(hline);
1247 TBOX search_box = line_box;
1250 part_search.SetUniqueMode(
true);
1251 part_search.StartRectSearch(search_box);
1252 ColPartition* covered;
1253 bool touched_table =
false;
1254 bool touched_text =
false;
1255 ColPartition* line_part =
NULL;
1256 while ((covered = part_search.NextRectSearch()) !=
NULL) {
1258 touched_table =
true;
1260 }
else if (covered->IsTextType()) {
1262 int text_bottom = covered->median_bottom();
1263 if (line_box.
bottom() <= text_bottom && text_bottom <= search_box.
top())
1264 touched_text =
true;
1265 }
else if (covered->blob_type() ==
BRT_HLINE &&
1266 line_box.
contains(covered->bounding_box())) {
1267 line_part = covered;
1270 if (line_part !=
NULL && !touched_table && touched_text) {
1278 void ColumnFinder::GridInsertHLinePartitions() {
1279 TabVector_IT hline_it(&horizontal_lines_);
1280 for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
1281 TabVector* hline = hline_it.data();
1282 TBOX line_box = BoxFromHLine(hline);
1287 bool any_image =
false;
1289 part_search.SetUniqueMode(
true);
1290 part_search.StartRectSearch(line_box);
1291 ColPartition* covered;
1292 while ((covered = part_search.NextRectSearch()) !=
NULL) {
1293 if (covered->IsImageType()) {
1306 void ColumnFinder::GridInsertVLinePartitions() {
1308 for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) {
1309 TabVector* vline = vline_it.data();
1310 if (!vline->IsSeparator())
1312 int left =
MIN(vline->startpt().x(), vline->endpt().x());
1313 int right =
MAX(vline->startpt().x(), vline->endpt().x());
1314 right += vline->mean_width();
1315 if (left == right) {
1323 left, vline->startpt().
y(), right, vline->endpt().y());
1325 bool any_image =
false;
1327 part_search.SetUniqueMode(
true);
1328 part_search.StartRectSearch(part->bounding_box());
1329 ColPartition* covered;
1330 while ((covered = part_search.NextRectSearch()) !=
NULL) {
1331 if (covered->IsImageType()) {
1345 void ColumnFinder::SetPartitionTypes() {
1346 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1347 gsearch(&part_grid_);
1348 gsearch.StartFullSearch();
1350 while ((part = gsearch.NextFullSearch()) !=
NULL) {
1351 part->SetPartitionType(
resolution_, best_columns_[gsearch.GridY()]);
1357 void ColumnFinder::SmoothPartnerRuns() {
1359 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1360 gsearch(&part_grid_);
1361 gsearch.StartFullSearch();
1363 while ((part = gsearch.NextFullSearch()) !=
NULL) {
1364 ColPartition* partner = part->SingletonPartner(
true);
1365 if (partner !=
NULL) {
1366 if (partner->SingletonPartner(
false) != part) {
1367 tprintf(
"Ooops! Partition:(%d partners)",
1368 part->upper_partners()->length());
1370 tprintf(
"has singleton partner:(%d partners",
1371 partner->lower_partners()->length());
1373 tprintf(
"but its singleton partner is:");
1374 if (partner->SingletonPartner(
false) ==
NULL)
1377 partner->SingletonPartner(
false)->Print();
1379 ASSERT_HOST(partner->SingletonPartner(
false) == part);
1380 }
else if (part->SingletonPartner(
false) !=
NULL) {
1381 ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
1383 part->SmoothPartnerRun(column_count * 2 + 1);
1390 void ColumnFinder::AddToTempPartList(ColPartition* part,
1391 ColPartition_CLIST* temp_list) {
1392 int mid_y = part->MidY();
1393 ColPartition_C_IT it(temp_list);
1394 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1395 ColPartition* test_part = it.data();
1398 if (test_part == part->SingletonPartner(
false))
1400 int neighbour_bottom = test_part->median_bottom();
1401 int neighbour_top = test_part->median_top();
1402 int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
1403 if (neighbour_y < mid_y)
1405 if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part))
1408 if (it.cycled_list()) {
1409 it.add_to_end(part);
1411 it.add_before_stay_put(part);
1416 void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list,
1417 WorkingPartSet_LIST* work_set) {
1418 ColPartition_C_IT it(temp_list);
1419 while (!it.empty()) {
1421 &good_parts_, work_set);
1427 void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks,
1428 TO_BLOCK_LIST* to_blocks) {
1429 WorkingPartSet_LIST work_set;
1430 ColPartitionSet* column_set =
NULL;
1431 ColPartition_IT noise_it(&noise_parts_);
1435 ColPartition_CLIST temp_part_list;
1437 GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1438 gsearch(&part_grid_);
1439 gsearch.StartFullSearch();
1440 int prev_grid_y = -1;
1442 while ((part = gsearch.NextFullSearch()) !=
NULL) {
1443 int grid_y = gsearch.GridY();
1444 if (grid_y != prev_grid_y) {
1445 EmptyTempPartList(&temp_part_list, &work_set);
1446 prev_grid_y = grid_y;
1448 if (best_columns_[grid_y] != column_set) {
1449 column_set = best_columns_[grid_y];
1453 &good_parts_, &work_set);
1455 tprintf(
"Changed column groups at grid index %d, y=%d\n",
1456 gsearch.GridY(), gsearch.GridY() *
gridsize());
1459 noise_it.add_to_end(part);
1461 AddToTempPartList(part, &temp_part_list);
1464 EmptyTempPartList(&temp_part_list, &work_set);
1466 WorkingPartSet_IT work_it(&work_set);
1467 while (!work_it.empty()) {
1468 WorkingPartSet* working_set = work_it.extract();
1470 &good_parts_, blocks, to_blocks);
1478 static void ReflectBlobList(BLOBNBOX_LIST* bblobs) {
1479 BLOBNBOX_IT it(bblobs);
1480 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1481 it.data()->reflect_box_in_y_axis();
1491 void ColumnFinder::ReflectForRtl(
TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) {
1492 ReflectBlobList(bblobs);
1493 ReflectBlobList(&input_block->
blobs);
1500 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f);
1501 denorm_ = new_denorm;
1507 static void RotateAndExplodeBlobList(
const FCOORD& blob_rotation,
1508 BLOBNBOX_LIST* bblobs,
1511 BLOBNBOX_IT it(bblobs);
1512 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1515 C_OUTLINE_LIST* outlines = cblob->
out_list();
1516 C_OUTLINE_IT ol_it(outlines);
1517 if (!outlines->singleton()) {
1520 for (;!ol_it.empty(); ol_it.forward()) {
1526 it.add_after_stay_put(new_blob);
1532 if (blob_rotation.
x() != 1.0f || blob_rotation.
y() != 0.0f) {
1533 cblob->
rotate(blob_rotation);
1554 void ColumnFinder::RotateAndReskewBlocks(
bool input_is_rtl,
1555 TO_BLOCK_LIST* blocks) {
1562 TO_BLOCK_IT it(blocks);
1563 int block_index = 1;
1564 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1578 FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block);
1583 RotateAndExplodeBlobList(blob_rotation, &to_block->
blobs,
1585 TO_ROW_IT row_it(to_block->
get_rows());
1586 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1587 TO_ROW* row = row_it.data();
1588 RotateAndExplodeBlobList(blob_rotation, row->
blob_list(),
1592 static_cast<int>(heights.
median() + 0.5));
1594 tprintf(
"Block median size = (%d, %d)\n",
1604 FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(
BLOCK* block) {
1613 FCOORD classify_rotation(text_rotation_);
1614 FCOORD block_rotation(1.0f, 0.0f);
1620 if (rerotate_.
x() == 0.0f)
1621 block_rotation = rerotate_;
1623 block_rotation =
FCOORD(0.0f, -1.0f);
1624 block->
rotate(block_rotation);
1625 classify_rotation =
FCOORD(1.0f, 0.0f);
1627 block_rotation.rotate(rotation_);
1631 FCOORD blob_rotation(block_rotation);
1632 block_rotation.set_y(-block_rotation.y());
1636 tprintf(
"Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:",
1639 classify_rotation.x(), classify_rotation.y());
1642 return blob_rotation;
void set_skew(const FCOORD &skew)
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
bool textord_tabfind_find_tables
static bool WithinTestRegion(int detail_level, int x, int y)
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
ColumnFinder(int gridsize, const ICOORD &bleft, const ICOORD &tright, int resolution, bool cjk_script, double aligned_gap_fraction, TabVector_LIST *vlines, TabVector_LIST *hlines, int vertical_x, int vertical_y)
bool right_to_left() const
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
void rotate(const FCOORD &rotation)
const ICOORD & median_size() const
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)
bool PSM_SPARSE(int pageseg_mode)
void ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void plot(ScrollView *window, inT32 serial, ScrollView::Color colour)
bool textord_debug_images
void set_median_size(int x, int y)
void set_resolution(int resolution)
const double kMaxDistToPartSizeRatio
void set_owner(tesseract::ColPartition *new_owner)
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
void SetTabStops(TabFind *tabgrid)
void rotate(const FCOORD vec)
void add(inT32 value, inT32 count)
void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, int *total_gap, int *gap_samples)
#define BOOL_VAR(name, val, comment)
static const STRING & textord_debug_pix()
void ReSetAndReFilterBlobs()
void set_re_rotation(const FCOORD &rotation)
const double kMinNonNoiseFraction
void AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback *cb)
void Image(struct Pix *image, int x_pos, int y_pos)
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
void GridFindMargins(ColPartitionSet **best_columns)
BLOBNBOX_LIST * blob_list()
void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask_pix, TO_BLOCK *input_block)
virtual int FindEquationParts(ColPartitionGrid *part_grid, ColPartitionSet **best_columns)=0
int FindBlocks(PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, TO_BLOCK *block, Pix *photo_mask_pix, Pix *thresholds_pix, Pix *grey_pix, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
void set_flow(BlobTextFlowType value)
bool textord_tabfind_show_reject_blobs
static BLOBNBOX * RealBlob(C_OUTLINE *outline)
bool textord_tabfind_show_columns
const DENORM * predecessor() const
void pad(int xpad, int ypad)
BLOBNBOX_LIST small_blobs
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
C_OUTLINE_LIST * out_list()
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
void InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block)
void DisplayBoxes(ScrollView *window)
bool IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
void DeleteUnownedNoise()
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
int textord_debug_tabfind
bool textord_tabfind_show_initial_partitions
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
inT16 y() const
access_function
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
FCOORD re_rotation() const
void SetEquationDetect(EquationDetectBase *detect)
void DeleteUnknownParts(TO_BLOCK *block)
void delete_data_pointers()
const double kMarginOverlapFraction
bool textord_debug_printable
ScrollView * DisplayTabVectors(ScrollView *tab_win)
SVEvent * AwaitEvent(SVEventType type)
void set_y(float yin)
rewrite function
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
BLOBNBOX_LIST noise_blobs
void DisplayColumnEdges(int y_bottom, int y_top, ScrollView *win)
const double kHorizontalGapMergeFraction
void CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation)
const ICOORD & bleft() const
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
void FindPartitionPartners()
void rotate(const FCOORD &rotation)
#define INT_VAR(name, val, comment)
void ReTypeBlobs(BLOBNBOX_LIST *im_blobs)
void compute_bounding_box()
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
static void FindImagePartitions(Pix *image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
ColPartitionSet * MakeSingleColumnSet(WidthCallback *cb)
tesseract::ColPartition * owner() const
Pix * ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *blob_block)
void plot_graded_blobs(ScrollView *to_win)
void RemoveLineResidue(ColPartition_LIST *big_part_list)
GenericVector< ColPartitionSet * > PartSetVector
ScrollView * MakeWindow(int x, int y, const char *window_name)
void TidyBlobs(TO_BLOCK *block)
void AssertNoDuplicates()
static ColPartition * MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
bool textord_tabfind_show_blocks
void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback *width_cb, const FCOORD &reskew)
inT16 x() const
access function
void SetBlockRuleEdges(TO_BLOCK *block)
void set_region_type(BlobRegionType new_type)
const double kMinGutterWidthGrid
WidthCallback * WidthCB()
void reflect_polygon_in_y_axis()
bool contains(const FCOORD pt) const
void set_index(int value)
const ICOORD & tright() const
bool PSM_COL_FIND_ENABLED(int pageseg_mode)
void move(GenericVector< T > *from)
void set_left_to_right_language(bool order)
TabVector_LIST * dead_vectors()
const TBOX & bounding_box() const
void FindFigureCaptions()
void Init(int grid_size, const ICOORD &bottom_left, const ICOORD &top_right)
const ICOORD & image_origin() const
int DistanceOfBoxFromPartition(const TBOX &box, const ColPartition &part, const DENORM *denorm, bool debug) const
const int kMaxIncompatibleColumnCount
void RemoveBBox(BBC *bbox)
const int kMinColumnWidth
POLY_BLOCK * poly_block() const
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
BLOBNBOX_LIST large_blobs
bool MakeColPartSets(PartSetVector *part_sets)
PolyBlockType isA() const
static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid, Pix *image_mask)
void set_right_to_left(bool value)
bool overlap(const TBOX &box) const
void set_classify_rotation(const FCOORD &rotation)
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
int textord_tabfind_show_partitions
void RefinePartitionPartners(bool get_desperate)
void set_type(PolyBlockType t)
void GetDeskewVectors(FCOORD *deskew, FCOORD *reskew)