24 #include "config_auto.h" 70 : text_grid_(nullptr),
77 median_cell_height_(0),
78 median_cell_width_(0),
79 max_text_height_(INT32_MAX) {
233 int column_start,
int column_end) {
235 ASSERT_HOST(0 <= column_start && column_start <= column_end &&
239 for (
int row = row_start; row <= row_end; ++row) {
242 for (
int col = column_start; col <= column_end; ++col) {
274 gsearch.SetUniqueMode(
true);
275 gsearch.StartRectSearch(kCellBox);
276 double area_covered = 0;
278 while ((text = gsearch.NextRectSearch()) !=
nullptr) {
282 const int32_t current_area = kCellBox.
area();
283 if (current_area == 0) {
286 return std::min(1.0, area_covered / current_area);
290 #ifndef GRAPHICS_DISABLED 377 if (left_sides.
length() == 0 || right_sides.
length() == 0)
405 int min_bottom = INT32_MAX;
406 int max_top = INT32_MIN;
422 max_top = std::max(max_top, static_cast<int>(text->
bounding_box().
top()));
443 if (bottom_sides.
length() == 0 || top_sides.
length() == 0)
485 bool decrease)
const {
502 bool decrease)
const {
519 const int kMaxCellHeight = 1000;
520 const int kMaxCellWidth = 1000;
521 STATS height_stats(0, kMaxCellHeight + 1);
522 STATS width_stats(0, kMaxCellWidth + 1);
598 if (min_list.
length() == 0)
607 int stacked_partitions = 0;
608 int last_cross_position = INT32_MAX;
612 while (min_index < min_list.
length()) {
614 if (min_list[min_index] < max_list[max_index]) {
615 ++stacked_partitions;
616 if (last_cross_position != INT32_MAX &&
617 stacked_partitions > max_merged) {
618 int mid = (last_cross_position + min_list[min_index]) / 2;
620 last_cross_position = INT32_MAX;
625 --stacked_partitions;
626 if (last_cross_position == INT32_MAX &&
627 stacked_partitions <= max_merged) {
628 last_cross_position = max_list[max_index];
643 vertical_box.
set_left(x - kGridSize);
668 horizontal_box.
set_top(y + kGridSize);
706 : text_grid_(nullptr),
710 max_text_height_(INT32_MAX) {
762 TBOX line_bound = guess_box;
780 int vertical_count = 0;
781 int horizontal_count = 0;
825 int old_area = bounding_box->
area();
830 changed = (bounding_box->
area() > old_area);
842 bool first_line =
true;
876 TBOX best_box = guess_box;
879 TBOX adjusted = guess_box;
884 const int kMidGuessY = (guess_box.
bottom() + guess_box.
top()) / 2;
889 bool found_good_border =
false;
894 int last_bottom = INT32_MAX;
904 int previous_below = 0;
905 const int kMaxChances = 10;
906 int chances = kMaxChances;
907 while (bottom != last_bottom) {
927 chances = kMaxChances;
936 found_good_border =
true;
947 last_bottom = bottom;
951 if (!found_good_border)
955 found_good_border =
false;
956 int last_top = INT32_MIN;
959 int previous_above = 0;
960 chances = kMaxChances;
963 while (last_top != top) {
974 chances = kMaxChances;
979 table->
row_height(last_row) < max_row_height)) {
983 found_good_border =
true;
999 if (!found_good_border)
1019 bool top_to_bottom) {
1032 if (top_to_bottom && (last_y >= y || last_y <= text_box.
top())) {
1033 last_y = std::min(last_y, static_cast<int>(text_box.
bottom()));
1036 if (!top_to_bottom && (last_y <= y || last_y >= text_box.
bottom())) {
1037 last_y = std::max(last_y, static_cast<int>(text_box.
top()));
1056 double threshold = 0.0;
bool IsHorizontalType() const
ColPartitionGrid * text_grid_
const int kLinedTableMinVerticalLines
void StartSideSearch(int x, int ymin, int ymax)
const double kMinFilledArea
TBOX intersection(const TBOX &box) const
double CalculateCellFilledPercentage(int row, int column)
void FindWhitespacedRows()
static bool IsWeakTableRow(StructuredTable *table, int row)
int CountHorizontalIntersections(int y)
ColPartitionGrid * text_grid_
bool FindLinesBoundingBoxIteration(TBOX *bounding_box)
GenericVectorEqEq< int > cell_x_
int CountFilledCellsInRow(int row)
bool VerifyWhitespacedTable()
int FindHorizontalMargin(ColPartitionGrid *grid, int start_y, bool decrease) const
StructuredTable * RecognizeTable(const TBOX &guess_box)
void set_max_text_height(int height)
const TBOX & bounding_box() const
void set_line_grid(ColPartitionGrid *lines)
void set_bounding_box(const TBOX &box)
bool FindLinedStructure()
void StartRectSearch(const TBOX &rect)
int FindVerticalMargin(ColPartitionGrid *grid, int start_x, bool decrease) const
const double kHorizontalSpacing
void Display(ScrollView *window, ScrollView::Color color)
bool HasSignificantLines(const TBOX &guess)
void set_text_grid(ColPartitionGrid *text)
BBC * NextSideSearch(bool right_to_left)
void set_max_text_height(int height)
static void FindCellSplitLocations(const GenericVector< int > &min_list, const GenericVector< int > &max_list, int max_merged, GenericVector< int > *locations)
bool VerifyLinedTableCells()
const double kGoodRowNumberOfColumnsSmall[]
const int kCellSplitColumnThreshold
void UpdateMargins(ColPartitionGrid *grid)
void FindWhitespacedColumns()
bool VerifyRowFilled(int row)
void set_min_height(int height)
ColPartitionGrid * line_grid_
void StartVerticalSearch(int xmin, int xmax, int y)
bool RecognizeWhitespacedTable(const TBOX &guess_box, StructuredTable *table)
void set_line_grid(ColPartitionGrid *lines)
const int kLinedTableMinHorizontalLines
const int kGoodRowNumberOfColumnsSmallSize
void add(int32_t value, int32_t count)
const TBOX & bounding_box() const
const double kVerticalSpacing
bool IsVerticalLine() const
bool FindWhitespacedStructure()
const double kRequiredColumns
int NextHorizontalSplit(int left, int right, int y, bool top_to_bottom)
GenericVectorEqEq< int > cell_y_
int CountFilledCellsInColumn(int column)
const int kCellSplitRowThreshold
void Rectangle(int x1, int y1, int x2, int y2)
bool FindLinesBoundingBox(TBOX *bounding_box)
void set_min_width(int width)
int row_height(int row) const
const double kGoodRowNumberOfColumnsLarge
BBC * NextVerticalSearch(bool top_to_bottom)
bool IsHorizontalLine() const
int column_width(int column) const
int CountVerticalIntersections(int x)
void SetUniqueMode(bool mode)
const double kMarginFactor
bool RecognizeLinedTable(const TBOX &guess_box, StructuredTable *table)
void Line(int x1, int y1, int x2, int y2)
void set_text_grid(ColPartitionGrid *text)
bool DoesPartitionFit(const ColPartition &part) const
int CountPartitions(const TBOX &box)
ColPartitionGrid * line_grid_