21 #include "config_auto.h"    28 #include "allheaders.h"   145          "Debug table marking steps in detail");
   147          "Show page stats used in table finding");
   149          "Enables the table recognizer for table layout and filtering.");
   162       global_median_xheight_(0),
   163       global_median_blob_width_(0),
   164       global_median_ledding_(0),
   165       left_to_right_language_(true) {
   183                        const ICOORD& top_right) {
   222     BLOBNBOX_CLIST* part_boxes = part->
boxes();
   223     BLOBNBOX_C_IT pit(part_boxes);
   224     for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
   231         if (leader_part == 
nullptr) {
   235         leader_part->
AddBox(pblob);
   237         clean_part->
AddBox(pblob);
   244     if (leader_part != 
nullptr) {
   267 #ifndef GRAPHICS_DISABLED   276     table_win = 
MakeWindow(100, 300, 
"Fragmented Text");
   279 #endif  // GRAPHICS_DISABLED   286   ColSegment_LIST column_blocks;
   300   ColSegment_LIST table_columns;
   306   ColSegment_LIST table_regions;
   309 #ifndef GRAPHICS_DISABLED   315 #endif  // GRAPHICS_DISABLED   331 #ifndef GRAPHICS_DISABLED   338 #endif  // GRAPHICS_DISABLED   345 #ifndef GRAPHICS_DISABLED   352 #endif  // GRAPHICS_DISABLED   360 #ifndef GRAPHICS_DISABLED   367 #endif  // GRAPHICS_DISABLED   440   if (part->
boxes()->empty()) {
   450   bool found_split = 
true;
   451   while (found_split) {
   453     BLOBNBOX_C_IT box_it(right_part->
boxes());
   458     int previous_right = INT32_MIN;
   461     for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
   462       const TBOX& box = box_it.data()->bounding_box();
   463       if (previous_right != INT32_MIN &&
   464           box.
left() - previous_right > kThreshold) {
   467         int mid_x = (box.
left() + previous_right) / 2;
   469         right_part = left_part->
SplitAt(mid_x);
   477       previous_right = std::max(previous_right, static_cast<int>(box.
right()));
   510   return box.
height() > kHeightRequired &&
   511          box.
width() > kWidthRequired &&
   512          box.
area() > kAreaRequired;
   525                                   ColSegment_LIST* column_blocks) {
   528     if (columns != 
nullptr) {
   529       ColSegment_LIST new_blocks;
   540                                     ColSegment_LIST* column_blocks) {
   541   ColSegment_IT src_it(new_blocks);
   542   ColSegment_IT dest_it(column_blocks);
   544   for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
   547     bool match_found = 
false;
   549     for (dest_it.mark_cycle_pt(); !dest_it.cycled_list(); dest_it.forward()) {
   557         delete src_it.extract();
   563       dest_it.add_after_then_move(src_it.extract());
   572   return (abs(b1.
left() - b2.
left()) < x_margin) &&
   596     int y = part->
MidY();
   601       int left_space = std::max(0, box.
left() - left_column->
LeftAtY(y));
   606       int right_space = std::max(0, right_column->
RightAtY(y) - box.
right());
   620         if (right < box.
left()) {
   633         if (left > box.
right()) {
   685     if (neighbor == part)
   691       if (neighbor_box.
top() < part_box.
bottom() &&
   692           gap < min_space_below) {
   693         min_space_below = gap;
   694         below_neighbor = neighbor;
   696       else if (part_box.
top() < neighbor_box.
bottom() &&
   697                gap < min_space_above) {
   698         min_space_above = gap;
   699         above_neighbor = neighbor;
   732       BLOBNBOX_C_IT it(part->
boxes());
   733       for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
   734         xheight_stats.
add(it.data()->bounding_box().height(), 1);
   735         width_stats.
add(it.data()->bounding_box().width(), 1);
   746   #ifndef GRAPHICS_DISABLED   748     const char* kWindowName = 
"X-height (R), X-width (G), and ledding (B)";
   754   #endif  // GRAPHICS_DISABLED   862   BLOBNBOX_CLIST* part_boxes = part->
boxes();
   863   BLOBNBOX_C_IT it(part_boxes);
   873   int previous_x1 = -1;
   875   int largest_partition_gap_found = -1;
   882   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
   886     if (previous_x1 != -1) {
   887       int gap = current_x0 - previous_x1;
   899           previous_x1 = std::max(previous_x1, current_x1);
   913       if (gap > largest_partition_gap_found)
   914         largest_partition_gap_found = gap;
   916     previous_x1 = current_x1;
   930   if (largest_partition_gap_found == -1)
   936   return largest_partition_gap_found < min_gap;
   956   const int top = box.
top() + search_size;
   957   const int bottom = box.
bottom() - search_size;
   961     int x = right_to_left ? box.
right() : box.
left();
   964     while ((leader = hsearch.
NextSideSearch(right_to_left)) != 
nullptr) {
  1023     int current_spacing = 0;  
  1024     int upper_spacing = 0;    
  1030       current_spacing = mid - left;
  1031       upper_spacing = upper_mid - left;
  1037       current_spacing = right - mid;
  1038       upper_spacing = right - upper_mid;
  1079   int max_top = INT32_MIN;
  1080   int min_bottom = INT32_MAX;
  1089     if (top > max_top) {
  1093     if (bottom < min_bottom) {
  1094       min_bottom = bottom;
  1119     if (!upper_part || !lower_part)
  1145   ColSegment_IT it(column_blocks);
  1146   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
  1149     int num_table_cells = 0;
  1150     int num_text_cells = 0;
  1165     if (!num_table_cells && !num_text_cells) {
  1166       delete it.extract();
  1179   ColSegment_IT it(segments);
  1180   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
  1207     bool neighbor_found = 
false;
  1208     bool modified = 
false;  
  1214       int top_range = std::min(box.
top() + margin, 
static_cast<int>(
tright().
y()));
  1215       int bottom_range = std::max(box.
bottom() - margin, 
static_cast<int>(
bleft().
y()));
  1218       neighbor_found = 
false;
  1224         if (neighbor == seg)
  1246           neighbor_found = 
true;
  1253     } 
while (neighbor_found);
  1275   ColSegment_IT it(table_columns);
  1295     bool found_neighbours = 
false;
  1311       found_neighbours = 
true;
  1313     if (found_neighbours) {
  1314       it.add_after_then_move(col);
  1325                                   ColSegment_LIST* table_regions) {
  1326   ColSegment_IT cit(table_columns);
  1327   ColSegment_IT rit(table_regions);
  1336   bool* table_region = 
new bool[page_height];
  1340     for (
int i = 0; i < page_height; i++) {
  1341       table_region[i] = 
false;
  1345     cit.move_to_first();
  1346     for (cit.mark_cycle_pt(); !cit.cycled_list(); cit.forward()) {
  1347       TBOX col_box = cit.data()->bounding_box();
  1351       for (
int i = intersection_box.
bottom(); i < intersection_box.
top(); i++) {
  1352         table_region[i - 
bleft().
y()] = 
true;
  1356     TBOX current_table_box;
  1361     for (
int i = 1; i < page_height; i++) {
  1363       if (!table_region[i - 1] && table_region[i]) {
  1368       if (table_region[i - 1] && !table_region[i]) {
  1370         if (!current_table_box.
null_box()) {
  1373           rit.add_after_then_move(seg);
  1378   delete[] table_region;
  1394     bool neighbor_found = 
false;
  1395     bool modified = 
false;  
  1399       TBOX search_region(box);
  1402       neighbor_found = 
false;
  1408         if (neighbor == seg)
  1426           neighbor_found = 
true;
  1433     } 
while (neighbor_found);
  1489   ColSegment_CLIST adjusted_tables;
  1490   ColSegment_C_IT it(&adjusted_tables);
  1496     TBOX grown_box = table_box;
  1504       it.add_after_then_move(col);
  1515   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
  1530   TBOX search_box = table_box;
  1544                                              const TBOX& search_range,
  1548   for (
int i = 0; i < 2; ++i) {
  1572                                           const TBOX& search_range,
  1587     if (result_box->
contains(part_box))
  1602                                       const TBOX& table_box) {
  1620   int num_extra_partitions = 0;
  1621   int extra_space_to_right = 0;
  1622   int extra_space_to_left = 0;
  1625   for (
int i = 0; i < 2; ++i) {
  1642       num_extra_partitions++;
  1646         extra_space_to_right++;
  1647         extra_space_to_left++;
  1652         extra_space_to_right++;
  1654         extra_space_to_left++;
  1659   return (extra_space_to_right > num_extra_partitions / 2) ||
  1660       (extra_space_to_left > num_extra_partitions / 2);
  1676     int table_top = table_box->
top();
  1679     if (box.
bottom() - table_top > max_distance)
  1685       previous_neighbor = 
nullptr;
  1690     if (previous_neighbor == 
nullptr) {
  1691       previous_neighbor = neighbor;
  1708   int* table_xprojection = 
new int[page_width];
  1717     for (
int i = 0; i < page_width; i++) {
  1718       table_xprojection[i] = 0;
  1735       BLOBNBOX_CLIST* part_boxes = part->
boxes();
  1736       BLOBNBOX_C_IT pit(part_boxes);
  1743       int next_position_to_write = 0;
  1745       for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
  1752         xstart = std::max(xstart, next_position_to_write);
  1753         for (
int i = xstart; i < xend; i++)
  1754           table_xprojection[i - 
bleft().
x()]++;
  1755         next_position_to_write = xend;
  1764   delete[] table_xprojection;
  1772   for (
int i = 0; i < length; i++) {
  1773     if (xprojection[i] > peak_value) {
  1774       peak_value = xprojection[i];
  1786   for (
int i = 0; i < length; i++) {
  1787     xprojection[i] = (xprojection[i] >= projection_threshold) ? 1 : 0;
  1790   int largest_gap = 0;
  1792   for (
int i = 1; i < length; i++) {
  1794     if (xprojection[i - 1] && !xprojection[i]) {
  1798     if (run_start != -1 && !xprojection[i - 1] && xprojection[i]) {
  1799       int gap = i - run_start;
  1800       if (gap > largest_gap)
  1821     table_win = 
MakeWindow(0, 0, 
"Table Structure");
  1836   ColSegment_CLIST good_tables;
  1837   ColSegment_C_IT good_it(&good_tables);
  1852     if (table_structure != 
nullptr) {
  1857       delete table_structure;
  1858       good_it.add_after_then_move(found_table);
  1867   for (good_it.mark_cycle_pt(); !good_it.cycled_list(); good_it.forward())
  1873                                      ColSegment_LIST *segments,
  1875 #ifndef GRAPHICS_DISABLED  1878   ColSegment_IT it(segments);
  1879   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
  1882     int left_x = box.
left();
  1883     int right_x = box.
right();
  1884     int top_y = box.
top();
  1885     int bottom_y = box.
bottom();
  1886     win->
Rectangle(left_x, bottom_y, right_x, top_y);
  1894 #ifndef GRAPHICS_DISABLED  1902     int left_x = box.
left();
  1903     int right_x = box.
right();
  1904     int top_y = box.
top();
  1905     int bottom_y = box.
bottom();
  1908     win->
Rectangle(left_x, bottom_y, right_x, top_y);
  1921 #ifndef GRAPHICS_DISABLED  1929     color = default_color;
  1931       color = table_color;
  1934     int left_x = box.
left();
  1935     int right_x = box.
right();
  1936     int top_y = box.
top();
  1937     int bottom_y = box.
bottom();
  1940     win->
Rectangle(left_x, bottom_y, right_x, top_y);
  1955 #ifndef GRAPHICS_DISABLED  1963     int left_x = box.
left();
  1964     int right_x = box.
right();
  1965     int top_y = box.
top();
  1966     int bottom_y = box.
bottom();
  1971       int mid_x = (left_x + right_x) / 2;
  1972       int mid_y = (top_y + bottom_y) / 2;
  1973       int other_x = (upper_box.
left() + upper_box.
right()) / 2;
  1974       int other_y = (upper_box.
top() + upper_box.
bottom()) / 2;
  1977       win->
Line(mid_x, mid_y, other_x, other_y);
  1982       int mid_x = (left_x + right_x) / 2;
  1983       int mid_y = (top_y + bottom_y) / 2;
  1984       int other_x = (lower_box.
left() + lower_box.
right()) / 2;
  1985       int other_y = (lower_box.
top() + lower_box.
bottom()) / 2;
  1988       win->
Line(mid_x, mid_y, other_x, other_y);
  2036         if (table_partition) {
  2037           table_partition->
Absorb(part, width_cb);
  2039           table_partition = part;
  2044     if (table_partition) {
  2056       grid->
InsertBBox(
true, 
true, table_partition);
  2065       num_table_cells_(0),
  2078   return kBoxColors[type_];
  2091   else if (num_text_cells_ > num_table_cells_)
 const double kLargeTableProjectionThreshold
 
const double kAllowTextArea
 
bool BelongToOneTable(const TBOX &box1, const TBOX &box2)
 
void RepositionIterator()
 
bool major_y_overlap(const TBOX &box) const
 
void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color text_color, ScrollView::Color table_color)
 
const double kMaxTableCellXheight
 
ScrollView * MakeWindow(int x, int y, const char *window_name)
 
const ICOORD & bleft() const
 
const double kTableColumnThreshold
 
const double kMaxGapInTextPartition
 
void FindPartitionPartners()
 
#define BOOL_VAR(name, val, comment)
 
const int kMaxBoxesInDataPartition
 
bool HasLeaderAdjacent(const ColPartition &part)
 
void set_global_median_xheight(int xheight)
 
bool IsInSameColumnAs(const ColPartition &part) const
 
void SetVerticalSpacing(ColPartition *part)
 
int global_median_blob_width_
 
const double kMinMaxGapInTextPartition
 
void StartSideSearch(int x, int ymin, int ymax)
 
void set_space_to_right(int space)
 
const double kParagraphEndingPreviousLineRatio
 
TBOX intersection(const TBOX &box) const
 
void GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
 
void SplitAndInsertFragmentedTextPartition(ColPartition *part)
 
int space_to_left() const
 
void SmoothTablePartitionRuns()
 
void GrowTableToIncludePartials(const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
 
void SetGlobalSpacings(ColPartitionGrid *grid)
 
BlobRegionType blob_type() const
 
const ICOORD & bleft() const
 
int16_t y() const
access_function 
 
ColPartitionGrid leader_and_ruling_grid_
 
const int kMaxVerticalSpacing
 
void InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block)
 
void DisplayColPartitionConnections(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color default_color)
 
bool textord_tablefind_show_stats
 
BlobTextFlowType flow() const
 
bool HasWideOrNoInterWordGap(ColPartition *part) const
 
const double kAllowTextWidth
 
const double kAllowBlobArea
 
int median_height() const
 
ColPartition * SingletonPartner(bool upper)
 
int global_median_xheight_
 
void MarkTablePartitions()
 
bool MatchingSizes(const ColPartition &other) const
 
static void SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet **all_columns)
 
int direction(EDGEPT *point)
 
void set_space_above(int space)
 
bool GapInXProjection(int *xprojection, int length)
 
void SetColumnsType(ColSegment_LIST *col_segments)
 
void DeleteObject(T *object)
 
bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2)
 
StructuredTable * RecognizeTable(const TBOX &guess_box)
 
bool AllowBlob(const BLOBNBOX &blob) const
 
void Absorb(ColPartition *other, WidthCallback *cb)
 
void set_max_text_height(int height)
 
const double kMinOverlapWithTable
 
void FilterParagraphEndings()
 
const TBOX & bounding_box() const
 
void InsertTextPartition(ColPartition *part)
 
ScrollView * MakeWindow(int x, int y, const char *window_name)
 
void set_line_grid(ColPartitionGrid *lines)
 
const int kMaxColumnHeaderDistance
 
void InsertBox(const TBOX &other)
 
void DisplayColSegments(ScrollView *win, ColSegment_LIST *cols, ScrollView::Color color)
 
void AdjustTableBoundaries()
 
void StartRectSearch(const TBOX &rect)
 
void DeleteSingleColumnTables()
 
double overlap_fraction(const TBOX &box) const
 
void set_inside_table_column(bool val)
 
const int kSideSpaceMargin
 
void DisplayColSegmentGrid(ScrollView *win, ColSegmentGrid *grid, ScrollView::Color color)
 
const double kMaxBlobOverlapFactor
 
void MarkPartitionsUsingLocalInformation()
 
void set_nearest_neighbor_above(ColPartition *part)
 
void set_blob_type(BlobRegionType t)
 
void set_flow(BlobTextFlowType f)
 
const int kMinBoxesInTextPartition
 
void InsertRulingPartition(ColPartition *part)
 
int RightAtY(int y) const
 
void Display(ScrollView *window, ScrollView::Color color)
 
void GetColumnBlocks(ColPartitionSet **columns, ColSegment_LIST *col_segments)
 
bool textord_tablefind_show_mark
 
void InsertLeaderPartition(ColPartition *part)
 
const double kMinParagraphEndingTextToWhitespaceRatio
 
const ICOORD & tright() const
 
const int kMinRowsInTable
 
void set_global_median_ledding(int ledding)
 
const double kSmallTableProjectionThreshold
 
bool inside_table_column()
 
void set_space_to_left(int space)
 
const double kMaxParagraphEndingLeftSpaceMultiple
 
BBC * NextSideSearch(bool right_to_left)
 
void GrowTableBox(const TBOX &table_box, TBOX *result_box)
 
int median_bottom() const
 
const double kStrokeWidthConstantTolerance
 
int16_t x() const
access function 
 
ColPartition * SplitAt(int split_x)
 
BlobRegionType region_type() const
 
bool major_x_overlap(const TBOX &box) const
 
int global_median_ledding_
 
void MoveColSegmentsToGrid(ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid)
 
void IncludeLeftOutColumnHeaders(TBOX *table_box)
 
ScrollView::Color BoxColor() const
 
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
 
void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments)
 
#define ELISTIZE(CLASSNAME)
 
void set_bounding_box(const TBOX &other)
 
ColSegmentGrid col_seg_grid_
 
const int kRulingVerticalMargin
 
void set_min_height(int height)
 
void set_nearest_neighbor_below(ColPartition *part)
 
void StartVerticalSearch(int xmin, int xmax, int y)
 
ColPartition * CopyButDontOwnBlobs()
 
const double kAllowBlobHeight
 
ColSegmentGrid table_grid_
 
const double kAllowTextHeight
 
void MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback *width_cb)
 
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
 
bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
 
bool left_to_right_language_
 
ColPartitionGrid clean_part_grid_
 
void add(int32_t value, int32_t count)
 
void AddBox(BLOBNBOX *box)
 
ColPartition * nearest_neighbor_below() const
 
BlobTextFlowType flow() const
 
void set_space_below(int space)
 
bool AllowTextPartition(const ColPartition &part) const
 
void FilterHeaderAndFooter()
 
const TBOX & bounding_box() const
 
bool VSignificantCoreOverlap(const ColPartition &other) const
 
void GroupColumnBlocks(ColSegment_LIST *current_segments, ColSegment_LIST *col_segments)
 
bool overlap(const TBOX &box) const
 
void GridMergeColumnBlocks()
 
const int kLargeTableRowCount
 
void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback *width_cb, const FCOORD &reskew)
 
bool contains(const FCOORD pt) const
 
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void SetPartitionType(int resolution, ColPartitionSet *columns)
 
void RefinePartitionPartners(bool get_desperate)
 
bool textord_tablefind_recognize_tables
 
const TBOX & bounding_box() const
 
void ClearGridData(void(*free_method)(BBC *))
 
int space_to_right() const
 
const int kAdjacentLeaderSearchPadding
 
void GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIST *table_regions)
 
void Rectangle(int x1, int y1, int x2, int y2)
 
const double kMaxXProjectionGapFactor
 
const TBOX & bounding_box() const
 
TBOX bounding_union(const TBOX &box) const
 
const double kStrokeWidthFractionalTolerance
 
void set_num_text_cells(int n)
 
BBC * NextVerticalSearch(bool top_to_bottom)
 
bool IsHorizontalLine() const
 
CLISTIZE(BLOCK_RES) ELISTIZE(ROW_RES) ELISTIZE(WERD_RES) static const double kStopperAmbiguityThresholdGain
 
ColPartition * ColumnContaining(int x, int y)
 
void set_left_to_right_language(bool order)
 
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
 
void Init(int grid_size, const ICOORD &bottom_left, const ICOORD &top_right)
 
void SetUniqueMode(bool mode)
 
ColPartition * ShallowCopy() const
 
void GridMergeTableRegions()
 
void InsertFragmentedTextPartition(ColPartition *part)
 
bool HLineBelongsToTable(const ColPartition &part, const TBOX &table_box)
 
void InitializePartitions(ColPartitionSet **all_columns)
 
void InsertImagePartition(ColPartition *part)
 
void GetTableColumns(ColSegment_LIST *table_columns)
 
ColPartitionGrid fragmented_text_grid_
 
void DisplayBoxes(ScrollView *window)
 
void set_num_table_cells(int n)
 
const double kAllowBlobWidth
 
void Line(int x1, int y1, int x2, int y2)
 
void set_text_grid(ColPartitionGrid *text)
 
const ICOORD & tright() const
 
const double kSplitPartitionSize
 
void set_global_median_blob_width(int width)
 
PolyBlockType type() const
 
ColPartition * nearest_neighbor_above() const