tesseract  5.0.0-alpha-619-ge9db
tesseract::TableFinder Class Reference

#include <tablefind.h>

Public Member Functions

 TableFinder ()
 
 ~TableFinder ()
 
void set_resolution (int resolution)
 
void set_left_to_right_language (bool order)
 
void Init (int grid_size, const ICOORD &bottom_left, const ICOORD &top_right)
 
void InsertCleanPartitions (ColPartitionGrid *grid, TO_BLOCK *block)
 
void LocateTables (ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback width_cb, const FCOORD &reskew)
 

Protected Member Functions

int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void InsertTextPartition (ColPartition *part)
 
void InsertFragmentedTextPartition (ColPartition *part)
 
void InsertLeaderPartition (ColPartition *part)
 
void InsertRulingPartition (ColPartition *part)
 
void InsertImagePartition (ColPartition *part)
 
void SplitAndInsertFragmentedTextPartition (ColPartition *part)
 
bool AllowTextPartition (const ColPartition &part) const
 
bool AllowBlob (const BLOBNBOX &blob) const
 
void MoveColSegmentsToGrid (ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid)
 
void InitializePartitions (ColPartitionSet **all_columns)
 
void SetVerticalSpacing (ColPartition *part)
 
void SetGlobalSpacings (ColPartitionGrid *grid)
 
void set_global_median_xheight (int xheight)
 
void set_global_median_blob_width (int width)
 
void set_global_median_ledding (int ledding)
 
void FindNeighbors ()
 
void MarkTablePartitions ()
 
void MarkPartitionsUsingLocalInformation ()
 
bool HasWideOrNoInterWordGap (ColPartition *part) const
 
bool HasLeaderAdjacent (const ColPartition &part)
 
void FilterFalseAlarms ()
 
void FilterParagraphEndings ()
 
void FilterHeaderAndFooter ()
 
void SmoothTablePartitionRuns ()
 
void GetColumnBlocks (ColPartitionSet **columns, ColSegment_LIST *col_segments)
 
void GroupColumnBlocks (ColSegment_LIST *current_segments, ColSegment_LIST *col_segments)
 
bool ConsecutiveBoxes (const TBOX &b1, const TBOX &b2)
 
void SetColumnsType (ColSegment_LIST *col_segments)
 
void GridMergeColumnBlocks ()
 
void GetTableColumns (ColSegment_LIST *table_columns)
 
void GetTableRegions (ColSegment_LIST *table_columns, ColSegment_LIST *table_regions)
 
void GridMergeTableRegions ()
 
bool BelongToOneTable (const TBOX &box1, const TBOX &box2)
 
void AdjustTableBoundaries ()
 
void GrowTableBox (const TBOX &table_box, TBOX *result_box)
 
void GrowTableToIncludePartials (const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
 
void GrowTableToIncludeLines (const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
 
bool HLineBelongsToTable (const ColPartition &part, const TBOX &table_box)
 
void IncludeLeftOutColumnHeaders (TBOX *table_box)
 
void DeleteSingleColumnTables ()
 
bool GapInXProjection (int *xprojection, int length)
 
void RecognizeTables ()
 
void DisplayColSegments (ScrollView *win, ColSegment_LIST *cols, ScrollView::Color color)
 
void DisplayColPartitions (ScrollView *win, ColPartitionGrid *grid, ScrollView::Color text_color, ScrollView::Color table_color)
 
void DisplayColPartitions (ScrollView *win, ColPartitionGrid *grid, ScrollView::Color default_color)
 
void DisplayColPartitionConnections (ScrollView *win, ColPartitionGrid *grid, ScrollView::Color default_color)
 
void DisplayColSegmentGrid (ScrollView *win, ColSegmentGrid *grid, ScrollView::Color color)
 
void MakeTableBlocks (ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback width_cb)
 

Static Protected Member Functions

static void SetPartitionSpacings (ColPartitionGrid *grid, ColPartitionSet **all_columns)
 

Protected Attributes

int resolution_
 
int global_median_xheight_
 
int global_median_blob_width_
 
int global_median_ledding_
 
ColPartitionGrid clean_part_grid_
 
ColPartitionGrid leader_and_ruling_grid_
 
ColPartitionGrid fragmented_text_grid_
 
ColSegmentGrid col_seg_grid_
 
ColSegmentGrid table_grid_
 
bool left_to_right_language_
 

Detailed Description

Definition at line 130 of file tablefind.h.

Constructor & Destructor Documentation

◆ TableFinder()

tesseract::TableFinder::TableFinder ( )

Definition at line 159 of file tablefind.cpp.

160  : resolution_(0),
165 }

◆ ~TableFinder()

tesseract::TableFinder::~TableFinder ( )

Definition at line 167 of file tablefind.cpp.

167  {
168  // ColPartitions and ColSegments created by this class for storage in grids
169  // need to be deleted explicitly.
170  clean_part_grid_.ClearGridData(&DeleteObject<ColPartition>);
171  leader_and_ruling_grid_.ClearGridData(&DeleteObject<ColPartition>);
172  fragmented_text_grid_.ClearGridData(&DeleteObject<ColPartition>);
173  col_seg_grid_.ClearGridData(&DeleteObject<ColSegment>);
174  table_grid_.ClearGridData(&DeleteObject<ColSegment>);
175 }

Member Function Documentation

◆ AdjustTableBoundaries()

void tesseract::TableFinder::AdjustTableBoundaries ( )
protected

Definition at line 1486 of file tablefind.cpp.

1486  {
1487  // Iterate the table regions in the grid
1488  ColSegment_CLIST adjusted_tables;
1489  ColSegment_C_IT it(&adjusted_tables);
1491  gsearch.StartFullSearch();
1492  ColSegment* table = nullptr;
1493  while ((table = gsearch.NextFullSearch()) != nullptr) {
1494  const TBOX& table_box = table->bounding_box();
1495  TBOX grown_box = table_box;
1496  GrowTableBox(table_box, &grown_box);
1497  // To prevent a table from expanding again, do not insert the
1498  // modified box back to the grid. Instead move it to a list and
1499  // and remove it from the grid. The list is moved later back to the grid.
1500  if (!grown_box.null_box()) {
1501  auto* col = new ColSegment();
1502  col->InsertBox(grown_box);
1503  it.add_after_then_move(col);
1504  }
1505  gsearch.RemoveBBox();
1506  delete table;
1507  }
1508  // clear table grid to move final tables in it
1509  // TODO(nbeato): table_grid_ should already be empty. The above loop
1510  // removed everything. Maybe just assert it is empty?
1511  table_grid_.Clear();
1512  it.move_to_first();
1513  // move back final tables to table_grid_
1514  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1515  ColSegment* seg = it.extract();
1516  table_grid_.InsertBBox(true, true, seg);
1517  }
1518 }

◆ AllowBlob()

bool tesseract::TableFinder::AllowBlob ( const BLOBNBOX blob) const
protected

Definition at line 502 of file tablefind.cpp.

502  {
503  const TBOX& box = blob.bounding_box();
504  const double kHeightRequired = global_median_xheight_ * kAllowBlobHeight;
505  const double kWidthRequired = global_median_blob_width_ * kAllowBlobWidth;
506  const int median_area = global_median_xheight_ * global_median_blob_width_;
507  const double kAreaRequired = median_area * kAllowBlobArea;
508  // Keep comparisons strictly greater to disallow 0!
509  return box.height() > kHeightRequired &&
510  box.width() > kWidthRequired &&
511  box.area() > kAreaRequired;
512 }

◆ AllowTextPartition()

bool tesseract::TableFinder::AllowTextPartition ( const ColPartition part) const
protected

Definition at line 489 of file tablefind.cpp.

489  {
490  const double kHeightRequired = global_median_xheight_ * kAllowTextHeight;
491  const double kWidthRequired = global_median_blob_width_ * kAllowTextWidth;
492  const int median_area = global_median_xheight_ * global_median_blob_width_;
493  const double kAreaPerBlobRequired = median_area * kAllowTextArea;
494  // Keep comparisons strictly greater to disallow 0!
495  return part.median_height() > kHeightRequired &&
496  part.median_width() > kWidthRequired &&
497  part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
498 }

◆ BelongToOneTable()

bool tesseract::TableFinder::BelongToOneTable ( const TBOX box1,
const TBOX box2 
)
protected

Definition at line 1444 of file tablefind.cpp.

1444  {
1445  // Check the obvious case. Most likely not true because overlapping boxes
1446  // should already be merged, but seems like a good thing to do in case things
1447  // change.
1448  if (box1.overlap(box2))
1449  return true;
1450  // Check for ColPartitions spanning both table regions
1451  TBOX bbox = box1.bounding_union(box2);
1452  // Start a rect search on bbox
1453  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1454  rectsearch(&clean_part_grid_);
1455  rectsearch.StartRectSearch(bbox);
1456  ColPartition* part = nullptr;
1457  while ((part = rectsearch.NextRectSearch()) != nullptr) {
1458  const TBOX& part_box = part->bounding_box();
1459  // return true if a colpartition spanning both table regions is found
1460  if (part_box.overlap(box1) && part_box.overlap(box2) &&
1461  !part->IsImageType())
1462  return true;
1463  }
1464  return false;
1465 }

◆ bleft()

const ICOORD & tesseract::TableFinder::bleft ( ) const
protected

Definition at line 387 of file tablefind.cpp.

387  {
388  return clean_part_grid_.bleft();
389 }

◆ ConsecutiveBoxes()

bool tesseract::TableFinder::ConsecutiveBoxes ( const TBOX b1,
const TBOX b2 
)
protected

Definition at line 568 of file tablefind.cpp.

568  {
569  int x_margin = 20;
570  int y_margin = 5;
571  return (abs(b1.left() - b2.left()) < x_margin) &&
572  (abs(b1.right() - b2.right()) < x_margin) &&
573  (abs(b1.top()-b2.bottom()) < y_margin ||
574  abs(b2.top()-b1.bottom()) < y_margin);
575 }

◆ DeleteSingleColumnTables()

void tesseract::TableFinder::DeleteSingleColumnTables ( )
protected

Definition at line 1703 of file tablefind.cpp.

1703  {
1704  int page_width = tright().x() - bleft().x();
1705  ASSERT_HOST(page_width > 0);
1706  // create an integer array to hold projection on x-axis
1707  int* table_xprojection = new int[page_width];
1708  // Iterate through all tables in the table grid
1709  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
1710  table_search(&table_grid_);
1711  table_search.StartFullSearch();
1712  ColSegment* table;
1713  while ((table = table_search.NextFullSearch()) != nullptr) {
1714  TBOX table_box = table->bounding_box();
1715  // reset the projection array
1716  for (int i = 0; i < page_width; i++) {
1717  table_xprojection[i] = 0;
1718  }
1719  // Start a rect search on table_box
1720  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1721  rectsearch(&clean_part_grid_);
1722  rectsearch.SetUniqueMode(true);
1723  rectsearch.StartRectSearch(table_box);
1724  ColPartition* part;
1725  while ((part = rectsearch.NextRectSearch()) != nullptr) {
1726  if (!part->IsTextType())
1727  continue; // Do not consider non-text partitions
1728  if (part->flow() == BTFT_LEADER)
1729  continue; // Assume leaders are in tables
1730  TBOX part_box = part->bounding_box();
1731  // Do not consider partitions partially covered by the table
1732  if (part_box.overlap_fraction(table_box) < kMinOverlapWithTable)
1733  continue;
1734  BLOBNBOX_CLIST* part_boxes = part->boxes();
1735  BLOBNBOX_C_IT pit(part_boxes);
1736 
1737  // Make sure overlapping blobs don't artificially inflate the number
1738  // of rows in the table. This happens frequently with things such as
1739  // decimals and split characters. Do this by assuming the column
1740  // partition is sorted mostly left to right and just clip
1741  // bounding boxes by the previous box's extent.
1742  int next_position_to_write = 0;
1743 
1744  for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
1745  BLOBNBOX *pblob = pit.data();
1746  // ignore blob height for the purpose of projection since we
1747  // are only interested in finding valleys
1748  int xstart = pblob->bounding_box().left();
1749  int xend = pblob->bounding_box().right();
1750 
1751  xstart = std::max(xstart, next_position_to_write);
1752  for (int i = xstart; i < xend; i++)
1753  table_xprojection[i - bleft().x()]++;
1754  next_position_to_write = xend;
1755  }
1756  }
1757  // Find largest valley between two reasonable peaks in the table
1758  if (!GapInXProjection(table_xprojection, page_width)) {
1759  table_search.RemoveBBox();
1760  delete table;
1761  }
1762  }
1763  delete[] table_xprojection;
1764 }

◆ DisplayColPartitionConnections()

void tesseract::TableFinder::DisplayColPartitionConnections ( ScrollView win,
ColPartitionGrid grid,
ScrollView::Color  default_color 
)
protected

Definition at line 1950 of file tablefind.cpp.

1953  {
1954 #ifndef GRAPHICS_DISABLED
1955  // Iterate the ColPartitions in the grid.
1956  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1957  gsearch(grid);
1958  gsearch.StartFullSearch();
1959  ColPartition* part = nullptr;
1960  while ((part = gsearch.NextFullSearch()) != nullptr) {
1961  const TBOX& box = part->bounding_box();
1962  int left_x = box.left();
1963  int right_x = box.right();
1964  int top_y = box.top();
1965  int bottom_y = box.bottom();
1966 
1967  ColPartition* upper_part = part->nearest_neighbor_above();
1968  if (upper_part) {
1969  const TBOX& upper_box = upper_part->bounding_box();
1970  int mid_x = (left_x + right_x) / 2;
1971  int mid_y = (top_y + bottom_y) / 2;
1972  int other_x = (upper_box.left() + upper_box.right()) / 2;
1973  int other_y = (upper_box.top() + upper_box.bottom()) / 2;
1974  win->Brush(ScrollView::NONE);
1975  win->Pen(color);
1976  win->Line(mid_x, mid_y, other_x, other_y);
1977  }
1978  ColPartition* lower_part = part->nearest_neighbor_below();
1979  if (lower_part) {
1980  const TBOX& lower_box = lower_part->bounding_box();
1981  int mid_x = (left_x + right_x) / 2;
1982  int mid_y = (top_y + bottom_y) / 2;
1983  int other_x = (lower_box.left() + lower_box.right()) / 2;
1984  int other_y = (lower_box.top() + lower_box.bottom()) / 2;
1985  win->Brush(ScrollView::NONE);
1986  win->Pen(color);
1987  win->Line(mid_x, mid_y, other_x, other_y);
1988  }
1989  }
1990  win->UpdateWindow();
1991 #endif
1992 }

◆ DisplayColPartitions() [1/2]

void tesseract::TableFinder::DisplayColPartitions ( ScrollView win,
ColPartitionGrid grid,
ScrollView::Color  default_color 
)
protected

Definition at line 1944 of file tablefind.cpp.

1946  {
1947  DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW);
1948 }

◆ DisplayColPartitions() [2/2]

void tesseract::TableFinder::DisplayColPartitions ( ScrollView win,
ColPartitionGrid grid,
ScrollView::Color  text_color,
ScrollView::Color  table_color 
)
protected

Definition at line 1916 of file tablefind.cpp.

1919  {
1920 #ifndef GRAPHICS_DISABLED
1921  ScrollView::Color color = default_color;
1922  // Iterate the ColPartitions in the grid.
1923  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1924  gsearch(grid);
1925  gsearch.StartFullSearch();
1926  ColPartition* part = nullptr;
1927  while ((part = gsearch.NextFullSearch()) != nullptr) {
1928  color = default_color;
1929  if (part->type() == PT_TABLE)
1930  color = table_color;
1931 
1932  const TBOX& box = part->bounding_box();
1933  int left_x = box.left();
1934  int right_x = box.right();
1935  int top_y = box.top();
1936  int bottom_y = box.bottom();
1937  win->Brush(ScrollView::NONE);
1938  win->Pen(color);
1939  win->Rectangle(left_x, bottom_y, right_x, top_y);
1940  }
1941  win->UpdateWindow();
1942 #endif
1943 }

◆ DisplayColSegmentGrid()

void tesseract::TableFinder::DisplayColSegmentGrid ( ScrollView win,
ColSegmentGrid grid,
ScrollView::Color  color 
)
protected

Definition at line 1891 of file tablefind.cpp.

1892  {
1893 #ifndef GRAPHICS_DISABLED
1894  // Iterate the ColPartitions in the grid.
1895  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
1896  gsearch(grid);
1897  gsearch.StartFullSearch();
1898  ColSegment* seg = nullptr;
1899  while ((seg = gsearch.NextFullSearch()) != nullptr) {
1900  const TBOX& box = seg->bounding_box();
1901  int left_x = box.left();
1902  int right_x = box.right();
1903  int top_y = box.top();
1904  int bottom_y = box.bottom();
1905  win->Brush(ScrollView::NONE);
1906  win->Pen(color);
1907  win->Rectangle(left_x, bottom_y, right_x, top_y);
1908  }
1909  win->UpdateWindow();
1910 #endif
1911 }

◆ DisplayColSegments()

void tesseract::TableFinder::DisplayColSegments ( ScrollView win,
ColSegment_LIST *  cols,
ScrollView::Color  color 
)
protected

Definition at line 1871 of file tablefind.cpp.

1873  {
1874 #ifndef GRAPHICS_DISABLED
1875  win->Pen(color);
1876  win->Brush(ScrollView::NONE);
1877  ColSegment_IT it(segments);
1878  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1879  ColSegment* col = it.data();
1880  const TBOX& box = col->bounding_box();
1881  int left_x = box.left();
1882  int right_x = box.right();
1883  int top_y = box.top();
1884  int bottom_y = box.bottom();
1885  win->Rectangle(left_x, bottom_y, right_x, top_y);
1886  }
1887  win->UpdateWindow();
1888 #endif
1889 }

◆ FilterFalseAlarms()

void tesseract::TableFinder::FilterFalseAlarms ( )
protected

Definition at line 988 of file tablefind.cpp.

988  {
991  // TODO(nbeato): Fully justified text as non-table?
992 }

◆ FilterHeaderAndFooter()

void tesseract::TableFinder::FilterHeaderAndFooter ( )
protected

Definition at line 1074 of file tablefind.cpp.

1074  {
1075  // Consider top-most text colpartition as header and bottom most as footer
1076  ColPartition* header = nullptr;
1077  ColPartition* footer = nullptr;
1078  int max_top = INT32_MIN;
1079  int min_bottom = INT32_MAX;
1081  gsearch.StartFullSearch();
1082  ColPartition* part = nullptr;
1083  while ((part = gsearch.NextFullSearch()) != nullptr) {
1084  if (!part->IsTextType())
1085  continue; // Consider only text partitions
1086  int top = part->bounding_box().top();
1087  int bottom = part->bounding_box().bottom();
1088  if (top > max_top) {
1089  max_top = top;
1090  header = part;
1091  }
1092  if (bottom < min_bottom) {
1093  min_bottom = bottom;
1094  footer = part;
1095  }
1096  }
1097  if (header)
1098  header->clear_table_type();
1099  if (footer)
1100  footer->clear_table_type();
1101 }

◆ FilterParagraphEndings()

void tesseract::TableFinder::FilterParagraphEndings ( )
protected

Definition at line 994 of file tablefind.cpp.

994  {
995  // Detect last line of paragraph
996  // Iterate the ColPartitions in the grid.
998  gsearch.StartFullSearch();
999  ColPartition* part = nullptr;
1000  while ((part = gsearch.NextFullSearch()) != nullptr) {
1001  if (part->type() != PT_TABLE)
1002  continue; // Consider only table partitions
1003 
1004  // Paragraph ending should have flowing text above it.
1005  ColPartition* upper_part = part->nearest_neighbor_above();
1006  if (!upper_part)
1007  continue;
1008  if (upper_part->type() != PT_FLOWING_TEXT)
1009  continue;
1010  if (upper_part->bounding_box().width() <
1011  2 * part->bounding_box().width())
1012  continue;
1013  // Check if its the last line of a paragraph.
1014  // In most cases, a paragraph ending should be left-aligned to text line
1015  // above it. Sometimes, it could be a 2 line paragraph, in which case
1016  // the line above it is indented.
1017  // To account for that, check if the partition center is to
1018  // the left of the one above it.
1019  int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2;
1020  int upper_mid = (upper_part->bounding_box().left() +
1021  upper_part->bounding_box().right()) / 2;
1022  int current_spacing = 0; // spacing of the current line to margin
1023  int upper_spacing = 0; // spacing of the previous line to the margin
1025  // Left to right languages, use mid - left to figure out the distance
1026  // the middle is from the left margin.
1027  int left = std::min(part->bounding_box().left(),
1028  upper_part->bounding_box().left());
1029  current_spacing = mid - left;
1030  upper_spacing = upper_mid - left;
1031  } else {
1032  // Right to left languages, use right - mid to figure out the distance
1033  // the middle is from the right margin.
1034  int right = std::max(part->bounding_box().right(),
1035  upper_part->bounding_box().right());
1036  current_spacing = right - mid;
1037  upper_spacing = right - upper_mid;
1038  }
1039  if (current_spacing * kParagraphEndingPreviousLineRatio > upper_spacing)
1040  continue;
1041 
1042  // Paragraphs should have similar fonts.
1043  if (!part->MatchingSizes(*upper_part) ||
1044  !part->MatchingStrokeWidth(*upper_part, kStrokeWidthFractionalTolerance,
1046  continue;
1047  }
1048 
1049  // The last line of a paragraph should be left aligned.
1050  // TODO(nbeato): This would be untrue if the text was right aligned.
1051  // How often is that?
1052  if (part->space_to_left() >
1053  kMaxParagraphEndingLeftSpaceMultiple * part->median_height())
1054  continue;
1055  // The line above it should be right aligned (assuming justified format).
1056  // Since we can't assume justified text, we compare whitespace to text.
1057  // The above line should have majority spanning text (or the current
1058  // line could have fit on the previous line). So compare
1059  // whitespace to text.
1060  if (upper_part->bounding_box().width() <
1061  kMinParagraphEndingTextToWhitespaceRatio * upper_part->space_to_right())
1062  continue;
1063 
1064  // Ledding above the line should be less than ledding below
1065  if (part->space_above() >= part->space_below() ||
1066  part->space_above() > 2 * global_median_ledding_)
1067  continue;
1068 
1069  // If all checks failed, it is probably text.
1070  part->clear_table_type();
1071  }
1072 }

◆ FindNeighbors()

void tesseract::TableFinder::FindNeighbors ( )
protected

Definition at line 766 of file tablefind.cpp.

766  {
768  gsearch.StartFullSearch();
769  ColPartition* part = nullptr;
770  while ((part = gsearch.NextFullSearch()) != nullptr) {
771  // TODO(nbeato): Rename this function, meaning is different now.
772  // IT is finding nearest neighbors its own way
773  //SetVerticalSpacing(part);
774 
775  ColPartition* upper = part->SingletonPartner(true);
776  if (upper)
777  part->set_nearest_neighbor_above(upper);
778 
779  ColPartition* lower = part->SingletonPartner(false);
780  if (lower)
781  part->set_nearest_neighbor_below(lower);
782  }
783 }

◆ GapInXProjection()

bool tesseract::TableFinder::GapInXProjection ( int *  xprojection,
int  length 
)
protected

Definition at line 1768 of file tablefind.cpp.

1768  {
1769  // Find peak value of the histogram
1770  int peak_value = 0;
1771  for (int i = 0; i < length; i++) {
1772  if (xprojection[i] > peak_value) {
1773  peak_value = xprojection[i];
1774  }
1775  }
1776  // Peak value represents the maximum number of horizontally
1777  // overlapping colpartitions, so this can be considered as the
1778  // number of rows in the table
1779  if (peak_value < kMinRowsInTable)
1780  return false;
1781  double projection_threshold = kSmallTableProjectionThreshold * peak_value;
1782  if (peak_value >= kLargeTableRowCount)
1783  projection_threshold = kLargeTableProjectionThreshold * peak_value;
1784  // Threshold the histogram
1785  for (int i = 0; i < length; i++) {
1786  xprojection[i] = (xprojection[i] >= projection_threshold) ? 1 : 0;
1787  }
1788  // Find the largest run of zeros between two ones
1789  int largest_gap = 0;
1790  int run_start = -1;
1791  for (int i = 1; i < length; i++) {
1792  // detect start of a run of zeros
1793  if (xprojection[i - 1] && !xprojection[i]) {
1794  run_start = i;
1795  }
1796  // detect end of a run of zeros and update the value of largest gap
1797  if (run_start != -1 && !xprojection[i - 1] && xprojection[i]) {
1798  int gap = i - run_start;
1799  if (gap > largest_gap)
1800  largest_gap = gap;
1801  run_start = -1;
1802  }
1803  }
1804  return largest_gap > kMaxXProjectionGapFactor * global_median_xheight_;
1805 }

◆ GetColumnBlocks()

void tesseract::TableFinder::GetColumnBlocks ( ColPartitionSet **  columns,
ColSegment_LIST *  col_segments 
)
protected

Definition at line 523 of file tablefind.cpp.

524  {
525  for (int i = 0; i < gridheight(); ++i) {
526  ColPartitionSet* columns = all_columns[i];
527  if (columns != nullptr) {
528  ColSegment_LIST new_blocks;
529  // Get boxes from the current vertical position on the grid
530  columns->GetColumnBoxes(i * gridsize(), (i+1) * gridsize(), &new_blocks);
531  // Merge the new_blocks boxes into column_blocks if they are well-aligned
532  GroupColumnBlocks(&new_blocks, column_blocks);
533  }
534  }
535 }

◆ GetTableColumns()

void tesseract::TableFinder::GetTableColumns ( ColSegment_LIST *  table_columns)
protected

Definition at line 1273 of file tablefind.cpp.

1273  {
1274  ColSegment_IT it(table_columns);
1275  // Iterate the ColPartitions in the grid.
1276  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1277  gsearch(&clean_part_grid_);
1278  gsearch.StartFullSearch();
1279  ColPartition* part;
1280  while ((part = gsearch.NextFullSearch()) != nullptr) {
1281  if (part->inside_table_column() || part->type() != PT_TABLE)
1282  continue; // prevent a partition to be assigned to multiple columns
1283  const TBOX& box = part->bounding_box();
1284  auto* col = new ColSegment();
1285  col->InsertBox(box);
1286  part->set_inside_table_column(true);
1287  // Start a search below the current cell to find bottom neighbours
1288  // Note: a full search will always process things above it first, so
1289  // this should be starting at the highest cell and working its way down.
1290  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1291  vsearch(&clean_part_grid_);
1292  vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom());
1293  ColPartition* neighbor = nullptr;
1294  bool found_neighbours = false;
1295  while ((neighbor = vsearch.NextVerticalSearch(true)) != nullptr) {
1296  // only consider neighbors not assigned to any column yet
1297  if (neighbor->inside_table_column())
1298  continue;
1299  // Horizontal lines should not break the flow
1300  if (neighbor->IsHorizontalLine())
1301  continue;
1302  // presence of a non-table neighbor marks the end of current
1303  // table column
1304  if (neighbor->type() != PT_TABLE)
1305  break;
1306  // add the neighbor partition to the table column
1307  const TBOX& neighbor_box = neighbor->bounding_box();
1308  col->InsertBox(neighbor_box);
1309  neighbor->set_inside_table_column(true);
1310  found_neighbours = true;
1311  }
1312  if (found_neighbours) {
1313  it.add_after_then_move(col);
1314  } else {
1315  part->set_inside_table_column(false);
1316  delete col;
1317  }
1318  }
1319 }

◆ GetTableRegions()

void tesseract::TableFinder::GetTableRegions ( ColSegment_LIST *  table_columns,
ColSegment_LIST *  table_regions 
)
protected

Definition at line 1323 of file tablefind.cpp.

1324  {
1325  ColSegment_IT cit(table_columns);
1326  ColSegment_IT rit(table_regions);
1327  // Iterate through column blocks
1328  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
1329  gsearch(&col_seg_grid_);
1330  gsearch.StartFullSearch();
1331  ColSegment* part;
1332  int page_height = tright().y() - bleft().y();
1333  ASSERT_HOST(page_height > 0);
1334  // create a bool array to hold projection on y-axis
1335  bool* table_region = new bool[page_height];
1336  while ((part = gsearch.NextFullSearch()) != nullptr) {
1337  const TBOX& part_box = part->bounding_box();
1338  // reset the projection array
1339  for (int i = 0; i < page_height; i++) {
1340  table_region[i] = false;
1341  }
1342  // iterate through all table columns to find regions in the current
1343  // page column block
1344  cit.move_to_first();
1345  for (cit.mark_cycle_pt(); !cit.cycled_list(); cit.forward()) {
1346  TBOX col_box = cit.data()->bounding_box();
1347  // find intersection region of table column and page column
1348  TBOX intersection_box = col_box.intersection(part_box);
1349  // project table column on the y-axis
1350  for (int i = intersection_box.bottom(); i < intersection_box.top(); i++) {
1351  table_region[i - bleft().y()] = true;
1352  }
1353  }
1354  // set x-limits of table regions to page column width
1355  TBOX current_table_box;
1356  current_table_box.set_left(part_box.left());
1357  current_table_box.set_right(part_box.right());
1358  // go through the y-axis projection to find runs of table
1359  // regions. Each run makes one table region.
1360  for (int i = 1; i < page_height; i++) {
1361  // detect start of a table region
1362  if (!table_region[i - 1] && table_region[i]) {
1363  current_table_box.set_bottom(i + bleft().y());
1364  }
1365  // TODO(nbeato): Is it guaranteed that the last row is not a table region?
1366  // detect end of a table region
1367  if (table_region[i - 1] && !table_region[i]) {
1368  current_table_box.set_top(i + bleft().y());
1369  if (!current_table_box.null_box()) {
1370  auto* seg = new ColSegment();
1371  seg->InsertBox(current_table_box);
1372  rit.add_after_then_move(seg);
1373  }
1374  }
1375  }
1376  }
1377  delete[] table_region;
1378 }

◆ gridheight()

int tesseract::TableFinder::gridheight ( ) const
protected

Definition at line 384 of file tablefind.cpp.

384  {
385  return clean_part_grid_.gridheight();
386 }

◆ GridMergeColumnBlocks()

void tesseract::TableFinder::GridMergeColumnBlocks ( )
protected

Definition at line 1195 of file tablefind.cpp.

1195  {
1196  int margin = gridsize();
1197 
1198  // Iterate the Column Blocks in the grid.
1199  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
1200  gsearch(&col_seg_grid_);
1201  gsearch.StartFullSearch();
1202  ColSegment* seg;
1203  while ((seg = gsearch.NextFullSearch()) != nullptr) {
1204  if (seg->type() != COL_TEXT)
1205  continue; // only consider text blocks for split detection
1206  bool neighbor_found = false;
1207  bool modified = false; // Modified at least once
1208  // keep expanding current box as long as neighboring table columns
1209  // are found above or below it.
1210  do {
1211  TBOX box = seg->bounding_box();
1212  // slightly expand the search region vertically
1213  int top_range = std::min(box.top() + margin, static_cast<int>(tright().y()));
1214  int bottom_range = std::max(box.bottom() - margin, static_cast<int>(bleft().y()));
1215  box.set_top(top_range);
1216  box.set_bottom(bottom_range);
1217  neighbor_found = false;
1218  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
1219  rectsearch(&col_seg_grid_);
1220  rectsearch.StartRectSearch(box);
1221  ColSegment* neighbor = nullptr;
1222  while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
1223  if (neighbor == seg)
1224  continue;
1225  const TBOX& neighbor_box = neighbor->bounding_box();
1226  // If the neighbor box significantly overlaps with the current
1227  // box (due to the expansion of the current box in the
1228  // previous iteration of this loop), remove the neighbor box
1229  // and expand the current box to include it.
1230  if (neighbor_box.overlap_fraction(box) >= 0.9) {
1231  seg->InsertBox(neighbor_box);
1232  modified = true;
1233  rectsearch.RemoveBBox();
1234  gsearch.RepositionIterator();
1235  delete neighbor;
1236  continue;
1237  }
1238  // Only expand if the neighbor box is of table type
1239  if (neighbor->type() != COL_TABLE)
1240  continue;
1241  // Insert the neighbor box into the current column block
1242  if (neighbor_box.major_x_overlap(box) &&
1243  !box.contains(neighbor_box)) {
1244  seg->InsertBox(neighbor_box);
1245  neighbor_found = true;
1246  modified = true;
1247  rectsearch.RemoveBBox();
1248  gsearch.RepositionIterator();
1249  delete neighbor;
1250  }
1251  }
1252  } while (neighbor_found);
1253  if (modified) {
1254  // Because the box has changed, it has to be removed first.
1255  gsearch.RemoveBBox();
1256  col_seg_grid_.InsertBBox(true, true, seg);
1257  gsearch.RepositionIterator();
1258  }
1259  }
1260 }

◆ GridMergeTableRegions()

void tesseract::TableFinder::GridMergeTableRegions ( )
protected

Definition at line 1386 of file tablefind.cpp.

1386  {
1387  // Iterate the table regions in the grid.
1388  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
1389  gsearch(&table_grid_);
1390  gsearch.StartFullSearch();
1391  ColSegment* seg = nullptr;
1392  while ((seg = gsearch.NextFullSearch()) != nullptr) {
1393  bool neighbor_found = false;
1394  bool modified = false; // Modified at least once
1395  do {
1396  // Start a rectangle search x-bounded by the image and y by the table
1397  const TBOX& box = seg->bounding_box();
1398  TBOX search_region(box);
1399  search_region.set_left(bleft().x());
1400  search_region.set_right(tright().x());
1401  neighbor_found = false;
1402  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
1403  rectsearch(&table_grid_);
1404  rectsearch.StartRectSearch(search_region);
1405  ColSegment* neighbor = nullptr;
1406  while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
1407  if (neighbor == seg)
1408  continue;
1409  const TBOX& neighbor_box = neighbor->bounding_box();
1410  // Check if a neighbor box has a large overlap with the table
1411  // region. This may happen as a result of merging two table
1412  // regions in the previous iteration.
1413  if (neighbor_box.overlap_fraction(box) >= 0.9) {
1414  seg->InsertBox(neighbor_box);
1415  rectsearch.RemoveBBox();
1416  gsearch.RepositionIterator();
1417  delete neighbor;
1418  modified = true;
1419  continue;
1420  }
1421  // Check if two table regions belong together based on a common
1422  // horizontal ruling line
1423  if (BelongToOneTable(box, neighbor_box)) {
1424  seg->InsertBox(neighbor_box);
1425  neighbor_found = true;
1426  modified = true;
1427  rectsearch.RemoveBBox();
1428  gsearch.RepositionIterator();
1429  delete neighbor;
1430  }
1431  }
1432  } while (neighbor_found);
1433  if (modified) {
1434  // Because the box has changed, it has to be removed first.
1435  gsearch.RemoveBBox();
1436  table_grid_.InsertBBox(true, true, seg);
1437  gsearch.RepositionIterator();
1438  }
1439  }
1440 }

◆ gridsize()

int tesseract::TableFinder::gridsize ( ) const
protected

Definition at line 378 of file tablefind.cpp.

378  {
379  return clean_part_grid_.gridsize();
380 }

◆ gridwidth()

int tesseract::TableFinder::gridwidth ( ) const
protected

Definition at line 381 of file tablefind.cpp.

381  {
382  return clean_part_grid_.gridwidth();
383 }

◆ GroupColumnBlocks()

void tesseract::TableFinder::GroupColumnBlocks ( ColSegment_LIST *  current_segments,
ColSegment_LIST *  col_segments 
)
protected

Definition at line 538 of file tablefind.cpp.

539  {
540  ColSegment_IT src_it(new_blocks);
541  ColSegment_IT dest_it(column_blocks);
542  // iterate through the source list
543  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
544  ColSegment* src_seg = src_it.data();
545  const TBOX& src_box = src_seg->bounding_box();
546  bool match_found = false;
547  // iterate through the destination list to find a matching column block
548  for (dest_it.mark_cycle_pt(); !dest_it.cycled_list(); dest_it.forward()) {
549  ColSegment* dest_seg = dest_it.data();
550  TBOX dest_box = dest_seg->bounding_box();
551  if (ConsecutiveBoxes(src_box, dest_box)) {
552  // If matching block is found, insert the current block into it
553  // and delete the source block.
554  dest_seg->InsertBox(src_box);
555  match_found = true;
556  delete src_it.extract();
557  break;
558  }
559  }
560  // If no match is found, just append the source block to column_blocks
561  if (!match_found) {
562  dest_it.add_after_then_move(src_it.extract());
563  }
564  }
565 }

◆ GrowTableBox()

void tesseract::TableFinder::GrowTableBox ( const TBOX table_box,
TBOX result_box 
)
protected

Definition at line 1520 of file tablefind.cpp.

1520  {
1521  // TODO(nbeato): The growing code is a bit excessive right now.
1522  // By removing these lines, the partitions considered need
1523  // to have some overlap or be special cases. These lines could
1524  // be added again once a check is put in place to make sure that
1525  // growing tables don't stomp on a lot of non-table partitions.
1526 
1527  // search for horizontal ruling lines within the vertical margin
1528  // int vertical_margin = kRulingVerticalMargin * gridsize();
1529  TBOX search_box = table_box;
1530  // int top = MIN(search_box.top() + vertical_margin, tright().y());
1531  // int bottom = MAX(search_box.bottom() - vertical_margin, bleft().y());
1532  // search_box.set_top(top);
1533  // search_box.set_bottom(bottom);
1534 
1535  GrowTableToIncludePartials(table_box, search_box, result_box);
1536  GrowTableToIncludeLines(table_box, search_box, result_box);
1537  IncludeLeftOutColumnHeaders(result_box);
1538 }

◆ GrowTableToIncludeLines()

void tesseract::TableFinder::GrowTableToIncludeLines ( const TBOX table_box,
const TBOX search_range,
TBOX result_box 
)
protected

Definition at line 1570 of file tablefind.cpp.

1572  {
1574  rsearch.SetUniqueMode(true);
1575  rsearch.StartRectSearch(search_range);
1576  ColPartition* part = nullptr;
1577  while ((part = rsearch.NextRectSearch()) != nullptr) {
1578  // TODO(nbeato) This should also do vertical, but column
1579  // boundaries are breaking things. This function needs to be
1580  // updated to allow vertical lines as well.
1581  if (!part->IsLineType())
1582  continue;
1583  // Avoid the following function call if the result of the
1584  // function is irrelevant.
1585  const TBOX& part_box = part->bounding_box();
1586  if (result_box->contains(part_box))
1587  continue;
1588  // Include a partially overlapping horizontal line only if the
1589  // extra ColPartitions that will be included due to expansion
1590  // have large side spacing w.r.t. columns containing them.
1591  if (HLineBelongsToTable(*part, table_box))
1592  *result_box = result_box->bounding_union(part_box);
1593  // TODO(nbeato): Vertical
1594  }
1595 }

◆ GrowTableToIncludePartials()

void tesseract::TableFinder::GrowTableToIncludePartials ( const TBOX table_box,
const TBOX search_range,
TBOX result_box 
)
protected

Definition at line 1542 of file tablefind.cpp.

1544  {
1545  // Rulings are in a different grid, so search 2 grids for rulings, text,
1546  // and table partitions that are not entirely within the new box.
1547  for (int i = 0; i < 2; ++i) {
1548  ColPartitionGrid* grid = (i == 0) ? &fragmented_text_grid_ :
1550  ColPartitionGridSearch rectsearch(grid);
1551  rectsearch.StartRectSearch(search_range);
1552  ColPartition* part = nullptr;
1553  while ((part = rectsearch.NextRectSearch()) != nullptr) {
1554  // Only include text and table types.
1555  if (part->IsImageType())
1556  continue;
1557  const TBOX& part_box = part->bounding_box();
1558  // Include partition in the table if more than half of it
1559  // is covered by the table
1560  if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) {
1561  *result_box = result_box->bounding_union(part_box);
1562  continue;
1563  }
1564  }
1565  }
1566 }

◆ HasLeaderAdjacent()

bool tesseract::TableFinder::HasLeaderAdjacent ( const ColPartition part)
protected

Definition at line 946 of file tablefind.cpp.

946  {
947  if (part.flow() == BTFT_LEADER)
948  return true;
949  // Search range is left and right bounded by an offset of the
950  // median xheight. This offset is to allow some tolerance to the
951  // the leaders on the page in the event that the alignment is still
952  // a bit off.
953  const TBOX& box = part.bounding_box();
954  const int search_size = kAdjacentLeaderSearchPadding * global_median_xheight_;
955  const int top = box.top() + search_size;
956  const int bottom = box.bottom() - search_size;
958  for (int direction = 0; direction < 2; ++direction) {
959  bool right_to_left = (direction == 0);
960  int x = right_to_left ? box.right() : box.left();
961  hsearch.StartSideSearch(x, bottom, top);
962  ColPartition* leader = nullptr;
963  while ((leader = hsearch.NextSideSearch(right_to_left)) != nullptr) {
964  // The leader could be a horizontal ruling in the grid.
965  // Make sure it is actually a leader.
966  if (leader->flow() != BTFT_LEADER)
967  continue;
968  // This should not happen, they are in different grids.
969  ASSERT_HOST(&part != leader);
970  // Make sure the leader shares a page column with the partition,
971  // otherwise we are spreading across columns.
972  if (!part.IsInSameColumnAs(*leader))
973  break;
974  // There should be a significant vertical overlap
975  if (!leader->VSignificantCoreOverlap(part))
976  continue;
977  // Leader passed all tests, so it is adjacent.
978  return true;
979  }
980  }
981  // No leaders are adjacent to the given partition.
982  return false;
983 }

◆ HasWideOrNoInterWordGap()

bool tesseract::TableFinder::HasWideOrNoInterWordGap ( ColPartition part) const
protected

Definition at line 857 of file tablefind.cpp.

857  {
858  // Should only get text partitions.
859  ASSERT_HOST(part->IsTextType());
860  // Blob access
861  BLOBNBOX_CLIST* part_boxes = part->boxes();
862  BLOBNBOX_C_IT it(part_boxes);
863  // Check if this is a relatively small partition (such as a single word)
864  if (part->bounding_box().width() <
865  kMinBoxesInTextPartition * part->median_height() &&
866  part_boxes->length() < kMinBoxesInTextPartition)
867  return true;
868 
869  // Variables used to compute inter-blob spacing.
870  int current_x0 = -1;
871  int current_x1 = -1;
872  int previous_x1 = -1;
873  // Stores the maximum gap detected.
874  int largest_partition_gap_found = -1;
875  // Text partition gap limits. If this is text (and not a table),
876  // there should be at least one gap larger than min_gap and no gap
877  // larger than max_gap.
878  const double max_gap = kMaxGapInTextPartition * part->median_height();
879  const double min_gap = kMinMaxGapInTextPartition * part->median_height();
880 
881  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
882  BLOBNBOX* blob = it.data();
883  current_x0 = blob->bounding_box().left();
884  current_x1 = blob->bounding_box().right();
885  if (previous_x1 != -1) {
886  int gap = current_x0 - previous_x1;
887 
888  // TODO(nbeato): Boxes may overlap? Huh?
889  // For example, mag.3B 8003_033.3B.tif in UNLV data. The titles/authors
890  // on the top right of the page are filtered out with this line.
891  // Note 2: Iterating over blobs in a partition, so we are looking for
892  // spacing between the words.
893  if (gap < 0) {
894  // More likely case, the blobs slightly overlap. This can happen
895  // with diacritics (accents) or broken alphabet symbols (characters).
896  // Merge boxes together by taking max of right sides.
897  if (-gap < part->median_height() * kMaxBlobOverlapFactor) {
898  previous_x1 = std::max(previous_x1, current_x1);
899  continue;
900  }
901  // Extreme case, blobs overlap significantly in the same partition...
902  // This should not happen often (if at all), but it does.
903  // TODO(nbeato): investigate cases when this happens.
904  else {
905  // The behavior before was to completely ignore this case.
906  }
907  }
908 
909  // If a large enough gap is found, mark it as a table cell (return true)
910  if (gap > max_gap)
911  return true;
912  if (gap > largest_partition_gap_found)
913  largest_partition_gap_found = gap;
914  }
915  previous_x1 = current_x1;
916  }
917  // Since no large gap was found, return false if the partition is too
918  // long to be a data cell
919  if (part->bounding_box().width() >
920  kMaxBoxesInDataPartition * part->median_height() ||
921  part_boxes->length() > kMaxBoxesInDataPartition)
922  return false;
923 
924  // A partition may be a single blob. In this case, it's an isolated symbol
925  // or non-text (such as a ruling or image).
926  // Detect these as table partitions? Shouldn't this be case by case?
927  // The behavior before was to ignore this, making max_partition_gap < 0
928  // and implicitly return true. Just making it explicit.
929  if (largest_partition_gap_found == -1)
930  return true;
931 
932  // return true if the maximum gap found is smaller than the minimum allowed
933  // max_gap in a text partition. This indicates that there is no significant
934  // space in the partition, hence it is likely a single word.
935  return largest_partition_gap_found < min_gap;
936 }

◆ HLineBelongsToTable()

bool tesseract::TableFinder::HLineBelongsToTable ( const ColPartition part,
const TBOX table_box 
)
protected

Definition at line 1600 of file tablefind.cpp.

1601  {
1602  if (!part.IsHorizontalLine())
1603  return false;
1604  const TBOX& part_box = part.bounding_box();
1605  if (!part_box.major_x_overlap(table_box))
1606  return false;
1607  // Do not consider top-most horizontal line since it usually
1608  // originates from noise.
1609  // TODO(nbeato): I had to comment this out because the ruling grid doesn't
1610  // have neighbors solved.
1611  // if (!part.nearest_neighbor_above())
1612  // return false;
1613  const TBOX bbox = part_box.bounding_union(table_box);
1614  // In the "unioned table" box (the table extents expanded by the line),
1615  // keep track of how many partitions have significant padding to the left
1616  // and right. If more than half of the partitions covered by the new table
1617  // have significant spacing, the line belongs to the table and the table
1618  // grows to include all of the partitions.
1619  int num_extra_partitions = 0;
1620  int extra_space_to_right = 0;
1621  int extra_space_to_left = 0;
1622  // Rulings are in a different grid, so search 2 grids for rulings, text,
1623  // and table partitions that are introduced by the new box.
1624  for (int i = 0; i < 2; ++i) {
1625  ColPartitionGrid* grid = (i == 0) ? &clean_part_grid_ :
1627  // Start a rect search on bbox
1628  ColPartitionGridSearch rectsearch(grid);
1629  rectsearch.SetUniqueMode(true);
1630  rectsearch.StartRectSearch(bbox);
1631  ColPartition* extra_part = nullptr;
1632  while ((extra_part = rectsearch.NextRectSearch()) != nullptr) {
1633  // ColPartition already in table
1634  const TBOX& extra_part_box = extra_part->bounding_box();
1635  if (extra_part_box.overlap_fraction(table_box) > kMinOverlapWithTable)
1636  continue;
1637  // Non-text ColPartitions do not contribute
1638  if (extra_part->IsImageType())
1639  continue;
1640  // Consider this partition.
1641  num_extra_partitions++;
1642  // presence of a table cell is a strong hint, so just increment the scores
1643  // without looking at the spacing.
1644  if (extra_part->type() == PT_TABLE || extra_part->IsLineType()) {
1645  extra_space_to_right++;
1646  extra_space_to_left++;
1647  continue;
1648  }
1649  int space_threshold = kSideSpaceMargin * part.median_height();
1650  if (extra_part->space_to_right() > space_threshold)
1651  extra_space_to_right++;
1652  if (extra_part->space_to_left() > space_threshold)
1653  extra_space_to_left++;
1654  }
1655  }
1656  // tprintf("%d %d %d\n",
1657  // num_extra_partitions,extra_space_to_right,extra_space_to_left);
1658  return (extra_space_to_right > num_extra_partitions / 2) ||
1659  (extra_space_to_left > num_extra_partitions / 2);
1660 }

◆ IncludeLeftOutColumnHeaders()

void tesseract::TableFinder::IncludeLeftOutColumnHeaders ( TBOX table_box)
protected

Definition at line 1664 of file tablefind.cpp.

1664  {
1665  // Start a search above the current table to look for column headers
1667  vsearch.StartVerticalSearch(table_box->left(), table_box->right(),
1668  table_box->top());
1669  ColPartition* neighbor = nullptr;
1670  ColPartition* previous_neighbor = nullptr;
1671  while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) {
1672  // Max distance to find a table heading.
1673  const int max_distance = kMaxColumnHeaderDistance *
1674  neighbor->median_height();
1675  int table_top = table_box->top();
1676  const TBOX& box = neighbor->bounding_box();
1677  // Do not continue if the next box is way above
1678  if (box.bottom() - table_top > max_distance)
1679  break;
1680  // Unconditionally include partitions of type TABLE or LINE
1681  // TODO(faisal): add some reasonable conditions here
1682  if (neighbor->type() == PT_TABLE || neighbor->IsLineType()) {
1683  table_box->set_top(box.top());
1684  previous_neighbor = nullptr;
1685  continue;
1686  }
1687  // If there are two text partitions, one above the other, without a table
1688  // cell on their left or right side, consider them a barrier and quit
1689  if (previous_neighbor == nullptr) {
1690  previous_neighbor = neighbor;
1691  } else {
1692  const TBOX& previous_box = previous_neighbor->bounding_box();
1693  if (!box.major_y_overlap(previous_box))
1694  break;
1695  }
1696  }
1697 }

◆ Init()

void tesseract::TableFinder::Init ( int  grid_size,
const ICOORD bottom_left,
const ICOORD top_right 
)

Definition at line 181 of file tablefind.cpp.

182  {
183  // Initialize clean partitions list and grid
184  clean_part_grid_.Init(grid_size, bottom_left, top_right);
185  leader_and_ruling_grid_.Init(grid_size, bottom_left, top_right);
186  fragmented_text_grid_.Init(grid_size, bottom_left, top_right);
187  col_seg_grid_.Init(grid_size, bottom_left, top_right);
188  table_grid_.Init(grid_size, bottom_left, top_right);
189 }

◆ InitializePartitions()

void tesseract::TableFinder::InitializePartitions ( ColPartitionSet **  all_columns)
protected

Definition at line 579 of file tablefind.cpp.

579  {
580  FindNeighbors();
581  SetPartitionSpacings(&clean_part_grid_, all_columns);
583 }

◆ InsertCleanPartitions()

void tesseract::TableFinder::InsertCleanPartitions ( ColPartitionGrid grid,
TO_BLOCK block 
)

Definition at line 193 of file tablefind.cpp.

194  {
195  // Calculate stats. This lets us filter partitions in AllowTextPartition()
196  // and filter blobs in AllowBlob().
197  SetGlobalSpacings(grid);
198 
199  // Iterate the ColPartitions in the grid.
200  ColPartitionGridSearch gsearch(grid);
201  gsearch.SetUniqueMode(true);
202  gsearch.StartFullSearch();
203  ColPartition* part = nullptr;
204  while ((part = gsearch.NextFullSearch()) != nullptr) {
205  // Reject partitions with nothing useful inside of them.
206  if (part->blob_type() == BRT_NOISE || part->bounding_box().area() <= 0)
207  continue;
208  ColPartition* clean_part = part->ShallowCopy();
209  ColPartition* leader_part = nullptr;
210  if (part->IsLineType()) {
211  InsertRulingPartition(clean_part);
212  continue;
213  }
214  // Insert all non-text partitions to clean_parts
215  if (!part->IsTextType()) {
216  InsertImagePartition(clean_part);
217  continue;
218  }
219  // Insert text colpartitions after removing noisy components from them
220  // The leaders are split into a separate grid.
221  BLOBNBOX_CLIST* part_boxes = part->boxes();
222  BLOBNBOX_C_IT pit(part_boxes);
223  for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
224  BLOBNBOX *pblob = pit.data();
225  // Bad blobs... happens in UNLV set.
226  // news.3G1, page 17 (around x=6)
227  if (!AllowBlob(*pblob))
228  continue;
229  if (pblob->flow() == BTFT_LEADER) {
230  if (leader_part == nullptr) {
231  leader_part = part->ShallowCopy();
232  leader_part->set_flow(BTFT_LEADER);
233  }
234  leader_part->AddBox(pblob);
235  } else if (pblob->region_type() != BRT_NOISE) {
236  clean_part->AddBox(pblob);
237  }
238  }
239  clean_part->ComputeLimits();
240  ColPartition* fragmented = clean_part->CopyButDontOwnBlobs();
241  InsertTextPartition(clean_part);
243  if (leader_part != nullptr) {
244  // TODO(nbeato): Note that ComputeLimits does not update the column
245  // information. So the leader may appear to span more columns than it
246  // really does later on when IsInSameColumnAs gets called to test
247  // for adjacent leaders.
248  leader_part->ComputeLimits();
249  InsertLeaderPartition(leader_part);
250  }
251  }
252 
253  // Make the partition partners better for upper and lower neighbors.
256 }

◆ InsertFragmentedTextPartition()

void tesseract::TableFinder::InsertFragmentedTextPartition ( ColPartition part)
protected

Definition at line 402 of file tablefind.cpp.

402  {
403  ASSERT_HOST(part != nullptr);
404  if (AllowTextPartition(*part)) {
405  fragmented_text_grid_.InsertBBox(true, true, part);
406  } else {
407  delete part;
408  }
409 }

◆ InsertImagePartition()

void tesseract::TableFinder::InsertImagePartition ( ColPartition part)
protected

Definition at line 421 of file tablefind.cpp.

421  {
422  // NOTE: If images are placed into a different grid in the future,
423  // the function SetPartitionSpacings needs to be updated. It should
424  // be the only thing that cares about image partitions.
425  clean_part_grid_.InsertBBox(true, true, part);
426 }

◆ InsertLeaderPartition()

void tesseract::TableFinder::InsertLeaderPartition ( ColPartition part)
protected

Definition at line 410 of file tablefind.cpp.

410  {
411  ASSERT_HOST(part != nullptr);
412  if (!part->IsEmpty() && part->bounding_box().area() > 0) {
413  leader_and_ruling_grid_.InsertBBox(true, true, part);
414  } else {
415  delete part;
416  }
417 }

◆ InsertRulingPartition()

void tesseract::TableFinder::InsertRulingPartition ( ColPartition part)
protected

Definition at line 418 of file tablefind.cpp.

418  {
419  leader_and_ruling_grid_.InsertBBox(true, true, part);
420 }

◆ InsertTextPartition()

void tesseract::TableFinder::InsertTextPartition ( ColPartition part)
protected

Definition at line 394 of file tablefind.cpp.

394  {
395  ASSERT_HOST(part != nullptr);
396  if (AllowTextPartition(*part)) {
397  clean_part_grid_.InsertBBox(true, true, part);
398  } else {
399  delete part;
400  }
401 }

◆ LocateTables()

void tesseract::TableFinder::LocateTables ( ColPartitionGrid grid,
ColPartitionSet **  columns,
WidthCallback  width_cb,
const FCOORD reskew 
)

Definition at line 259 of file tablefind.cpp.

262  {
263  // initialize spacing, neighbors, and columns
264  InitializePartitions(all_columns);
265 
266 #ifndef GRAPHICS_DISABLED
267  if (textord_show_tables) {
268  ScrollView* table_win = MakeWindow(0, 300, "Column Partitions & Neighbors");
274 
275  table_win = MakeWindow(100, 300, "Fragmented Text");
277  }
278 #endif // GRAPHICS_DISABLED
279 
280  // mark, filter, and smooth candidate table partitions
282 
283  // Make single-column blocks from good_columns_ partitions. col_segments are
284  // moved to a grid later which takes the ownership
285  ColSegment_LIST column_blocks;
286  GetColumnBlocks(all_columns, &column_blocks);
287  // Set the ratio of candidate table partitions in each column
288  SetColumnsType(&column_blocks);
289 
290  // Move column segments to col_seg_grid_
291  MoveColSegmentsToGrid(&column_blocks, &col_seg_grid_);
292 
293  // Detect split in column layout that might have occurred due to the
294  // presence of a table. In such a case, merge the corresponding columns.
296 
297  // Group horizontally overlapping table partitions into table columns.
298  // table_columns created here get deleted at the end of this method.
299  ColSegment_LIST table_columns;
300  GetTableColumns(&table_columns);
301 
302  // Within each column, mark the range table regions occupy based on the
303  // table columns detected. table_regions are moved to a grid later which
304  // takes the ownership
305  ColSegment_LIST table_regions;
306  GetTableRegions(&table_columns, &table_regions);
307 
308 #ifndef GRAPHICS_DISABLED
309  if (textord_tablefind_show_mark) {
310  ScrollView* table_win = MakeWindow(1200, 300, "Table Columns and Regions");
311  DisplayColSegments(table_win, &table_columns, ScrollView::DARK_TURQUOISE);
312  DisplayColSegments(table_win, &table_regions, ScrollView::YELLOW);
313  }
314 #endif // GRAPHICS_DISABLED
315 
316  // Merge table regions across columns for tables spanning multiple
317  // columns
318  MoveColSegmentsToGrid(&table_regions, &table_grid_);
320 
321  // Adjust table boundaries by including nearby horizontal lines and left
322  // out column headers
325 
326  if (textord_tablefind_recognize_tables) {
327  // Remove false alarms consisting of a single column
329 
330 #ifndef GRAPHICS_DISABLED
331  if (textord_show_tables) {
332  ScrollView* table_win = MakeWindow(1200, 300, "Detected Table Locations");
334  DisplayColSegments(table_win, &table_columns, ScrollView::KHAKI);
335  table_grid_.DisplayBoxes(table_win);
336  }
337 #endif // GRAPHICS_DISABLED
338 
339  // Find table grid structure and reject tables that are malformed.
340  RecognizeTables();
342  RecognizeTables();
343 
344 #ifndef GRAPHICS_DISABLED
345  if (textord_show_tables) {
346  ScrollView* table_win = MakeWindow(1400, 600, "Recognized Tables");
349  table_grid_.DisplayBoxes(table_win);
350  }
351 #endif // GRAPHICS_DISABLED
352  } else {
353  // Remove false alarms consisting of a single column
354  // TODO(nbeato): verify this is a NOP after structured table rejection.
355  // Right now it isn't. If the recognize function is doing what it is
356  // supposed to do, this function is obsolete.
358 
359 #ifndef GRAPHICS_DISABLED
360  if (textord_show_tables) {
361  ScrollView* table_win = MakeWindow(1500, 300, "Detected Tables");
364  table_grid_.DisplayBoxes(table_win);
365  }
366 #endif // GRAPHICS_DISABLED
367  }
368 
369  // Merge all colpartitions in table regions to make them a single
370  // colpartition and revert types of isolated table cells not
371  // assigned to any table to their original types.
372  MakeTableBlocks(grid, all_columns, width_cb);
373 }

◆ MakeTableBlocks()

void tesseract::TableFinder::MakeTableBlocks ( ColPartitionGrid grid,
ColPartitionSet **  columns,
WidthCallback  width_cb 
)
protected

Definition at line 1997 of file tablefind.cpp.

1999  {
2000  // Since we have table blocks already, remove table tags from all
2001  // colpartitions
2002  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
2003  gsearch(grid);
2004  gsearch.StartFullSearch();
2005  ColPartition* part = nullptr;
2006 
2007  while ((part = gsearch.NextFullSearch()) != nullptr) {
2008  if (part->type() == PT_TABLE) {
2009  part->clear_table_type();
2010  }
2011  }
2012  // Now make a single colpartition out of each table block and remove
2013  // all colpartitions contained within a table
2014  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
2015  table_search(&table_grid_);
2016  table_search.StartFullSearch();
2017  ColSegment* table;
2018  while ((table = table_search.NextFullSearch()) != nullptr) {
2019  const TBOX& table_box = table->bounding_box();
2020  // Start a rect search on table_box
2021  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
2022  rectsearch(grid);
2023  rectsearch.StartRectSearch(table_box);
2024  ColPartition* part;
2025  ColPartition* table_partition = nullptr;
2026  while ((part = rectsearch.NextRectSearch()) != nullptr) {
2027  // Do not consider image partitions
2028  if (!part->IsTextType())
2029  continue;
2030  TBOX part_box = part->bounding_box();
2031  // Include partition in the table if more than half of it
2032  // is covered by the table
2033  if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) {
2034  rectsearch.RemoveBBox();
2035  if (table_partition) {
2036  table_partition->Absorb(part, width_cb);
2037  } else {
2038  table_partition = part;
2039  }
2040  }
2041  }
2042  // Insert table colpartition back to part_grid_
2043  if (table_partition) {
2044  // To match the columns used when transforming to blocks, the new table
2045  // partition must have its first and last column set at the grid y that
2046  // corresponds to its bottom.
2047  const TBOX& table_box = table_partition->bounding_box();
2048  int grid_x, grid_y;
2049  grid->GridCoords(table_box.left(), table_box.bottom(), &grid_x, &grid_y);
2050  table_partition->SetPartitionType(resolution_, all_columns[grid_y]);
2051  table_partition->set_table_type();
2052  table_partition->set_blob_type(BRT_TEXT);
2053  table_partition->set_flow(BTFT_CHAIN);
2054  table_partition->SetBlobTypes();
2055  grid->InsertBBox(true, true, table_partition);
2056  }
2057  }
2058 }

◆ MakeWindow()

ScrollView * tesseract::TableFinder::MakeWindow ( int  x,
int  y,
const char *  window_name 
)
protected

Definition at line 518 of file tablefind.cpp.

518  {
519  return clean_part_grid_.MakeWindow(x, y, window_name);
520 }

◆ MarkPartitionsUsingLocalInformation()

void tesseract::TableFinder::MarkPartitionsUsingLocalInformation ( )
protected

Definition at line 827 of file tablefind.cpp.

827  {
828  // Iterate the ColPartitions in the grid.
829  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
830  gsearch(&clean_part_grid_);
831  gsearch.StartFullSearch();
832  ColPartition* part = nullptr;
833  while ((part = gsearch.NextFullSearch()) != nullptr) {
834  if (!part->IsTextType()) // Only consider text partitions
835  continue;
836  // Only consider partitions in dominant font size or smaller
837  if (part->median_height() > kMaxTableCellXheight * global_median_xheight_)
838  continue;
839  // Mark partitions with a large gap, or no significant gap as
840  // table partitions.
841  // Comments: It produces several false alarms at:
842  // - last line of a paragraph (fixed)
843  // - single word section headings
844  // - page headers and footers
845  // - numbered equations
846  // - line drawing regions
847  // TODO(faisal): detect and fix above-mentioned cases
848  if (HasWideOrNoInterWordGap(part) ||
849  HasLeaderAdjacent(*part)) {
850  part->set_table_type();
851  }
852  }
853 }

◆ MarkTablePartitions()

void tesseract::TableFinder::MarkTablePartitions ( )
protected

Definition at line 789 of file tablefind.cpp.

789  {
791  if (textord_tablefind_show_mark) {
792  ScrollView* table_win = MakeWindow(300, 300, "Initial Table Partitions");
796  }
798  if (textord_tablefind_show_mark) {
799  ScrollView* table_win = MakeWindow(600, 300, "Filtered Table Partitions");
803  }
805  if (textord_tablefind_show_mark) {
806  ScrollView* table_win = MakeWindow(900, 300, "Smoothed Table Partitions");
810  }
812  if (textord_tablefind_show_mark || textord_show_tables) {
813  ScrollView* table_win = MakeWindow(900, 300, "Final Table Partitions");
817  }
818 }

◆ MoveColSegmentsToGrid()

void tesseract::TableFinder::MoveColSegmentsToGrid ( ColSegment_LIST *  segments,
ColSegmentGrid col_seg_grid 
)
protected

Definition at line 1176 of file tablefind.cpp.

1177  {
1178  ColSegment_IT it(segments);
1179  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1180  ColSegment* seg = it.extract();
1181  col_seg_grid->InsertBBox(true, true, seg);
1182  }
1183 }

◆ RecognizeTables()

void tesseract::TableFinder::RecognizeTables ( )
protected

Definition at line 1817 of file tablefind.cpp.

1817  {
1818  ScrollView* table_win = nullptr;
1819  if (textord_show_tables) {
1820  table_win = MakeWindow(0, 0, "Table Structure");
1823  // table_grid_.DisplayBoxes(table_win);
1824  }
1825 
1826 
1827  TableRecognizer recognizer;
1828  recognizer.Init();
1829  recognizer.set_line_grid(&leader_and_ruling_grid_);
1830  recognizer.set_text_grid(&fragmented_text_grid_);
1831  recognizer.set_max_text_height(global_median_xheight_ * 2.0);
1832  recognizer.set_min_height(1.5 * gridheight());
1833  // Loop over all of the tables and try to fit them.
1834  // Store the good tables here.
1835  ColSegment_CLIST good_tables;
1836  ColSegment_C_IT good_it(&good_tables);
1837 
1839  gsearch.StartFullSearch();
1840  ColSegment* found_table = nullptr;
1841  while ((found_table = gsearch.NextFullSearch()) != nullptr) {
1842  gsearch.RemoveBBox();
1843 
1844  // The goal is to make the tables persistent in a list.
1845  // When that happens, this will move into the search loop.
1846  const TBOX& found_box = found_table->bounding_box();
1847  StructuredTable* table_structure = recognizer.RecognizeTable(found_box);
1848 
1849  // Process a table. Good tables are inserted into the grid again later on
1850  // We can't change boxes in the grid while it is running a search.
1851  if (table_structure != nullptr) {
1852  if (textord_show_tables) {
1853  table_structure->Display(table_win, ScrollView::LIME_GREEN);
1854  }
1855  found_table->set_bounding_box(table_structure->bounding_box());
1856  delete table_structure;
1857  good_it.add_after_then_move(found_table);
1858  } else {
1859  delete found_table;
1860  }
1861  }
1862  // TODO(nbeato): MERGE!! There is awesome info now available for merging.
1863 
1864  // At this point, the grid is empty. We can safely insert the good tables
1865  // back into grid.
1866  for (good_it.mark_cycle_pt(); !good_it.cycled_list(); good_it.forward())
1867  table_grid_.InsertBBox(true, true, good_it.extract());
1868 }

◆ set_global_median_blob_width()

void tesseract::TableFinder::set_global_median_blob_width ( int  width)
protected

Definition at line 759 of file tablefind.cpp.

759  {
761 }

◆ set_global_median_ledding()

void tesseract::TableFinder::set_global_median_ledding ( int  ledding)
protected

Definition at line 762 of file tablefind.cpp.

762  {
763  global_median_ledding_ = ledding;
764 }

◆ set_global_median_xheight()

void tesseract::TableFinder::set_global_median_xheight ( int  xheight)
protected

Definition at line 756 of file tablefind.cpp.

756  {
757  global_median_xheight_ = xheight;
758 }

◆ set_left_to_right_language()

void tesseract::TableFinder::set_left_to_right_language ( bool  order)

Definition at line 177 of file tablefind.cpp.

177  {
178  left_to_right_language_ = order;
179 }

◆ set_resolution()

void tesseract::TableFinder::set_resolution ( int  resolution)
inline

Definition at line 137 of file tablefind.h.

137  {
138  resolution_ = resolution;
139  }

◆ SetColumnsType()

void tesseract::TableFinder::SetColumnsType ( ColSegment_LIST *  col_segments)
protected

Definition at line 1143 of file tablefind.cpp.

1143  {
1144  ColSegment_IT it(column_blocks);
1145  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1146  ColSegment* seg = it.data();
1147  TBOX box = seg->bounding_box();
1148  int num_table_cells = 0;
1149  int num_text_cells = 0;
1150  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
1151  rsearch(&clean_part_grid_);
1152  rsearch.SetUniqueMode(true);
1153  rsearch.StartRectSearch(box);
1154  ColPartition* part = nullptr;
1155  while ((part = rsearch.NextRectSearch()) != nullptr) {
1156  if (part->type() == PT_TABLE) {
1157  num_table_cells++;
1158  } else if (part->type() == PT_FLOWING_TEXT) {
1159  num_text_cells++;
1160  }
1161  }
1162  // If a column block has no text or table partition in it, it is not needed
1163  // for table detection.
1164  if (!num_table_cells && !num_text_cells) {
1165  delete it.extract();
1166  } else {
1167  seg->set_num_table_cells(num_table_cells);
1168  seg->set_num_text_cells(num_text_cells);
1169  // set column type based on the ratio of table to text cells
1170  seg->set_type();
1171  }
1172  }
1173 }

◆ SetGlobalSpacings()

void tesseract::TableFinder::SetGlobalSpacings ( ColPartitionGrid grid)
protected

Definition at line 709 of file tablefind.cpp.

709  {
710  STATS xheight_stats(0, kMaxVerticalSpacing + 1);
711  STATS width_stats(0, kMaxBlobWidth + 1);
712  STATS ledding_stats(0, kMaxVerticalSpacing + 1);
713  // Iterate the ColPartitions in the grid.
714  ColPartitionGridSearch gsearch(grid);
715  gsearch.SetUniqueMode(true);
716  gsearch.StartFullSearch();
717  ColPartition* part = nullptr;
718  while ((part = gsearch.NextFullSearch()) != nullptr) {
719  // TODO(nbeato): HACK HACK HACK! medians are equal to partition length.
720  // ComputeLimits needs to get called somewhere outside of TableFinder
721  // to make sure the partitions are properly initialized.
722  // When this is called, SmoothPartitionPartners dies in an assert after
723  // table find runs. Alternative solution.
724  // part->ComputeLimits();
725  if (part->IsTextType()) {
726  // xheight_stats.add(part->median_height(), part->boxes_count());
727  // width_stats.add(part->median_width(), part->boxes_count());
728 
729  // This loop can be removed when above issues are fixed.
730  // Replace it with the 2 lines commented out above.
731  BLOBNBOX_C_IT it(part->boxes());
732  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
733  xheight_stats.add(it.data()->bounding_box().height(), 1);
734  width_stats.add(it.data()->bounding_box().width(), 1);
735  }
736 
737  ledding_stats.add(part->space_above(), 1);
738  ledding_stats.add(part->space_below(), 1);
739  }
740  }
741  // Set estimates based on median of statistics obtained
742  set_global_median_xheight(static_cast<int>(xheight_stats.median() + 0.5));
743  set_global_median_blob_width(static_cast<int>(width_stats.median() + 0.5));
744  set_global_median_ledding(static_cast<int>(ledding_stats.median() + 0.5));
745  #ifndef GRAPHICS_DISABLED
746  if (textord_tablefind_show_stats) {
747  const char* kWindowName = "X-height (R), X-width (G), and ledding (B)";
748  ScrollView* stats_win = MakeWindow(500, 10, kWindowName);
749  xheight_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::RED);
750  width_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::GREEN);
751  ledding_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::BLUE);
752  }
753  #endif // GRAPHICS_DISABLED
754 }

◆ SetPartitionSpacings()

void tesseract::TableFinder::SetPartitionSpacings ( ColPartitionGrid grid,
ColPartitionSet **  all_columns 
)
staticprotected

Definition at line 586 of file tablefind.cpp.

587  {
588  // Iterate the ColPartitions in the grid.
589  ColPartitionGridSearch gsearch(grid);
590  gsearch.StartFullSearch();
591  ColPartition* part = nullptr;
592  while ((part = gsearch.NextFullSearch()) != nullptr) {
593  ColPartitionSet* columns = all_columns[gsearch.GridY()];
594  TBOX box = part->bounding_box();
595  int y = part->MidY();
596  ColPartition* left_column = columns->ColumnContaining(box.left(), y);
597  ColPartition* right_column = columns->ColumnContaining(box.right(), y);
598  // set distance from left column as space to the left
599  if (left_column) {
600  int left_space = std::max(0, box.left() - left_column->LeftAtY(y));
601  part->set_space_to_left(left_space);
602  }
603  // set distance from right column as space to the right
604  if (right_column) {
605  int right_space = std::max(0, right_column->RightAtY(y) - box.right());
606  part->set_space_to_right(right_space);
607  }
608 
609  // Look for images that may be closer.
610  // NOTE: used to be part_grid_, might cause issues now
611  ColPartitionGridSearch hsearch(grid);
612  hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
613  ColPartition* neighbor = nullptr;
614  while ((neighbor = hsearch.NextSideSearch(true)) != nullptr) {
615  if (neighbor->type() == PT_PULLOUT_IMAGE ||
616  neighbor->type() == PT_FLOWING_IMAGE ||
617  neighbor->type() == PT_HEADING_IMAGE) {
618  int right = neighbor->bounding_box().right();
619  if (right < box.left()) {
620  int space = std::min(box.left() - right, part->space_to_left());
621  part->set_space_to_left(space);
622  }
623  }
624  }
625  hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
626  neighbor = nullptr;
627  while ((neighbor = hsearch.NextSideSearch(false)) != nullptr) {
628  if (neighbor->type() == PT_PULLOUT_IMAGE ||
629  neighbor->type() == PT_FLOWING_IMAGE ||
630  neighbor->type() == PT_HEADING_IMAGE) {
631  int left = neighbor->bounding_box().left();
632  if (left > box.right()) {
633  int space = std::min(left - box.right(), part->space_to_right());
634  part->set_space_to_right(space);
635  }
636  }
637  }
638 
639  ColPartition* upper_part = part->SingletonPartner(true);
640  if (upper_part) {
641  int space = std::max(0, static_cast<int>(upper_part->bounding_box().bottom() -
642  part->bounding_box().bottom()));
643  part->set_space_above(space);
644  } else {
645  // TODO(nbeato): What constitutes a good value?
646  // 0 is the default value when not set, explicitly noting it needs to
647  // be something else.
648  part->set_space_above(INT32_MAX);
649  }
650 
651  ColPartition* lower_part = part->SingletonPartner(false);
652  if (lower_part) {
653  int space = std::max(0, static_cast<int>(part->bounding_box().bottom() -
654  lower_part->bounding_box().bottom()));
655  part->set_space_below(space);
656  } else {
657  // TODO(nbeato): What constitutes a good value?
658  // 0 is the default value when not set, explicitly noting it needs to
659  // be something else.
660  part->set_space_below(INT32_MAX);
661  }
662  }
663 }

◆ SetVerticalSpacing()

void tesseract::TableFinder::SetVerticalSpacing ( ColPartition part)
protected

Definition at line 666 of file tablefind.cpp.

666  {
667  TBOX box = part->bounding_box();
668  int top_range = std::min(box.top() + kMaxVerticalSpacing, static_cast<int>(tright().y()));
669  int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing, static_cast<int>(bleft().y()));
670  box.set_top(top_range);
671  box.set_bottom(bottom_range);
672 
673  TBOX part_box = part->bounding_box();
674  // Start a rect search
675  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
676  rectsearch(&clean_part_grid_);
677  rectsearch.StartRectSearch(box);
678  ColPartition* neighbor;
679  int min_space_above = kMaxVerticalSpacing;
680  int min_space_below = kMaxVerticalSpacing;
681  ColPartition* above_neighbor = nullptr;
682  ColPartition* below_neighbor = nullptr;
683  while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
684  if (neighbor == part)
685  continue;
686  TBOX neighbor_box = neighbor->bounding_box();
687  if (neighbor_box.major_x_overlap(part_box)) {
688  int gap = abs(part->median_bottom() - neighbor->median_bottom());
689  // If neighbor is below current partition
690  if (neighbor_box.top() < part_box.bottom() &&
691  gap < min_space_below) {
692  min_space_below = gap;
693  below_neighbor = neighbor;
694  } // If neighbor is above current partition
695  else if (part_box.top() < neighbor_box.bottom() &&
696  gap < min_space_above) {
697  min_space_above = gap;
698  above_neighbor = neighbor;
699  }
700  }
701  }
702  part->set_space_above(min_space_above);
703  part->set_space_below(min_space_below);
704  part->set_nearest_neighbor_above(above_neighbor);
705  part->set_nearest_neighbor_below(below_neighbor);
706 }

◆ SmoothTablePartitionRuns()

void tesseract::TableFinder::SmoothTablePartitionRuns ( )
protected

Definition at line 1108 of file tablefind.cpp.

1108  {
1109  // Iterate the ColPartitions in the grid.
1111  gsearch.StartFullSearch();
1112  ColPartition* part = nullptr;
1113  while ((part = gsearch.NextFullSearch()) != nullptr) {
1114  if (part->type() >= PT_TABLE || part->type() == PT_UNKNOWN)
1115  continue; // Consider only text partitions
1116  ColPartition* upper_part = part->nearest_neighbor_above();
1117  ColPartition* lower_part = part->nearest_neighbor_below();
1118  if (!upper_part || !lower_part)
1119  continue;
1120  if (upper_part->type() == PT_TABLE && lower_part->type() == PT_TABLE)
1121  part->set_table_type();
1122  }
1123 
1124  // Pass 2, do the opposite. If both the upper and lower neighbors
1125  // exist and are not tables, this probably shouldn't be a table.
1126  gsearch.StartFullSearch();
1127  part = nullptr;
1128  while ((part = gsearch.NextFullSearch()) != nullptr) {
1129  if (part->type() != PT_TABLE)
1130  continue; // Consider only text partitions
1131  ColPartition* upper_part = part->nearest_neighbor_above();
1132  ColPartition* lower_part = part->nearest_neighbor_below();
1133 
1134  // table can't be by itself
1135  if ((upper_part && upper_part->type() != PT_TABLE) &&
1136  (lower_part && lower_part->type() != PT_TABLE)) {
1137  part->clear_table_type();
1138  }
1139  }
1140 }

◆ SplitAndInsertFragmentedTextPartition()

void tesseract::TableFinder::SplitAndInsertFragmentedTextPartition ( ColPartition part)
protected

Definition at line 436 of file tablefind.cpp.

436  {
437  ASSERT_HOST(part != nullptr);
438  // Bye bye empty partitions!
439  if (part->boxes()->empty()) {
440  delete part;
441  return;
442  }
443 
444  // The AllowBlob function prevents this.
445  ASSERT_HOST(part->median_width() > 0);
446  const double kThreshold = part->median_width() * kSplitPartitionSize;
447 
448  ColPartition* right_part = part;
449  bool found_split = true;
450  while (found_split) {
451  found_split = false;
452  BLOBNBOX_C_IT box_it(right_part->boxes());
453  // Blobs are sorted left side first. If blobs overlap,
454  // the previous blob may have a "more right" right side.
455  // Account for this by always keeping the largest "right"
456  // so far.
457  int previous_right = INT32_MIN;
458 
459  // Look for the next split in the partition.
460  for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
461  const TBOX& box = box_it.data()->bounding_box();
462  if (previous_right != INT32_MIN &&
463  box.left() - previous_right > kThreshold) {
464  // We have a split position. Split the partition in two pieces.
465  // Insert the left piece in the grid and keep processing the right.
466  int mid_x = (box.left() + previous_right) / 2;
467  ColPartition* left_part = right_part;
468  right_part = left_part->SplitAt(mid_x);
469 
471  found_split = true;
472  break;
473  }
474 
475  // The right side of the previous blobs.
476  previous_right = std::max(previous_right, static_cast<int>(box.right()));
477  }
478  }
479  // When a split is not found, the right part is minimized
480  // as much as possible, so process it.
481  InsertFragmentedTextPartition(right_part);
482 }

◆ tright()

const ICOORD & tesseract::TableFinder::tright ( ) const
protected

Definition at line 390 of file tablefind.cpp.

390  {
391  return clean_part_grid_.tright();
392 }

Member Data Documentation

◆ clean_part_grid_

ColPartitionGrid tesseract::TableFinder::clean_part_grid_
protected

Definition at line 412 of file tablefind.h.

◆ col_seg_grid_

ColSegmentGrid tesseract::TableFinder::col_seg_grid_
protected

Definition at line 420 of file tablefind.h.

◆ fragmented_text_grid_

ColPartitionGrid tesseract::TableFinder::fragmented_text_grid_
protected

Definition at line 418 of file tablefind.h.

◆ global_median_blob_width_

int tesseract::TableFinder::global_median_blob_width_
protected

Definition at line 406 of file tablefind.h.

◆ global_median_ledding_

int tesseract::TableFinder::global_median_ledding_
protected

Definition at line 408 of file tablefind.h.

◆ global_median_xheight_

int tesseract::TableFinder::global_median_xheight_
protected

Definition at line 404 of file tablefind.h.

◆ leader_and_ruling_grid_

ColPartitionGrid tesseract::TableFinder::leader_and_ruling_grid_
protected

Definition at line 414 of file tablefind.h.

◆ left_to_right_language_

bool tesseract::TableFinder::left_to_right_language_
protected

Definition at line 424 of file tablefind.h.

◆ resolution_

int tesseract::TableFinder::resolution_
protected

Definition at line 402 of file tablefind.h.

◆ table_grid_

ColSegmentGrid tesseract::TableFinder::table_grid_
protected

Definition at line 422 of file tablefind.h.


The documentation for this class was generated from the following files:
tesseract::kMaxBlobWidth
const int kMaxBlobWidth
Definition: tablefind.cpp:39
tesseract::TableFinder::InsertImagePartition
void InsertImagePartition(ColPartition *part)
Definition: tablefind.cpp:421
tesseract::ColSegmentGridSearch
GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT > ColSegmentGridSearch
Definition: tablefind.h:120
ScrollView
Definition: scrollview.h:97
tesseract::TableFinder::FilterHeaderAndFooter
void FilterHeaderAndFooter()
Definition: tablefind.cpp:1074
ScrollView::Brush
void Brush(Color color)
Definition: scrollview.cpp:723
tesseract::TableFinder::GroupColumnBlocks
void GroupColumnBlocks(ColSegment_LIST *current_segments, ColSegment_LIST *col_segments)
Definition: tablefind.cpp:538
tesseract::kLargeTableRowCount
const int kLargeTableRowCount
Definition: tablefind.cpp:108
tesseract::TableFinder::set_global_median_xheight
void set_global_median_xheight(int xheight)
Definition: tablefind.cpp:756
tesseract::kStrokeWidthFractionalTolerance
const double kStrokeWidthFractionalTolerance
Definition: tablefind.cpp:139
tesseract::TableFinder::InsertFragmentedTextPartition
void InsertFragmentedTextPartition(ColPartition *part)
Definition: tablefind.cpp:402
TBOX::intersection
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:83
tesseract::TableFinder::left_to_right_language_
bool left_to_right_language_
Definition: tablefind.h:424
tesseract::TableFinder::table_grid_
ColSegmentGrid table_grid_
Definition: tablefind.h:422
tesseract::TableFinder::GridMergeColumnBlocks
void GridMergeColumnBlocks()
Definition: tablefind.cpp:1195
tesseract::BBGrid::InsertBBox
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:486
tesseract::COL_TEXT
Definition: tablefind.h:31
tesseract::BBGrid::MakeWindow
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:589
BRT_NOISE
Definition: blobbox.h:72
tesseract::kMaxXProjectionGapFactor
const double kMaxXProjectionGapFactor
Definition: tablefind.cpp:135
tesseract::kMinRowsInTable
const int kMinRowsInTable
Definition: tablefind.cpp:111
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::kMaxVerticalSpacing
const int kMaxVerticalSpacing
Definition: tablefind.cpp:37
tesseract::TableFinder::set_global_median_ledding
void set_global_median_ledding(int ledding)
Definition: tablefind.cpp:762
tesseract::kLargeTableProjectionThreshold
const double kLargeTableProjectionThreshold
Definition: tablefind.cpp:106
tesseract::TableFinder::AllowBlob
bool AllowBlob(const BLOBNBOX &blob) const
Definition: tablefind.cpp:502
TBOX::overlap
bool overlap(const TBOX &box) const
Definition: rect.h:350
BLOBNBOX::set_flow
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:297
tesseract::TableFinder::DisplayColPartitionConnections
void DisplayColPartitionConnections(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color default_color)
Definition: tablefind.cpp:1950
tesseract::GridBase::gridwidth
int gridwidth() const
Definition: bbgrid.h:66
PT_HEADING_IMAGE
Definition: capi.h:118
TBOX::top
int16_t top() const
Definition: rect.h:57
TBOX::contains
bool contains(const FCOORD pt) const
Definition: rect.h:330
tesseract::kMinBoxesInTextPartition
const int kMinBoxesInTextPartition
Definition: tablefind.cpp:62
tesseract::BBGrid::Clear
void Clear()
Definition: bbgrid.h:455
TBOX::bounding_union
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:124
tesseract::TableFinder::RecognizeTables
void RecognizeTables()
Definition: tablefind.cpp:1817
TBOX::area
int32_t area() const
Definition: rect.h:121
TBOX::set_top
void set_top(int y)
Definition: rect.h:60
PT_FLOWING_IMAGE
Definition: capi.h:117
tesseract::TableFinder::set_global_median_blob_width
void set_global_median_blob_width(int width)
Definition: tablefind.cpp:759
tesseract::TableFinder::DisplayColSegments
void DisplayColSegments(ScrollView *win, ColSegment_LIST *cols, ScrollView::Color color)
Definition: tablefind.cpp:1871
tesseract::TableFinder::HasLeaderAdjacent
bool HasLeaderAdjacent(const ColPartition &part)
Definition: tablefind.cpp:946
tesseract::TableFinder::AdjustTableBoundaries
void AdjustTableBoundaries()
Definition: tablefind.cpp:1486
ScrollView::NONE
Definition: scrollview.h:101
ScrollView::Pen
void Pen(Color color)
Definition: scrollview.cpp:717
tesseract::TableFinder::ConsecutiveBoxes
bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2)
Definition: tablefind.cpp:568
tesseract::kMaxBoxesInDataPartition
const int kMaxBoxesInDataPartition
Definition: tablefind.cpp:65
TBOX::major_y_overlap
bool major_y_overlap(const TBOX &box) const
Definition: rect.h:428
PT_TABLE
Definition: capi.h:114
tesseract::TableFinder::InsertLeaderPartition
void InsertLeaderPartition(ColPartition *part)
Definition: tablefind.cpp:410
tesseract::kMinParagraphEndingTextToWhitespaceRatio
const double kMinParagraphEndingTextToWhitespaceRatio
Definition: tablefind.cpp:131
tesseract::kMaxGapInTextPartition
const double kMaxGapInTextPartition
Definition: tablefind.cpp:68
tesseract::ColPartitionGridSearch
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:935
tesseract::TableFinder::SplitAndInsertFragmentedTextPartition
void SplitAndInsertFragmentedTextPartition(ColPartition *part)
Definition: tablefind.cpp:436
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
BLOBNBOX
Definition: blobbox.h:142
BTFT_CHAIN
Definition: blobbox.h:117
tesseract::TableFinder::bleft
const ICOORD & bleft() const
Definition: tablefind.cpp:387
ScrollView::BLUE
Definition: scrollview.h:108
BTFT_LEADER
Definition: blobbox.h:120
tesseract::TableFinder::IncludeLeftOutColumnHeaders
void IncludeLeftOutColumnHeaders(TBOX *table_box)
Definition: tablefind.cpp:1664
tesseract::kMaxBlobOverlapFactor
const double kMaxBlobOverlapFactor
Definition: tablefind.cpp:76
tesseract::GridBase::tright
const ICOORD & tright() const
Definition: bbgrid.h:75
TBOX::height
int16_t height() const
Definition: rect.h:107
tesseract::TableFinder::AllowTextPartition
bool AllowTextPartition(const ColPartition &part) const
Definition: tablefind.cpp:489
tesseract::TableFinder::resolution_
int resolution_
Definition: tablefind.h:402
TBOX::set_right
void set_right(int x)
Definition: rect.h:81
tesseract::kAllowBlobArea
const double kAllowBlobArea
Definition: tablefind.cpp:57
ScrollView::DARK_TURQUOISE
Definition: scrollview.h:146
tesseract::TableFinder::tright
const ICOORD & tright() const
Definition: tablefind.cpp:390
tesseract::COL_TABLE
Definition: tablefind.h:32
tesseract::TableFinder::FilterParagraphEndings
void FilterParagraphEndings()
Definition: tablefind.cpp:994
tesseract::ColPartitionGrid::RefinePartitionPartners
void RefinePartitionPartners(bool get_desperate)
Definition: colpartitiongrid.cpp:1287
tesseract::TableFinder::gridheight
int gridheight() const
Definition: tablefind.cpp:384
tesseract::TableFinder::leader_and_ruling_grid_
ColPartitionGrid leader_and_ruling_grid_
Definition: tablefind.h:414
tesseract::kMinMaxGapInTextPartition
const double kMinMaxGapInTextPartition
Definition: tablefind.cpp:72
tesseract::TableFinder::GrowTableToIncludePartials
void GrowTableToIncludePartials(const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
Definition: tablefind.cpp:1542
ScrollView::KHAKI
Definition: scrollview.h:134
ScrollView::ORANGE
Definition: scrollview.h:136
TBOX::major_x_overlap
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:403
tesseract::TableFinder::InsertRulingPartition
void InsertRulingPartition(ColPartition *part)
Definition: tablefind.cpp:418
tesseract::kSideSpaceMargin
const int kSideSpaceMargin
Definition: tablefind.cpp:101
tesseract::TableFinder::MarkPartitionsUsingLocalInformation
void MarkPartitionsUsingLocalInformation()
Definition: tablefind.cpp:827
tesseract::TableFinder::clean_part_grid_
ColPartitionGrid clean_part_grid_
Definition: tablefind.h:412
ScrollView::UpdateWindow
void UpdateWindow()
Definition: scrollview.cpp:703
tesseract::kAllowTextHeight
const double kAllowTextHeight
Definition: tablefind.cpp:48
TBOX::null_box
bool null_box() const
Definition: rect.h:49
ScrollView::LIGHT_BLUE
Definition: scrollview.h:112
BRT_TEXT
Definition: blobbox.h:79
tesseract::TableFinder::GetTableColumns
void GetTableColumns(ColSegment_LIST *table_columns)
Definition: tablefind.cpp:1273
tesseract::TableFinder::DeleteSingleColumnTables
void DeleteSingleColumnTables()
Definition: tablefind.cpp:1703
tesseract::TableFinder::HasWideOrNoInterWordGap
bool HasWideOrNoInterWordGap(ColPartition *part) const
Definition: tablefind.cpp:857
tesseract::kMaxColumnHeaderDistance
const int kMaxColumnHeaderDistance
Definition: tablefind.cpp:84
TBOX::width
int16_t width() const
Definition: rect.h:114
tesseract::kMaxParagraphEndingLeftSpaceMultiple
const double kMaxParagraphEndingLeftSpaceMultiple
Definition: tablefind.cpp:125
tesseract::kMinOverlapWithTable
const double kMinOverlapWithTable
Definition: tablefind.cpp:96
ScrollView::YELLOW
Definition: scrollview.h:105
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::TableFinder::GridMergeTableRegions
void GridMergeTableRegions()
Definition: tablefind.cpp:1386
tesseract::TableFinder::GrowTableBox
void GrowTableBox(const TBOX &table_box, TBOX *result_box)
Definition: tablefind.cpp:1520
tesseract::TableFinder::GrowTableToIncludeLines
void GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
Definition: tablefind.cpp:1570
tesseract::TableFinder::col_seg_grid_
ColSegmentGrid col_seg_grid_
Definition: tablefind.h:420
TBOX::overlap_fraction
double overlap_fraction(const TBOX &box) const
Definition: rect.h:381
ScrollView::AQUAMARINE
Definition: scrollview.h:110
tesseract::TableFinder::FilterFalseAlarms
void FilterFalseAlarms()
Definition: tablefind.cpp:988
ScrollView::RED
Definition: scrollview.h:104
PT_UNKNOWN
Definition: capi.h:108
STATS
Definition: statistc.h:30
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
tesseract::TableFinder::HLineBelongsToTable
bool HLineBelongsToTable(const ColPartition &part, const TBOX &table_box)
Definition: tablefind.cpp:1600
tesseract::TableFinder::SetColumnsType
void SetColumnsType(ColSegment_LIST *col_segments)
Definition: tablefind.cpp:1143
tesseract::TableFinder::global_median_blob_width_
int global_median_blob_width_
Definition: tablefind.h:406
tesseract::kSplitPartitionSize
const double kSplitPartitionSize
Definition: tablefind.cpp:43
PT_PULLOUT_IMAGE
Definition: capi.h:119
tesseract::GridBase::gridsize
int gridsize() const
Definition: bbgrid.h:63
tesseract::BBGrid::ClearGridData
void ClearGridData(void(*free_method)(BBC *))
Definition: bbgrid.h:464
ScrollView::LIME_GREEN
Definition: scrollview.h:127
tesseract::TableFinder::MarkTablePartitions
void MarkTablePartitions()
Definition: tablefind.cpp:789
tesseract::BBGrid::DisplayBoxes
void DisplayBoxes(ScrollView *window)
Definition: bbgrid.h:613
tesseract::ColPartitionGrid::FindPartitionPartners
void FindPartitionPartners()
Definition: colpartitiongrid.cpp:1179
tesseract::BBGrid::Init
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:445
tesseract::TableFinder::SmoothTablePartitionRuns
void SmoothTablePartitionRuns()
Definition: tablefind.cpp:1108
tesseract::TableFinder::DisplayColPartitions
void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color text_color, ScrollView::Color table_color)
Definition: tablefind.cpp:1916
BLOBNBOX::flow
BlobTextFlowType flow() const
Definition: blobbox.h:294
tesseract::TableFinder::SetPartitionSpacings
static void SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet **all_columns)
Definition: tablefind.cpp:586
tesseract::kSmallTableProjectionThreshold
const double kSmallTableProjectionThreshold
Definition: tablefind.cpp:105
TBOX::left
int16_t left() const
Definition: rect.h:71
ScrollView::GREEN
Definition: scrollview.h:106
tesseract::TableFinder::SetGlobalSpacings
void SetGlobalSpacings(ColPartitionGrid *grid)
Definition: tablefind.cpp:709
tesseract::TableFinder::global_median_ledding_
int global_median_ledding_
Definition: tablefind.h:408
tesseract::kAllowBlobWidth
const double kAllowBlobWidth
Definition: tablefind.cpp:56
PT_FLOWING_TEXT
Definition: capi.h:109
tesseract::kMaxTableCellXheight
const double kMaxTableCellXheight
Definition: tablefind.cpp:80
tesseract::TableFinder::global_median_xheight_
int global_median_xheight_
Definition: tablefind.h:404
tesseract::kAdjacentLeaderSearchPadding
const int kAdjacentLeaderSearchPadding
Definition: tablefind.cpp:116
BLOBNBOX::region_type
BlobRegionType region_type() const
Definition: blobbox.h:282
TBOX::right
int16_t right() const
Definition: rect.h:78
tesseract::kAllowBlobHeight
const double kAllowBlobHeight
Definition: tablefind.cpp:55
tesseract::TableFinder::fragmented_text_grid_
ColPartitionGrid fragmented_text_grid_
Definition: tablefind.h:418
tesseract::TableFinder::InsertTextPartition
void InsertTextPartition(ColPartition *part)
Definition: tablefind.cpp:394
ScrollView::Line
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:531
tesseract::TableFinder::FindNeighbors
void FindNeighbors()
Definition: tablefind.cpp:766
tesseract::kStrokeWidthConstantTolerance
const double kStrokeWidthConstantTolerance
Definition: tablefind.cpp:140
tesseract::kAllowTextArea
const double kAllowTextArea
Definition: tablefind.cpp:50
tesseract::TableFinder::MoveColSegmentsToGrid
void MoveColSegmentsToGrid(ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid)
Definition: tablefind.cpp:1176
tesseract::TableFinder::MakeWindow
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: tablefind.cpp:518
tesseract::TableFinder::BelongToOneTable
bool BelongToOneTable(const TBOX &box1, const TBOX &box2)
Definition: tablefind.cpp:1444
TBOX::set_bottom
void set_bottom(int y)
Definition: rect.h:67
tesseract::TableFinder::GapInXProjection
bool GapInXProjection(int *xprojection, int length)
Definition: tablefind.cpp:1768
ScrollView::Color
Color
Definition: scrollview.h:100
tesseract::TableFinder::MakeTableBlocks
void MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback width_cb)
Definition: tablefind.cpp:1997
tesseract::TableFinder::gridsize
int gridsize() const
Definition: tablefind.cpp:378
ScrollView::Rectangle
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:599
tesseract::TableFinder::InitializePartitions
void InitializePartitions(ColPartitionSet **all_columns)
Definition: tablefind.cpp:579
tesseract::TableFinder::GetTableRegions
void GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIST *table_regions)
Definition: tablefind.cpp:1323
tesseract::TableFinder::GetColumnBlocks
void GetColumnBlocks(ColPartitionSet **columns, ColSegment_LIST *col_segments)
Definition: tablefind.cpp:523
tesseract::GridBase::gridheight
int gridheight() const
Definition: bbgrid.h:69
tesseract::kAllowTextWidth
const double kAllowTextWidth
Definition: tablefind.cpp:49
tesseract::GridBase::bleft
const ICOORD & bleft() const
Definition: bbgrid.h:72
TBOX::set_left
void set_left(int x)
Definition: rect.h:74
ICOORD::y
int16_t y() const
access_function
Definition: points.h:55
tesseract::kParagraphEndingPreviousLineRatio
const double kParagraphEndingPreviousLineRatio
Definition: tablefind.cpp:121
TBOX
Definition: rect.h:33