tesseract  5.0.0-alpha-619-ge9db
colpartitionset.h
Go to the documentation of this file.
1 // File: colpartitionset.h
3 // Description: Class to hold a list of ColPartitions of the page that
4 // correspond roughly to columns.
5 // Author: Ray Smith
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_TEXTORD_COLPARTITIONSET_H_
21 #define TESSERACT_TEXTORD_COLPARTITIONSET_H_
22 
23 #include "colpartition.h" // For ColPartition_LIST.
24 #include <tesseract/genericvector.h> // For GenericVector.
25 #include "rect.h" // For TBOX.
26 #include "tabvector.h" // For BLOBNBOX_CLIST.
27 
28 namespace tesseract {
29 
30 class WorkingPartSet_LIST;
31 class ColSegment_LIST;
32 class ColPartitionSet;
34 
35 // ColPartitionSet is a class that holds a list of ColPartitions.
36 // Its main use is in holding a candidate partitioning of the width of the
37 // image into columns, where each member ColPartition is a single column.
38 // ColPartitionSets are used in building the column layout of a page.
39 class ColPartitionSet : public ELIST_LINK {
40  public:
41  ColPartitionSet() = default;
42  explicit ColPartitionSet(ColPartition_LIST* partitions);
43  explicit ColPartitionSet(ColPartition* partition);
44 
45  ~ColPartitionSet() = default;
46 
47  // Simple accessors.
48  const TBOX& bounding_box() const {
49  return bounding_box_;
50  }
51  bool Empty() const {
52  return parts_.empty();
53  }
54  int ColumnCount() const {
55  return parts_.length();
56  }
57 
58  // Returns the number of columns of good width.
59  int GoodColumnCount() const;
60 
61  // Return an element of the parts_ list from its index.
62  ColPartition* GetColumnByIndex(int index);
63 
64  // Return the ColPartition that contains the given coords, if any, else nullptr.
65  ColPartition* ColumnContaining(int x, int y);
66 
67  // Return the bounding boxes of columns at the given y-range
68  void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments);
69 
70  // Extract all the parts from the list, relinquishing ownership.
71  void RelinquishParts();
72 
73  // Attempt to improve this by adding partitions or expanding partitions.
75 
76  // If this set is good enough to represent a new partitioning into columns,
77  // add it to the vector of sets, otherwise delete it.
79 
80  // Return true if the partitions in other are all compatible with the columns
81  // in this.
82  bool CompatibleColumns(bool debug, ColPartitionSet* other, WidthCallback cb);
83 
84  // Returns the total width of all blobs in the part_set that do not lie
85  // within an approved column. Used as a cost measure for using this
86  // column set over another that might be compatible.
87  int UnmatchedWidth(ColPartitionSet* part_set);
88 
89  // Return true if this ColPartitionSet makes a legal column candidate by
90  // having legal individual partitions and non-overlapping adjacent pairs.
91  bool LegalColumnCandidate();
92 
93  // Return a copy of this. If good_only will only copy the Good ColPartitions.
94  ColPartitionSet* Copy(bool good_only);
95 
96  // Display the edges of the columns at the given y coords.
97  void DisplayColumnEdges(int y_bottom, int y_top, ScrollView* win);
98 
99  // Return the ColumnSpanningType that best explains the columns overlapped
100  // by the given coords(left,right,y), with the given margins.
101  // Also return the first and last column index touched by the coords and
102  // the leftmost spanned column.
103  // Column indices are 2n + 1 for real columns (0 based) and even values
104  // represent the gaps in between columns, with 0 being left of the leftmost.
105  // resolution refers to the ppi resolution of the image. It may be 0 if only
106  // the first_col and last_col are required.
107  ColumnSpanningType SpanningType(int resolution,
108  int left, int right, int height, int y,
109  int left_margin, int right_margin,
110  int* first_col, int* last_col,
111  int* first_spanned_col);
112 
113  // The column_set has changed. Close down all in-progress WorkingPartSets in
114  // columns that do not match and start new ones for the new columns in this.
115  // As ColPartitions are turned into BLOCKs, the used ones are put in
116  // used_parts, as they still need to be referenced in the grid.
117  void ChangeWorkColumns(const ICOORD& bleft, const ICOORD& tright,
118  int resolution, ColPartition_LIST* used_parts,
119  WorkingPartSet_LIST* working_set);
120 
121  // Accumulate the widths and gaps into the given variables.
122  void AccumulateColumnWidthsAndGaps(int* total_width, int* width_samples,
123  int* total_gap, int* gap_samples);
124 
125  // Provide debug output for this ColPartitionSet and all the ColPartitions.
126  void Print();
127 
128  private:
129  // Add the given partition to the list in the appropriate place.
130  void AddPartition(ColPartition* new_part, ColPartition_IT* it);
131 
132  // Compute the coverage and good column count. Coverage is the amount of the
133  // width of the page (in pixels) that is covered by ColPartitions, which are
134  // used to provide candidate column layouts.
135  // Coverage is split into good and bad. Good coverage is provided by
136  // ColPartitions of a frequent width (according to the callback function
137  // provided by TabFinder::WidthCB, which accesses stored statistics on the
138  // widths of ColPartitions) and bad coverage is provided by all other
139  // ColPartitions, even if they have tab vectors at both sides. Thus:
140  // |-----------------------------------------------------------------|
141  // | Double width heading |
142  // |-----------------------------------------------------------------|
143  // |-------------------------------| |-------------------------------|
144  // | Common width ColParition | | Common width ColPartition |
145  // |-------------------------------| |-------------------------------|
146  // the layout with two common-width columns has better coverage than the
147  // double width heading, because the coverage is "good," even though less in
148  // total coverage than the heading, because the heading coverage is "bad."
149  void ComputeCoverage();
150 
151  // Adds the coverage, column count and box for a single partition,
152  // without adding it to the list. (Helper factored from ComputeCoverage.)
153  void AddPartitionCoverageAndBox(const ColPartition& part);
154 
155  // The partitions in this column candidate.
156  ColPartition_LIST parts_;
157  // The number of partitions that have a frequent column width.
158  int good_column_count_;
159  // Total width of all the good ColPartitions.
160  int good_coverage_;
161  // Total width of all the bad ColPartitions.
162  int bad_coverage_;
163  // Bounding box of all partitions in the set.
164  TBOX bounding_box_;
165 };
166 
167 ELISTIZEH(ColPartitionSet)
168 
169 } // namespace tesseract.
170 
171 #endif // TESSERACT_TEXTORD_COLPARTITION_H_
ScrollView
Definition: scrollview.h:97
tesseract::ColPartitionSet::AccumulateColumnWidthsAndGaps
void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, int *total_gap, int *gap_samples)
Definition: colpartitionset.cpp:571
tesseract::ColPartitionSet::DisplayColumnEdges
void DisplayColumnEdges(int y_bottom, int y_top, ScrollView *win)
Definition: colpartitionset.cpp:385
tesseract::ColPartitionSet::GetColumnByIndex
ColPartition * GetColumnByIndex(int index)
Definition: colpartitionset.cpp:59
tesseract::ColPartitionSet::ChangeWorkColumns
void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
Definition: colpartitionset.cpp:499
ICOORD
integer coordinate
Definition: points.h:30
tesseract::WidthCallback
std::function< bool(int)> WidthCallback
Definition: tabfind.h:35
tesseract::ColPartitionSet::SpanningType
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
Definition: colpartitionset.cpp:404
colpartition.h
tesseract::ColPartitionSet
Definition: colpartitionset.h:39
tesseract::ColPartitionSet::GoodColumnCount
int GoodColumnCount() const
Definition: colpartitionset.cpp:48
rect.h
tesseract::ColPartitionSet::LegalColumnCandidate
bool LegalColumnCandidate()
Definition: colpartitionset.cpp:330
tesseract::ColPartitionSet::AddToColumnSetsIfUnique
void AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback cb)
Definition: colpartitionset.cpp:174
ELISTIZEH
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:907
tesseract::ColPartition
Definition: colpartition.h:67
tesseract::ColPartitionSet::GetColumnBoxes
void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments)
Definition: colpartitionset.cpp:369
genericvector.h
tesseract::ColPartitionSet::bounding_box
const TBOX & bounding_box() const
Definition: colpartitionset.h:48
tesseract::ColumnSpanningType
ColumnSpanningType
Definition: colpartition.h:47
tesseract::ColPartitionSet::CompatibleColumns
bool CompatibleColumns(bool debug, ColPartitionSet *other, WidthCallback cb)
Definition: colpartitionset.cpp:222
tesseract::ColPartitionSet::UnmatchedWidth
int UnmatchedWidth(ColPartitionSet *part_set)
Definition: colpartitionset.cpp:305
tesseract
Definition: baseapi.h:65
tesseract::ColPartitionSet::ImproveColumnCandidate
void ImproveColumnCandidate(WidthCallback cb, PartSetVector *src_sets)
Definition: colpartitionset.cpp:89
GenericVector< ColPartitionSet * >
tesseract::ColPartitionSet::ColumnCount
int ColumnCount() const
Definition: colpartitionset.h:54
tesseract::ColPartitionSet::~ColPartitionSet
~ColPartitionSet()=default
tesseract::ColPartitionSet::Print
void Print()
Definition: colpartitionset.cpp:592
tesseract::ColPartitionSet::Copy
ColPartitionSet * Copy(bool good_only)
Definition: colpartitionset.cpp:353
ELIST_LINK
Definition: elst.h:74
tabvector.h
tesseract::ColPartitionSet::ColumnContaining
ColPartition * ColumnContaining(int x, int y)
Definition: colpartitionset.cpp:69
tesseract::ColPartitionSet::ColPartitionSet
ColPartitionSet()=default
tesseract::ColPartitionSet::Empty
bool Empty() const
Definition: colpartitionset.h:51
tesseract::ColPartitionSet::RelinquishParts
void RelinquishParts()
Definition: colpartitionset.cpp:80
TBOX
Definition: rect.h:33