tesseract
5.0.0-alpha-619-ge9db
|
#include <colpartition.h>
|
| ColPartition ()=default |
|
| ColPartition (BlobRegionType blob_type, const ICOORD &vertical) |
|
| ~ColPartition () |
|
const TBOX & | bounding_box () const |
|
int | left_margin () const |
|
void | set_left_margin (int margin) |
|
int | right_margin () const |
|
void | set_right_margin (int margin) |
|
int | median_top () const |
|
int | median_bottom () const |
|
int | median_left () const |
|
int | median_right () const |
|
int | median_height () const |
|
void | set_median_height (int height) |
|
int | median_width () const |
|
void | set_median_width (int width) |
|
BlobRegionType | blob_type () const |
|
void | set_blob_type (BlobRegionType t) |
|
BlobTextFlowType | flow () const |
|
void | set_flow (BlobTextFlowType f) |
|
int | good_blob_score () const |
|
bool | good_width () const |
|
bool | good_column () const |
|
bool | left_key_tab () const |
|
int | left_key () const |
|
bool | right_key_tab () const |
|
int | right_key () const |
|
PolyBlockType | type () const |
|
void | set_type (PolyBlockType t) |
|
BLOBNBOX_CLIST * | boxes () |
|
int | boxes_count () const |
|
void | set_vertical (const ICOORD &v) |
|
ColPartition_CLIST * | upper_partners () |
|
ColPartition_CLIST * | lower_partners () |
|
void | set_working_set (WorkingPartSet *working_set) |
|
bool | block_owned () const |
|
void | set_block_owned (bool owned) |
|
bool | desperately_merged () const |
|
ColPartitionSet * | column_set () const |
|
void | set_side_step (int step) |
|
int | bottom_spacing () const |
|
void | set_bottom_spacing (int spacing) |
|
int | top_spacing () const |
|
void | set_top_spacing (int spacing) |
|
void | set_table_type () |
|
void | clear_table_type () |
|
bool | inside_table_column () |
|
void | set_inside_table_column (bool val) |
|
ColPartition * | nearest_neighbor_above () const |
|
void | set_nearest_neighbor_above (ColPartition *part) |
|
ColPartition * | nearest_neighbor_below () const |
|
void | set_nearest_neighbor_below (ColPartition *part) |
|
int | space_above () const |
|
void | set_space_above (int space) |
|
int | space_below () const |
|
void | set_space_below (int space) |
|
int | space_to_left () const |
|
void | set_space_to_left (int space) |
|
int | space_to_right () const |
|
void | set_space_to_right (int space) |
|
uint8_t * | color1 () |
|
uint8_t * | color2 () |
|
bool | owns_blobs () const |
|
void | set_owns_blobs (bool owns_blobs) |
|
int | MidY () const |
|
int | MedianY () const |
|
int | MidX () const |
|
int | SortKey (int x, int y) const |
|
int | XAtY (int sort_key, int y) const |
|
int | KeyWidth (int left_key, int right_key) const |
|
int | ColumnWidth () const |
|
int | BoxLeftKey () const |
|
int | BoxRightKey () const |
|
int | LeftAtY (int y) const |
|
int | RightAtY (int y) const |
|
bool | IsLeftOf (const ColPartition &other) const |
|
bool | ColumnContains (int x, int y) const |
|
bool | IsEmpty () const |
|
bool | IsSingleton () const |
|
bool | HOverlaps (const ColPartition &other) const |
|
bool | VOverlaps (const ColPartition &other) const |
|
int | VCoreOverlap (const ColPartition &other) const |
|
int | HCoreOverlap (const ColPartition &other) const |
|
bool | VSignificantCoreOverlap (const ColPartition &other) const |
|
bool | WithinSameMargins (const ColPartition &other) const |
|
bool | TypesMatch (const ColPartition &other) const |
|
bool | IsLineType () const |
|
bool | IsImageType () const |
|
bool | IsTextType () const |
|
bool | IsPulloutType () const |
|
bool | IsVerticalType () const |
|
bool | IsHorizontalType () const |
|
bool | IsUnMergeableType () const |
|
bool | IsVerticalLine () const |
|
bool | IsHorizontalLine () const |
|
void | AddBox (BLOBNBOX *box) |
|
void | RemoveBox (BLOBNBOX *box) |
|
BLOBNBOX * | BiggestBox () |
|
TBOX | BoundsWithoutBox (BLOBNBOX *box) |
|
void | ClaimBoxes () |
|
void | DisownBoxes () |
|
void | DisownBoxesNoAssert () |
|
bool | ReleaseNonLeaderBoxes () |
|
void | DeleteBoxes () |
|
void | ReflectInYAxis () |
|
bool | IsLegal () |
|
bool | MatchingColumns (const ColPartition &other) const |
|
bool | MatchingTextColor (const ColPartition &other) const |
|
bool | MatchingSizes (const ColPartition &other) const |
|
bool | ConfirmNoTabViolation (const ColPartition &other) const |
|
bool | MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const |
|
bool | OKDiacriticMerge (const ColPartition &candidate, bool debug) const |
|
void | SetLeftTab (const TabVector *tab_vector) |
|
void | SetRightTab (const TabVector *tab_vector) |
|
void | CopyLeftTab (const ColPartition &src, bool take_box) |
|
void | CopyRightTab (const ColPartition &src, bool take_box) |
|
int | LeftBlobRule () const |
|
int | RightBlobRule () const |
|
float | SpecialBlobsDensity (const BlobSpecialTextType type) const |
|
int | SpecialBlobsCount (const BlobSpecialTextType type) |
|
void | SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density) |
|
void | ComputeSpecialBlobsDensity () |
|
void | AddPartner (bool upper, ColPartition *partner) |
|
void | RemovePartner (bool upper, ColPartition *partner) |
|
ColPartition * | SingletonPartner (bool upper) |
|
void | Absorb (ColPartition *other, WidthCallback cb) |
|
bool | OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug) |
|
BLOBNBOX * | OverlapSplitBlob (const TBOX &box) |
|
ColPartition * | SplitAtBlob (BLOBNBOX *split_blob) |
|
ColPartition * | SplitAt (int split_x) |
|
void | ComputeLimits () |
|
int | CountOverlappingBoxes (const TBOX &box) |
|
void | SetPartitionType (int resolution, ColPartitionSet *columns) |
|
PolyBlockType | PartitionType (ColumnSpanningType flow) const |
|
void | ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col) |
|
void | SetColumnGoodness (WidthCallback cb) |
|
bool | MarkAsLeaderIfMonospaced () |
|
void | SetRegionAndFlowTypesFromProjectionValue (int value) |
|
void | SetBlobTypes () |
|
bool | HasGoodBaseline () |
|
void | AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set) |
|
TO_ROW * | MakeToRow () |
|
ColPartition * | ShallowCopy () const |
|
ColPartition * | CopyButDontOwnBlobs () |
|
ScrollView::Color | BoxColor () const |
|
void | Print () const |
|
void | PrintColors () |
|
void | SmoothPartnerRun (int working_set_count) |
|
void | RefinePartners (PolyBlockType type, bool get_desperate, ColPartitionGrid *grid) |
|
bool | IsInSameColumnAs (const ColPartition &part) const |
|
void | set_first_column (int column) |
|
void | set_last_column (int column) |
|
| ELIST2_LINK () |
|
| ELIST2_LINK (const ELIST2_LINK &) |
|
void | operator= (const ELIST2_LINK &) |
|
|
static ColPartition * | MakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top) |
|
static ColPartition * | FakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow) |
|
static ColPartition * | MakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list) |
|
static bool | TypesMatch (BlobRegionType type1, BlobRegionType type2) |
|
static bool | TypesSimilar (PolyBlockType type1, PolyBlockType type2) |
|
static void | LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks) |
|
static TO_BLOCK * | MakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) |
|
static TO_BLOCK * | MakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts) |
|
static int | SortByBBox (const void *p1, const void *p2) |
|
ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.
Definition at line 67 of file colpartition.h.
◆ ColPartition() [1/2]
tesseract::ColPartition::ColPartition |
( |
| ) |
|
|
default |
◆ ColPartition() [2/2]
- Parameters
-
blob_type | is the blob_region_type_ of the blobs in this partition. |
vertical | is the direction of logical vertical on the possibly skewed image. |
Definition at line 81 of file colpartition.cpp.
82 : left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
83 median_bottom_(INT32_MAX), median_top_(-INT32_MAX),
84 median_left_(INT32_MAX), median_right_(-INT32_MAX),
87 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
◆ ~ColPartition()
tesseract::ColPartition::~ColPartition |
( |
| ) |
|
Definition at line 133 of file colpartition.cpp.
136 ColPartition_C_IT it(&upper_partners_);
137 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
138 it.data()->RemovePartner(
false,
this);
140 it.set_to_list(&lower_partners_);
141 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
142 it.data()->RemovePartner(
true,
this);
◆ Absorb()
Definition at line 638 of file colpartition.cpp.
645 bounding_box_.
bottom()) ||
647 other->bounding_box_.bottom())) {
654 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
656 unsigned w1 = boxes_.length();
657 unsigned w2 = other->boxes_.length();
658 float new_val = special_blobs_densities_[
type] * w1 +
659 other->special_blobs_densities_[
type] * w2;
662 special_blobs_densities_[
type] = new_val / (w1 + w2);
667 BLOBNBOX_C_IT it(&boxes_);
668 BLOBNBOX_C_IT it2(&other->boxes_);
669 for (; !it2.empty(); it2.forward()) {
672 if (prev_owner != other && prev_owner !=
nullptr) {
676 ASSERT_HOST(prev_owner == other || prev_owner ==
nullptr);
677 if (prev_owner == other)
679 it.add_to_end(bbox2);
681 left_margin_ = std::min(left_margin_, other->left_margin_);
682 right_margin_ = std::max(right_margin_, other->right_margin_);
683 if (other->left_key_ < left_key_) {
684 left_key_ = other->left_key_;
685 left_key_tab_ = other->left_key_tab_;
687 if (other->right_key_ > right_key_) {
688 right_key_ = other->right_key_;
689 right_key_tab_ = other->right_key_tab_;
694 flow_ = other->flow_;
695 blob_type_ = other->blob_type_;
699 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
700 last_add_was_vertical_ =
true;
702 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
703 last_add_was_vertical_ =
false;
708 for (
int upper = 0; upper < 2; ++upper) {
709 ColPartition_CLIST partners;
710 ColPartition_C_IT part_it(&partners);
711 part_it.add_list_after(upper ? &other->upper_partners_
712 : &other->lower_partners_);
713 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
715 partner->RemovePartner(!upper, other);
716 partner->RemovePartner(!upper,
this);
717 partner->AddPartner(!upper,
this);
◆ AddBox()
void tesseract::ColPartition::AddBox |
( |
BLOBNBOX * |
box | ) |
|
Definition at line 169 of file colpartition.cpp.
172 if (boxes_.length() == 0) {
175 bounding_box_ += box;
179 if (!last_add_was_vertical_) {
180 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
181 last_add_was_vertical_ =
true;
183 boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>,
true, bbox);
185 if (last_add_was_vertical_) {
186 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
187 last_add_was_vertical_ =
false;
189 boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>,
true, bbox);
196 tprintf(
"Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
198 bounding_box_.
left(), bounding_box_.
right());
◆ AddPartner()
void tesseract::ColPartition::AddPartner |
( |
bool |
upper, |
|
|
ColPartition * |
partner |
|
) |
| |
Definition at line 603 of file colpartition.cpp.
605 partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
607 upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
609 partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
611 lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
◆ AddToWorkingSet()
void tesseract::ColPartition::AddToWorkingSet |
( |
const ICOORD & |
bleft, |
|
|
const ICOORD & |
tright, |
|
|
int |
resolution, |
|
|
ColPartition_LIST * |
used_parts, |
|
|
WorkingPartSet_LIST * |
working_set |
|
) |
| |
Definition at line 1347 of file colpartition.cpp.
1353 block_owned_ =
true;
1354 WorkingPartSet_IT it(working_sets);
1357 if (partner !=
nullptr && partner->working_set_ !=
nullptr) {
1358 working_set_ = partner->working_set_;
1363 tprintf(
"Partition with partner has no working set!:");
1368 WorkingPartSet* work_set =
nullptr;
1371 for (it.mark_cycle_pt(); !it.cycled_list() &&
1372 col_index != first_column_;
1373 it.forward(), ++col_index);
1375 tprintf(
"Match is %s for:", (col_index & 1) ?
"Real" :
"Between");
1379 tprintf(
"Target column=%d, only had %d\n", first_column_, col_index);
1382 work_set = it.data();
1385 if (!it.cycled_list() && last_column_ != first_column_ && !
IsPulloutType()) {
1387 BLOCK_LIST completed_blocks;
1388 TO_BLOCK_LIST to_blocks;
1389 for (; !it.cycled_list() && col_index <= last_column_;
1390 it.forward(), ++col_index) {
1391 WorkingPartSet* end_set = it.data();
1392 end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
1393 &completed_blocks, &to_blocks);
1395 work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1397 working_set_ = work_set;
◆ BiggestBox()
BLOBNBOX * tesseract::ColPartition::BiggestBox |
( |
| ) |
|
Definition at line 215 of file colpartition.cpp.
217 BLOBNBOX_C_IT bb_it(&boxes_);
218 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
221 if (biggest ==
nullptr ||
225 if (biggest ==
nullptr ||
◆ blob_type()
◆ block_owned()
bool tesseract::ColPartition::block_owned |
( |
| ) |
const |
|
inline |
◆ bottom_spacing()
int tesseract::ColPartition::bottom_spacing |
( |
| ) |
const |
|
inline |
◆ bounding_box()
const TBOX& tesseract::ColPartition::bounding_box |
( |
| ) |
const |
|
inline |
◆ BoundsWithoutBox()
TBOX tesseract::ColPartition::BoundsWithoutBox |
( |
BLOBNBOX * |
box | ) |
|
Definition at line 234 of file colpartition.cpp.
236 BLOBNBOX_C_IT bb_it(&boxes_);
237 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
238 if (box != bb_it.data()) {
239 result += bb_it.data()->bounding_box();
◆ BoxColor()
◆ boxes()
BLOBNBOX_CLIST* tesseract::ColPartition::boxes |
( |
| ) |
|
|
inline |
◆ boxes_count()
int tesseract::ColPartition::boxes_count |
( |
| ) |
const |
|
inline |
◆ BoxLeftKey()
int tesseract::ColPartition::BoxLeftKey |
( |
| ) |
const |
|
inline |
◆ BoxRightKey()
int tesseract::ColPartition::BoxRightKey |
( |
| ) |
const |
|
inline |
◆ ClaimBoxes()
void tesseract::ColPartition::ClaimBoxes |
( |
| ) |
|
Definition at line 247 of file colpartition.cpp.
248 BLOBNBOX_C_IT bb_it(&boxes_);
249 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
252 if (other ==
nullptr) {
◆ clear_table_type()
void tesseract::ColPartition::clear_table_type |
( |
| ) |
|
|
inline |
◆ color1()
uint8_t* tesseract::ColPartition::color1 |
( |
| ) |
|
|
inline |
◆ color2()
uint8_t* tesseract::ColPartition::color2 |
( |
| ) |
|
|
inline |
◆ column_set()
◆ ColumnContains()
bool tesseract::ColPartition::ColumnContains |
( |
int |
x, |
|
|
int |
y |
|
) |
| const |
|
inline |
◆ ColumnRange()
void tesseract::ColPartition::ColumnRange |
( |
int |
resolution, |
|
|
ColPartitionSet * |
columns, |
|
|
int * |
first_col, |
|
|
int * |
last_col |
|
) |
| |
Definition at line 1056 of file colpartition.cpp.
1058 int first_spanned_col = -1;
1060 columns->SpanningType(resolution,
1061 bounding_box_.
left(), bounding_box_.
right(),
1062 std::min(bounding_box_.
height(), bounding_box_.
width()),
1063 MidY(), left_margin_, right_margin_,
1064 first_col, last_col,
1065 &first_spanned_col);
◆ ColumnWidth()
int tesseract::ColPartition::ColumnWidth |
( |
| ) |
const |
|
inline |
◆ ComputeLimits()
void tesseract::ColPartition::ComputeLimits |
( |
| ) |
|
Definition at line 861 of file colpartition.cpp.
862 bounding_box_ =
TBOX();
863 BLOBNBOX_C_IT it(&boxes_);
865 int non_leader_count = 0;
867 bounding_box_.
set_left(left_margin_);
872 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
884 tprintf(
"Computed left-illegal partition\n");
890 tprintf(
"Computed right-illegal partition\n");
897 median_top_ = bounding_box_.
top();
898 median_bottom_ = bounding_box_.
bottom();
899 median_height_ = bounding_box_.
height();
900 median_left_ = bounding_box_.
left();
901 median_right_ = bounding_box_.
right();
902 median_width_ = bounding_box_.
width();
905 STATS bottom_stats(bounding_box_.
bottom(), bounding_box_.
top() + 1);
907 STATS left_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
908 STATS right_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
909 STATS width_stats(0, bounding_box_.
width() + 1);
910 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
914 int area = box.
area();
915 top_stats.add(box.
top(), area);
916 bottom_stats.add(box.
bottom(), area);
917 height_stats.add(box.
height(), area);
918 left_stats.add(box.
left(), area);
919 right_stats.add(box.
right(), area);
920 width_stats.add(box.
width(), area);
923 median_top_ = static_cast<int>(top_stats.median() + 0.5);
924 median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
925 median_height_ = static_cast<int>(height_stats.median() + 0.5);
926 median_left_ = static_cast<int>(left_stats.median() + 0.5);
927 median_right_ = static_cast<int>(right_stats.median() + 0.5);
928 median_width_ = static_cast<int>(width_stats.median() + 0.5);
932 tprintf(
"Made partition with bad right coords");
936 tprintf(
"Made partition with bad left coords");
942 for (
int upper = 0; upper < 2; ++upper) {
943 ColPartition_CLIST partners;
944 ColPartition_C_IT part_it(&partners);
945 part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
946 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
948 partner->RemovePartner(!upper,
this);
949 partner->AddPartner(!upper,
this);
953 bounding_box_.
bottom())) {
954 tprintf(
"Recomputed box for partition %p\n",
this);
◆ ComputeSpecialBlobsDensity()
void tesseract::ColPartition::ComputeSpecialBlobsDensity |
( |
| ) |
|
Definition at line 582 of file colpartition.cpp.
583 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
584 if (boxes_.empty()) {
588 BLOBNBOX_C_IT blob_it(&boxes_);
589 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
592 special_blobs_densities_[
type]++;
595 for (
float& special_blobs_density : special_blobs_densities_) {
596 special_blobs_density /= boxes_.length();
◆ ConfirmNoTabViolation()
bool tesseract::ColPartition::ConfirmNoTabViolation |
( |
const ColPartition & |
other | ) |
const |
Definition at line 413 of file colpartition.cpp.
414 if (bounding_box_.
right() < other.bounding_box_.left() &&
415 bounding_box_.
right() < other.LeftBlobRule())
417 if (other.bounding_box_.right() < bounding_box_.
left() &&
420 if (bounding_box_.
left() > other.bounding_box_.right() &&
421 bounding_box_.
left() > other.RightBlobRule())
423 if (other.bounding_box_.left() > bounding_box_.
right() &&
◆ CopyButDontOwnBlobs()
ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs |
( |
| ) |
|
Definition at line 1758 of file colpartition.cpp.
1760 copy->set_owns_blobs(
false);
1761 BLOBNBOX_C_IT inserter(copy->boxes());
1762 BLOBNBOX_C_IT traverser(
boxes());
1763 for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1764 inserter.add_after_then_move(traverser.data());
◆ CopyLeftTab()
void tesseract::ColPartition::CopyLeftTab |
( |
const ColPartition & |
src, |
|
|
bool |
take_box |
|
) |
| |
Definition at line 519 of file colpartition.cpp.
520 left_key_tab_ = take_box ? false : src.left_key_tab_;
522 left_key_ = src.left_key_;
527 if (left_margin_ > bounding_box_.
left())
528 left_margin_ = src.left_margin_;
◆ CopyRightTab()
void tesseract::ColPartition::CopyRightTab |
( |
const ColPartition & |
src, |
|
|
bool |
take_box |
|
) |
| |
Definition at line 532 of file colpartition.cpp.
533 right_key_tab_ = take_box ? false : src.right_key_tab_;
534 if (right_key_tab_) {
535 right_key_ = src.right_key_;
540 if (right_margin_ < bounding_box_.
right())
541 right_margin_ = src.right_margin_;
◆ CountOverlappingBoxes()
int tesseract::ColPartition::CountOverlappingBoxes |
( |
const TBOX & |
box | ) |
|
Definition at line 960 of file colpartition.cpp.
961 BLOBNBOX_C_IT it(&boxes_);
962 int overlap_count = 0;
963 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
968 return overlap_count;
◆ DeleteBoxes()
void tesseract::ColPartition::DeleteBoxes |
( |
| ) |
|
Definition at line 305 of file colpartition.cpp.
309 for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
311 delete bblob->
cblob();
◆ desperately_merged()
bool tesseract::ColPartition::desperately_merged |
( |
| ) |
const |
|
inline |
◆ DisownBoxes()
void tesseract::ColPartition::DisownBoxes |
( |
| ) |
|
Definition at line 263 of file colpartition.cpp.
264 BLOBNBOX_C_IT bb_it(&boxes_);
265 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
◆ DisownBoxesNoAssert()
void tesseract::ColPartition::DisownBoxesNoAssert |
( |
| ) |
|
Definition at line 276 of file colpartition.cpp.
277 BLOBNBOX_C_IT bb_it(&boxes_);
278 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
280 if (bblob->
owner() ==
this)
◆ FakePartition()
Definition at line 95 of file colpartition.cpp.
100 part->set_type(block_type);
101 part->set_flow(
flow);
103 part->set_left_margin(box.
left());
104 part->set_right_margin(box.
right());
105 part->SetBlobTypes();
106 part->ComputeLimits();
◆ flow()
◆ good_blob_score()
int tesseract::ColPartition::good_blob_score |
( |
| ) |
const |
|
inline |
◆ good_column()
bool tesseract::ColPartition::good_column |
( |
| ) |
const |
|
inline |
◆ good_width()
bool tesseract::ColPartition::good_width |
( |
| ) |
const |
|
inline |
◆ HasGoodBaseline()
bool tesseract::ColPartition::HasGoodBaseline |
( |
| ) |
|
Definition at line 1280 of file colpartition.cpp.
1282 DetLineFit linepoints;
1286 int total_height = 0;
1288 int height_count = 0;
1290 BLOBNBOX_C_IT it(&boxes_);
1291 TBOX box(it.data()->bounding_box());
1297 ICOORD first_pt(box.right(), box.bottom());
1300 linepoints.Add(first_pt);
1301 for (it.forward(); !it.at_last(); it.forward()) {
1304 ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1305 linepoints.Add(box_pt);
1306 total_height += box.width();
1307 coverage += box.height();
1310 box = it.data()->bounding_box();
1311 ICOORD last_pt(box.right(), box.top());
1312 linepoints.Add(last_pt);
1313 width = last_pt.
y() - first_pt.y();
1317 TBOX box(it.data()->bounding_box());
1320 ICOORD first_pt(box.left(), box.bottom());
1321 linepoints.Add(first_pt);
1322 for (it.forward(); !it.at_last(); it.forward()) {
1325 ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1326 linepoints.Add(box_pt);
1327 total_height += box.height();
1328 coverage += box.width();
1331 box = it.data()->bounding_box();
1332 ICOORD last_pt(box.right(), box.bottom());
1333 linepoints.Add(last_pt);
1334 width = last_pt.
x() - first_pt.x();
1337 if (height_count == 0)
1341 double error = linepoints.Fit(&start_pt, &end_pt);
◆ HCoreOverlap()
int tesseract::ColPartition::HCoreOverlap |
( |
const ColPartition & |
other | ) |
const |
|
inline |
Definition at line 384 of file colpartition.h.
385 return std::min(median_right_, other.median_right_) -
386 std::max(median_left_, other.median_left_);
◆ HOverlaps()
bool tesseract::ColPartition::HOverlaps |
( |
const ColPartition & |
other | ) |
const |
|
inline |
◆ inside_table_column()
bool tesseract::ColPartition::inside_table_column |
( |
| ) |
|
|
inline |
◆ IsEmpty()
bool tesseract::ColPartition::IsEmpty |
( |
| ) |
const |
|
inline |
◆ IsHorizontalLine()
bool tesseract::ColPartition::IsHorizontalLine |
( |
| ) |
const |
|
inline |
◆ IsHorizontalType()
bool tesseract::ColPartition::IsHorizontalType |
( |
| ) |
const |
|
inline |
◆ IsImageType()
bool tesseract::ColPartition::IsImageType |
( |
| ) |
const |
|
inline |
◆ IsInSameColumnAs()
bool tesseract::ColPartition::IsInSameColumnAs |
( |
const ColPartition & |
part | ) |
const |
Definition at line 2175 of file colpartition.cpp.
2179 return (last_column_ >= part.first_column_) &&
2180 (first_column_ <= part.last_column_);
◆ IsLeftOf()
bool tesseract::ColPartition::IsLeftOf |
( |
const ColPartition & |
other | ) |
const |
|
inline |
Definition at line 349 of file colpartition.h.
350 return bounding_box_.
right() < other.bounding_box_.right();
◆ IsLegal()
bool tesseract::ColPartition::IsLegal |
( |
| ) |
|
Definition at line 342 of file colpartition.cpp.
343 if (bounding_box_.
left() > bounding_box_.
right()) {
345 tprintf(
"Bounding box invalid\n");
350 if (left_margin_ > bounding_box_.
left() ||
351 right_margin_ < bounding_box_.
right()) {
360 tprintf(
"Key inside box: %d v %d or %d v %d\n",
◆ IsLineType()
bool tesseract::ColPartition::IsLineType |
( |
| ) |
const |
|
inline |
◆ IsPulloutType()
bool tesseract::ColPartition::IsPulloutType |
( |
| ) |
const |
|
inline |
◆ IsSingleton()
bool tesseract::ColPartition::IsSingleton |
( |
| ) |
const |
|
inline |
◆ IsTextType()
bool tesseract::ColPartition::IsTextType |
( |
| ) |
const |
|
inline |
◆ IsUnMergeableType()
bool tesseract::ColPartition::IsUnMergeableType |
( |
| ) |
const |
|
inline |
◆ IsVerticalLine()
bool tesseract::ColPartition::IsVerticalLine |
( |
| ) |
const |
|
inline |
◆ IsVerticalType()
bool tesseract::ColPartition::IsVerticalType |
( |
| ) |
const |
|
inline |
◆ KeyWidth()
int tesseract::ColPartition::KeyWidth |
( |
int |
left_key, |
|
|
int |
right_key |
|
) |
| const |
|
inline |
◆ left_key()
int tesseract::ColPartition::left_key |
( |
| ) |
const |
|
inline |
◆ left_key_tab()
bool tesseract::ColPartition::left_key_tab |
( |
| ) |
const |
|
inline |
◆ left_margin()
int tesseract::ColPartition::left_margin |
( |
| ) |
const |
|
inline |
◆ LeftAtY()
int tesseract::ColPartition::LeftAtY |
( |
int |
y | ) |
const |
|
inline |
◆ LeftBlobRule()
int tesseract::ColPartition::LeftBlobRule |
( |
| ) |
const |
Definition at line 545 of file colpartition.cpp.
546 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
547 return it.data()->left_rule();
◆ LineSpacingBlocks()
void tesseract::ColPartition::LineSpacingBlocks |
( |
const ICOORD & |
bleft, |
|
|
const ICOORD & |
tright, |
|
|
int |
resolution, |
|
|
ColPartition_LIST * |
block_parts, |
|
|
ColPartition_LIST * |
used_parts, |
|
|
BLOCK_LIST * |
completed_blocks, |
|
|
TO_BLOCK_LIST * |
to_blocks |
|
) |
| |
|
static |
Definition at line 1407 of file colpartition.cpp.
1413 int page_height = tright.
y() - bleft.
y();
1415 ColPartition_IT it(block_parts);
1417 int max_line_height = 0;
1423 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1426 STATS side_steps(0, part->bounding_box().height());
1427 if (part->bounding_box().height() > max_line_height)
1428 max_line_height = part->bounding_box().height();
1429 BLOBNBOX_C_IT blob_it(part->boxes());
1430 int prev_bottom = blob_it.data()->bounding_box().bottom();
1431 for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1434 int step = bottom - prev_bottom;
1437 side_steps.add(step, 1);
1438 prev_bottom = bottom;
1440 part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
1441 if (!it.at_last()) {
1443 part->set_bottom_spacing(part->median_bottom() -
1444 next_part->median_bottom());
1445 part->set_top_spacing(part->median_top() - next_part->median_top());
1447 part->set_bottom_spacing(page_height);
1448 part->set_top_spacing(page_height);
1452 tprintf(
"side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1453 side_steps.median(), part->top_spacing(), part->bottom_spacing());
1457 if (part_count == 0)
1460 SmoothSpacings(resolution, page_height, block_parts);
1463 BLOCK_IT block_it(completed_blocks);
1464 TO_BLOCK_IT to_block_it(to_blocks);
1465 ColPartition_LIST spacing_parts;
1466 ColPartition_IT sp_block_it(&spacing_parts);
1468 for (it.mark_cycle_pt(); !it.empty();) {
1470 sp_block_it.add_to_end(part);
1472 if (it.empty() || part->bottom_spacing() > same_block_threshold ||
1473 !part->SpacingsEqual(*it.data(), resolution)) {
1476 if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
1480 ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1);
1482 tprintf(
"Spacings unequal: upper:%d/%d, lower:%d/%d,"
1483 " sizes %d %d %d\n",
1484 part->top_spacing(), part->bottom_spacing(),
1485 next_part->top_spacing(), next_part->bottom_spacing(),
1486 part->median_height(), next_part->median_height(),
1487 third_part !=
nullptr ? third_part->median_height() : 0);
1491 if (part->SizesSimilar(*next_part) &&
1493 part->bottom_spacing() &&
1495 part->top_spacing()) {
1498 if (third_part ==
nullptr ||
1499 !next_part->SizesSimilar(*third_part) ||
1501 next_part->bottom_spacing() ||
1503 next_part->top_spacing() ||
1504 next_part->bottom_spacing() > part->bottom_spacing()) {
1506 sp_block_it.add_to_end(it.extract());
1509 tprintf(
"Added line to current block.\n");
1515 if (to_block !=
nullptr) {
1516 to_block_it.add_to_end(to_block);
1517 block_it.add_to_end(to_block->
block);
1519 sp_block_it.set_to_list(&spacing_parts);
1523 tprintf(
"Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1524 part->top_spacing(), part->bottom_spacing(),
1525 next_part->top_spacing(), next_part->bottom_spacing(),
1526 part->median_height(), next_part->median_height());
◆ lower_partners()
ColPartition_CLIST* tesseract::ColPartition::lower_partners |
( |
| ) |
|
|
inline |
◆ MakeBigPartition()
ColPartition * tesseract::ColPartition::MakeBigPartition |
( |
BLOBNBOX * |
box, |
|
|
ColPartition_LIST * |
big_part_list |
|
) |
| |
|
static |
Definition at line 116 of file colpartition.cpp.
122 single->ComputeLimits();
123 single->ClaimBoxes();
124 single->SetBlobTypes();
125 single->set_block_owned(
true);
126 if (big_part_list !=
nullptr) {
127 ColPartition_IT part_it(big_part_list);
128 part_it.add_to_end(single);
◆ MakeBlock()
TO_BLOCK * tesseract::ColPartition::MakeBlock |
( |
const ICOORD & |
bleft, |
|
|
const ICOORD & |
tright, |
|
|
ColPartition_LIST * |
block_parts, |
|
|
ColPartition_LIST * |
used_parts |
|
) |
| |
|
static |
Definition at line 1623 of file colpartition.cpp.
1626 if (block_parts->empty())
1632 ColPartition_IT it(block_parts);
1640 int line_spacing = part->bottom_spacing();
1642 line_spacing = part->bounding_box().height();
1643 ICOORDELT_LIST vertices;
1644 ICOORDELT_IT vert_it(&vertices);
1646 int min_x = INT32_MAX;
1647 int max_x = -INT32_MAX;
1648 int min_y = INT32_MAX;
1649 int max_y = -INT32_MAX;
1653 ColPartition::LeftEdgeRun(&it, &start, &end);
1655 ColPartition::RightEdgeRun(&it, &start, &end);
1656 ClipCoord(bleft, tright, &start);
1657 ClipCoord(bleft, tright, &end);
1658 vert_it.add_after_then_move(
new ICOORDELT(start));
1659 vert_it.add_after_then_move(
new ICOORDELT(end));
1664 if ((iteration == 0 && it.at_first()) ||
1665 (iteration == 1 && it.at_last())) {
1669 }
while (iteration < 2);
1671 tprintf(
"Making block at (%d,%d)->(%d,%d)\n",
1672 min_x, min_y, max_x, max_y);
1673 auto* block =
new BLOCK(
"",
true, 0, 0, min_x, min_y, max_x, max_y);
1675 return MoveBlobsToBlock(
false, line_spacing, block, block_parts, used_parts);
◆ MakeLinePartition()
Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.
Definition at line 148 of file colpartition.cpp.
153 part->bounding_box_ =
TBOX(left, bottom, right, top);
154 part->median_bottom_ = bottom;
155 part->median_top_ = top;
156 part->median_height_ = top - bottom;
157 part->median_left_ = left;
158 part->median_right_ = right;
159 part->median_width_ = right - left;
160 part->left_key_ = part->BoxLeftKey();
161 part->right_key_ = part->BoxRightKey();
◆ MakeToRow()
TO_ROW * tesseract::ColPartition::MakeToRow |
( |
| ) |
|
Definition at line 1706 of file colpartition.cpp.
1707 BLOBNBOX_C_IT blob_it(&boxes_);
1709 int line_size =
IsVerticalType() ? median_width_ : median_height_;
1711 for (; !blob_it.empty(); blob_it.forward()) {
1712 BLOBNBOX* blob = blob_it.extract();
1716 if (row ==
nullptr) {
1717 row =
new TO_ROW(blob, static_cast<float>(top),
1718 static_cast<float>(bottom),
1719 static_cast<float>(line_size));
1721 row->
add_blob(blob, static_cast<float>(top),
1722 static_cast<float>(bottom),
1723 static_cast<float>(line_size));
◆ MakeVerticalTextBlock()
TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock |
( |
const ICOORD & |
bleft, |
|
|
const ICOORD & |
tright, |
|
|
ColPartition_LIST * |
block_parts, |
|
|
ColPartition_LIST * |
used_parts |
|
) |
| |
|
static |
Definition at line 1680 of file colpartition.cpp.
1684 if (block_parts->empty())
1686 ColPartition_IT it(block_parts);
1688 TBOX block_box = part->bounding_box();
1689 int line_spacing = block_box.
width();
1691 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1692 block_box += it.data()->bounding_box();
1698 auto* block =
new BLOCK(
"",
true, 0, 0, block_box.
left(), block_box.
bottom(),
1699 block_box.
right(), block_box.
top());
1701 return MoveBlobsToBlock(
true, line_spacing, block, block_parts, used_parts);
◆ MarkAsLeaderIfMonospaced()
bool tesseract::ColPartition::MarkAsLeaderIfMonospaced |
( |
| ) |
|
Definition at line 1083 of file colpartition.cpp.
1084 bool result =
false;
1086 int part_width = bounding_box_.
width();
1087 STATS gap_stats(0, part_width);
1088 STATS width_stats(0, part_width);
1089 BLOBNBOX_C_IT it(&boxes_);
1094 for (it.forward(); !it.at_first(); it.forward()) {
1099 width_stats.add(right - left, 1);
1104 double median_gap = gap_stats.median();
1108 double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
1110 tprintf(
"gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1120 int offset = static_cast<int>(ceil(gap_iqr * 2));
1121 int min_step = static_cast<int>(median_gap +
median_width + 0.5);
1122 int max_step = min_step + offset;
1125 int part_left = bounding_box_.
left() - min_step / 2;
1126 part_width += min_step;
1127 auto* projection =
new DPPoint[part_width];
1128 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1133 for (
int x = left; x < right; ++x) {
1134 projection[left - part_left].AddLocalCost(height);
1139 part_width, projection);
1140 if (best_end !=
nullptr && best_end->total_cost() < blob_count) {
1143 bool modified_blob_list =
false;
1144 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1147 if (it.at_first()) {
1152 modified_blob_list =
true;
1158 it.data_relative(-1)->bounding_box().right();
1161 modified_blob_list =
true;
1172 if (best_end ==
nullptr) {
1175 tprintf(
"Total cost = %d vs allowed %d\n", best_end->total_cost(),
1179 delete [] projection;
◆ MatchingColumns()
bool tesseract::ColPartition::MatchingColumns |
( |
const ColPartition & |
other | ) |
const |
◆ MatchingSizes()
bool tesseract::ColPartition::MatchingSizes |
( |
const ColPartition & |
other | ) |
const |
◆ MatchingStrokeWidth()
bool tesseract::ColPartition::MatchingStrokeWidth |
( |
const ColPartition & |
other, |
|
|
double |
fractional_tolerance, |
|
|
double |
constant_tolerance |
|
) |
| const |
Definition at line 430 of file colpartition.cpp.
434 int nonmatch_count = 0;
435 BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
436 BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
437 box_it.mark_cycle_pt();
438 other_it.mark_cycle_pt();
439 while (!box_it.cycled_list() && !other_it.cycled_list()) {
440 if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
441 fractional_tolerance,
449 return match_count > nonmatch_count;
◆ MatchingTextColor()
bool tesseract::ColPartition::MatchingTextColor |
( |
const ColPartition & |
other | ) |
const |
◆ median_bottom()
int tesseract::ColPartition::median_bottom |
( |
| ) |
const |
|
inline |
◆ median_height()
int tesseract::ColPartition::median_height |
( |
| ) |
const |
|
inline |
◆ median_left()
int tesseract::ColPartition::median_left |
( |
| ) |
const |
|
inline |
◆ median_right()
int tesseract::ColPartition::median_right |
( |
| ) |
const |
|
inline |
◆ median_top()
int tesseract::ColPartition::median_top |
( |
| ) |
const |
|
inline |
◆ median_width()
int tesseract::ColPartition::median_width |
( |
| ) |
const |
|
inline |
◆ MedianY()
int tesseract::ColPartition::MedianY |
( |
| ) |
const |
|
inline |
Definition at line 308 of file colpartition.h.
309 return (median_top_ + median_bottom_) / 2;
◆ MidX()
int tesseract::ColPartition::MidX |
( |
| ) |
const |
|
inline |
◆ MidY()
int tesseract::ColPartition::MidY |
( |
| ) |
const |
|
inline |
◆ nearest_neighbor_above()
ColPartition* tesseract::ColPartition::nearest_neighbor_above |
( |
| ) |
const |
|
inline |
◆ nearest_neighbor_below()
ColPartition* tesseract::ColPartition::nearest_neighbor_below |
( |
| ) |
const |
|
inline |
◆ OKDiacriticMerge()
bool tesseract::ColPartition::OKDiacriticMerge |
( |
const ColPartition & |
candidate, |
|
|
bool |
debug |
|
) |
| const |
Definition at line 458 of file colpartition.cpp.
460 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
461 int min_top = INT32_MAX;
462 int max_bottom = -INT32_MAX;
463 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
467 tprintf(
"Blob is not a diacritic:");
479 bool result = min_top > candidate.median_bottom_ &&
480 max_bottom < candidate.median_top_;
485 tprintf(
"y ranges don\'t overlap: %d-%d / %d-%d\n",
486 max_bottom, min_top, median_bottom_, median_top_);
◆ OKMergeOverlap()
bool tesseract::ColPartition::OKMergeOverlap |
( |
const ColPartition & |
merge1, |
|
|
const ColPartition & |
merge2, |
|
|
int |
ok_box_overlap, |
|
|
bool |
debug |
|
) |
| |
Definition at line 736 of file colpartition.cpp.
740 if (
IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
742 tprintf(
"Vertical partition\n");
746 if (!merge1.VSignificantCoreOverlap(merge2)) {
749 merge1.VCoreOverlap(merge2),
750 merge1.VSignificantCoreOverlap(merge2));
754 TBOX merged_box(merge1.bounding_box());
755 merged_box += merge2.bounding_box();
756 if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
757 merged_box.bottom() < bounding_box_.
top() - ok_box_overlap &&
758 merged_box.top() > bounding_box_.
bottom() + ok_box_overlap) {
760 tprintf(
"Excessive box overlap\n");
◆ OverlapSplitBlob()
BLOBNBOX * tesseract::ColPartition::OverlapSplitBlob |
( |
const TBOX & |
box | ) |
|
Definition at line 769 of file colpartition.cpp.
770 if (boxes_.empty() || boxes_.singleton())
772 BLOBNBOX_C_IT it(&boxes_);
773 TBOX left_box(it.data()->bounding_box());
774 for (it.forward(); !it.at_first(); it.forward()) {
777 if (left_box.overlap(box))
◆ owns_blobs()
bool tesseract::ColPartition::owns_blobs |
( |
| ) |
const |
|
inline |
◆ PartitionType()
◆ Print()
void tesseract::ColPartition::Print |
( |
| ) |
const |
Definition at line 1782 of file colpartition.cpp.
1784 tprintf(
"ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1785 " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1786 " ts=%d bs=%d ls=%d rs=%d\n",
1787 boxes_.empty() ?
'E' :
' ',
1788 left_margin_, left_key_tab_ ?
'T' :
'B',
LeftAtY(y),
1789 bounding_box_.
left(), median_left_,
1790 bounding_box_.
bottom(), median_bottom_,
1791 bounding_box_.
right(),
RightAtY(y), right_key_tab_ ?
'T' :
'B',
1792 right_margin_, median_right_, bounding_box_.
top(), median_top_,
1793 good_width_, good_column_, type_,
1794 kBlobTypes[blob_type_], flow_,
1795 first_column_, last_column_, boxes_.length(),
1796 space_above_, space_below_, space_to_left_, space_to_right_);
◆ PrintColors()
void tesseract::ColPartition::PrintColors |
( |
| ) |
|
Definition at line 1800 of file colpartition.cpp.
1801 tprintf(
"Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1802 color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1803 color1_[L_ALPHA_CHANNEL],
1804 color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
◆ RefinePartners()
Definition at line 1877 of file colpartition.cpp.
1880 RefinePartnersInternal(
true, get_desperate, grid);
1881 RefinePartnersInternal(
false, get_desperate, grid);
1885 RefinePartnersByType(
true, &upper_partners_);
1886 RefinePartnersByType(
false, &lower_partners_);
1890 if (!upper_partners_.empty() && !upper_partners_.singleton())
1891 RefinePartnersByOverlap(
true, &upper_partners_);
1892 if (!lower_partners_.empty() && !lower_partners_.singleton())
1893 RefinePartnersByOverlap(
false, &lower_partners_);
◆ ReflectInYAxis()
void tesseract::ColPartition::ReflectInYAxis |
( |
| ) |
|
Definition at line 320 of file colpartition.cpp.
321 BLOBNBOX_CLIST reversed_boxes;
322 BLOBNBOX_C_IT reversed_it(&reversed_boxes);
324 BLOBNBOX_C_IT bb_it(&boxes_);
325 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
326 reversed_it.add_before_then_move(bb_it.extract());
328 bb_it.add_list_after(&reversed_boxes);
330 int tmp = left_margin_;
331 left_margin_ = -right_margin_;
332 right_margin_ = -tmp;
◆ ReleaseNonLeaderBoxes()
bool tesseract::ColPartition::ReleaseNonLeaderBoxes |
( |
| ) |
|
Definition at line 289 of file colpartition.cpp.
290 BLOBNBOX_C_IT bb_it(&boxes_);
291 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
298 if (bb_it.empty())
return false;
◆ RemoveBox()
void tesseract::ColPartition::RemoveBox |
( |
BLOBNBOX * |
box | ) |
|
Definition at line 202 of file colpartition.cpp.
203 BLOBNBOX_C_IT bb_it(&boxes_);
204 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
205 if (box == bb_it.data()) {
◆ RemovePartner()
void tesseract::ColPartition::RemovePartner |
( |
bool |
upper, |
|
|
ColPartition * |
partner |
|
) |
| |
Definition at line 618 of file colpartition.cpp.
619 ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
620 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
621 if (it.data() == partner) {
◆ right_key()
int tesseract::ColPartition::right_key |
( |
| ) |
const |
|
inline |
◆ right_key_tab()
bool tesseract::ColPartition::right_key_tab |
( |
| ) |
const |
|
inline |
◆ right_margin()
int tesseract::ColPartition::right_margin |
( |
| ) |
const |
|
inline |
◆ RightAtY()
int tesseract::ColPartition::RightAtY |
( |
int |
y | ) |
const |
|
inline |
◆ RightBlobRule()
int tesseract::ColPartition::RightBlobRule |
( |
| ) |
const |
Definition at line 550 of file colpartition.cpp.
551 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
553 return it.data()->right_rule();
◆ set_blob_type()
◆ set_block_owned()
void tesseract::ColPartition::set_block_owned |
( |
bool |
owned | ) |
|
|
inline |
◆ set_bottom_spacing()
void tesseract::ColPartition::set_bottom_spacing |
( |
int |
spacing | ) |
|
|
inline |
◆ set_first_column()
void tesseract::ColPartition::set_first_column |
( |
int |
column | ) |
|
|
inline |
◆ set_flow()
◆ set_inside_table_column()
void tesseract::ColPartition::set_inside_table_column |
( |
bool |
val | ) |
|
|
inline |
◆ set_last_column()
void tesseract::ColPartition::set_last_column |
( |
int |
column | ) |
|
|
inline |
◆ set_left_margin()
void tesseract::ColPartition::set_left_margin |
( |
int |
margin | ) |
|
|
inline |
◆ set_median_height()
void tesseract::ColPartition::set_median_height |
( |
int |
height | ) |
|
|
inline |
◆ set_median_width()
void tesseract::ColPartition::set_median_width |
( |
int |
width | ) |
|
|
inline |
◆ set_nearest_neighbor_above()
void tesseract::ColPartition::set_nearest_neighbor_above |
( |
ColPartition * |
part | ) |
|
|
inline |
◆ set_nearest_neighbor_below()
void tesseract::ColPartition::set_nearest_neighbor_below |
( |
ColPartition * |
part | ) |
|
|
inline |
◆ set_owns_blobs()
void tesseract::ColPartition::set_owns_blobs |
( |
bool |
owns_blobs | ) |
|
|
inline |
◆ set_right_margin()
void tesseract::ColPartition::set_right_margin |
( |
int |
margin | ) |
|
|
inline |
◆ set_side_step()
void tesseract::ColPartition::set_side_step |
( |
int |
step | ) |
|
|
inline |
◆ set_space_above()
void tesseract::ColPartition::set_space_above |
( |
int |
space | ) |
|
|
inline |
◆ set_space_below()
void tesseract::ColPartition::set_space_below |
( |
int |
space | ) |
|
|
inline |
◆ set_space_to_left()
void tesseract::ColPartition::set_space_to_left |
( |
int |
space | ) |
|
|
inline |
◆ set_space_to_right()
void tesseract::ColPartition::set_space_to_right |
( |
int |
space | ) |
|
|
inline |
◆ set_table_type()
void tesseract::ColPartition::set_table_type |
( |
| ) |
|
|
inline |
◆ set_top_spacing()
void tesseract::ColPartition::set_top_spacing |
( |
int |
spacing | ) |
|
|
inline |
◆ set_type()
◆ set_vertical()
void tesseract::ColPartition::set_vertical |
( |
const ICOORD & |
v | ) |
|
|
inline |
◆ set_working_set()
void tesseract::ColPartition::set_working_set |
( |
WorkingPartSet * |
working_set | ) |
|
|
inline |
◆ SetBlobTypes()
void tesseract::ColPartition::SetBlobTypes |
( |
| ) |
|
Definition at line 1265 of file colpartition.cpp.
1268 BLOBNBOX_C_IT it(&boxes_);
1269 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
◆ SetColumnGoodness()
void tesseract::ColPartition::SetColumnGoodness |
( |
WidthCallback |
cb | ) |
|
Definition at line 1070 of file colpartition.cpp.
1073 good_width_ = cb(width);
1074 good_column_ = blob_type_ ==
BRT_TEXT && left_key_tab_ && right_key_tab_;
◆ SetLeftTab()
void tesseract::ColPartition::SetLeftTab |
( |
const TabVector * |
tab_vector | ) |
|
Definition at line 494 of file colpartition.cpp.
495 if (tab_vector !=
nullptr) {
496 left_key_ = tab_vector->sort_key();
499 left_key_tab_ =
false;
◆ SetPartitionType()
void tesseract::ColPartition::SetPartitionType |
( |
int |
resolution, |
|
|
ColPartitionSet * |
columns |
|
) |
| |
Definition at line 973 of file colpartition.cpp.
974 int first_spanned_col = -1;
976 columns->SpanningType(resolution,
977 bounding_box_.
left(), bounding_box_.
right(),
978 std::min(bounding_box_.
height(), bounding_box_.
width()),
979 MidY(), left_margin_, right_margin_,
980 &first_column_, &last_column_,
982 column_set_ = columns;
983 if (first_column_ < last_column_ && span_type ==
CST_PULLOUT &&
987 if (first_spanned_col >= 0) {
988 first_column_ = first_spanned_col;
989 last_column_ = first_spanned_col;
991 if ((first_column_ & 1) == 0)
992 last_column_ = first_column_;
993 else if ((last_column_ & 1) == 0)
994 first_column_ = last_column_;
996 first_column_ = last_column_ = (first_column_ + last_column_) / 2;
◆ SetRegionAndFlowTypesFromProjectionValue()
void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue |
( |
int |
value | ) |
|
Definition at line 1191 of file colpartition.cpp.
1193 int good_blob_score_ = 0;
1194 int noisy_count = 0;
1195 int hline_count = 0;
1196 int vline_count = 0;
1197 BLOBNBOX_C_IT it(&boxes_);
1198 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1208 if (hline_count > vline_count) {
1211 }
else if (vline_count > hline_count) {
1214 }
else if (value < -1 || 1 < value) {
1218 long_side = bounding_box_.
width();
1219 short_side = bounding_box_.
height();
1222 long_side = bounding_box_.
height();
1223 short_side = bounding_box_.
width();
1239 if (flow_ ==
BTFT_CHAIN && strong_score == 3)
1247 if (noisy_count >= blob_count) {
1253 bounding_box_.
bottom())) {
1254 tprintf(
"RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1255 blob_count, noisy_count, good_blob_score_);
1256 tprintf(
" Projection value=%d, flow=%d, blob_type=%d\n",
1257 value, flow_, blob_type_);
◆ SetRightTab()
void tesseract::ColPartition::SetRightTab |
( |
const TabVector * |
tab_vector | ) |
|
Definition at line 506 of file colpartition.cpp.
507 if (tab_vector !=
nullptr) {
508 right_key_ = tab_vector->sort_key();
511 right_key_tab_ =
false;
◆ SetSpecialBlobsDensity()
void tesseract::ColPartition::SetSpecialBlobsDensity |
( |
const BlobSpecialTextType |
type, |
|
|
const float |
density |
|
) |
| |
◆ ShallowCopy()
ColPartition * tesseract::ColPartition::ShallowCopy |
( |
| ) |
const |
Definition at line 1731 of file colpartition.cpp.
1733 part->left_margin_ = left_margin_;
1734 part->right_margin_ = right_margin_;
1735 part->bounding_box_ = bounding_box_;
1736 memcpy(part->special_blobs_densities_, special_blobs_densities_,
1737 sizeof(special_blobs_densities_));
1738 part->median_bottom_ = median_bottom_;
1739 part->median_top_ = median_top_;
1740 part->median_height_ = median_height_;
1741 part->median_left_ = median_left_;
1742 part->median_right_ = median_right_;
1743 part->median_width_ = median_width_;
1744 part->good_width_ = good_width_;
1745 part->good_column_ = good_column_;
1746 part->left_key_tab_ = left_key_tab_;
1747 part->right_key_tab_ = right_key_tab_;
1748 part->type_ = type_;
1749 part->flow_ = flow_;
1750 part->left_key_ = left_key_;
1751 part->right_key_ = right_key_;
1752 part->first_column_ = first_column_;
1753 part->last_column_ = last_column_;
1754 part->owns_blobs_ =
false;
◆ SingletonPartner()
ColPartition * tesseract::ColPartition::SingletonPartner |
( |
bool |
upper | ) |
|
Definition at line 629 of file colpartition.cpp.
630 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
631 if (!partners->singleton())
633 ColPartition_C_IT it(partners);
◆ SmoothPartnerRun()
void tesseract::ColPartition::SmoothPartnerRun |
( |
int |
working_set_count | ) |
|
Definition at line 1808 of file colpartition.cpp.
1809 STATS left_stats(0, working_set_count);
1810 STATS right_stats(0, working_set_count);
1815 if (partner->type_ > max_type)
1816 max_type = partner->type_;
1817 if (column_set_ == partner->column_set_) {
1818 left_stats.add(partner->first_column_, 1);
1819 right_stats.add(partner->last_column_, 1);
1827 first_column_ = left_stats.mode();
1828 last_column_ = right_stats.mode();
1829 if (last_column_ < first_column_)
1830 last_column_ = first_column_;
1835 partner->type_ = max_type;
1836 #if 0 // See TODO above
1837 if (column_set_ == partner->column_set_) {
1838 partner->first_column_ = first_column_;
1839 partner->last_column_ = last_column_;
◆ SortByBBox()
static int tesseract::ColPartition::SortByBBox |
( |
const void * |
p1, |
|
|
const void * |
p2 |
|
) |
| |
|
inlinestatic |
Definition at line 714 of file colpartition.h.
715 const ColPartition* part1 = *static_cast<const ColPartition* const*>(p1);
716 const ColPartition* part2 = *static_cast<const ColPartition* const*>(p2);
717 int mid_y1 = part1->bounding_box_.y_middle();
718 int mid_y2 = part2->bounding_box_.y_middle();
719 if ((part2->bounding_box_.bottom() <= mid_y1 &&
720 mid_y1 <= part2->bounding_box_.top()) ||
721 (part1->bounding_box_.bottom() <= mid_y2 &&
722 mid_y2 <= part1->bounding_box_.top())) {
724 return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
727 return mid_y2 - mid_y1;
◆ SortKey()
int tesseract::ColPartition::SortKey |
( |
int |
x, |
|
|
int |
y |
|
) |
| const |
|
inline |
◆ space_above()
int tesseract::ColPartition::space_above |
( |
| ) |
const |
|
inline |
◆ space_below()
int tesseract::ColPartition::space_below |
( |
| ) |
const |
|
inline |
◆ space_to_left()
int tesseract::ColPartition::space_to_left |
( |
| ) |
const |
|
inline |
◆ space_to_right()
int tesseract::ColPartition::space_to_right |
( |
| ) |
const |
|
inline |
◆ SpecialBlobsCount()
Definition at line 561 of file colpartition.cpp.
563 BLOBNBOX_C_IT blob_it(&boxes_);
565 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
◆ SpecialBlobsDensity()
◆ SplitAt()
ColPartition * tesseract::ColPartition::SplitAt |
( |
int |
split_x | ) |
|
Definition at line 823 of file colpartition.cpp.
824 if (split_x <= bounding_box_.
left() || split_x >= bounding_box_.
right())
828 BLOBNBOX_C_IT it(&boxes_);
829 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
834 if (box.
left() >= split_x) {
835 split_part->AddBox(it.extract());
842 it.add_list_after(&split_part->boxes_);
845 if (split_part->IsEmpty()) {
851 right_key_tab_ =
false;
852 split_part->left_key_tab_ =
false;
853 right_margin_ = split_x;
854 split_part->left_margin_ = split_x;
856 split_part->ComputeLimits();
◆ SplitAtBlob()
Definition at line 787 of file colpartition.cpp.
790 BLOBNBOX_C_IT it(&boxes_);
791 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
795 if (bbox == split_blob || !split_part->boxes_.empty()) {
796 split_part->AddBox(it.extract());
802 if (split_part->IsEmpty()) {
808 right_key_tab_ =
false;
809 split_part->left_key_tab_ =
false;
814 split_part->ComputeLimits();
◆ top_spacing()
int tesseract::ColPartition::top_spacing |
( |
| ) |
const |
|
inline |
◆ type()
◆ TypesMatch() [1/2]
◆ TypesMatch() [2/2]
bool tesseract::ColPartition::TypesMatch |
( |
const ColPartition & |
other | ) |
const |
|
inline |
◆ TypesSimilar()
◆ upper_partners()
ColPartition_CLIST* tesseract::ColPartition::upper_partners |
( |
| ) |
|
|
inline |
◆ VCoreOverlap()
int tesseract::ColPartition::VCoreOverlap |
( |
const ColPartition & |
other | ) |
const |
|
inline |
Definition at line 375 of file colpartition.h.
376 if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
379 return std::min(median_top_, other.median_top_) -
380 std::max(median_bottom_, other.median_bottom_);
◆ VOverlaps()
bool tesseract::ColPartition::VOverlaps |
( |
const ColPartition & |
other | ) |
const |
|
inline |
◆ VSignificantCoreOverlap()
bool tesseract::ColPartition::VSignificantCoreOverlap |
( |
const ColPartition & |
other | ) |
const |
|
inline |
Definition at line 390 of file colpartition.h.
391 if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
395 int height = std::min(median_top_ - median_bottom_,
396 other.median_top_ - other.median_bottom_);
397 return overlap * 3 > height;
◆ WithinSameMargins()
bool tesseract::ColPartition::WithinSameMargins |
( |
const ColPartition & |
other | ) |
const |
|
inline |
Definition at line 401 of file colpartition.h.
402 return left_margin_ <= other.bounding_box_.left() &&
403 bounding_box_.
left() >= other.left_margin_ &&
404 bounding_box_.
right() <= other.right_margin_ &&
405 right_margin_ >= other.bounding_box_.right();
◆ XAtY()
int tesseract::ColPartition::XAtY |
( |
int |
sort_key, |
|
|
int |
y |
|
) |
| const |
|
inline |
The documentation for this class was generated from the following files:
static C_BLOB * FakeBlob(const TBOX &box)
static int SortKey(const ICOORD &vertical, int x, int y)
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
int NoisyNeighbours() const
bool IsHorizontalType() const
const double kMaxLeaderGapFractionOfMin
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
const double kMinBaselineCoverage
const int kMinChainTextValue
const int kMinLeaderCount
BlobTextFlowType flow() const
bool NearlyEqual(T x, T y, T tolerance)
void AddPartition(ColPartition *part)
int base_char_top() const
bool overlap(const TBOX &box) const
const int kColumnWidthFactor
bool IsPulloutType() const
void set_flow(BlobTextFlowType value)
bool PTIsPulloutType(PolyBlockType type)
const double kMaxLeaderGapFractionOfMax
int median_height() const
PolyBlockType type() const
int SortKey(int x, int y) const
int64_t CostWithVariance(const DPPoint *prev)
int16_t x() const
access function
const int kMinStrongTextValue
static bool UnMergeableType(BlobRegionType type)
bool TypesMatch(const ColPartition &other) const
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
int y_gap(const TBOX &box) const
const int kMaxColorDistance
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
static bool IsLineType(BlobRegionType type)
BlobSpecialTextType special_text_type() const
ColPartition * SingletonPartner(bool upper)
PDBLK pdblk
Page Description Block.
const int kHorzStrongTextlineAspect
int base_char_bottom() const
void set_poly_block(POLY_BLOCK *blk)
set the poly block
static bool WithinTestRegion(int detail_level, int x, int y)
const int kHorzStrongTextlineCount
const int kHorzStrongTextlineHeight
BlobRegionType blob_type() const
void set_owner(tesseract::ColPartition *new_owner)
static bool DifferentSizes(int size1, int size2)
bool PTIsImageType(PolyBlockType type)
void set_region_type(BlobRegionType new_type)
const int kMaxRMSColorNoise
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
const TBOX & bounding_box() const
int KeyWidth(int left_key, int right_key) const
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
int XAtY(int sort_key, int y) const
bool x_overlap(const TBOX &box) const
void SetColumnGoodness(WidthCallback cb)
bool PTIsLineType(PolyBlockType type)
int RightBlobRule() const
PolyBlockType PartitionType(ColumnSpanningType flow) const
BlobTextFlowType flow() const
int RightAtY(int y) const
bool PTIsTextType(PolyBlockType type)
bool IsVerticalType() const
BlobRegionType region_type() const
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
DLLSYM void tprintf(const char *format,...)
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
static int SortByBBox(const void *p1, const void *p2)
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
tesseract::ColPartition * owner() const
const double kMaxBaselineError
const double kMaxSameBlockLineSpacing
int VCoreOverlap(const ColPartition &other) const
int textord_debug_tabfind
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
ColPartition * ShallowCopy() const
int16_t y() const
access_function