tesseract  4.0.0-1-g2a2b
tesseract::ColPartition Class Reference

#include <colpartition.h>

Inheritance diagram for tesseract::ColPartition:
ELIST2_LINK

Public Member Functions

 ColPartition ()=default
 
 ColPartition (BlobRegionType blob_type, const ICOORD &vertical)
 
 ~ColPartition ()
 
const TBOXbounding_box () const
 
int left_margin () const
 
void set_left_margin (int margin)
 
int right_margin () const
 
void set_right_margin (int margin)
 
int median_top () const
 
int median_bottom () const
 
int median_left () const
 
int median_right () const
 
int median_height () const
 
void set_median_height (int height)
 
int median_width () const
 
void set_median_width (int width)
 
BlobRegionType blob_type () const
 
void set_blob_type (BlobRegionType t)
 
BlobTextFlowType flow () const
 
void set_flow (BlobTextFlowType f)
 
int good_blob_score () const
 
bool good_width () const
 
bool good_column () const
 
bool left_key_tab () const
 
int left_key () const
 
bool right_key_tab () const
 
int right_key () const
 
PolyBlockType type () const
 
void set_type (PolyBlockType t)
 
BLOBNBOX_CLIST * boxes ()
 
int boxes_count () const
 
void set_vertical (const ICOORD &v)
 
ColPartition_CLIST * upper_partners ()
 
ColPartition_CLIST * lower_partners ()
 
void set_working_set (WorkingPartSet *working_set)
 
bool block_owned () const
 
void set_block_owned (bool owned)
 
bool desperately_merged () const
 
ColPartitionSetcolumn_set () const
 
void set_side_step (int step)
 
int bottom_spacing () const
 
void set_bottom_spacing (int spacing)
 
int top_spacing () const
 
void set_top_spacing (int spacing)
 
void set_table_type ()
 
void clear_table_type ()
 
bool inside_table_column ()
 
void set_inside_table_column (bool val)
 
ColPartitionnearest_neighbor_above () const
 
void set_nearest_neighbor_above (ColPartition *part)
 
ColPartitionnearest_neighbor_below () const
 
void set_nearest_neighbor_below (ColPartition *part)
 
int space_above () const
 
void set_space_above (int space)
 
int space_below () const
 
void set_space_below (int space)
 
int space_to_left () const
 
void set_space_to_left (int space)
 
int space_to_right () const
 
void set_space_to_right (int space)
 
uint8_t * color1 ()
 
uint8_t * color2 ()
 
bool owns_blobs () const
 
void set_owns_blobs (bool owns_blobs)
 
int MidY () const
 
int MedianY () const
 
int MidX () const
 
int SortKey (int x, int y) const
 
int XAtY (int sort_key, int y) const
 
int KeyWidth (int left_key, int right_key) const
 
int ColumnWidth () const
 
int BoxLeftKey () const
 
int BoxRightKey () const
 
int LeftAtY (int y) const
 
int RightAtY (int y) const
 
bool IsLeftOf (const ColPartition &other) const
 
bool ColumnContains (int x, int y) const
 
bool IsEmpty () const
 
bool IsSingleton () const
 
bool HOverlaps (const ColPartition &other) const
 
bool VOverlaps (const ColPartition &other) const
 
int VCoreOverlap (const ColPartition &other) const
 
int HCoreOverlap (const ColPartition &other) const
 
bool VSignificantCoreOverlap (const ColPartition &other) const
 
bool WithinSameMargins (const ColPartition &other) const
 
bool TypesMatch (const ColPartition &other) const
 
bool IsLineType () const
 
bool IsImageType () const
 
bool IsTextType () const
 
bool IsPulloutType () const
 
bool IsVerticalType () const
 
bool IsHorizontalType () const
 
bool IsUnMergeableType () const
 
bool IsVerticalLine () const
 
bool IsHorizontalLine () const
 
void AddBox (BLOBNBOX *box)
 
void RemoveBox (BLOBNBOX *box)
 
BLOBNBOXBiggestBox ()
 
TBOX BoundsWithoutBox (BLOBNBOX *box)
 
void ClaimBoxes ()
 
void DisownBoxes ()
 
void DisownBoxesNoAssert ()
 
bool ReleaseNonLeaderBoxes ()
 
void DeleteBoxes ()
 
void ReflectInYAxis ()
 
bool IsLegal ()
 
bool MatchingColumns (const ColPartition &other) const
 
bool MatchingTextColor (const ColPartition &other) const
 
bool MatchingSizes (const ColPartition &other) const
 
bool ConfirmNoTabViolation (const ColPartition &other) const
 
bool MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
 
bool OKDiacriticMerge (const ColPartition &candidate, bool debug) const
 
void SetLeftTab (const TabVector *tab_vector)
 
void SetRightTab (const TabVector *tab_vector)
 
void CopyLeftTab (const ColPartition &src, bool take_box)
 
void CopyRightTab (const ColPartition &src, bool take_box)
 
int LeftBlobRule () const
 
int RightBlobRule () const
 
float SpecialBlobsDensity (const BlobSpecialTextType type) const
 
int SpecialBlobsCount (const BlobSpecialTextType type)
 
void SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density)
 
void ComputeSpecialBlobsDensity ()
 
void AddPartner (bool upper, ColPartition *partner)
 
void RemovePartner (bool upper, ColPartition *partner)
 
ColPartitionSingletonPartner (bool upper)
 
void Absorb (ColPartition *other, WidthCallback *cb)
 
bool OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
 
BLOBNBOXOverlapSplitBlob (const TBOX &box)
 
ColPartitionSplitAtBlob (BLOBNBOX *split_blob)
 
ColPartitionSplitAt (int split_x)
 
void ComputeLimits ()
 
int CountOverlappingBoxes (const TBOX &box)
 
void SetPartitionType (int resolution, ColPartitionSet *columns)
 
PolyBlockType PartitionType (ColumnSpanningType flow) const
 
void ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
 
void SetColumnGoodness (WidthCallback *cb)
 
bool MarkAsLeaderIfMonospaced ()
 
void SetRegionAndFlowTypesFromProjectionValue (int value)
 
void SetBlobTypes ()
 
bool HasGoodBaseline ()
 
void AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
 
TO_ROWMakeToRow ()
 
ColPartitionShallowCopy () const
 
ColPartitionCopyButDontOwnBlobs ()
 
ScrollView::Color BoxColor () const
 
void Print () const
 
void PrintColors ()
 
void SmoothPartnerRun (int working_set_count)
 
void RefinePartners (PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
 
bool IsInSameColumnAs (const ColPartition &part) const
 
void set_first_column (int column)
 
void set_last_column (int column)
 
- Public Member Functions inherited from ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static ColPartitionMakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
 
static ColPartitionFakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
 
static ColPartitionMakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list)
 
static bool TypesMatch (BlobRegionType type1, BlobRegionType type2)
 
static bool TypesSimilar (PolyBlockType type1, PolyBlockType type2)
 
static void LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
 
static TO_BLOCKMakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static TO_BLOCKMakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static int SortByBBox (const void *p1, const void *p2)
 

Detailed Description

ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.

Definition at line 68 of file colpartition.h.

Constructor & Destructor Documentation

◆ ColPartition() [1/2]

tesseract::ColPartition::ColPartition ( )
default

◆ ColPartition() [2/2]

tesseract::ColPartition::ColPartition ( BlobRegionType  blob_type,
const ICOORD vertical 
)
Parameters
blob_typeis the blob_region_type_ of the blobs in this partition.
verticalis the direction of logical vertical on the possibly skewed image.

Definition at line 81 of file colpartition.cpp.

82  : left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
83  median_bottom_(INT32_MAX), median_top_(-INT32_MAX), median_height_(0),
84  median_left_(INT32_MAX), median_right_(-INT32_MAX), median_width_(0),
85  blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0),
86  good_width_(false), good_column_(false),
87  left_key_tab_(false), right_key_tab_(false),
88  left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical),
89  working_set_(nullptr), last_add_was_vertical_(false), block_owned_(false),
90  desperately_merged_(false),
91  first_column_(-1), last_column_(-1), column_set_(nullptr),
92  side_step_(0), top_spacing_(0), bottom_spacing_(0),
93  type_before_table_(PT_UNKNOWN), inside_table_column_(false),
94  nearest_neighbor_above_(nullptr), nearest_neighbor_below_(nullptr),
95  space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0),
96  owns_blobs_(true) {
97  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
98 }
BlobRegionType blob_type() const
Definition: colpartition.h:149

◆ ~ColPartition()

tesseract::ColPartition::~ColPartition ( )

Definition at line 143 of file colpartition.cpp.

143  {
144  // Remove this as a partner of all partners, as we don't want them
145  // referring to a deleted object.
146  ColPartition_C_IT it(&upper_partners_);
147  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
148  it.data()->RemovePartner(false, this);
149  }
150  it.set_to_list(&lower_partners_);
151  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
152  it.data()->RemovePartner(true, this);
153  }
154 }

Member Function Documentation

◆ Absorb()

void tesseract::ColPartition::Absorb ( ColPartition other,
WidthCallback cb 
)

Definition at line 648 of file colpartition.cpp.

648  {
649  // The result has to either own all of the blobs or none of them.
650  // Verify the flag is consistent.
651  ASSERT_HOST(owns_blobs() == other->owns_blobs());
652  // TODO(nbeato): check owns_blobs better. Right now owns_blobs
653  // should always be true when this is called. So there is no issues.
654  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
655  bounding_box_.bottom()) ||
656  TabFind::WithinTestRegion(2, other->bounding_box_.left(),
657  other->bounding_box_.bottom())) {
658  tprintf("Merging:");
659  Print();
660  other->Print();
661  }
662 
663  // Update the special_blobs_densities_.
664  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
665  for (int type = 0; type < BSTT_COUNT; ++type) {
666  unsigned w1 = boxes_.length();
667  unsigned w2 = other->boxes_.length();
668  float new_val = special_blobs_densities_[type] * w1 +
669  other->special_blobs_densities_[type] * w2;
670  if (!w1 || !w2) {
671  ASSERT_HOST((w1 + w2) > 0);
672  special_blobs_densities_[type] = new_val / (w1 + w2);
673  }
674  }
675 
676  // Merge the two sorted lists.
677  BLOBNBOX_C_IT it(&boxes_);
678  BLOBNBOX_C_IT it2(&other->boxes_);
679  for (; !it2.empty(); it2.forward()) {
680  BLOBNBOX* bbox2 = it2.extract();
681  ColPartition* prev_owner = bbox2->owner();
682  if (prev_owner != other && prev_owner != nullptr) {
683  // A blob on other's list is owned by someone else; let them have it.
684  continue;
685  }
686  ASSERT_HOST(prev_owner == other || prev_owner == nullptr);
687  if (prev_owner == other)
688  bbox2->set_owner(this);
689  it.add_to_end(bbox2);
690  }
691  left_margin_ = std::min(left_margin_, other->left_margin_);
692  right_margin_ = std::max(right_margin_, other->right_margin_);
693  if (other->left_key_ < left_key_) {
694  left_key_ = other->left_key_;
695  left_key_tab_ = other->left_key_tab_;
696  }
697  if (other->right_key_ > right_key_) {
698  right_key_ = other->right_key_;
699  right_key_tab_ = other->right_key_tab_;
700  }
701  // Combine the flow and blob_type in a sensible way.
702  // Dominant flows stay.
703  if (!DominatesInMerge(flow_, other->flow_)) {
704  flow_ = other->flow_;
705  blob_type_ = other->blob_type_;
706  }
707  SetBlobTypes();
708  if (IsVerticalType()) {
709  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
710  last_add_was_vertical_ = true;
711  } else {
712  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
713  last_add_was_vertical_ = false;
714  }
715  ComputeLimits();
716  // Fix partner lists. other is going away, so remove it as a
717  // partner of all its partners and add this in its place.
718  for (int upper = 0; upper < 2; ++upper) {
719  ColPartition_CLIST partners;
720  ColPartition_C_IT part_it(&partners);
721  part_it.add_list_after(upper ? &other->upper_partners_
722  : &other->lower_partners_);
723  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
724  ColPartition* partner = part_it.extract();
725  partner->RemovePartner(!upper, other);
726  partner->RemovePartner(!upper, this);
727  partner->AddPartner(!upper, this);
728  }
729  }
730  delete other;
731  if (cb != nullptr) {
732  SetColumnGoodness(cb);
733  }
734 }
static bool WithinTestRegion(int detail_level, int x, int y)
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
Definition: blobbox.h:130
int16_t left() const
Definition: rect.h:72
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:356
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
bool IsVerticalType() const
Definition: colpartition.h:442
tesseract::ColPartition * owner() const
Definition: blobbox.h:353
void SetColumnGoodness(WidthCallback *cb)
int16_t bottom() const
Definition: rect.h:65
#define ASSERT_HOST(x)
Definition: errcode.h:84
PolyBlockType type() const
Definition: colpartition.h:182

◆ AddBox()

void tesseract::ColPartition::AddBox ( BLOBNBOX box)

Definition at line 179 of file colpartition.cpp.

179  {
180  TBOX box = bbox->bounding_box();
181  // Update the partition limits.
182  if (boxes_.length() == 0) {
183  bounding_box_ = box;
184  } else {
185  bounding_box_ += box;
186  }
187 
188  if (IsVerticalType()) {
189  if (!last_add_was_vertical_) {
190  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
191  last_add_was_vertical_ = true;
192  }
193  boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
194  } else {
195  if (last_add_was_vertical_) {
196  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
197  last_add_was_vertical_ = false;
198  }
199  boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
200  }
201  if (!left_key_tab_)
202  left_key_ = BoxLeftKey();
203  if (!right_key_tab_)
204  right_key_ = BoxRightKey();
205  if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
206  tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
207  box.left(), box.bottom(), box.right(), box.top(),
208  bounding_box_.left(), bounding_box_.right());
209 }
Definition: rect.h:34
static bool WithinTestRegion(int detail_level, int x, int y)
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
const TBOX & bounding_box() const
Definition: blobbox.h:231
bool IsVerticalType() const
Definition: colpartition.h:442
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65

◆ AddPartner()

void tesseract::ColPartition::AddPartner ( bool  upper,
ColPartition partner 
)

Definition at line 613 of file colpartition.cpp.

613  {
614  if (upper) {
615  partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
616  true, this);
617  upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
618  } else {
619  partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
620  true, this);
621  lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
622  }
623 }

◆ AddToWorkingSet()

void tesseract::ColPartition::AddToWorkingSet ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 1358 of file colpartition.cpp.

1361  {
1362  if (block_owned_)
1363  return; // Done it already.
1364  block_owned_ = true;
1365  WorkingPartSet_IT it(working_sets);
1366  // If there is an upper partner use its working_set_ directly.
1367  ColPartition* partner = SingletonPartner(true);
1368  if (partner != nullptr && partner->working_set_ != nullptr) {
1369  working_set_ = partner->working_set_;
1370  working_set_->AddPartition(this);
1371  return;
1372  }
1373  if (partner != nullptr && textord_debug_bugs) {
1374  tprintf("Partition with partner has no working set!:");
1375  Print();
1376  partner->Print();
1377  }
1378  // Search for the column that the left edge fits in.
1379  WorkingPartSet* work_set = nullptr;
1380  it.move_to_first();
1381  int col_index = 0;
1382  for (it.mark_cycle_pt(); !it.cycled_list() &&
1383  col_index != first_column_;
1384  it.forward(), ++col_index);
1385  if (textord_debug_tabfind >= 2) {
1386  tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
1387  Print();
1388  }
1389  if (it.cycled_list() && textord_debug_bugs) {
1390  tprintf("Target column=%d, only had %d\n", first_column_, col_index);
1391  }
1392  ASSERT_HOST(!it.cycled_list());
1393  work_set = it.data();
1394  // If last_column_ != first_column, then we need to scoop up all blocks
1395  // between here and the last_column_ and put back in work_set.
1396  if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) {
1397  // Find the column that the right edge falls in.
1398  BLOCK_LIST completed_blocks;
1399  TO_BLOCK_LIST to_blocks;
1400  for (; !it.cycled_list() && col_index <= last_column_;
1401  it.forward(), ++col_index) {
1402  WorkingPartSet* end_set = it.data();
1403  end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
1404  &completed_blocks, &to_blocks);
1405  }
1406  work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1407  }
1408  working_set_ = work_set;
1409  work_set->AddPartition(this);
1410 }
ColPartition * SingletonPartner(bool upper)
void AddPartition(ColPartition *part)
int textord_debug_tabfind
Definition: alignedblob.cpp:28
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
bool IsPulloutType() const
Definition: colpartition.h:438
int textord_debug_bugs
Definition: alignedblob.cpp:29
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ BiggestBox()

BLOBNBOX * tesseract::ColPartition::BiggestBox ( )

Definition at line 225 of file colpartition.cpp.

225  {
226  BLOBNBOX* biggest = nullptr;
227  BLOBNBOX_C_IT bb_it(&boxes_);
228  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
229  BLOBNBOX* bbox = bb_it.data();
230  if (IsVerticalType()) {
231  if (biggest == nullptr ||
232  bbox->bounding_box().width() > biggest->bounding_box().width())
233  biggest = bbox;
234  } else {
235  if (biggest == nullptr ||
236  bbox->bounding_box().height() > biggest->bounding_box().height())
237  biggest = bbox;
238  }
239  }
240  return biggest;
241 }
int16_t width() const
Definition: rect.h:115
const TBOX & bounding_box() const
Definition: blobbox.h:231
bool IsVerticalType() const
Definition: colpartition.h:442
int16_t height() const
Definition: rect.h:108

◆ blob_type()

BlobRegionType tesseract::ColPartition::blob_type ( ) const
inline

Definition at line 149 of file colpartition.h.

149  {
150  return blob_type_;
151  }

◆ block_owned()

bool tesseract::ColPartition::block_owned ( ) const
inline

Definition at line 206 of file colpartition.h.

206  {
207  return block_owned_;
208  }

◆ bottom_spacing()

int tesseract::ColPartition::bottom_spacing ( ) const
inline

Definition at line 221 of file colpartition.h.

221  {
222  return bottom_spacing_;
223  }

◆ bounding_box()

const TBOX& tesseract::ColPartition::bounding_box ( ) const
inline

Definition at line 110 of file colpartition.h.

110  {
111  return bounding_box_;
112  }

◆ BoundsWithoutBox()

TBOX tesseract::ColPartition::BoundsWithoutBox ( BLOBNBOX box)

Definition at line 244 of file colpartition.cpp.

244  {
245  TBOX result;
246  BLOBNBOX_C_IT bb_it(&boxes_);
247  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
248  if (box != bb_it.data()) {
249  result += bb_it.data()->bounding_box();
250  }
251  }
252  return result;
253 }
Definition: rect.h:34

◆ BoxColor()

ScrollView::Color tesseract::ColPartition::BoxColor ( ) const

Definition at line 1782 of file colpartition.cpp.

1782  {
1783  if (type_ == PT_UNKNOWN)
1784  return BLOBNBOX::TextlineColor(blob_type_, flow_);
1785  return POLY_BLOCK::ColorForPolyBlockType(type_);
1786 }
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
Definition: polyblk.cpp:392
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
Definition: blobbox.cpp:445

◆ boxes()

BLOBNBOX_CLIST* tesseract::ColPartition::boxes ( )
inline

Definition at line 188 of file colpartition.h.

188  {
189  return &boxes_;
190  }

◆ boxes_count()

int tesseract::ColPartition::boxes_count ( ) const
inline

Definition at line 191 of file colpartition.h.

191  {
192  return boxes_.length();
193  }

◆ BoxLeftKey()

int tesseract::ColPartition::BoxLeftKey ( ) const
inline

Definition at line 333 of file colpartition.h.

333  {
334  return SortKey(bounding_box_.left(), MidY());
335  }
int SortKey(int x, int y) const
Definition: colpartition.h:317
int16_t left() const
Definition: rect.h:72

◆ BoxRightKey()

int tesseract::ColPartition::BoxRightKey ( ) const
inline

Definition at line 337 of file colpartition.h.

337  {
338  return SortKey(bounding_box_.right(), MidY());
339  }
int SortKey(int x, int y) const
Definition: colpartition.h:317
int16_t right() const
Definition: rect.h:79

◆ ClaimBoxes()

void tesseract::ColPartition::ClaimBoxes ( )

Definition at line 257 of file colpartition.cpp.

257  {
258  BLOBNBOX_C_IT bb_it(&boxes_);
259  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
260  BLOBNBOX* bblob = bb_it.data();
261  ColPartition* other = bblob->owner();
262  if (other == nullptr) {
263  // Normal case: ownership is available.
264  bblob->set_owner(this);
265  } else {
266  ASSERT_HOST(other == this);
267  }
268  }
269 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:356
tesseract::ColPartition * owner() const
Definition: blobbox.h:353
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ clear_table_type()

void tesseract::ColPartition::clear_table_type ( )
inline

Definition at line 240 of file colpartition.h.

240  {
241  if (type_ == PT_TABLE)
242  type_ = type_before_table_;
243  }
Definition: capi.h:100

◆ color1()

uint8_t* tesseract::ColPartition::color1 ( )
inline

Definition at line 286 of file colpartition.h.

286  {
287  return color1_;
288  }

◆ color2()

uint8_t* tesseract::ColPartition::color2 ( )
inline

Definition at line 289 of file colpartition.h.

289  {
290  return color2_;
291  }

◆ column_set()

ColPartitionSet* tesseract::ColPartition::column_set ( ) const
inline

Definition at line 215 of file colpartition.h.

215  {
216  return column_set_;
217  }

◆ ColumnContains()

bool tesseract::ColPartition::ColumnContains ( int  x,
int  y 
) const
inline

Definition at line 354 of file colpartition.h.

354  {
355  return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
356  }
int RightAtY(int y) const
Definition: colpartition.h:345
int LeftAtY(int y) const
Definition: colpartition.h:341

◆ ColumnRange()

void tesseract::ColPartition::ColumnRange ( int  resolution,
ColPartitionSet columns,
int *  first_col,
int *  last_col 
)

Definition at line 1066 of file colpartition.cpp.

1067  {
1068  int first_spanned_col = -1;
1069  ColumnSpanningType span_type =
1070  columns->SpanningType(resolution,
1071  bounding_box_.left(), bounding_box_.right(),
1072  std::min(bounding_box_.height(), bounding_box_.width()),
1073  MidY(), left_margin_, right_margin_,
1074  first_col, last_col,
1075  &first_spanned_col);
1076  type_ = PartitionType(span_type);
1077 }
PolyBlockType PartitionType(ColumnSpanningType flow) const
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79
int16_t height() const
Definition: rect.h:108

◆ ColumnWidth()

int tesseract::ColPartition::ColumnWidth ( ) const
inline

Definition at line 329 of file colpartition.h.

329  {
330  return KeyWidth(left_key_, right_key_);
331  }
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:325

◆ ComputeLimits()

void tesseract::ColPartition::ComputeLimits ( )

Definition at line 871 of file colpartition.cpp.

871  {
872  bounding_box_ = TBOX(); // Clear it
873  BLOBNBOX_C_IT it(&boxes_);
874  BLOBNBOX* bbox = nullptr;
875  int non_leader_count = 0;
876  if (it.empty()) {
877  bounding_box_.set_left(left_margin_);
878  bounding_box_.set_right(right_margin_);
879  bounding_box_.set_bottom(0);
880  bounding_box_.set_top(0);
881  } else {
882  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
883  bbox = it.data();
884  bounding_box_ += bbox->bounding_box();
885  if (bbox->flow() != BTFT_LEADER)
886  ++non_leader_count;
887  }
888  }
889  if (!left_key_tab_)
890  left_key_ = BoxLeftKey();
891  if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
892  // TODO(rays) investigate the causes of these error messages, to find
893  // out if they are genuinely harmful, or just indicative of junk input.
894  tprintf("Computed left-illegal partition\n");
895  Print();
896  }
897  if (!right_key_tab_)
898  right_key_ = BoxRightKey();
899  if (right_key_ < BoxRightKey() && textord_debug_bugs) {
900  tprintf("Computed right-illegal partition\n");
901  Print();
902  }
903  if (it.empty())
904  return;
905  if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
906  blob_type() == BRT_POLYIMAGE) {
907  median_top_ = bounding_box_.top();
908  median_bottom_ = bounding_box_.bottom();
909  median_height_ = bounding_box_.height();
910  median_left_ = bounding_box_.left();
911  median_right_ = bounding_box_.right();
912  median_width_ = bounding_box_.width();
913  } else {
914  STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
915  STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
916  STATS height_stats(0, bounding_box_.height() + 1);
917  STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
918  STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
919  STATS width_stats(0, bounding_box_.width() + 1);
920  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
921  bbox = it.data();
922  if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
923  const TBOX& box = bbox->bounding_box();
924  int area = box.area();
925  top_stats.add(box.top(), area);
926  bottom_stats.add(box.bottom(), area);
927  height_stats.add(box.height(), area);
928  left_stats.add(box.left(), area);
929  right_stats.add(box.right(), area);
930  width_stats.add(box.width(), area);
931  }
932  }
933  median_top_ = static_cast<int>(top_stats.median() + 0.5);
934  median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
935  median_height_ = static_cast<int>(height_stats.median() + 0.5);
936  median_left_ = static_cast<int>(left_stats.median() + 0.5);
937  median_right_ = static_cast<int>(right_stats.median() + 0.5);
938  median_width_ = static_cast<int>(width_stats.median() + 0.5);
939  }
940 
941  if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
942  tprintf("Made partition with bad right coords");
943  Print();
944  }
945  if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
946  tprintf("Made partition with bad left coords");
947  Print();
948  }
949  // Fix partner lists. The bounding box has changed and partners are stored
950  // in bounding box order, so remove and reinsert this as a partner
951  // of all its partners.
952  for (int upper = 0; upper < 2; ++upper) {
953  ColPartition_CLIST partners;
954  ColPartition_C_IT part_it(&partners);
955  part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
956  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
957  ColPartition* partner = part_it.extract();
958  partner->RemovePartner(!upper, this);
959  partner->AddPartner(!upper, this);
960  }
961  }
962  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
963  bounding_box_.bottom())) {
964  tprintf("Recomputed box for partition %p\n", this);
965  Print();
966  }
967 }
void set_top(int y)
Definition: rect.h:61
void set_bottom(int y)
Definition: rect.h:68
BlobRegionType blob_type() const
Definition: colpartition.h:149
Definition: rect.h:34
BlobTextFlowType flow() const
Definition: blobbox.h:296
static bool WithinTestRegion(int detail_level, int x, int y)
Definition: statistc.h:33
void set_right(int x)
Definition: rect.h:82
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int32_t area() const
Definition: rect.h:122
void set_left(int x)
Definition: rect.h:75
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
bool IsImageType() const
Definition: colpartition.h:430
int textord_debug_bugs
Definition: alignedblob.cpp:29
int16_t bottom() const
Definition: rect.h:65
int16_t height() const
Definition: rect.h:108

◆ ComputeSpecialBlobsDensity()

void tesseract::ColPartition::ComputeSpecialBlobsDensity ( )

Definition at line 592 of file colpartition.cpp.

592  {
593  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
594  if (boxes_.empty()) {
595  return;
596  }
597 
598  BLOBNBOX_C_IT blob_it(&boxes_);
599  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
600  BLOBNBOX* blob = blob_it.data();
602  special_blobs_densities_[type]++;
603  }
604 
605  for (int type = 0; type < BSTT_COUNT; ++type) {
606  special_blobs_densities_[type] /= boxes_.length();
607  }
608 }
BlobSpecialTextType
Definition: blobbox.h:97
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:290
PolyBlockType type() const
Definition: colpartition.h:182

◆ ConfirmNoTabViolation()

bool tesseract::ColPartition::ConfirmNoTabViolation ( const ColPartition other) const

Definition at line 423 of file colpartition.cpp.

423  {
424  if (bounding_box_.right() < other.bounding_box_.left() &&
425  bounding_box_.right() < other.LeftBlobRule())
426  return false;
427  if (other.bounding_box_.right() < bounding_box_.left() &&
428  other.bounding_box_.right() < LeftBlobRule())
429  return false;
430  if (bounding_box_.left() > other.bounding_box_.right() &&
431  bounding_box_.left() > other.RightBlobRule())
432  return false;
433  if (other.bounding_box_.left() > bounding_box_.right() &&
434  other.bounding_box_.left() > RightBlobRule())
435  return false;
436  return true;
437 }
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79

◆ CopyButDontOwnBlobs()

ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs ( )

Definition at line 1769 of file colpartition.cpp.

1769  {
1770  ColPartition* copy = ShallowCopy();
1771  copy->set_owns_blobs(false);
1772  BLOBNBOX_C_IT inserter(copy->boxes());
1773  BLOBNBOX_C_IT traverser(boxes());
1774  for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1775  inserter.add_after_then_move(traverser.data());
1776  return copy;
1777 }
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:188
ColPartition * ShallowCopy() const

◆ CopyLeftTab()

void tesseract::ColPartition::CopyLeftTab ( const ColPartition src,
bool  take_box 
)

Definition at line 529 of file colpartition.cpp.

529  {
530  left_key_tab_ = take_box ? false : src.left_key_tab_;
531  if (left_key_tab_) {
532  left_key_ = src.left_key_;
533  } else {
534  bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
535  left_key_ = BoxLeftKey();
536  }
537  if (left_margin_ > bounding_box_.left())
538  left_margin_ = src.left_margin_;
539 }
int16_t left() const
Definition: rect.h:72
int XAtY(int sort_key, int y) const
Definition: colpartition.h:321
void set_left(int x)
Definition: rect.h:75

◆ CopyRightTab()

void tesseract::ColPartition::CopyRightTab ( const ColPartition src,
bool  take_box 
)

Definition at line 542 of file colpartition.cpp.

542  {
543  right_key_tab_ = take_box ? false : src.right_key_tab_;
544  if (right_key_tab_) {
545  right_key_ = src.right_key_;
546  } else {
547  bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
548  right_key_ = BoxRightKey();
549  }
550  if (right_margin_ < bounding_box_.right())
551  right_margin_ = src.right_margin_;
552 }
void set_right(int x)
Definition: rect.h:82
int XAtY(int sort_key, int y) const
Definition: colpartition.h:321
int16_t right() const
Definition: rect.h:79

◆ CountOverlappingBoxes()

int tesseract::ColPartition::CountOverlappingBoxes ( const TBOX box)

Definition at line 970 of file colpartition.cpp.

970  {
971  BLOBNBOX_C_IT it(&boxes_);
972  int overlap_count = 0;
973  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
974  BLOBNBOX* bbox = it.data();
975  if (box.overlap(bbox->bounding_box()))
976  ++overlap_count;
977  }
978  return overlap_count;
979 }
bool overlap(const TBOX &box) const
Definition: rect.h:355
const TBOX & bounding_box() const
Definition: blobbox.h:231

◆ DeleteBoxes()

void tesseract::ColPartition::DeleteBoxes ( )

Definition at line 315 of file colpartition.cpp.

315  {
316  // Although the boxes_ list is a C_LIST, in some cases it owns the
317  // BLOBNBOXes, as the ColPartition takes ownership from the grid,
318  // and the BLOBNBOXes own the underlying C_BLOBs.
319  for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
320  BLOBNBOX* bblob = bb_it.extract();
321  delete bblob->cblob();
322  delete bblob;
323  }
324 }
C_BLOB * cblob() const
Definition: blobbox.h:269

◆ desperately_merged()

bool tesseract::ColPartition::desperately_merged ( ) const
inline

Definition at line 212 of file colpartition.h.

212  {
213  return desperately_merged_;
214  }

◆ DisownBoxes()

void tesseract::ColPartition::DisownBoxes ( )

Definition at line 273 of file colpartition.cpp.

273  {
274  BLOBNBOX_C_IT bb_it(&boxes_);
275  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
276  BLOBNBOX* bblob = bb_it.data();
277  ASSERT_HOST(bblob->owner() == this || bblob->owner() == nullptr);
278  bblob->set_owner(nullptr);
279  }
280 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:356
tesseract::ColPartition * owner() const
Definition: blobbox.h:353
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ DisownBoxesNoAssert()

void tesseract::ColPartition::DisownBoxesNoAssert ( )

Definition at line 286 of file colpartition.cpp.

286  {
287  BLOBNBOX_C_IT bb_it(&boxes_);
288  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
289  BLOBNBOX* bblob = bb_it.data();
290  if (bblob->owner() == this)
291  bblob->set_owner(nullptr);
292  }
293 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:356
tesseract::ColPartition * owner() const
Definition: blobbox.h:353

◆ FakePartition()

ColPartition * tesseract::ColPartition::FakePartition ( const TBOX box,
PolyBlockType  block_type,
BlobRegionType  blob_type,
BlobTextFlowType  flow 
)
static

Definition at line 105 of file colpartition.cpp.

108  {
109  ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
110  part->set_type(block_type);
111  part->set_flow(flow);
112  part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
113  part->set_left_margin(box.left());
114  part->set_right_margin(box.right());
115  part->SetBlobTypes();
116  part->ComputeLimits();
117  part->ClaimBoxes();
118  return part;
119 }
BlobRegionType blob_type() const
Definition: colpartition.h:149
int16_t left() const
Definition: rect.h:72
integer coordinate
Definition: points.h:32
BlobTextFlowType flow() const
Definition: colpartition.h:155
int16_t right() const
Definition: rect.h:79
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:243

◆ flow()

BlobTextFlowType tesseract::ColPartition::flow ( ) const
inline

Definition at line 155 of file colpartition.h.

155  {
156  return flow_;
157  }

◆ good_blob_score()

int tesseract::ColPartition::good_blob_score ( ) const
inline

Definition at line 161 of file colpartition.h.

161  {
162  return good_blob_score_;
163  }

◆ good_column()

bool tesseract::ColPartition::good_column ( ) const
inline

Definition at line 167 of file colpartition.h.

167  {
168  return good_column_;
169  }

◆ good_width()

bool tesseract::ColPartition::good_width ( ) const
inline

Definition at line 164 of file colpartition.h.

164  {
165  return good_width_;
166  }

◆ HasGoodBaseline()

bool tesseract::ColPartition::HasGoodBaseline ( )

Definition at line 1291 of file colpartition.cpp.

1291  {
1292  // Approximation of the baseline.
1293  DetLineFit linepoints;
1294  // Calculation of the mean height on this line segment. Note that these
1295  // variable names apply to the context of a horizontal line, and work
1296  // analogously, rather than literally in the case of a vertical line.
1297  int total_height = 0;
1298  int coverage = 0;
1299  int height_count = 0;
1300  int width = 0;
1301  BLOBNBOX_C_IT it(&boxes_);
1302  TBOX box(it.data()->bounding_box());
1303  // Accumulate points representing the baseline at the middle of each blob,
1304  // but add an additional point for each end of the line. This makes it
1305  // harder to fit a severe skew angle, as it is most likely not right.
1306  if (IsVerticalType()) {
1307  // For a vertical line, use the right side as the baseline.
1308  ICOORD first_pt(box.right(), box.bottom());
1309  // Use the bottom-right of the first (bottom) box, the top-right of the
1310  // last, and the middle-right of all others.
1311  linepoints.Add(first_pt);
1312  for (it.forward(); !it.at_last(); it.forward()) {
1313  BLOBNBOX* blob = it.data();
1314  box = blob->bounding_box();
1315  ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1316  linepoints.Add(box_pt);
1317  total_height += box.width();
1318  coverage += box.height();
1319  ++height_count;
1320  }
1321  box = it.data()->bounding_box();
1322  ICOORD last_pt(box.right(), box.top());
1323  linepoints.Add(last_pt);
1324  width = last_pt.y() - first_pt.y();
1325 
1326  } else {
1327  // Horizontal lines use the bottom as the baseline.
1328  TBOX box(it.data()->bounding_box());
1329  // Use the bottom-left of the first box, the the bottom-right of the last,
1330  // and the middle of all others.
1331  ICOORD first_pt(box.left(), box.bottom());
1332  linepoints.Add(first_pt);
1333  for (it.forward(); !it.at_last(); it.forward()) {
1334  BLOBNBOX* blob = it.data();
1335  box = blob->bounding_box();
1336  ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1337  linepoints.Add(box_pt);
1338  total_height += box.height();
1339  coverage += box.width();
1340  ++height_count;
1341  }
1342  box = it.data()->bounding_box();
1343  ICOORD last_pt(box.right(), box.bottom());
1344  linepoints.Add(last_pt);
1345  width = last_pt.x() - first_pt.x();
1346  }
1347  // Maximum median error allowed to be a good text line.
1348  if (height_count == 0)
1349  return false;
1350  double max_error = kMaxBaselineError * total_height / height_count;
1351  ICOORD start_pt, end_pt;
1352  double error = linepoints.Fit(&start_pt, &end_pt);
1353  return error < max_error && coverage >= kMinBaselineCoverage * width;
1354 }
const double kMaxBaselineError
int16_t y() const
access_function
Definition: points.h:57
Definition: rect.h:34
const double kMinBaselineCoverage
integer coordinate
Definition: points.h:32
int16_t x() const
access function
Definition: points.h:53
const TBOX & bounding_box() const
Definition: blobbox.h:231
bool IsVerticalType() const
Definition: colpartition.h:442

◆ HCoreOverlap()

int tesseract::ColPartition::HCoreOverlap ( const ColPartition other) const
inline

Definition at line 385 of file colpartition.h.

385  {
386  return std::min(median_right_, other.median_right_) -
387  std::max(median_left_, other.median_left_);
388  }

◆ HOverlaps()

bool tesseract::ColPartition::HOverlaps ( const ColPartition other) const
inline

Definition at line 366 of file colpartition.h.

366  {
367  return bounding_box_.x_overlap(other.bounding_box_);
368  }
bool x_overlap(const TBOX &box) const
Definition: rect.h:401

◆ inside_table_column()

bool tesseract::ColPartition::inside_table_column ( )
inline

Definition at line 244 of file colpartition.h.

244  {
245  return inside_table_column_;
246  }

◆ IsEmpty()

bool tesseract::ColPartition::IsEmpty ( ) const
inline

Definition at line 358 of file colpartition.h.

358  {
359  return boxes_.empty();
360  }

◆ IsHorizontalLine()

bool tesseract::ColPartition::IsHorizontalLine ( ) const
inline

Definition at line 460 of file colpartition.h.

460  {
461  return IsHorizontalType() && IsLineType();
462  }
bool IsHorizontalType() const
Definition: colpartition.h:446

◆ IsHorizontalType()

bool tesseract::ColPartition::IsHorizontalType ( ) const
inline

Definition at line 446 of file colpartition.h.

446  {
447  return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
448  }

◆ IsImageType()

bool tesseract::ColPartition::IsImageType ( ) const
inline

Definition at line 430 of file colpartition.h.

430  {
431  return PTIsImageType(type_);
432  }
bool PTIsImageType(PolyBlockType type)
Definition: publictypes.h:77

◆ IsInSameColumnAs()

bool tesseract::ColPartition::IsInSameColumnAs ( const ColPartition part) const

Definition at line 2186 of file colpartition.cpp.

2186  {
2187  // Overlap does not occur when last < part.first or first > part.last.
2188  // In other words, one is completely to the side of the other.
2189  // This is just DeMorgan's law applied to that so the function returns true.
2190  return (last_column_ >= part.first_column_) &&
2191  (first_column_ <= part.last_column_);
2192 }

◆ IsLeftOf()

bool tesseract::ColPartition::IsLeftOf ( const ColPartition other) const
inline

Definition at line 350 of file colpartition.h.

350  {
351  return bounding_box_.right() < other.bounding_box_.right();
352  }
int16_t right() const
Definition: rect.h:79

◆ IsLegal()

bool tesseract::ColPartition::IsLegal ( )

Definition at line 352 of file colpartition.cpp.

352  {
353  if (bounding_box_.left() > bounding_box_.right()) {
354  if (textord_debug_bugs) {
355  tprintf("Bounding box invalid\n");
356  Print();
357  }
358  return false; // Bounding box invalid.
359  }
360  if (left_margin_ > bounding_box_.left() ||
361  right_margin_ < bounding_box_.right()) {
362  if (textord_debug_bugs) {
363  tprintf("Margins invalid\n");
364  Print();
365  }
366  return false; // Margins invalid.
367  }
368  if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
369  if (textord_debug_bugs) {
370  tprintf("Key inside box: %d v %d or %d v %d\n",
371  left_key_, BoxLeftKey(), right_key_, BoxRightKey());
372  Print();
373  }
374  return false; // Keys inside the box.
375  }
376  return true;
377 }
int16_t left() const
Definition: rect.h:72
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int16_t right() const
Definition: rect.h:79
int textord_debug_bugs
Definition: alignedblob.cpp:29

◆ IsLineType()

bool tesseract::ColPartition::IsLineType ( ) const
inline

Definition at line 426 of file colpartition.h.

426  {
427  return PTIsLineType(type_);
428  }
bool PTIsLineType(PolyBlockType type)
Definition: publictypes.h:73

◆ IsPulloutType()

bool tesseract::ColPartition::IsPulloutType ( ) const
inline

Definition at line 438 of file colpartition.h.

438  {
439  return PTIsPulloutType(type_);
440  }
bool PTIsPulloutType(PolyBlockType type)
Definition: publictypes.h:89

◆ IsSingleton()

bool tesseract::ColPartition::IsSingleton ( ) const
inline

Definition at line 362 of file colpartition.h.

362  {
363  return boxes_.singleton();
364  }

◆ IsTextType()

bool tesseract::ColPartition::IsTextType ( ) const
inline

Definition at line 434 of file colpartition.h.

434  {
435  return PTIsTextType(type_);
436  }
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:82

◆ IsUnMergeableType()

bool tesseract::ColPartition::IsUnMergeableType ( ) const
inline

Definition at line 450 of file colpartition.h.

450  {
451  return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
452  }
Definition: capi.h:101
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:431

◆ IsVerticalLine()

bool tesseract::ColPartition::IsVerticalLine ( ) const
inline

Definition at line 455 of file colpartition.h.

455  {
456  return IsVerticalType() && IsLineType();
457  }
bool IsVerticalType() const
Definition: colpartition.h:442

◆ IsVerticalType()

bool tesseract::ColPartition::IsVerticalType ( ) const
inline

Definition at line 442 of file colpartition.h.

442  {
443  return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
444  }

◆ KeyWidth()

int tesseract::ColPartition::KeyWidth ( int  left_key,
int  right_key 
) const
inline

Definition at line 325 of file colpartition.h.

325  {
326  return (right_key - left_key) / vertical_.y();
327  }
int16_t y() const
access_function
Definition: points.h:57

◆ left_key()

int tesseract::ColPartition::left_key ( ) const
inline

Definition at line 173 of file colpartition.h.

173  {
174  return left_key_;
175  }

◆ left_key_tab()

bool tesseract::ColPartition::left_key_tab ( ) const
inline

Definition at line 170 of file colpartition.h.

170  {
171  return left_key_tab_;
172  }

◆ left_margin()

int tesseract::ColPartition::left_margin ( ) const
inline

Definition at line 113 of file colpartition.h.

113  {
114  return left_margin_;
115  }

◆ LeftAtY()

int tesseract::ColPartition::LeftAtY ( int  y) const
inline

Definition at line 341 of file colpartition.h.

341  {
342  return XAtY(left_key_, y);
343  }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:321

◆ LeftBlobRule()

int tesseract::ColPartition::LeftBlobRule ( ) const

Definition at line 555 of file colpartition.cpp.

555  {
556  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
557  return it.data()->left_rule();
558 }

◆ LineSpacingBlocks()

void tesseract::ColPartition::LineSpacingBlocks ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts,
BLOCK_LIST *  completed_blocks,
TO_BLOCK_LIST *  to_blocks 
)
static

Definition at line 1418 of file colpartition.cpp.

1423  {
1424  int page_height = tright.y() - bleft.y();
1425  // Compute the initial spacing stats.
1426  ColPartition_IT it(block_parts);
1427  int part_count = 0;
1428  int max_line_height = 0;
1429 
1430  // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
1431  // because their line spacing with their neighbors maybe smaller and their
1432  // height may be slightly larger.
1433 
1434  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1435  ColPartition* part = it.data();
1436  ASSERT_HOST(!part->boxes()->empty());
1437  STATS side_steps(0, part->bounding_box().height());
1438  if (part->bounding_box().height() > max_line_height)
1439  max_line_height = part->bounding_box().height();
1440  BLOBNBOX_C_IT blob_it(part->boxes());
1441  int prev_bottom = blob_it.data()->bounding_box().bottom();
1442  for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1443  BLOBNBOX* blob = blob_it.data();
1444  int bottom = blob->bounding_box().bottom();
1445  int step = bottom - prev_bottom;
1446  if (step < 0)
1447  step = -step;
1448  side_steps.add(step, 1);
1449  prev_bottom = bottom;
1450  }
1451  part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
1452  if (!it.at_last()) {
1453  ColPartition* next_part = it.data_relative(1);
1454  part->set_bottom_spacing(part->median_bottom() -
1455  next_part->median_bottom());
1456  part->set_top_spacing(part->median_top() - next_part->median_top());
1457  } else {
1458  part->set_bottom_spacing(page_height);
1459  part->set_top_spacing(page_height);
1460  }
1461  if (textord_debug_tabfind) {
1462  part->Print();
1463  tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1464  side_steps.median(), part->top_spacing(), part->bottom_spacing());
1465  }
1466  ++part_count;
1467  }
1468  if (part_count == 0)
1469  return;
1470 
1471  SmoothSpacings(resolution, page_height, block_parts);
1472 
1473  // Move the partitions into individual block lists and make the blocks.
1474  BLOCK_IT block_it(completed_blocks);
1475  TO_BLOCK_IT to_block_it(to_blocks);
1476  ColPartition_LIST spacing_parts;
1477  ColPartition_IT sp_block_it(&spacing_parts);
1478  int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
1479  for (it.mark_cycle_pt(); !it.empty();) {
1480  ColPartition* part = it.extract();
1481  sp_block_it.add_to_end(part);
1482  it.forward();
1483  if (it.empty() || part->bottom_spacing() > same_block_threshold ||
1484  !part->SpacingsEqual(*it.data(), resolution)) {
1485  // There is a spacing boundary. Check to see if it.data() belongs
1486  // better in the current block or the next one.
1487  if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
1488  ColPartition* next_part = it.data();
1489  // If there is a size match one-way, then the middle line goes with
1490  // its matched size, otherwise it goes with the smallest spacing.
1491  ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1);
1492  if (textord_debug_tabfind) {
1493  tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
1494  " sizes %d %d %d\n",
1495  part->top_spacing(), part->bottom_spacing(),
1496  next_part->top_spacing(), next_part->bottom_spacing(),
1497  part->median_height(), next_part->median_height(),
1498  third_part != nullptr ? third_part->median_height() : 0);
1499  }
1500  // We can only consider adding the next line to the block if the sizes
1501  // match and the lines are close enough for their size.
1502  if (part->SizesSimilar(*next_part) &&
1503  next_part->median_height() * kMaxSameBlockLineSpacing >
1504  part->bottom_spacing() &&
1505  part->median_height() * kMaxSameBlockLineSpacing >
1506  part->top_spacing()) {
1507  // Even now, we can only add it as long as the third line doesn't
1508  // match in the same way and have a smaller bottom spacing.
1509  if (third_part == nullptr ||
1510  !next_part->SizesSimilar(*third_part) ||
1511  third_part->median_height() * kMaxSameBlockLineSpacing <=
1512  next_part->bottom_spacing() ||
1513  next_part->median_height() * kMaxSameBlockLineSpacing <=
1514  next_part->top_spacing() ||
1515  next_part->bottom_spacing() > part->bottom_spacing()) {
1516  // Add to the current block.
1517  sp_block_it.add_to_end(it.extract());
1518  it.forward();
1519  if (textord_debug_tabfind) {
1520  tprintf("Added line to current block.\n");
1521  }
1522  }
1523  }
1524  }
1525  TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
1526  if (to_block != nullptr) {
1527  to_block_it.add_to_end(to_block);
1528  block_it.add_to_end(to_block->block);
1529  }
1530  sp_block_it.set_to_list(&spacing_parts);
1531  } else {
1532  if (textord_debug_tabfind && !it.empty()) {
1533  ColPartition* next_part = it.data();
1534  tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1535  part->top_spacing(), part->bottom_spacing(),
1536  next_part->top_spacing(), next_part->bottom_spacing(),
1537  part->median_height(), next_part->median_height());
1538  }
1539  }
1540  }
1541 }
int16_t y() const
access_function
Definition: points.h:57
Definition: statistc.h:33
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
int textord_debug_tabfind
Definition: alignedblob.cpp:28
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
BLOCK * block
Definition: blobbox.h:790
const double kMaxSameBlockLineSpacing
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t bottom() const
Definition: rect.h:65
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ lower_partners()

ColPartition_CLIST* tesseract::ColPartition::lower_partners ( )
inline

Definition at line 200 of file colpartition.h.

200  {
201  return &lower_partners_;
202  }

◆ MakeBigPartition()

ColPartition * tesseract::ColPartition::MakeBigPartition ( BLOBNBOX box,
ColPartition_LIST *  big_part_list 
)
static

Definition at line 126 of file colpartition.cpp.

127  {
128  box->set_owner(nullptr);
129  ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
130  single->set_flow(BTFT_NONE);
131  single->AddBox(box);
132  single->ComputeLimits();
133  single->ClaimBoxes();
134  single->SetBlobTypes();
135  single->set_block_owned(true);
136  if (big_part_list != nullptr) {
137  ColPartition_IT part_it(big_part_list);
138  part_it.add_to_end(single);
139  }
140  return single;
141 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:356
integer coordinate
Definition: points.h:32

◆ MakeBlock()

TO_BLOCK * tesseract::ColPartition::MakeBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1634 of file colpartition.cpp.

1636  {
1637  if (block_parts->empty())
1638  return nullptr; // Nothing to do.
1639  // If the block_parts are not in reading order, then it will make an invalid
1640  // block polygon and bounding_box, so sort by bounding box now just to make
1641  // sure.
1642  block_parts->sort(&ColPartition::SortByBBox);
1643  ColPartition_IT it(block_parts);
1644  ColPartition* part = it.data();
1645  PolyBlockType type = part->type();
1646  if (type == PT_VERTICAL_TEXT)
1647  return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
1648  // LineSpacingBlocks has handed us a collection of evenly spaced lines and
1649  // put the average spacing in each partition, so we can just take the
1650  // linespacing from the first partition.
1651  int line_spacing = part->bottom_spacing();
1652  if (line_spacing < part->median_height())
1653  line_spacing = part->bounding_box().height();
1654  ICOORDELT_LIST vertices;
1655  ICOORDELT_IT vert_it(&vertices);
1656  ICOORD start, end;
1657  int min_x = INT32_MAX;
1658  int max_x = -INT32_MAX;
1659  int min_y = INT32_MAX;
1660  int max_y = -INT32_MAX;
1661  int iteration = 0;
1662  do {
1663  if (iteration == 0)
1664  ColPartition::LeftEdgeRun(&it, &start, &end);
1665  else
1666  ColPartition::RightEdgeRun(&it, &start, &end);
1667  ClipCoord(bleft, tright, &start);
1668  ClipCoord(bleft, tright, &end);
1669  vert_it.add_after_then_move(new ICOORDELT(start));
1670  vert_it.add_after_then_move(new ICOORDELT(end));
1671  UpdateRange(start.x(), &min_x, &max_x);
1672  UpdateRange(end.x(), &min_x, &max_x);
1673  UpdateRange(start.y(), &min_y, &max_y);
1674  UpdateRange(end.y(), &min_y, &max_y);
1675  if ((iteration == 0 && it.at_first()) ||
1676  (iteration == 1 && it.at_last())) {
1677  ++iteration;
1678  it.move_to_last();
1679  }
1680  } while (iteration < 2);
1682  tprintf("Making block at (%d,%d)->(%d,%d)\n",
1683  min_x, min_y, max_x, max_y);
1684  BLOCK* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
1685  block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type));
1686  return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
1687 }
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:58
int16_t y() const
access_function
Definition: points.h:57
PolyBlockType
Definition: publictypes.h:53
integer coordinate
Definition: points.h:32
int16_t x() const
access function
Definition: points.h:53
int textord_debug_tabfind
Definition: alignedblob.cpp:28
static int SortByBBox(const void *p1, const void *p2)
Definition: colpartition.h:715
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
Definition: ocrblock.h:30
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
PDBLK pdblk
Definition: ocrblock.h:192
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:121
PolyBlockType type() const
Definition: colpartition.h:182

◆ MakeLinePartition()

ColPartition * tesseract::ColPartition::MakeLinePartition ( BlobRegionType  blob_type,
const ICOORD vertical,
int  left,
int  bottom,
int  right,
int  top 
)
static

Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.

Definition at line 158 of file colpartition.cpp.

161  {
162  ColPartition* part = new ColPartition(blob_type, vertical);
163  part->bounding_box_ = TBOX(left, bottom, right, top);
164  part->median_bottom_ = bottom;
165  part->median_top_ = top;
166  part->median_height_ = top - bottom;
167  part->median_left_ = left;
168  part->median_right_ = right;
169  part->median_width_ = right - left;
170  part->left_key_ = part->BoxLeftKey();
171  part->right_key_ = part->BoxRightKey();
172  return part;
173 }
BlobRegionType blob_type() const
Definition: colpartition.h:149
Definition: rect.h:34

◆ MakeToRow()

TO_ROW * tesseract::ColPartition::MakeToRow ( )

Definition at line 1717 of file colpartition.cpp.

1717  {
1718  BLOBNBOX_C_IT blob_it(&boxes_);
1719  TO_ROW* row = nullptr;
1720  int line_size = IsVerticalType() ? median_width_ : median_height_;
1721  // Add all the blobs to a single TO_ROW.
1722  for (; !blob_it.empty(); blob_it.forward()) {
1723  BLOBNBOX* blob = blob_it.extract();
1724 // blob->compute_bounding_box();
1725  int top = blob->bounding_box().top();
1726  int bottom = blob->bounding_box().bottom();
1727  if (row == nullptr) {
1728  row = new TO_ROW(blob, static_cast<float>(top),
1729  static_cast<float>(bottom),
1730  static_cast<float>(line_size));
1731  } else {
1732  row->add_blob(blob, static_cast<float>(top),
1733  static_cast<float>(bottom),
1734  static_cast<float>(line_size));
1735  }
1736  }
1737  return row;
1738 }
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:734
int16_t top() const
Definition: rect.h:58
const TBOX & bounding_box() const
Definition: blobbox.h:231
bool IsVerticalType() const
Definition: colpartition.h:442
int16_t bottom() const
Definition: rect.h:65

◆ MakeVerticalTextBlock()

TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1691 of file colpartition.cpp.

1694  {
1695  if (block_parts->empty())
1696  return nullptr; // Nothing to do.
1697  ColPartition_IT it(block_parts);
1698  ColPartition* part = it.data();
1699  TBOX block_box = part->bounding_box();
1700  int line_spacing = block_box.width();
1701  PolyBlockType type = it.data()->type();
1702  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1703  block_box += it.data()->bounding_box();
1704  }
1705  if (textord_debug_tabfind) {
1706  tprintf("Making block at:");
1707  block_box.print();
1708  }
1709  BLOCK* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
1710  block_box.right(), block_box.top());
1711  block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type));
1712  return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
1713 }
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:58
void print() const
Definition: rect.h:278
Definition: rect.h:34
PolyBlockType
Definition: publictypes.h:53
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
int textord_debug_tabfind
Definition: alignedblob.cpp:28
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
Definition: ocrblock.h:30
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65
PDBLK pdblk
Definition: ocrblock.h:192
PolyBlockType type() const
Definition: colpartition.h:182

◆ MarkAsLeaderIfMonospaced()

bool tesseract::ColPartition::MarkAsLeaderIfMonospaced ( )

Definition at line 1093 of file colpartition.cpp.

1093  {
1094  bool result = false;
1095  // Gather statistics on the gaps between blobs and the widths of the blobs.
1096  int part_width = bounding_box_.width();
1097  STATS gap_stats(0, part_width);
1098  STATS width_stats(0, part_width);
1099  BLOBNBOX_C_IT it(&boxes_);
1100  BLOBNBOX* prev_blob = it.data();
1101  prev_blob->set_flow(BTFT_NEIGHBOURS);
1102  width_stats.add(prev_blob->bounding_box().width(), 1);
1103  int blob_count = 1;
1104  for (it.forward(); !it.at_first(); it.forward()) {
1105  BLOBNBOX* blob = it.data();
1106  int left = blob->bounding_box().left();
1107  int right = blob->bounding_box().right();
1108  gap_stats.add(left - prev_blob->bounding_box().right(), 1);
1109  width_stats.add(right - left, 1);
1110  blob->set_flow(BTFT_NEIGHBOURS);
1111  prev_blob = blob;
1112  ++blob_count;
1113  }
1114  double median_gap = gap_stats.median();
1115  double median_width = width_stats.median();
1116  double max_width = std::max(median_gap, median_width);
1117  double min_width = std::min(median_gap, median_width);
1118  double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
1119  if (textord_debug_tabfind >= 4) {
1120  tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1121  gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
1122  min_width * kMaxLeaderGapFractionOfMin);
1123  }
1124  if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
1125  gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
1126  blob_count >= kMinLeaderCount) {
1127  // This is stable enough to be called a leader, so check the widths.
1128  // Since leader dashes can join, run a dp cutting algorithm and go
1129  // on the cost.
1130  int offset = static_cast<int>(ceil(gap_iqr * 2));
1131  int min_step = static_cast<int>(median_gap + median_width + 0.5);
1132  int max_step = min_step + offset;
1133  min_step -= offset;
1134  // Pad the buffer with min_step/2 on each end.
1135  int part_left = bounding_box_.left() - min_step / 2;
1136  part_width += min_step;
1137  DPPoint* projection = new DPPoint[part_width];
1138  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1139  BLOBNBOX* blob = it.data();
1140  int left = blob->bounding_box().left();
1141  int right = blob->bounding_box().right();
1142  int height = blob->bounding_box().height();
1143  for (int x = left; x < right; ++x) {
1144  projection[left - part_left].AddLocalCost(height);
1145  }
1146  }
1147  DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
1149  part_width, projection);
1150  if (best_end != nullptr && best_end->total_cost() < blob_count) {
1151  // Good enough. Call it a leader.
1152  result = true;
1153  bool modified_blob_list = false;
1154  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1155  BLOBNBOX* blob = it.data();
1156  TBOX box = blob->bounding_box();
1157  // If the first or last blob is spaced too much, don't mark it.
1158  if (it.at_first()) {
1159  int gap = it.data_relative(1)->bounding_box().left() -
1160  blob->bounding_box().right();
1161  if (blob->bounding_box().width() + gap > max_step) {
1162  it.extract();
1163  modified_blob_list = true;
1164  continue;
1165  }
1166  }
1167  if (it.at_last()) {
1168  int gap = blob->bounding_box().left() -
1169  it.data_relative(-1)->bounding_box().right();
1170  if (blob->bounding_box().width() + gap > max_step) {
1171  it.extract();
1172  modified_blob_list = true;
1173  break;
1174  }
1175  }
1176  blob->set_region_type(BRT_TEXT);
1177  blob->set_flow(BTFT_LEADER);
1178  }
1179  if (modified_blob_list) ComputeLimits();
1180  blob_type_ = BRT_TEXT;
1181  flow_ = BTFT_LEADER;
1182  } else if (textord_debug_tabfind) {
1183  if (best_end == nullptr) {
1184  tprintf("No path\n");
1185  } else {
1186  tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
1187  blob_count);
1188  }
1189  }
1190  delete [] projection;
1191  }
1192  return result;
1193 }
int64_t CostWithVariance(const DPPoint *prev)
Definition: dppoint.cpp:69
const double kMaxLeaderGapFractionOfMax
const double kMaxLeaderGapFractionOfMin
Definition: rect.h:34
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
Definition: dppoint.cpp:31
Definition: statistc.h:33
int16_t width() const
Definition: rect.h:115
const int kMinLeaderCount
int16_t left() const
Definition: rect.h:72
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:287
int textord_debug_tabfind
Definition: alignedblob.cpp:28
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:299
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
int16_t height() const
Definition: rect.h:108

◆ MatchingColumns()

bool tesseract::ColPartition::MatchingColumns ( const ColPartition other) const

Definition at line 380 of file colpartition.cpp.

380  {
381  int y = (MidY() + other.MidY()) / 2;
382  if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
383  LeftAtY(y) / kColumnWidthFactor, 1))
384  return false;
385  if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
386  RightAtY(y) / kColumnWidthFactor, 1))
387  return false;
388  return true;
389 }
const int kColumnWidthFactor
Definition: tabfind.h:42
int RightAtY(int y) const
Definition: colpartition.h:345
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:60
int LeftAtY(int y) const
Definition: colpartition.h:341

◆ MatchingSizes()

bool tesseract::ColPartition::MatchingSizes ( const ColPartition other) const

Definition at line 415 of file colpartition.cpp.

415  {
416  if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
417  return !TabFind::DifferentSizes(median_width_, other.median_width_);
418  else
419  return !TabFind::DifferentSizes(median_height_, other.median_height_);
420 }
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:408

◆ MatchingStrokeWidth()

bool tesseract::ColPartition::MatchingStrokeWidth ( const ColPartition other,
double  fractional_tolerance,
double  constant_tolerance 
) const

Definition at line 440 of file colpartition.cpp.

442  {
443  int match_count = 0;
444  int nonmatch_count = 0;
445  BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
446  BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
447  box_it.mark_cycle_pt();
448  other_it.mark_cycle_pt();
449  while (!box_it.cycled_list() && !other_it.cycled_list()) {
450  if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
451  fractional_tolerance,
452  constant_tolerance))
453  ++match_count;
454  else
455  ++nonmatch_count;
456  box_it.forward();
457  other_it.forward();
458  }
459  return match_count > nonmatch_count;
460 }

◆ MatchingTextColor()

bool tesseract::ColPartition::MatchingTextColor ( const ColPartition other) const

Definition at line 392 of file colpartition.cpp.

392  {
393  if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
394  other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
395  return false; // Too noisy.
396 
397  // Colors must match for other to count.
398  double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
399  other.color2_,
400  color1_);
401  double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
402  other.color2_,
403  color2_);
404  double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
405  other.color1_);
406  double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
407  other.color2_);
408 // All 4 distances must be small enough.
409  return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
410  d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
411 }
const int kMaxColorDistance
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
Definition: imagefind.cpp:356
const int kMaxRMSColorNoise

◆ median_bottom()

int tesseract::ColPartition::median_bottom ( ) const
inline

Definition at line 128 of file colpartition.h.

128  {
129  return median_bottom_;
130  }

◆ median_height()

int tesseract::ColPartition::median_height ( ) const
inline

Definition at line 137 of file colpartition.h.

137  {
138  return median_height_;
139  }

◆ median_left()

int tesseract::ColPartition::median_left ( ) const
inline

Definition at line 131 of file colpartition.h.

131  {
132  return median_left_;
133  }

◆ median_right()

int tesseract::ColPartition::median_right ( ) const
inline

Definition at line 134 of file colpartition.h.

134  {
135  return median_right_;
136  }

◆ median_top()

int tesseract::ColPartition::median_top ( ) const
inline

Definition at line 125 of file colpartition.h.

125  {
126  return median_top_;
127  }

◆ median_width()

int tesseract::ColPartition::median_width ( ) const
inline

Definition at line 143 of file colpartition.h.

143  {
144  return median_width_;
145  }

◆ MedianY()

int tesseract::ColPartition::MedianY ( ) const
inline

Definition at line 309 of file colpartition.h.

309  {
310  return (median_top_ + median_bottom_) / 2;
311  }

◆ MidX()

int tesseract::ColPartition::MidX ( ) const
inline

Definition at line 313 of file colpartition.h.

313  {
314  return (bounding_box_.left() + bounding_box_.right()) / 2;
315  }
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79

◆ MidY()

int tesseract::ColPartition::MidY ( ) const
inline

Definition at line 305 of file colpartition.h.

305  {
306  return (bounding_box_.top() + bounding_box_.bottom()) / 2;
307  }
int16_t top() const
Definition: rect.h:58
int16_t bottom() const
Definition: rect.h:65

◆ nearest_neighbor_above()

ColPartition* tesseract::ColPartition::nearest_neighbor_above ( ) const
inline

Definition at line 250 of file colpartition.h.

250  {
251  return nearest_neighbor_above_;
252  }

◆ nearest_neighbor_below()

ColPartition* tesseract::ColPartition::nearest_neighbor_below ( ) const
inline

Definition at line 256 of file colpartition.h.

256  {
257  return nearest_neighbor_below_;
258  }

◆ OKDiacriticMerge()

bool tesseract::ColPartition::OKDiacriticMerge ( const ColPartition candidate,
bool  debug 
) const

Definition at line 468 of file colpartition.cpp.

469  {
470  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
471  int min_top = INT32_MAX;
472  int max_bottom = -INT32_MAX;
473  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
474  BLOBNBOX* blob = it.data();
475  if (!blob->IsDiacritic()) {
476  if (debug) {
477  tprintf("Blob is not a diacritic:");
478  blob->bounding_box().print();
479  }
480  return false; // All blobs must have diacritic bases.
481  }
482  if (blob->base_char_top() < min_top)
483  min_top = blob->base_char_top();
484  if (blob->base_char_bottom() > max_bottom)
485  max_bottom = blob->base_char_bottom();
486  }
487  // If the intersection of all vertical ranges of all base characters
488  // overlaps the median range of this, then it is OK.
489  bool result = min_top > candidate.median_bottom_ &&
490  max_bottom < candidate.median_top_;
491  if (debug) {
492  if (result)
493  tprintf("OKDiacritic!\n");
494  else
495  tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
496  max_bottom, min_top, median_bottom_, median_top_);
497  }
498  return result;
499 }
void print() const
Definition: rect.h:278
bool IsDiacritic() const
Definition: blobbox.h:381
int base_char_top() const
Definition: blobbox.h:384
int base_char_bottom() const
Definition: blobbox.h:387
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
const TBOX & bounding_box() const
Definition: blobbox.h:231

◆ OKMergeOverlap()

bool tesseract::ColPartition::OKMergeOverlap ( const ColPartition merge1,
const ColPartition merge2,
int  ok_box_overlap,
bool  debug 
)

Definition at line 746 of file colpartition.cpp.

748  {
749  // Vertical partitions are not allowed to be involved.
750  if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
751  if (debug)
752  tprintf("Vertical partition\n");
753  return false;
754  }
755  // The merging partitions must strongly overlap each other.
756  if (!merge1.VSignificantCoreOverlap(merge2)) {
757  if (debug)
758  tprintf("Voverlap %d (%d)\n",
759  merge1.VCoreOverlap(merge2),
760  merge1.VSignificantCoreOverlap(merge2));
761  return false;
762  }
763  // The merged box must not overlap the median bounds of this.
764  TBOX merged_box(merge1.bounding_box());
765  merged_box += merge2.bounding_box();
766  if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
767  merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
768  merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
769  if (debug)
770  tprintf("Excessive box overlap\n");
771  return false;
772  }
773  // Looks OK!
774  return true;
775 }
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
bool IsVerticalType() const
Definition: colpartition.h:442
int16_t bottom() const
Definition: rect.h:65

◆ OverlapSplitBlob()

BLOBNBOX * tesseract::ColPartition::OverlapSplitBlob ( const TBOX box)

Definition at line 779 of file colpartition.cpp.

779  {
780  if (boxes_.empty() || boxes_.singleton())
781  return nullptr;
782  BLOBNBOX_C_IT it(&boxes_);
783  TBOX left_box(it.data()->bounding_box());
784  for (it.forward(); !it.at_first(); it.forward()) {
785  BLOBNBOX* bbox = it.data();
786  left_box += bbox->bounding_box();
787  if (left_box.overlap(box))
788  return bbox;
789  }
790  return nullptr;
791 }
Definition: rect.h:34
const TBOX & bounding_box() const
Definition: blobbox.h:231

◆ owns_blobs()

bool tesseract::ColPartition::owns_blobs ( ) const
inline

Definition at line 292 of file colpartition.h.

292  {
293  return owns_blobs_;
294  }

◆ PartitionType()

PolyBlockType tesseract::ColPartition::PartitionType ( ColumnSpanningType  flow) const

Definition at line 1016 of file colpartition.cpp.

1016  {
1017  if (flow == CST_NOISE) {
1018  if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
1019  blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
1020  return PT_NOISE;
1021  flow = CST_FLOWING;
1022  }
1023 
1024  switch (blob_type_) {
1025  case BRT_NOISE:
1026  return PT_NOISE;
1027  case BRT_HLINE:
1028  return PT_HORZ_LINE;
1029  case BRT_VLINE:
1030  return PT_VERT_LINE;
1031  case BRT_RECTIMAGE:
1032  case BRT_POLYIMAGE:
1033  switch (flow) {
1034  case CST_FLOWING:
1035  return PT_FLOWING_IMAGE;
1036  case CST_HEADING:
1037  return PT_HEADING_IMAGE;
1038  case CST_PULLOUT:
1039  return PT_PULLOUT_IMAGE;
1040  default:
1041  ASSERT_HOST(!"Undefined flow type for image!");
1042  }
1043  break;
1044  case BRT_VERT_TEXT:
1045  return PT_VERTICAL_TEXT;
1046  case BRT_TEXT:
1047  case BRT_UNKNOWN:
1048  default:
1049  switch (flow) {
1050  case CST_FLOWING:
1051  return PT_FLOWING_TEXT;
1052  case CST_HEADING:
1053  return PT_HEADING_TEXT;
1054  case CST_PULLOUT:
1055  return PT_PULLOUT_TEXT;
1056  default:
1057  ASSERT_HOST(!"Undefined flow type for text!");
1058  }
1059  }
1060  ASSERT_HOST(!"Should never get here!");
1061  return PT_NOISE;
1062 }
Definition: capi.h:101
BlobTextFlowType flow() const
Definition: colpartition.h:155
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ Print()

void tesseract::ColPartition::Print ( ) const

Definition at line 1793 of file colpartition.cpp.

1793  {
1794  int y = MidY();
1795  tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1796  " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1797  " ts=%d bs=%d ls=%d rs=%d\n",
1798  boxes_.empty() ? 'E' : ' ',
1799  left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
1800  bounding_box_.left(), median_left_,
1801  bounding_box_.bottom(), median_bottom_,
1802  bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
1803  right_margin_, median_right_, bounding_box_.top(), median_top_,
1804  good_width_, good_column_, type_,
1805  kBlobTypes[blob_type_], flow_,
1806  first_column_, last_column_, boxes_.length(),
1807  space_above_, space_below_, space_to_left_, space_to_right_);
1808 }
int RightAtY(int y) const
Definition: colpartition.h:345
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
int LeftAtY(int y) const
Definition: colpartition.h:341
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int16_t right() const
Definition: rect.h:79
int16_t bottom() const
Definition: rect.h:65

◆ PrintColors()

void tesseract::ColPartition::PrintColors ( )

Definition at line 1811 of file colpartition.cpp.

1811  {
1812  tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1813  color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1814  color1_[L_ALPHA_CHANNEL],
1815  color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1816 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37

◆ RefinePartners()

void tesseract::ColPartition::RefinePartners ( PolyBlockType  type,
bool  get_desperate,
ColPartitionGrid grid 
)

Definition at line 1888 of file colpartition.cpp.

1889  {
1890  if (TypesSimilar(type_, type)) {
1891  RefinePartnersInternal(true, get_desperate, grid);
1892  RefinePartnersInternal(false, get_desperate, grid);
1893  } else if (type == PT_COUNT) {
1894  // This is the final pass. Make sure only the correctly typed
1895  // partners surivive, however many there are.
1896  RefinePartnersByType(true, &upper_partners_);
1897  RefinePartnersByType(false, &lower_partners_);
1898  // It is possible for a merge to have given a partition multiple
1899  // partners again, so the last resort is to use overlap which is
1900  // guaranteed to leave at most one partner left.
1901  if (!upper_partners_.empty() && !upper_partners_.singleton())
1902  RefinePartnersByOverlap(true, &upper_partners_);
1903  if (!lower_partners_.empty() && !lower_partners_.singleton())
1904  RefinePartnersByOverlap(false, &lower_partners_);
1905  }
1906 }
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
Definition: colpartition.h:419
Definition: capi.h:101
PolyBlockType type() const
Definition: colpartition.h:182

◆ ReflectInYAxis()

void tesseract::ColPartition::ReflectInYAxis ( )

Definition at line 330 of file colpartition.cpp.

330  {
331  BLOBNBOX_CLIST reversed_boxes;
332  BLOBNBOX_C_IT reversed_it(&reversed_boxes);
333  // Reverse the order of the boxes_.
334  BLOBNBOX_C_IT bb_it(&boxes_);
335  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
336  reversed_it.add_before_then_move(bb_it.extract());
337  }
338  bb_it.add_list_after(&reversed_boxes);
339  ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
340  int tmp = left_margin_;
341  left_margin_ = -right_margin_;
342  right_margin_ = -tmp;
343  ComputeLimits();
344 }
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ ReleaseNonLeaderBoxes()

bool tesseract::ColPartition::ReleaseNonLeaderBoxes ( )

Definition at line 299 of file colpartition.cpp.

299  {
300  BLOBNBOX_C_IT bb_it(&boxes_);
301  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
302  BLOBNBOX* bblob = bb_it.data();
303  if (bblob->flow() != BTFT_LEADER) {
304  if (bblob->owner() == this) bblob->set_owner(nullptr);
305  bb_it.extract();
306  }
307  }
308  if (bb_it.empty()) return false;
309  flow_ = BTFT_LEADER;
310  ComputeLimits();
311  return true;
312 }
BlobTextFlowType flow() const
Definition: blobbox.h:296
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:356
tesseract::ColPartition * owner() const
Definition: blobbox.h:353

◆ RemoveBox()

void tesseract::ColPartition::RemoveBox ( BLOBNBOX box)

Definition at line 212 of file colpartition.cpp.

212  {
213  BLOBNBOX_C_IT bb_it(&boxes_);
214  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
215  if (box == bb_it.data()) {
216  bb_it.extract();
217  ComputeLimits();
218  return;
219  }
220  }
221 }

◆ RemovePartner()

void tesseract::ColPartition::RemovePartner ( bool  upper,
ColPartition partner 
)

Definition at line 628 of file colpartition.cpp.

628  {
629  ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
630  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
631  if (it.data() == partner) {
632  it.extract();
633  break;
634  }
635  }
636 }

◆ right_key()

int tesseract::ColPartition::right_key ( ) const
inline

Definition at line 179 of file colpartition.h.

179  {
180  return right_key_;
181  }

◆ right_key_tab()

bool tesseract::ColPartition::right_key_tab ( ) const
inline

Definition at line 176 of file colpartition.h.

176  {
177  return right_key_tab_;
178  }

◆ right_margin()

int tesseract::ColPartition::right_margin ( ) const
inline

Definition at line 119 of file colpartition.h.

119  {
120  return right_margin_;
121  }

◆ RightAtY()

int tesseract::ColPartition::RightAtY ( int  y) const
inline

Definition at line 345 of file colpartition.h.

345  {
346  return XAtY(right_key_, y);
347  }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:321

◆ RightBlobRule()

int tesseract::ColPartition::RightBlobRule ( ) const

Definition at line 560 of file colpartition.cpp.

560  {
561  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
562  it.move_to_last();
563  return it.data()->right_rule();
564 }

◆ set_blob_type()

void tesseract::ColPartition::set_blob_type ( BlobRegionType  t)
inline

Definition at line 152 of file colpartition.h.

152  {
153  blob_type_ = t;
154  }

◆ set_block_owned()

void tesseract::ColPartition::set_block_owned ( bool  owned)
inline

Definition at line 209 of file colpartition.h.

209  {
210  block_owned_ = owned;
211  }

◆ set_bottom_spacing()

void tesseract::ColPartition::set_bottom_spacing ( int  spacing)
inline

Definition at line 224 of file colpartition.h.

224  {
225  bottom_spacing_ = spacing;
226  }

◆ set_first_column()

void tesseract::ColPartition::set_first_column ( int  column)
inline

Definition at line 732 of file colpartition.h.

732  {
733  first_column_ = column;
734  }

◆ set_flow()

void tesseract::ColPartition::set_flow ( BlobTextFlowType  f)
inline

Definition at line 158 of file colpartition.h.

158  {
159  flow_ = f;
160  }

◆ set_inside_table_column()

void tesseract::ColPartition::set_inside_table_column ( bool  val)
inline

Definition at line 247 of file colpartition.h.

247  {
248  inside_table_column_ = val;
249  }

◆ set_last_column()

void tesseract::ColPartition::set_last_column ( int  column)
inline

Definition at line 735 of file colpartition.h.

735  {
736  last_column_ = column;
737  }

◆ set_left_margin()

void tesseract::ColPartition::set_left_margin ( int  margin)
inline

Definition at line 116 of file colpartition.h.

116  {
117  left_margin_ = margin;
118  }

◆ set_median_height()

void tesseract::ColPartition::set_median_height ( int  height)
inline

Definition at line 140 of file colpartition.h.

140  {
141  median_height_ = height;
142  }

◆ set_median_width()

void tesseract::ColPartition::set_median_width ( int  width)
inline

Definition at line 146 of file colpartition.h.

146  {
147  median_width_ = width;
148  }

◆ set_nearest_neighbor_above()

void tesseract::ColPartition::set_nearest_neighbor_above ( ColPartition part)
inline

Definition at line 253 of file colpartition.h.

253  {
254  nearest_neighbor_above_ = part;
255  }

◆ set_nearest_neighbor_below()

void tesseract::ColPartition::set_nearest_neighbor_below ( ColPartition part)
inline

Definition at line 259 of file colpartition.h.

259  {
260  nearest_neighbor_below_ = part;
261  }

◆ set_owns_blobs()

void tesseract::ColPartition::set_owns_blobs ( bool  owns_blobs)
inline

Definition at line 295 of file colpartition.h.

295  {
296  // Do NOT change ownership flag when there are blobs in the list.
297  // Immediately set the ownership flag when creating copies.
298  ASSERT_HOST(boxes_.empty());
299  owns_blobs_ = owns_blobs;
300  }
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ set_right_margin()

void tesseract::ColPartition::set_right_margin ( int  margin)
inline

Definition at line 122 of file colpartition.h.

122  {
123  right_margin_ = margin;
124  }

◆ set_side_step()

void tesseract::ColPartition::set_side_step ( int  step)
inline

Definition at line 218 of file colpartition.h.

218  {
219  side_step_ = step;
220  }

◆ set_space_above()

void tesseract::ColPartition::set_space_above ( int  space)
inline

Definition at line 265 of file colpartition.h.

265  {
266  space_above_ = space;
267  }

◆ set_space_below()

void tesseract::ColPartition::set_space_below ( int  space)
inline

Definition at line 271 of file colpartition.h.

271  {
272  space_below_ = space;
273  }

◆ set_space_to_left()

void tesseract::ColPartition::set_space_to_left ( int  space)
inline

Definition at line 277 of file colpartition.h.

277  {
278  space_to_left_ = space;
279  }

◆ set_space_to_right()

void tesseract::ColPartition::set_space_to_right ( int  space)
inline

Definition at line 283 of file colpartition.h.

283  {
284  space_to_right_ = space;
285  }

◆ set_table_type()

void tesseract::ColPartition::set_table_type ( )
inline

Definition at line 234 of file colpartition.h.

234  {
235  if (type_ != PT_TABLE) {
236  type_before_table_ = type_;
237  type_ = PT_TABLE;
238  }
239  }
Definition: capi.h:100

◆ set_top_spacing()

void tesseract::ColPartition::set_top_spacing ( int  spacing)
inline

Definition at line 230 of file colpartition.h.

230  {
231  top_spacing_ = spacing;
232  }

◆ set_type()

void tesseract::ColPartition::set_type ( PolyBlockType  t)
inline

Definition at line 185 of file colpartition.h.

185  {
186  type_ = t;
187  }

◆ set_vertical()

void tesseract::ColPartition::set_vertical ( const ICOORD v)
inline

Definition at line 194 of file colpartition.h.

194  {
195  vertical_ = v;
196  }

◆ set_working_set()

void tesseract::ColPartition::set_working_set ( WorkingPartSet working_set)
inline

Definition at line 203 of file colpartition.h.

203  {
204  working_set_ = working_set;
205  }

◆ SetBlobTypes()

void tesseract::ColPartition::SetBlobTypes ( )

Definition at line 1276 of file colpartition.cpp.

1276  {
1277  if (!owns_blobs())
1278  return;
1279  BLOBNBOX_C_IT it(&boxes_);
1280  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1281  BLOBNBOX* blob = it.data();
1282  if (blob->flow() != BTFT_LEADER)
1283  blob->set_flow(flow_);
1284  blob->set_region_type(blob_type_);
1285  ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this);
1286  }
1287 }
BlobTextFlowType flow() const
Definition: blobbox.h:296
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:287
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:299
tesseract::ColPartition * owner() const
Definition: blobbox.h:353
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ SetColumnGoodness()

void tesseract::ColPartition::SetColumnGoodness ( WidthCallback cb)

Definition at line 1080 of file colpartition.cpp.

1080  {
1081  int y = MidY();
1082  int width = RightAtY(y) - LeftAtY(y);
1083  good_width_ = cb->Run(width);
1084  good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
1085 }
int RightAtY(int y) const
Definition: colpartition.h:345
int LeftAtY(int y) const
Definition: colpartition.h:341

◆ SetLeftTab()

void tesseract::ColPartition::SetLeftTab ( const TabVector tab_vector)

Definition at line 504 of file colpartition.cpp.

504  {
505  if (tab_vector != nullptr) {
506  left_key_ = tab_vector->sort_key();
507  left_key_tab_ = left_key_ <= BoxLeftKey();
508  } else {
509  left_key_tab_ = false;
510  }
511  if (!left_key_tab_)
512  left_key_ = BoxLeftKey();
513 }

◆ SetPartitionType()

void tesseract::ColPartition::SetPartitionType ( int  resolution,
ColPartitionSet columns 
)

Definition at line 983 of file colpartition.cpp.

983  {
984  int first_spanned_col = -1;
985  ColumnSpanningType span_type =
986  columns->SpanningType(resolution,
987  bounding_box_.left(), bounding_box_.right(),
988  std::min(bounding_box_.height(), bounding_box_.width()),
989  MidY(), left_margin_, right_margin_,
990  &first_column_, &last_column_,
991  &first_spanned_col);
992  column_set_ = columns;
993  if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
994  !IsLineType()) {
995  // Unequal columns may indicate that the pullout spans one of the columns
996  // it lies in, so force it to be allocated to just that column.
997  if (first_spanned_col >= 0) {
998  first_column_ = first_spanned_col;
999  last_column_ = first_spanned_col;
1000  } else {
1001  if ((first_column_ & 1) == 0)
1002  last_column_ = first_column_;
1003  else if ((last_column_ & 1) == 0)
1004  first_column_ = last_column_;
1005  else
1006  first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1007  }
1008  }
1009  type_ = PartitionType(span_type);
1010 }
PolyBlockType PartitionType(ColumnSpanningType flow) const
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79
int16_t height() const
Definition: rect.h:108

◆ SetRegionAndFlowTypesFromProjectionValue()

void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue ( int  value)

Definition at line 1202 of file colpartition.cpp.

1202  {
1203  int blob_count = 0; // Total # blobs.
1204  int good_blob_score_ = 0; // Total # good strokewidth neighbours.
1205  int noisy_count = 0; // Total # neighbours marked as noise.
1206  int hline_count = 0;
1207  int vline_count = 0;
1208  BLOBNBOX_C_IT it(&boxes_);
1209  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1210  BLOBNBOX* blob = it.data();
1211  ++blob_count;
1212  noisy_count += blob->NoisyNeighbours();
1213  good_blob_score_ += blob->GoodTextBlob();
1214  if (blob->region_type() == BRT_HLINE) ++hline_count;
1215  if (blob->region_type() == BRT_VLINE) ++vline_count;
1216  }
1217  flow_ = BTFT_NEIGHBOURS;
1218  blob_type_ = BRT_UNKNOWN;
1219  if (hline_count > vline_count) {
1220  flow_ = BTFT_NONE;
1221  blob_type_ = BRT_HLINE;
1222  } else if (vline_count > hline_count) {
1223  flow_ = BTFT_NONE;
1224  blob_type_ = BRT_VLINE;
1225  } else if (value < -1 || 1 < value) {
1226  int long_side;
1227  int short_side;
1228  if (value > 0) {
1229  long_side = bounding_box_.width();
1230  short_side = bounding_box_.height();
1231  blob_type_ = BRT_TEXT;
1232  } else {
1233  long_side = bounding_box_.height();
1234  short_side = bounding_box_.width();
1235  blob_type_ = BRT_VERT_TEXT;
1236  }
1237  // We will combine the old metrics using aspect ratio and blob counts
1238  // with the input value by allowing a strong indication to flip the
1239  // STRONG_CHAIN/CHAIN flow values.
1240  int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
1241  if (short_side > kHorzStrongTextlineHeight) ++strong_score;
1242  if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
1243  if (abs(value) >= kMinStrongTextValue)
1244  flow_ = BTFT_STRONG_CHAIN;
1245  else if (abs(value) >= kMinChainTextValue)
1246  flow_ = BTFT_CHAIN;
1247  else
1248  flow_ = BTFT_NEIGHBOURS;
1249  // Upgrade chain to strong chain if the other indicators are good
1250  if (flow_ == BTFT_CHAIN && strong_score == 3)
1251  flow_ = BTFT_STRONG_CHAIN;
1252  // Downgrade strong vertical text to chain if the indicators are bad.
1253  if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
1254  flow_ = BTFT_CHAIN;
1255  }
1256  if (flow_ == BTFT_NEIGHBOURS) {
1257  // Check for noisy neighbours.
1258  if (noisy_count >= blob_count) {
1259  flow_ = BTFT_NONTEXT;
1260  blob_type_= BRT_NOISE;
1261  }
1262  }
1263  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1264  bounding_box_.bottom())) {
1265  tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1266  blob_count, noisy_count, good_blob_score_);
1267  tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
1268  value, flow_, blob_type_);
1269  Print();
1270  }
1271  SetBlobTypes();
1272 }
int GoodTextBlob() const
Definition: blobbox.cpp:227
const int kMinChainTextValue
int NoisyNeighbours() const
Definition: blobbox.cpp:238
const int kHorzStrongTextlineHeight
const int kHorzStrongTextlineAspect
static bool WithinTestRegion(int detail_level, int x, int y)
int16_t width() const
Definition: rect.h:115
const int kHorzStrongTextlineCount
int16_t left() const
Definition: rect.h:72
const int kMinStrongTextValue
BlobRegionType region_type() const
Definition: blobbox.h:284
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int16_t bottom() const
Definition: rect.h:65
int16_t height() const
Definition: rect.h:108

◆ SetRightTab()

void tesseract::ColPartition::SetRightTab ( const TabVector tab_vector)

Definition at line 516 of file colpartition.cpp.

516  {
517  if (tab_vector != nullptr) {
518  right_key_ = tab_vector->sort_key();
519  right_key_tab_ = right_key_ >= BoxRightKey();
520  } else {
521  right_key_tab_ = false;
522  }
523  if (!right_key_tab_)
524  right_key_ = BoxRightKey();
525 }

◆ SetSpecialBlobsDensity()

void tesseract::ColPartition::SetSpecialBlobsDensity ( const BlobSpecialTextType  type,
const float  density 
)

Definition at line 586 of file colpartition.cpp.

587  {
589  special_blobs_densities_[type] = density;
590 }
#define ASSERT_HOST(x)
Definition: errcode.h:84
PolyBlockType type() const
Definition: colpartition.h:182

◆ ShallowCopy()

ColPartition * tesseract::ColPartition::ShallowCopy ( ) const

Definition at line 1742 of file colpartition.cpp.

1742  {
1743  ColPartition* part = new ColPartition(blob_type_, vertical_);
1744  part->left_margin_ = left_margin_;
1745  part->right_margin_ = right_margin_;
1746  part->bounding_box_ = bounding_box_;
1747  memcpy(part->special_blobs_densities_, special_blobs_densities_,
1748  sizeof(special_blobs_densities_));
1749  part->median_bottom_ = median_bottom_;
1750  part->median_top_ = median_top_;
1751  part->median_height_ = median_height_;
1752  part->median_left_ = median_left_;
1753  part->median_right_ = median_right_;
1754  part->median_width_ = median_width_;
1755  part->good_width_ = good_width_;
1756  part->good_column_ = good_column_;
1757  part->left_key_tab_ = left_key_tab_;
1758  part->right_key_tab_ = right_key_tab_;
1759  part->type_ = type_;
1760  part->flow_ = flow_;
1761  part->left_key_ = left_key_;
1762  part->right_key_ = right_key_;
1763  part->first_column_ = first_column_;
1764  part->last_column_ = last_column_;
1765  part->owns_blobs_ = false;
1766  return part;
1767 }

◆ SingletonPartner()

ColPartition * tesseract::ColPartition::SingletonPartner ( bool  upper)

Definition at line 639 of file colpartition.cpp.

639  {
640  ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
641  if (!partners->singleton())
642  return nullptr;
643  ColPartition_C_IT it(partners);
644  return it.data();
645 }

◆ SmoothPartnerRun()

void tesseract::ColPartition::SmoothPartnerRun ( int  working_set_count)

Definition at line 1819 of file colpartition.cpp.

1819  {
1820  STATS left_stats(0, working_set_count);
1821  STATS right_stats(0, working_set_count);
1822  PolyBlockType max_type = type_;
1823  ColPartition* partner;
1824  for (partner = SingletonPartner(false); partner != nullptr;
1825  partner = partner->SingletonPartner(false)) {
1826  if (partner->type_ > max_type)
1827  max_type = partner->type_;
1828  if (column_set_ == partner->column_set_) {
1829  left_stats.add(partner->first_column_, 1);
1830  right_stats.add(partner->last_column_, 1);
1831  }
1832  }
1833  type_ = max_type;
1834  // TODO(rays) Either establish that it isn't necessary to set the columns,
1835  // or find a way to do it that does not cause an assert failure in
1836  // AddToWorkingSet.
1837 #if 0
1838  first_column_ = left_stats.mode();
1839  last_column_ = right_stats.mode();
1840  if (last_column_ < first_column_)
1841  last_column_ = first_column_;
1842 #endif
1843 
1844  for (partner = SingletonPartner(false); partner != nullptr;
1845  partner = partner->SingletonPartner(false)) {
1846  partner->type_ = max_type;
1847 #if 0 // See TODO above
1848  if (column_set_ == partner->column_set_) {
1849  partner->first_column_ = first_column_;
1850  partner->last_column_ = last_column_;
1851  }
1852 #endif
1853  }
1854 }
PolyBlockType
Definition: publictypes.h:53
ColPartition * SingletonPartner(bool upper)
Definition: statistc.h:33

◆ SortByBBox()

static int tesseract::ColPartition::SortByBBox ( const void *  p1,
const void *  p2 
)
inlinestatic

Definition at line 715 of file colpartition.h.

715  {
716  const ColPartition* part1 = *static_cast<const ColPartition* const*>(p1);
717  const ColPartition* part2 = *static_cast<const ColPartition* const*>(p2);
718  int mid_y1 = part1->bounding_box_.y_middle();
719  int mid_y2 = part2->bounding_box_.y_middle();
720  if ((part2->bounding_box_.bottom() <= mid_y1 &&
721  mid_y1 <= part2->bounding_box_.top()) ||
722  (part1->bounding_box_.bottom() <= mid_y2 &&
723  mid_y2 <= part1->bounding_box_.top())) {
724  // Sort by increasing x.
725  return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
726  }
727  // Sort by decreasing y.
728  return mid_y2 - mid_y1;
729  }

◆ SortKey()

int tesseract::ColPartition::SortKey ( int  x,
int  y 
) const
inline

Definition at line 317 of file colpartition.h.

317  {
318  return TabVector::SortKey(vertical_, x, y);
319  }
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280

◆ space_above()

int tesseract::ColPartition::space_above ( ) const
inline

Definition at line 262 of file colpartition.h.

262  {
263  return space_above_;
264  }

◆ space_below()

int tesseract::ColPartition::space_below ( ) const
inline

Definition at line 268 of file colpartition.h.

268  {
269  return space_below_;
270  }

◆ space_to_left()

int tesseract::ColPartition::space_to_left ( ) const
inline

Definition at line 274 of file colpartition.h.

274  {
275  return space_to_left_;
276  }

◆ space_to_right()

int tesseract::ColPartition::space_to_right ( ) const
inline

Definition at line 280 of file colpartition.h.

280  {
281  return space_to_right_;
282  }

◆ SpecialBlobsCount()

int tesseract::ColPartition::SpecialBlobsCount ( const BlobSpecialTextType  type)

Definition at line 571 of file colpartition.cpp.

571  {
573  BLOBNBOX_C_IT blob_it(&boxes_);
574  int count = 0;
575  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
576  BLOBNBOX* blob = blob_it.data();
578  if (blob_type == type) {
579  count++;
580  }
581  }
582 
583  return count;
584 }
int count(LIST var_list)
Definition: oldlist.cpp:98
BlobRegionType blob_type() const
Definition: colpartition.h:149
BlobSpecialTextType
Definition: blobbox.h:97
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:290
#define ASSERT_HOST(x)
Definition: errcode.h:84
PolyBlockType type() const
Definition: colpartition.h:182

◆ SpecialBlobsDensity()

float tesseract::ColPartition::SpecialBlobsDensity ( const BlobSpecialTextType  type) const

Definition at line 566 of file colpartition.cpp.

566  {
568  return special_blobs_densities_[type];
569 }
#define ASSERT_HOST(x)
Definition: errcode.h:84
PolyBlockType type() const
Definition: colpartition.h:182

◆ SplitAt()

ColPartition * tesseract::ColPartition::SplitAt ( int  split_x)

Definition at line 833 of file colpartition.cpp.

833  {
834  if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
835  return nullptr; // There will be no change.
836  ColPartition* split_part = ShallowCopy();
837  split_part->set_owns_blobs(owns_blobs());
838  BLOBNBOX_C_IT it(&boxes_);
839  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
840  BLOBNBOX* bbox = it.data();
841  ColPartition* prev_owner = bbox->owner();
842  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
843  const TBOX& box = bbox->bounding_box();
844  if (box.left() >= split_x) {
845  split_part->AddBox(it.extract());
846  if (owns_blobs() && prev_owner != nullptr)
847  bbox->set_owner(split_part);
848  }
849  }
850  if (it.empty()) {
851  // Possible if split-x passes through the first blob.
852  it.add_list_after(&split_part->boxes_);
853  }
854  ASSERT_HOST(!it.empty());
855  if (split_part->IsEmpty()) {
856  // Split part ended up with nothing. Possible if split_x passes
857  // through the last blob.
858  delete split_part;
859  return nullptr;
860  }
861  right_key_tab_ = false;
862  split_part->left_key_tab_ = false;
863  right_margin_ = split_x;
864  split_part->left_margin_ = split_x;
865  ComputeLimits();
866  split_part->ComputeLimits();
867  return split_part;
868 }
Definition: rect.h:34
int16_t left() const
Definition: rect.h:72
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:356
const TBOX & bounding_box() const
Definition: blobbox.h:231
int16_t right() const
Definition: rect.h:79
tesseract::ColPartition * owner() const
Definition: blobbox.h:353
ColPartition * ShallowCopy() const
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ SplitAtBlob()

ColPartition * tesseract::ColPartition::SplitAtBlob ( BLOBNBOX split_blob)

Definition at line 797 of file colpartition.cpp.

797  {
798  ColPartition* split_part = ShallowCopy();
799  split_part->set_owns_blobs(owns_blobs());
800  BLOBNBOX_C_IT it(&boxes_);
801  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
802  BLOBNBOX* bbox = it.data();
803  ColPartition* prev_owner = bbox->owner();
804  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
805  if (bbox == split_blob || !split_part->boxes_.empty()) {
806  split_part->AddBox(it.extract());
807  if (owns_blobs() && prev_owner != nullptr)
808  bbox->set_owner(split_part);
809  }
810  }
811  ASSERT_HOST(!it.empty());
812  if (split_part->IsEmpty()) {
813  // Split part ended up with nothing. Possible if split_blob is not
814  // in the list of blobs.
815  delete split_part;
816  return nullptr;
817  }
818  right_key_tab_ = false;
819  split_part->left_key_tab_ = false;
820  ComputeLimits();
821  // TODO(nbeato) Merge Ray's CL like this:
822  // if (owns_blobs())
823  // SetBlobTextlineGoodness();
824  split_part->ComputeLimits();
825  // TODO(nbeato) Merge Ray's CL like this:
826  // if (split_part->owns_blobs())
827  // split_part->SetBlobTextlineGoodness();
828  return split_part;
829 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:356
tesseract::ColPartition * owner() const
Definition: blobbox.h:353
ColPartition * ShallowCopy() const
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ top_spacing()

int tesseract::ColPartition::top_spacing ( ) const
inline

Definition at line 227 of file colpartition.h.

227  {
228  return top_spacing_;
229  }

◆ type()

PolyBlockType tesseract::ColPartition::type ( ) const
inline

Definition at line 182 of file colpartition.h.

182  {
183  return type_;
184  }

◆ TypesMatch() [1/2]

bool tesseract::ColPartition::TypesMatch ( const ColPartition other) const
inline

Definition at line 410 of file colpartition.h.

410  {
411  return TypesMatch(blob_type_, other.blob_type_);
412  }
bool TypesMatch(const ColPartition &other) const
Definition: colpartition.h:410

◆ TypesMatch() [2/2]

static bool tesseract::ColPartition::TypesMatch ( BlobRegionType  type1,
BlobRegionType  type2 
)
inlinestatic

Definition at line 413 of file colpartition.h.

413  {
414  return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
415  !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
416  }
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:427

◆ TypesSimilar()

static bool tesseract::ColPartition::TypesSimilar ( PolyBlockType  type1,
PolyBlockType  type2 
)
inlinestatic

Definition at line 419 of file colpartition.h.

419  {
420  return (type1 == type2 ||
421  (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
422  (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
423  }

◆ upper_partners()

ColPartition_CLIST* tesseract::ColPartition::upper_partners ( )
inline

Definition at line 197 of file colpartition.h.

197  {
198  return &upper_partners_;
199  }

◆ VCoreOverlap()

int tesseract::ColPartition::VCoreOverlap ( const ColPartition other) const
inline

Definition at line 376 of file colpartition.h.

376  {
377  if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
378  return 0;
379  }
380  return std::min(median_top_, other.median_top_) -
381  std::max(median_bottom_, other.median_bottom_);
382  }

◆ VOverlaps()

bool tesseract::ColPartition::VOverlaps ( const ColPartition other) const
inline

Definition at line 371 of file colpartition.h.

371  {
372  return bounding_box_.y_gap(other.bounding_box_) < 0;
373  }
int y_gap(const TBOX &box) const
Definition: rect.h:233

◆ VSignificantCoreOverlap()

bool tesseract::ColPartition::VSignificantCoreOverlap ( const ColPartition other) const
inline

Definition at line 391 of file colpartition.h.

391  {
392  if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
393  return false;
394  }
395  int overlap = VCoreOverlap(other);
396  int height = std::min(median_top_ - median_bottom_,
397  other.median_top_ - other.median_bottom_);
398  return overlap * 3 > height;
399  }
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:376

◆ WithinSameMargins()

bool tesseract::ColPartition::WithinSameMargins ( const ColPartition other) const
inline

Definition at line 402 of file colpartition.h.

402  {
403  return left_margin_ <= other.bounding_box_.left() &&
404  bounding_box_.left() >= other.left_margin_ &&
405  bounding_box_.right() <= other.right_margin_ &&
406  right_margin_ >= other.bounding_box_.right();
407  }
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79

◆ XAtY()

int tesseract::ColPartition::XAtY ( int  sort_key,
int  y 
) const
inline

Definition at line 321 of file colpartition.h.

321  {
322  return TabVector::XAtY(vertical_, sort_key, y);
323  }
int XAtY(int y) const
Definition: tabvector.h:189

The documentation for this class was generated from the following files: