tesseract  5.0.0-alpha-619-ge9db
tesseract::ColPartition Class Reference

#include <colpartition.h>

Inheritance diagram for tesseract::ColPartition:
ELIST2_LINK

Public Member Functions

 ColPartition ()=default
 
 ColPartition (BlobRegionType blob_type, const ICOORD &vertical)
 
 ~ColPartition ()
 
const TBOXbounding_box () const
 
int left_margin () const
 
void set_left_margin (int margin)
 
int right_margin () const
 
void set_right_margin (int margin)
 
int median_top () const
 
int median_bottom () const
 
int median_left () const
 
int median_right () const
 
int median_height () const
 
void set_median_height (int height)
 
int median_width () const
 
void set_median_width (int width)
 
BlobRegionType blob_type () const
 
void set_blob_type (BlobRegionType t)
 
BlobTextFlowType flow () const
 
void set_flow (BlobTextFlowType f)
 
int good_blob_score () const
 
bool good_width () const
 
bool good_column () const
 
bool left_key_tab () const
 
int left_key () const
 
bool right_key_tab () const
 
int right_key () const
 
PolyBlockType type () const
 
void set_type (PolyBlockType t)
 
BLOBNBOX_CLIST * boxes ()
 
int boxes_count () const
 
void set_vertical (const ICOORD &v)
 
ColPartition_CLIST * upper_partners ()
 
ColPartition_CLIST * lower_partners ()
 
void set_working_set (WorkingPartSet *working_set)
 
bool block_owned () const
 
void set_block_owned (bool owned)
 
bool desperately_merged () const
 
ColPartitionSetcolumn_set () const
 
void set_side_step (int step)
 
int bottom_spacing () const
 
void set_bottom_spacing (int spacing)
 
int top_spacing () const
 
void set_top_spacing (int spacing)
 
void set_table_type ()
 
void clear_table_type ()
 
bool inside_table_column ()
 
void set_inside_table_column (bool val)
 
ColPartitionnearest_neighbor_above () const
 
void set_nearest_neighbor_above (ColPartition *part)
 
ColPartitionnearest_neighbor_below () const
 
void set_nearest_neighbor_below (ColPartition *part)
 
int space_above () const
 
void set_space_above (int space)
 
int space_below () const
 
void set_space_below (int space)
 
int space_to_left () const
 
void set_space_to_left (int space)
 
int space_to_right () const
 
void set_space_to_right (int space)
 
uint8_t * color1 ()
 
uint8_t * color2 ()
 
bool owns_blobs () const
 
void set_owns_blobs (bool owns_blobs)
 
int MidY () const
 
int MedianY () const
 
int MidX () const
 
int SortKey (int x, int y) const
 
int XAtY (int sort_key, int y) const
 
int KeyWidth (int left_key, int right_key) const
 
int ColumnWidth () const
 
int BoxLeftKey () const
 
int BoxRightKey () const
 
int LeftAtY (int y) const
 
int RightAtY (int y) const
 
bool IsLeftOf (const ColPartition &other) const
 
bool ColumnContains (int x, int y) const
 
bool IsEmpty () const
 
bool IsSingleton () const
 
bool HOverlaps (const ColPartition &other) const
 
bool VOverlaps (const ColPartition &other) const
 
int VCoreOverlap (const ColPartition &other) const
 
int HCoreOverlap (const ColPartition &other) const
 
bool VSignificantCoreOverlap (const ColPartition &other) const
 
bool WithinSameMargins (const ColPartition &other) const
 
bool TypesMatch (const ColPartition &other) const
 
bool IsLineType () const
 
bool IsImageType () const
 
bool IsTextType () const
 
bool IsPulloutType () const
 
bool IsVerticalType () const
 
bool IsHorizontalType () const
 
bool IsUnMergeableType () const
 
bool IsVerticalLine () const
 
bool IsHorizontalLine () const
 
void AddBox (BLOBNBOX *box)
 
void RemoveBox (BLOBNBOX *box)
 
BLOBNBOXBiggestBox ()
 
TBOX BoundsWithoutBox (BLOBNBOX *box)
 
void ClaimBoxes ()
 
void DisownBoxes ()
 
void DisownBoxesNoAssert ()
 
bool ReleaseNonLeaderBoxes ()
 
void DeleteBoxes ()
 
void ReflectInYAxis ()
 
bool IsLegal ()
 
bool MatchingColumns (const ColPartition &other) const
 
bool MatchingTextColor (const ColPartition &other) const
 
bool MatchingSizes (const ColPartition &other) const
 
bool ConfirmNoTabViolation (const ColPartition &other) const
 
bool MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
 
bool OKDiacriticMerge (const ColPartition &candidate, bool debug) const
 
void SetLeftTab (const TabVector *tab_vector)
 
void SetRightTab (const TabVector *tab_vector)
 
void CopyLeftTab (const ColPartition &src, bool take_box)
 
void CopyRightTab (const ColPartition &src, bool take_box)
 
int LeftBlobRule () const
 
int RightBlobRule () const
 
float SpecialBlobsDensity (const BlobSpecialTextType type) const
 
int SpecialBlobsCount (const BlobSpecialTextType type)
 
void SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density)
 
void ComputeSpecialBlobsDensity ()
 
void AddPartner (bool upper, ColPartition *partner)
 
void RemovePartner (bool upper, ColPartition *partner)
 
ColPartitionSingletonPartner (bool upper)
 
void Absorb (ColPartition *other, WidthCallback cb)
 
bool OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
 
BLOBNBOXOverlapSplitBlob (const TBOX &box)
 
ColPartitionSplitAtBlob (BLOBNBOX *split_blob)
 
ColPartitionSplitAt (int split_x)
 
void ComputeLimits ()
 
int CountOverlappingBoxes (const TBOX &box)
 
void SetPartitionType (int resolution, ColPartitionSet *columns)
 
PolyBlockType PartitionType (ColumnSpanningType flow) const
 
void ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
 
void SetColumnGoodness (WidthCallback cb)
 
bool MarkAsLeaderIfMonospaced ()
 
void SetRegionAndFlowTypesFromProjectionValue (int value)
 
void SetBlobTypes ()
 
bool HasGoodBaseline ()
 
void AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
 
TO_ROWMakeToRow ()
 
ColPartitionShallowCopy () const
 
ColPartitionCopyButDontOwnBlobs ()
 
ScrollView::Color BoxColor () const
 
void Print () const
 
void PrintColors ()
 
void SmoothPartnerRun (int working_set_count)
 
void RefinePartners (PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
 
bool IsInSameColumnAs (const ColPartition &part) const
 
void set_first_column (int column)
 
void set_last_column (int column)
 
- Public Member Functions inherited from ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static ColPartitionMakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
 
static ColPartitionFakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
 
static ColPartitionMakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list)
 
static bool TypesMatch (BlobRegionType type1, BlobRegionType type2)
 
static bool TypesSimilar (PolyBlockType type1, PolyBlockType type2)
 
static void LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
 
static TO_BLOCKMakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static TO_BLOCKMakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static int SortByBBox (const void *p1, const void *p2)
 

Detailed Description

ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.

Definition at line 67 of file colpartition.h.

Constructor & Destructor Documentation

◆ ColPartition() [1/2]

tesseract::ColPartition::ColPartition ( )
default

◆ ColPartition() [2/2]

tesseract::ColPartition::ColPartition ( BlobRegionType  blob_type,
const ICOORD vertical 
)
Parameters
blob_typeis the blob_region_type_ of the blobs in this partition.
verticalis the direction of logical vertical on the possibly skewed image.

Definition at line 81 of file colpartition.cpp.

82  : left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
83  median_bottom_(INT32_MAX), median_top_(-INT32_MAX),
84  median_left_(INT32_MAX), median_right_(-INT32_MAX),
85  blob_type_(blob_type),
86  vertical_(vertical) {
87  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
88 }

◆ ~ColPartition()

tesseract::ColPartition::~ColPartition ( )

Definition at line 133 of file colpartition.cpp.

133  {
134  // Remove this as a partner of all partners, as we don't want them
135  // referring to a deleted object.
136  ColPartition_C_IT it(&upper_partners_);
137  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
138  it.data()->RemovePartner(false, this);
139  }
140  it.set_to_list(&lower_partners_);
141  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
142  it.data()->RemovePartner(true, this);
143  }
144 }

Member Function Documentation

◆ Absorb()

void tesseract::ColPartition::Absorb ( ColPartition other,
WidthCallback  cb 
)

Definition at line 638 of file colpartition.cpp.

638  {
639  // The result has to either own all of the blobs or none of them.
640  // Verify the flag is consistent.
641  ASSERT_HOST(owns_blobs() == other->owns_blobs());
642  // TODO(nbeato): check owns_blobs better. Right now owns_blobs
643  // should always be true when this is called. So there is no issues.
644  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
645  bounding_box_.bottom()) ||
646  TabFind::WithinTestRegion(2, other->bounding_box_.left(),
647  other->bounding_box_.bottom())) {
648  tprintf("Merging:");
649  Print();
650  other->Print();
651  }
652 
653  // Update the special_blobs_densities_.
654  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
655  for (int type = 0; type < BSTT_COUNT; ++type) {
656  unsigned w1 = boxes_.length();
657  unsigned w2 = other->boxes_.length();
658  float new_val = special_blobs_densities_[type] * w1 +
659  other->special_blobs_densities_[type] * w2;
660  if (!w1 || !w2) {
661  ASSERT_HOST((w1 + w2) > 0);
662  special_blobs_densities_[type] = new_val / (w1 + w2);
663  }
664  }
665 
666  // Merge the two sorted lists.
667  BLOBNBOX_C_IT it(&boxes_);
668  BLOBNBOX_C_IT it2(&other->boxes_);
669  for (; !it2.empty(); it2.forward()) {
670  BLOBNBOX* bbox2 = it2.extract();
671  ColPartition* prev_owner = bbox2->owner();
672  if (prev_owner != other && prev_owner != nullptr) {
673  // A blob on other's list is owned by someone else; let them have it.
674  continue;
675  }
676  ASSERT_HOST(prev_owner == other || prev_owner == nullptr);
677  if (prev_owner == other)
678  bbox2->set_owner(this);
679  it.add_to_end(bbox2);
680  }
681  left_margin_ = std::min(left_margin_, other->left_margin_);
682  right_margin_ = std::max(right_margin_, other->right_margin_);
683  if (other->left_key_ < left_key_) {
684  left_key_ = other->left_key_;
685  left_key_tab_ = other->left_key_tab_;
686  }
687  if (other->right_key_ > right_key_) {
688  right_key_ = other->right_key_;
689  right_key_tab_ = other->right_key_tab_;
690  }
691  // Combine the flow and blob_type in a sensible way.
692  // Dominant flows stay.
693  if (!DominatesInMerge(flow_, other->flow_)) {
694  flow_ = other->flow_;
695  blob_type_ = other->blob_type_;
696  }
697  SetBlobTypes();
698  if (IsVerticalType()) {
699  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
700  last_add_was_vertical_ = true;
701  } else {
702  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
703  last_add_was_vertical_ = false;
704  }
705  ComputeLimits();
706  // Fix partner lists. other is going away, so remove it as a
707  // partner of all its partners and add this in its place.
708  for (int upper = 0; upper < 2; ++upper) {
709  ColPartition_CLIST partners;
710  ColPartition_C_IT part_it(&partners);
711  part_it.add_list_after(upper ? &other->upper_partners_
712  : &other->lower_partners_);
713  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
714  ColPartition* partner = part_it.extract();
715  partner->RemovePartner(!upper, other);
716  partner->RemovePartner(!upper, this);
717  partner->AddPartner(!upper, this);
718  }
719  }
720  delete other;
721  if (cb != nullptr) {
722  SetColumnGoodness(cb);
723  }
724 }

◆ AddBox()

void tesseract::ColPartition::AddBox ( BLOBNBOX box)

Definition at line 169 of file colpartition.cpp.

169  {
170  TBOX box = bbox->bounding_box();
171  // Update the partition limits.
172  if (boxes_.length() == 0) {
173  bounding_box_ = box;
174  } else {
175  bounding_box_ += box;
176  }
177 
178  if (IsVerticalType()) {
179  if (!last_add_was_vertical_) {
180  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
181  last_add_was_vertical_ = true;
182  }
183  boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
184  } else {
185  if (last_add_was_vertical_) {
186  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
187  last_add_was_vertical_ = false;
188  }
189  boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
190  }
191  if (!left_key_tab_)
192  left_key_ = BoxLeftKey();
193  if (!right_key_tab_)
194  right_key_ = BoxRightKey();
195  if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
196  tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
197  box.left(), box.bottom(), box.right(), box.top(),
198  bounding_box_.left(), bounding_box_.right());
199 }

◆ AddPartner()

void tesseract::ColPartition::AddPartner ( bool  upper,
ColPartition partner 
)

Definition at line 603 of file colpartition.cpp.

603  {
604  if (upper) {
605  partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
606  true, this);
607  upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
608  } else {
609  partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
610  true, this);
611  lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
612  }
613 }

◆ AddToWorkingSet()

void tesseract::ColPartition::AddToWorkingSet ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 1347 of file colpartition.cpp.

1350  {
1351  if (block_owned_)
1352  return; // Done it already.
1353  block_owned_ = true;
1354  WorkingPartSet_IT it(working_sets);
1355  // If there is an upper partner use its working_set_ directly.
1356  ColPartition* partner = SingletonPartner(true);
1357  if (partner != nullptr && partner->working_set_ != nullptr) {
1358  working_set_ = partner->working_set_;
1359  working_set_->AddPartition(this);
1360  return;
1361  }
1362  if (partner != nullptr && textord_debug_bugs) {
1363  tprintf("Partition with partner has no working set!:");
1364  Print();
1365  partner->Print();
1366  }
1367  // Search for the column that the left edge fits in.
1368  WorkingPartSet* work_set = nullptr;
1369  it.move_to_first();
1370  int col_index = 0;
1371  for (it.mark_cycle_pt(); !it.cycled_list() &&
1372  col_index != first_column_;
1373  it.forward(), ++col_index);
1374  if (textord_debug_tabfind >= 2) {
1375  tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
1376  Print();
1377  }
1378  if (it.cycled_list() && textord_debug_bugs) {
1379  tprintf("Target column=%d, only had %d\n", first_column_, col_index);
1380  }
1381  ASSERT_HOST(!it.cycled_list());
1382  work_set = it.data();
1383  // If last_column_ != first_column, then we need to scoop up all blocks
1384  // between here and the last_column_ and put back in work_set.
1385  if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) {
1386  // Find the column that the right edge falls in.
1387  BLOCK_LIST completed_blocks;
1388  TO_BLOCK_LIST to_blocks;
1389  for (; !it.cycled_list() && col_index <= last_column_;
1390  it.forward(), ++col_index) {
1391  WorkingPartSet* end_set = it.data();
1392  end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
1393  &completed_blocks, &to_blocks);
1394  }
1395  work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1396  }
1397  working_set_ = work_set;
1398  work_set->AddPartition(this);
1399 }

◆ BiggestBox()

BLOBNBOX * tesseract::ColPartition::BiggestBox ( )

Definition at line 215 of file colpartition.cpp.

215  {
216  BLOBNBOX* biggest = nullptr;
217  BLOBNBOX_C_IT bb_it(&boxes_);
218  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
219  BLOBNBOX* bbox = bb_it.data();
220  if (IsVerticalType()) {
221  if (biggest == nullptr ||
222  bbox->bounding_box().width() > biggest->bounding_box().width())
223  biggest = bbox;
224  } else {
225  if (biggest == nullptr ||
226  bbox->bounding_box().height() > biggest->bounding_box().height())
227  biggest = bbox;
228  }
229  }
230  return biggest;
231 }

◆ blob_type()

BlobRegionType tesseract::ColPartition::blob_type ( ) const
inline

Definition at line 148 of file colpartition.h.

148  {
149  return blob_type_;
150  }

◆ block_owned()

bool tesseract::ColPartition::block_owned ( ) const
inline

Definition at line 205 of file colpartition.h.

205  {
206  return block_owned_;
207  }

◆ bottom_spacing()

int tesseract::ColPartition::bottom_spacing ( ) const
inline

Definition at line 220 of file colpartition.h.

220  {
221  return bottom_spacing_;
222  }

◆ bounding_box()

const TBOX& tesseract::ColPartition::bounding_box ( ) const
inline

Definition at line 109 of file colpartition.h.

109  {
110  return bounding_box_;
111  }

◆ BoundsWithoutBox()

TBOX tesseract::ColPartition::BoundsWithoutBox ( BLOBNBOX box)

Definition at line 234 of file colpartition.cpp.

234  {
235  TBOX result;
236  BLOBNBOX_C_IT bb_it(&boxes_);
237  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
238  if (box != bb_it.data()) {
239  result += bb_it.data()->bounding_box();
240  }
241  }
242  return result;
243 }

◆ BoxColor()

ScrollView::Color tesseract::ColPartition::BoxColor ( ) const

Definition at line 1771 of file colpartition.cpp.

1771  {
1772  if (type_ == PT_UNKNOWN)
1773  return BLOBNBOX::TextlineColor(blob_type_, flow_);
1774  return POLY_BLOCK::ColorForPolyBlockType(type_);
1775 }

◆ boxes()

BLOBNBOX_CLIST* tesseract::ColPartition::boxes ( )
inline

Definition at line 187 of file colpartition.h.

187  {
188  return &boxes_;
189  }

◆ boxes_count()

int tesseract::ColPartition::boxes_count ( ) const
inline

Definition at line 190 of file colpartition.h.

190  {
191  return boxes_.length();
192  }

◆ BoxLeftKey()

int tesseract::ColPartition::BoxLeftKey ( ) const
inline

Definition at line 332 of file colpartition.h.

332  {
333  return SortKey(bounding_box_.left(), MidY());
334  }

◆ BoxRightKey()

int tesseract::ColPartition::BoxRightKey ( ) const
inline

Definition at line 336 of file colpartition.h.

336  {
337  return SortKey(bounding_box_.right(), MidY());
338  }

◆ ClaimBoxes()

void tesseract::ColPartition::ClaimBoxes ( )

Definition at line 247 of file colpartition.cpp.

247  {
248  BLOBNBOX_C_IT bb_it(&boxes_);
249  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
250  BLOBNBOX* bblob = bb_it.data();
251  ColPartition* other = bblob->owner();
252  if (other == nullptr) {
253  // Normal case: ownership is available.
254  bblob->set_owner(this);
255  } else {
256  ASSERT_HOST(other == this);
257  }
258  }
259 }

◆ clear_table_type()

void tesseract::ColPartition::clear_table_type ( )
inline

Definition at line 239 of file colpartition.h.

239  {
240  if (type_ == PT_TABLE)
241  type_ = type_before_table_;
242  }

◆ color1()

uint8_t* tesseract::ColPartition::color1 ( )
inline

Definition at line 285 of file colpartition.h.

285  {
286  return color1_;
287  }

◆ color2()

uint8_t* tesseract::ColPartition::color2 ( )
inline

Definition at line 288 of file colpartition.h.

288  {
289  return color2_;
290  }

◆ column_set()

ColPartitionSet* tesseract::ColPartition::column_set ( ) const
inline

Definition at line 214 of file colpartition.h.

214  {
215  return column_set_;
216  }

◆ ColumnContains()

bool tesseract::ColPartition::ColumnContains ( int  x,
int  y 
) const
inline

Definition at line 353 of file colpartition.h.

353  {
354  return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
355  }

◆ ColumnRange()

void tesseract::ColPartition::ColumnRange ( int  resolution,
ColPartitionSet columns,
int *  first_col,
int *  last_col 
)

Definition at line 1056 of file colpartition.cpp.

1057  {
1058  int first_spanned_col = -1;
1059  ColumnSpanningType span_type =
1060  columns->SpanningType(resolution,
1061  bounding_box_.left(), bounding_box_.right(),
1062  std::min(bounding_box_.height(), bounding_box_.width()),
1063  MidY(), left_margin_, right_margin_,
1064  first_col, last_col,
1065  &first_spanned_col);
1066  type_ = PartitionType(span_type);
1067 }

◆ ColumnWidth()

int tesseract::ColPartition::ColumnWidth ( ) const
inline

Definition at line 328 of file colpartition.h.

328  {
329  return KeyWidth(left_key_, right_key_);
330  }

◆ ComputeLimits()

void tesseract::ColPartition::ComputeLimits ( )

Definition at line 861 of file colpartition.cpp.

861  {
862  bounding_box_ = TBOX(); // Clear it
863  BLOBNBOX_C_IT it(&boxes_);
864  BLOBNBOX* bbox = nullptr;
865  int non_leader_count = 0;
866  if (it.empty()) {
867  bounding_box_.set_left(left_margin_);
868  bounding_box_.set_right(right_margin_);
869  bounding_box_.set_bottom(0);
870  bounding_box_.set_top(0);
871  } else {
872  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
873  bbox = it.data();
874  bounding_box_ += bbox->bounding_box();
875  if (bbox->flow() != BTFT_LEADER)
876  ++non_leader_count;
877  }
878  }
879  if (!left_key_tab_)
880  left_key_ = BoxLeftKey();
881  if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
882  // TODO(rays) investigate the causes of these error messages, to find
883  // out if they are genuinely harmful, or just indicative of junk input.
884  tprintf("Computed left-illegal partition\n");
885  Print();
886  }
887  if (!right_key_tab_)
888  right_key_ = BoxRightKey();
889  if (right_key_ < BoxRightKey() && textord_debug_bugs) {
890  tprintf("Computed right-illegal partition\n");
891  Print();
892  }
893  if (it.empty())
894  return;
895  if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
896  blob_type() == BRT_POLYIMAGE) {
897  median_top_ = bounding_box_.top();
898  median_bottom_ = bounding_box_.bottom();
899  median_height_ = bounding_box_.height();
900  median_left_ = bounding_box_.left();
901  median_right_ = bounding_box_.right();
902  median_width_ = bounding_box_.width();
903  } else {
904  STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
905  STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
906  STATS height_stats(0, bounding_box_.height() + 1);
907  STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
908  STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
909  STATS width_stats(0, bounding_box_.width() + 1);
910  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
911  bbox = it.data();
912  if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
913  const TBOX& box = bbox->bounding_box();
914  int area = box.area();
915  top_stats.add(box.top(), area);
916  bottom_stats.add(box.bottom(), area);
917  height_stats.add(box.height(), area);
918  left_stats.add(box.left(), area);
919  right_stats.add(box.right(), area);
920  width_stats.add(box.width(), area);
921  }
922  }
923  median_top_ = static_cast<int>(top_stats.median() + 0.5);
924  median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
925  median_height_ = static_cast<int>(height_stats.median() + 0.5);
926  median_left_ = static_cast<int>(left_stats.median() + 0.5);
927  median_right_ = static_cast<int>(right_stats.median() + 0.5);
928  median_width_ = static_cast<int>(width_stats.median() + 0.5);
929  }
930 
931  if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
932  tprintf("Made partition with bad right coords");
933  Print();
934  }
935  if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
936  tprintf("Made partition with bad left coords");
937  Print();
938  }
939  // Fix partner lists. The bounding box has changed and partners are stored
940  // in bounding box order, so remove and reinsert this as a partner
941  // of all its partners.
942  for (int upper = 0; upper < 2; ++upper) {
943  ColPartition_CLIST partners;
944  ColPartition_C_IT part_it(&partners);
945  part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
946  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
947  ColPartition* partner = part_it.extract();
948  partner->RemovePartner(!upper, this);
949  partner->AddPartner(!upper, this);
950  }
951  }
952  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
953  bounding_box_.bottom())) {
954  tprintf("Recomputed box for partition %p\n", this);
955  Print();
956  }
957 }

◆ ComputeSpecialBlobsDensity()

void tesseract::ColPartition::ComputeSpecialBlobsDensity ( )

Definition at line 582 of file colpartition.cpp.

582  {
583  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
584  if (boxes_.empty()) {
585  return;
586  }
587 
588  BLOBNBOX_C_IT blob_it(&boxes_);
589  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
590  BLOBNBOX* blob = blob_it.data();
592  special_blobs_densities_[type]++;
593  }
594 
595  for (float& special_blobs_density : special_blobs_densities_) {
596  special_blobs_density /= boxes_.length();
597  }
598 }

◆ ConfirmNoTabViolation()

bool tesseract::ColPartition::ConfirmNoTabViolation ( const ColPartition other) const

Definition at line 413 of file colpartition.cpp.

413  {
414  if (bounding_box_.right() < other.bounding_box_.left() &&
415  bounding_box_.right() < other.LeftBlobRule())
416  return false;
417  if (other.bounding_box_.right() < bounding_box_.left() &&
418  other.bounding_box_.right() < LeftBlobRule())
419  return false;
420  if (bounding_box_.left() > other.bounding_box_.right() &&
421  bounding_box_.left() > other.RightBlobRule())
422  return false;
423  if (other.bounding_box_.left() > bounding_box_.right() &&
424  other.bounding_box_.left() > RightBlobRule())
425  return false;
426  return true;
427 }

◆ CopyButDontOwnBlobs()

ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs ( )

Definition at line 1758 of file colpartition.cpp.

1758  {
1759  ColPartition* copy = ShallowCopy();
1760  copy->set_owns_blobs(false);
1761  BLOBNBOX_C_IT inserter(copy->boxes());
1762  BLOBNBOX_C_IT traverser(boxes());
1763  for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1764  inserter.add_after_then_move(traverser.data());
1765  return copy;
1766 }

◆ CopyLeftTab()

void tesseract::ColPartition::CopyLeftTab ( const ColPartition src,
bool  take_box 
)

Definition at line 519 of file colpartition.cpp.

519  {
520  left_key_tab_ = take_box ? false : src.left_key_tab_;
521  if (left_key_tab_) {
522  left_key_ = src.left_key_;
523  } else {
524  bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
525  left_key_ = BoxLeftKey();
526  }
527  if (left_margin_ > bounding_box_.left())
528  left_margin_ = src.left_margin_;
529 }

◆ CopyRightTab()

void tesseract::ColPartition::CopyRightTab ( const ColPartition src,
bool  take_box 
)

Definition at line 532 of file colpartition.cpp.

532  {
533  right_key_tab_ = take_box ? false : src.right_key_tab_;
534  if (right_key_tab_) {
535  right_key_ = src.right_key_;
536  } else {
537  bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
538  right_key_ = BoxRightKey();
539  }
540  if (right_margin_ < bounding_box_.right())
541  right_margin_ = src.right_margin_;
542 }

◆ CountOverlappingBoxes()

int tesseract::ColPartition::CountOverlappingBoxes ( const TBOX box)

Definition at line 960 of file colpartition.cpp.

960  {
961  BLOBNBOX_C_IT it(&boxes_);
962  int overlap_count = 0;
963  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
964  BLOBNBOX* bbox = it.data();
965  if (box.overlap(bbox->bounding_box()))
966  ++overlap_count;
967  }
968  return overlap_count;
969 }

◆ DeleteBoxes()

void tesseract::ColPartition::DeleteBoxes ( )

Definition at line 305 of file colpartition.cpp.

305  {
306  // Although the boxes_ list is a C_LIST, in some cases it owns the
307  // BLOBNBOXes, as the ColPartition takes ownership from the grid,
308  // and the BLOBNBOXes own the underlying C_BLOBs.
309  for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
310  BLOBNBOX* bblob = bb_it.extract();
311  delete bblob->cblob();
312  delete bblob;
313  }
314 }

◆ desperately_merged()

bool tesseract::ColPartition::desperately_merged ( ) const
inline

Definition at line 211 of file colpartition.h.

211  {
212  return desperately_merged_;
213  }

◆ DisownBoxes()

void tesseract::ColPartition::DisownBoxes ( )

Definition at line 263 of file colpartition.cpp.

263  {
264  BLOBNBOX_C_IT bb_it(&boxes_);
265  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
266  BLOBNBOX* bblob = bb_it.data();
267  ASSERT_HOST(bblob->owner() == this || bblob->owner() == nullptr);
268  bblob->set_owner(nullptr);
269  }
270 }

◆ DisownBoxesNoAssert()

void tesseract::ColPartition::DisownBoxesNoAssert ( )

Definition at line 276 of file colpartition.cpp.

276  {
277  BLOBNBOX_C_IT bb_it(&boxes_);
278  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
279  BLOBNBOX* bblob = bb_it.data();
280  if (bblob->owner() == this)
281  bblob->set_owner(nullptr);
282  }
283 }

◆ FakePartition()

ColPartition * tesseract::ColPartition::FakePartition ( const TBOX box,
PolyBlockType  block_type,
BlobRegionType  blob_type,
BlobTextFlowType  flow 
)
static

Definition at line 95 of file colpartition.cpp.

98  {
99  ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
100  part->set_type(block_type);
101  part->set_flow(flow);
102  part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
103  part->set_left_margin(box.left());
104  part->set_right_margin(box.right());
105  part->SetBlobTypes();
106  part->ComputeLimits();
107  part->ClaimBoxes();
108  return part;
109 }

◆ flow()

BlobTextFlowType tesseract::ColPartition::flow ( ) const
inline

Definition at line 154 of file colpartition.h.

154  {
155  return flow_;
156  }

◆ good_blob_score()

int tesseract::ColPartition::good_blob_score ( ) const
inline

Definition at line 160 of file colpartition.h.

160  {
161  return good_blob_score_;
162  }

◆ good_column()

bool tesseract::ColPartition::good_column ( ) const
inline

Definition at line 166 of file colpartition.h.

166  {
167  return good_column_;
168  }

◆ good_width()

bool tesseract::ColPartition::good_width ( ) const
inline

Definition at line 163 of file colpartition.h.

163  {
164  return good_width_;
165  }

◆ HasGoodBaseline()

bool tesseract::ColPartition::HasGoodBaseline ( )

Definition at line 1280 of file colpartition.cpp.

1280  {
1281  // Approximation of the baseline.
1282  DetLineFit linepoints;
1283  // Calculation of the mean height on this line segment. Note that these
1284  // variable names apply to the context of a horizontal line, and work
1285  // analogously, rather than literally in the case of a vertical line.
1286  int total_height = 0;
1287  int coverage = 0;
1288  int height_count = 0;
1289  int width = 0;
1290  BLOBNBOX_C_IT it(&boxes_);
1291  TBOX box(it.data()->bounding_box());
1292  // Accumulate points representing the baseline at the middle of each blob,
1293  // but add an additional point for each end of the line. This makes it
1294  // harder to fit a severe skew angle, as it is most likely not right.
1295  if (IsVerticalType()) {
1296  // For a vertical line, use the right side as the baseline.
1297  ICOORD first_pt(box.right(), box.bottom());
1298  // Use the bottom-right of the first (bottom) box, the top-right of the
1299  // last, and the middle-right of all others.
1300  linepoints.Add(first_pt);
1301  for (it.forward(); !it.at_last(); it.forward()) {
1302  BLOBNBOX* blob = it.data();
1303  box = blob->bounding_box();
1304  ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1305  linepoints.Add(box_pt);
1306  total_height += box.width();
1307  coverage += box.height();
1308  ++height_count;
1309  }
1310  box = it.data()->bounding_box();
1311  ICOORD last_pt(box.right(), box.top());
1312  linepoints.Add(last_pt);
1313  width = last_pt.y() - first_pt.y();
1314 
1315  } else {
1316  // Horizontal lines use the bottom as the baseline.
1317  TBOX box(it.data()->bounding_box());
1318  // Use the bottom-left of the first box, the the bottom-right of the last,
1319  // and the middle of all others.
1320  ICOORD first_pt(box.left(), box.bottom());
1321  linepoints.Add(first_pt);
1322  for (it.forward(); !it.at_last(); it.forward()) {
1323  BLOBNBOX* blob = it.data();
1324  box = blob->bounding_box();
1325  ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1326  linepoints.Add(box_pt);
1327  total_height += box.height();
1328  coverage += box.width();
1329  ++height_count;
1330  }
1331  box = it.data()->bounding_box();
1332  ICOORD last_pt(box.right(), box.bottom());
1333  linepoints.Add(last_pt);
1334  width = last_pt.x() - first_pt.x();
1335  }
1336  // Maximum median error allowed to be a good text line.
1337  if (height_count == 0)
1338  return false;
1339  double max_error = kMaxBaselineError * total_height / height_count;
1340  ICOORD start_pt, end_pt;
1341  double error = linepoints.Fit(&start_pt, &end_pt);
1342  return error < max_error && coverage >= kMinBaselineCoverage * width;
1343 }

◆ HCoreOverlap()

int tesseract::ColPartition::HCoreOverlap ( const ColPartition other) const
inline

Definition at line 384 of file colpartition.h.

384  {
385  return std::min(median_right_, other.median_right_) -
386  std::max(median_left_, other.median_left_);
387  }

◆ HOverlaps()

bool tesseract::ColPartition::HOverlaps ( const ColPartition other) const
inline

Definition at line 365 of file colpartition.h.

365  {
366  return bounding_box_.x_overlap(other.bounding_box_);
367  }

◆ inside_table_column()

bool tesseract::ColPartition::inside_table_column ( )
inline

Definition at line 243 of file colpartition.h.

243  {
244  return inside_table_column_;
245  }

◆ IsEmpty()

bool tesseract::ColPartition::IsEmpty ( ) const
inline

Definition at line 357 of file colpartition.h.

357  {
358  return boxes_.empty();
359  }

◆ IsHorizontalLine()

bool tesseract::ColPartition::IsHorizontalLine ( ) const
inline

Definition at line 459 of file colpartition.h.

459  {
460  return IsHorizontalType() && IsLineType();
461  }

◆ IsHorizontalType()

bool tesseract::ColPartition::IsHorizontalType ( ) const
inline

Definition at line 445 of file colpartition.h.

445  {
446  return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
447  }

◆ IsImageType()

bool tesseract::ColPartition::IsImageType ( ) const
inline

Definition at line 429 of file colpartition.h.

429  {
430  return PTIsImageType(type_);
431  }

◆ IsInSameColumnAs()

bool tesseract::ColPartition::IsInSameColumnAs ( const ColPartition part) const

Definition at line 2175 of file colpartition.cpp.

2175  {
2176  // Overlap does not occur when last < part.first or first > part.last.
2177  // In other words, one is completely to the side of the other.
2178  // This is just DeMorgan's law applied to that so the function returns true.
2179  return (last_column_ >= part.first_column_) &&
2180  (first_column_ <= part.last_column_);
2181 }

◆ IsLeftOf()

bool tesseract::ColPartition::IsLeftOf ( const ColPartition other) const
inline

Definition at line 349 of file colpartition.h.

349  {
350  return bounding_box_.right() < other.bounding_box_.right();
351  }

◆ IsLegal()

bool tesseract::ColPartition::IsLegal ( )

Definition at line 342 of file colpartition.cpp.

342  {
343  if (bounding_box_.left() > bounding_box_.right()) {
344  if (textord_debug_bugs) {
345  tprintf("Bounding box invalid\n");
346  Print();
347  }
348  return false; // Bounding box invalid.
349  }
350  if (left_margin_ > bounding_box_.left() ||
351  right_margin_ < bounding_box_.right()) {
352  if (textord_debug_bugs) {
353  tprintf("Margins invalid\n");
354  Print();
355  }
356  return false; // Margins invalid.
357  }
358  if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
359  if (textord_debug_bugs) {
360  tprintf("Key inside box: %d v %d or %d v %d\n",
361  left_key_, BoxLeftKey(), right_key_, BoxRightKey());
362  Print();
363  }
364  return false; // Keys inside the box.
365  }
366  return true;
367 }

◆ IsLineType()

bool tesseract::ColPartition::IsLineType ( ) const
inline

Definition at line 425 of file colpartition.h.

425  {
426  return PTIsLineType(type_);
427  }

◆ IsPulloutType()

bool tesseract::ColPartition::IsPulloutType ( ) const
inline

Definition at line 437 of file colpartition.h.

437  {
438  return PTIsPulloutType(type_);
439  }

◆ IsSingleton()

bool tesseract::ColPartition::IsSingleton ( ) const
inline

Definition at line 361 of file colpartition.h.

361  {
362  return boxes_.singleton();
363  }

◆ IsTextType()

bool tesseract::ColPartition::IsTextType ( ) const
inline

Definition at line 433 of file colpartition.h.

433  {
434  return PTIsTextType(type_);
435  }

◆ IsUnMergeableType()

bool tesseract::ColPartition::IsUnMergeableType ( ) const
inline

Definition at line 449 of file colpartition.h.

449  {
450  return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
451  }

◆ IsVerticalLine()

bool tesseract::ColPartition::IsVerticalLine ( ) const
inline

Definition at line 454 of file colpartition.h.

454  {
455  return IsVerticalType() && IsLineType();
456  }

◆ IsVerticalType()

bool tesseract::ColPartition::IsVerticalType ( ) const
inline

Definition at line 441 of file colpartition.h.

441  {
442  return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
443  }

◆ KeyWidth()

int tesseract::ColPartition::KeyWidth ( int  left_key,
int  right_key 
) const
inline

Definition at line 324 of file colpartition.h.

324  {
325  return (right_key - left_key) / vertical_.y();
326  }

◆ left_key()

int tesseract::ColPartition::left_key ( ) const
inline

Definition at line 172 of file colpartition.h.

172  {
173  return left_key_;
174  }

◆ left_key_tab()

bool tesseract::ColPartition::left_key_tab ( ) const
inline

Definition at line 169 of file colpartition.h.

169  {
170  return left_key_tab_;
171  }

◆ left_margin()

int tesseract::ColPartition::left_margin ( ) const
inline

Definition at line 112 of file colpartition.h.

112  {
113  return left_margin_;
114  }

◆ LeftAtY()

int tesseract::ColPartition::LeftAtY ( int  y) const
inline

Definition at line 340 of file colpartition.h.

340  {
341  return XAtY(left_key_, y);
342  }

◆ LeftBlobRule()

int tesseract::ColPartition::LeftBlobRule ( ) const

Definition at line 545 of file colpartition.cpp.

545  {
546  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
547  return it.data()->left_rule();
548 }

◆ LineSpacingBlocks()

void tesseract::ColPartition::LineSpacingBlocks ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts,
BLOCK_LIST *  completed_blocks,
TO_BLOCK_LIST *  to_blocks 
)
static

Definition at line 1407 of file colpartition.cpp.

1412  {
1413  int page_height = tright.y() - bleft.y();
1414  // Compute the initial spacing stats.
1415  ColPartition_IT it(block_parts);
1416  int part_count = 0;
1417  int max_line_height = 0;
1418 
1419  // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
1420  // because their line spacing with their neighbors maybe smaller and their
1421  // height may be slightly larger.
1422 
1423  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1424  ColPartition* part = it.data();
1425  ASSERT_HOST(!part->boxes()->empty());
1426  STATS side_steps(0, part->bounding_box().height());
1427  if (part->bounding_box().height() > max_line_height)
1428  max_line_height = part->bounding_box().height();
1429  BLOBNBOX_C_IT blob_it(part->boxes());
1430  int prev_bottom = blob_it.data()->bounding_box().bottom();
1431  for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1432  BLOBNBOX* blob = blob_it.data();
1433  int bottom = blob->bounding_box().bottom();
1434  int step = bottom - prev_bottom;
1435  if (step < 0)
1436  step = -step;
1437  side_steps.add(step, 1);
1438  prev_bottom = bottom;
1439  }
1440  part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
1441  if (!it.at_last()) {
1442  ColPartition* next_part = it.data_relative(1);
1443  part->set_bottom_spacing(part->median_bottom() -
1444  next_part->median_bottom());
1445  part->set_top_spacing(part->median_top() - next_part->median_top());
1446  } else {
1447  part->set_bottom_spacing(page_height);
1448  part->set_top_spacing(page_height);
1449  }
1450  if (textord_debug_tabfind) {
1451  part->Print();
1452  tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1453  side_steps.median(), part->top_spacing(), part->bottom_spacing());
1454  }
1455  ++part_count;
1456  }
1457  if (part_count == 0)
1458  return;
1459 
1460  SmoothSpacings(resolution, page_height, block_parts);
1461 
1462  // Move the partitions into individual block lists and make the blocks.
1463  BLOCK_IT block_it(completed_blocks);
1464  TO_BLOCK_IT to_block_it(to_blocks);
1465  ColPartition_LIST spacing_parts;
1466  ColPartition_IT sp_block_it(&spacing_parts);
1467  int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
1468  for (it.mark_cycle_pt(); !it.empty();) {
1469  ColPartition* part = it.extract();
1470  sp_block_it.add_to_end(part);
1471  it.forward();
1472  if (it.empty() || part->bottom_spacing() > same_block_threshold ||
1473  !part->SpacingsEqual(*it.data(), resolution)) {
1474  // There is a spacing boundary. Check to see if it.data() belongs
1475  // better in the current block or the next one.
1476  if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
1477  ColPartition* next_part = it.data();
1478  // If there is a size match one-way, then the middle line goes with
1479  // its matched size, otherwise it goes with the smallest spacing.
1480  ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1);
1481  if (textord_debug_tabfind) {
1482  tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
1483  " sizes %d %d %d\n",
1484  part->top_spacing(), part->bottom_spacing(),
1485  next_part->top_spacing(), next_part->bottom_spacing(),
1486  part->median_height(), next_part->median_height(),
1487  third_part != nullptr ? third_part->median_height() : 0);
1488  }
1489  // We can only consider adding the next line to the block if the sizes
1490  // match and the lines are close enough for their size.
1491  if (part->SizesSimilar(*next_part) &&
1492  next_part->median_height() * kMaxSameBlockLineSpacing >
1493  part->bottom_spacing() &&
1494  part->median_height() * kMaxSameBlockLineSpacing >
1495  part->top_spacing()) {
1496  // Even now, we can only add it as long as the third line doesn't
1497  // match in the same way and have a smaller bottom spacing.
1498  if (third_part == nullptr ||
1499  !next_part->SizesSimilar(*third_part) ||
1500  third_part->median_height() * kMaxSameBlockLineSpacing <=
1501  next_part->bottom_spacing() ||
1502  next_part->median_height() * kMaxSameBlockLineSpacing <=
1503  next_part->top_spacing() ||
1504  next_part->bottom_spacing() > part->bottom_spacing()) {
1505  // Add to the current block.
1506  sp_block_it.add_to_end(it.extract());
1507  it.forward();
1508  if (textord_debug_tabfind) {
1509  tprintf("Added line to current block.\n");
1510  }
1511  }
1512  }
1513  }
1514  TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
1515  if (to_block != nullptr) {
1516  to_block_it.add_to_end(to_block);
1517  block_it.add_to_end(to_block->block);
1518  }
1519  sp_block_it.set_to_list(&spacing_parts);
1520  } else {
1521  if (textord_debug_tabfind && !it.empty()) {
1522  ColPartition* next_part = it.data();
1523  tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1524  part->top_spacing(), part->bottom_spacing(),
1525  next_part->top_spacing(), next_part->bottom_spacing(),
1526  part->median_height(), next_part->median_height());
1527  }
1528  }
1529  }
1530 }

◆ lower_partners()

ColPartition_CLIST* tesseract::ColPartition::lower_partners ( )
inline

Definition at line 199 of file colpartition.h.

199  {
200  return &lower_partners_;
201  }

◆ MakeBigPartition()

ColPartition * tesseract::ColPartition::MakeBigPartition ( BLOBNBOX box,
ColPartition_LIST *  big_part_list 
)
static

Definition at line 116 of file colpartition.cpp.

117  {
118  box->set_owner(nullptr);
119  ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
120  single->set_flow(BTFT_NONE);
121  single->AddBox(box);
122  single->ComputeLimits();
123  single->ClaimBoxes();
124  single->SetBlobTypes();
125  single->set_block_owned(true);
126  if (big_part_list != nullptr) {
127  ColPartition_IT part_it(big_part_list);
128  part_it.add_to_end(single);
129  }
130  return single;
131 }

◆ MakeBlock()

TO_BLOCK * tesseract::ColPartition::MakeBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1623 of file colpartition.cpp.

1625  {
1626  if (block_parts->empty())
1627  return nullptr; // Nothing to do.
1628  // If the block_parts are not in reading order, then it will make an invalid
1629  // block polygon and bounding_box, so sort by bounding box now just to make
1630  // sure.
1631  block_parts->sort(&ColPartition::SortByBBox);
1632  ColPartition_IT it(block_parts);
1633  ColPartition* part = it.data();
1634  PolyBlockType type = part->type();
1635  if (type == PT_VERTICAL_TEXT)
1636  return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
1637  // LineSpacingBlocks has handed us a collection of evenly spaced lines and
1638  // put the average spacing in each partition, so we can just take the
1639  // linespacing from the first partition.
1640  int line_spacing = part->bottom_spacing();
1641  if (line_spacing < part->median_height())
1642  line_spacing = part->bounding_box().height();
1643  ICOORDELT_LIST vertices;
1644  ICOORDELT_IT vert_it(&vertices);
1645  ICOORD start, end;
1646  int min_x = INT32_MAX;
1647  int max_x = -INT32_MAX;
1648  int min_y = INT32_MAX;
1649  int max_y = -INT32_MAX;
1650  int iteration = 0;
1651  do {
1652  if (iteration == 0)
1653  ColPartition::LeftEdgeRun(&it, &start, &end);
1654  else
1655  ColPartition::RightEdgeRun(&it, &start, &end);
1656  ClipCoord(bleft, tright, &start);
1657  ClipCoord(bleft, tright, &end);
1658  vert_it.add_after_then_move(new ICOORDELT(start));
1659  vert_it.add_after_then_move(new ICOORDELT(end));
1660  UpdateRange(start.x(), &min_x, &max_x);
1661  UpdateRange(end.x(), &min_x, &max_x);
1662  UpdateRange(start.y(), &min_y, &max_y);
1663  UpdateRange(end.y(), &min_y, &max_y);
1664  if ((iteration == 0 && it.at_first()) ||
1665  (iteration == 1 && it.at_last())) {
1666  ++iteration;
1667  it.move_to_last();
1668  }
1669  } while (iteration < 2);
1671  tprintf("Making block at (%d,%d)->(%d,%d)\n",
1672  min_x, min_y, max_x, max_y);
1673  auto* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
1674  block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type));
1675  return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
1676 }

◆ MakeLinePartition()

ColPartition * tesseract::ColPartition::MakeLinePartition ( BlobRegionType  blob_type,
const ICOORD vertical,
int  left,
int  bottom,
int  right,
int  top 
)
static

Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.

Definition at line 148 of file colpartition.cpp.

151  {
152  auto* part = new ColPartition(blob_type, vertical);
153  part->bounding_box_ = TBOX(left, bottom, right, top);
154  part->median_bottom_ = bottom;
155  part->median_top_ = top;
156  part->median_height_ = top - bottom;
157  part->median_left_ = left;
158  part->median_right_ = right;
159  part->median_width_ = right - left;
160  part->left_key_ = part->BoxLeftKey();
161  part->right_key_ = part->BoxRightKey();
162  return part;
163 }

◆ MakeToRow()

TO_ROW * tesseract::ColPartition::MakeToRow ( )

Definition at line 1706 of file colpartition.cpp.

1706  {
1707  BLOBNBOX_C_IT blob_it(&boxes_);
1708  TO_ROW* row = nullptr;
1709  int line_size = IsVerticalType() ? median_width_ : median_height_;
1710  // Add all the blobs to a single TO_ROW.
1711  for (; !blob_it.empty(); blob_it.forward()) {
1712  BLOBNBOX* blob = blob_it.extract();
1713 // blob->compute_bounding_box();
1714  int top = blob->bounding_box().top();
1715  int bottom = blob->bounding_box().bottom();
1716  if (row == nullptr) {
1717  row = new TO_ROW(blob, static_cast<float>(top),
1718  static_cast<float>(bottom),
1719  static_cast<float>(line_size));
1720  } else {
1721  row->add_blob(blob, static_cast<float>(top),
1722  static_cast<float>(bottom),
1723  static_cast<float>(line_size));
1724  }
1725  }
1726  return row;
1727 }

◆ MakeVerticalTextBlock()

TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1680 of file colpartition.cpp.

1683  {
1684  if (block_parts->empty())
1685  return nullptr; // Nothing to do.
1686  ColPartition_IT it(block_parts);
1687  ColPartition* part = it.data();
1688  TBOX block_box = part->bounding_box();
1689  int line_spacing = block_box.width();
1690  PolyBlockType type = it.data()->type();
1691  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1692  block_box += it.data()->bounding_box();
1693  }
1694  if (textord_debug_tabfind) {
1695  tprintf("Making block at:");
1696  block_box.print();
1697  }
1698  auto* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
1699  block_box.right(), block_box.top());
1700  block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type));
1701  return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
1702 }

◆ MarkAsLeaderIfMonospaced()

bool tesseract::ColPartition::MarkAsLeaderIfMonospaced ( )

Definition at line 1083 of file colpartition.cpp.

1083  {
1084  bool result = false;
1085  // Gather statistics on the gaps between blobs and the widths of the blobs.
1086  int part_width = bounding_box_.width();
1087  STATS gap_stats(0, part_width);
1088  STATS width_stats(0, part_width);
1089  BLOBNBOX_C_IT it(&boxes_);
1090  BLOBNBOX* prev_blob = it.data();
1091  prev_blob->set_flow(BTFT_NEIGHBOURS);
1092  width_stats.add(prev_blob->bounding_box().width(), 1);
1093  int blob_count = 1;
1094  for (it.forward(); !it.at_first(); it.forward()) {
1095  BLOBNBOX* blob = it.data();
1096  int left = blob->bounding_box().left();
1097  int right = blob->bounding_box().right();
1098  gap_stats.add(left - prev_blob->bounding_box().right(), 1);
1099  width_stats.add(right - left, 1);
1100  blob->set_flow(BTFT_NEIGHBOURS);
1101  prev_blob = blob;
1102  ++blob_count;
1103  }
1104  double median_gap = gap_stats.median();
1105  double median_width = width_stats.median();
1106  double max_width = std::max(median_gap, median_width);
1107  double min_width = std::min(median_gap, median_width);
1108  double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
1109  if (textord_debug_tabfind >= 4) {
1110  tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1111  gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
1112  min_width * kMaxLeaderGapFractionOfMin);
1113  }
1114  if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
1115  gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
1116  blob_count >= kMinLeaderCount) {
1117  // This is stable enough to be called a leader, so check the widths.
1118  // Since leader dashes can join, run a dp cutting algorithm and go
1119  // on the cost.
1120  int offset = static_cast<int>(ceil(gap_iqr * 2));
1121  int min_step = static_cast<int>(median_gap + median_width + 0.5);
1122  int max_step = min_step + offset;
1123  min_step -= offset;
1124  // Pad the buffer with min_step/2 on each end.
1125  int part_left = bounding_box_.left() - min_step / 2;
1126  part_width += min_step;
1127  auto* projection = new DPPoint[part_width];
1128  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1129  BLOBNBOX* blob = it.data();
1130  int left = blob->bounding_box().left();
1131  int right = blob->bounding_box().right();
1132  int height = blob->bounding_box().height();
1133  for (int x = left; x < right; ++x) {
1134  projection[left - part_left].AddLocalCost(height);
1135  }
1136  }
1137  DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
1139  part_width, projection);
1140  if (best_end != nullptr && best_end->total_cost() < blob_count) {
1141  // Good enough. Call it a leader.
1142  result = true;
1143  bool modified_blob_list = false;
1144  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1145  BLOBNBOX* blob = it.data();
1146  // If the first or last blob is spaced too much, don't mark it.
1147  if (it.at_first()) {
1148  int gap = it.data_relative(1)->bounding_box().left() -
1149  blob->bounding_box().right();
1150  if (blob->bounding_box().width() + gap > max_step) {
1151  it.extract();
1152  modified_blob_list = true;
1153  continue;
1154  }
1155  }
1156  if (it.at_last()) {
1157  int gap = blob->bounding_box().left() -
1158  it.data_relative(-1)->bounding_box().right();
1159  if (blob->bounding_box().width() + gap > max_step) {
1160  it.extract();
1161  modified_blob_list = true;
1162  break;
1163  }
1164  }
1165  blob->set_region_type(BRT_TEXT);
1166  blob->set_flow(BTFT_LEADER);
1167  }
1168  if (modified_blob_list) ComputeLimits();
1169  blob_type_ = BRT_TEXT;
1170  flow_ = BTFT_LEADER;
1171  } else if (textord_debug_tabfind) {
1172  if (best_end == nullptr) {
1173  tprintf("No path\n");
1174  } else {
1175  tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
1176  blob_count);
1177  }
1178  }
1179  delete [] projection;
1180  }
1181  return result;
1182 }

◆ MatchingColumns()

bool tesseract::ColPartition::MatchingColumns ( const ColPartition other) const

Definition at line 370 of file colpartition.cpp.

370  {
371  int y = (MidY() + other.MidY()) / 2;
372  if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
373  LeftAtY(y) / kColumnWidthFactor, 1))
374  return false;
375  if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
376  RightAtY(y) / kColumnWidthFactor, 1))
377  return false;
378  return true;
379 }

◆ MatchingSizes()

bool tesseract::ColPartition::MatchingSizes ( const ColPartition other) const

Definition at line 405 of file colpartition.cpp.

405  {
406  if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
407  return !TabFind::DifferentSizes(median_width_, other.median_width_);
408  else
409  return !TabFind::DifferentSizes(median_height_, other.median_height_);
410 }

◆ MatchingStrokeWidth()

bool tesseract::ColPartition::MatchingStrokeWidth ( const ColPartition other,
double  fractional_tolerance,
double  constant_tolerance 
) const

Definition at line 430 of file colpartition.cpp.

432  {
433  int match_count = 0;
434  int nonmatch_count = 0;
435  BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
436  BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
437  box_it.mark_cycle_pt();
438  other_it.mark_cycle_pt();
439  while (!box_it.cycled_list() && !other_it.cycled_list()) {
440  if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
441  fractional_tolerance,
442  constant_tolerance))
443  ++match_count;
444  else
445  ++nonmatch_count;
446  box_it.forward();
447  other_it.forward();
448  }
449  return match_count > nonmatch_count;
450 }

◆ MatchingTextColor()

bool tesseract::ColPartition::MatchingTextColor ( const ColPartition other) const

Definition at line 382 of file colpartition.cpp.

382  {
383  if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
384  other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
385  return false; // Too noisy.
386 
387  // Colors must match for other to count.
388  double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
389  other.color2_,
390  color1_);
391  double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
392  other.color2_,
393  color2_);
394  double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
395  other.color1_);
396  double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
397  other.color2_);
398 // All 4 distances must be small enough.
399  return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
400  d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
401 }

◆ median_bottom()

int tesseract::ColPartition::median_bottom ( ) const
inline

Definition at line 127 of file colpartition.h.

127  {
128  return median_bottom_;
129  }

◆ median_height()

int tesseract::ColPartition::median_height ( ) const
inline

Definition at line 136 of file colpartition.h.

136  {
137  return median_height_;
138  }

◆ median_left()

int tesseract::ColPartition::median_left ( ) const
inline

Definition at line 130 of file colpartition.h.

130  {
131  return median_left_;
132  }

◆ median_right()

int tesseract::ColPartition::median_right ( ) const
inline

Definition at line 133 of file colpartition.h.

133  {
134  return median_right_;
135  }

◆ median_top()

int tesseract::ColPartition::median_top ( ) const
inline

Definition at line 124 of file colpartition.h.

124  {
125  return median_top_;
126  }

◆ median_width()

int tesseract::ColPartition::median_width ( ) const
inline

Definition at line 142 of file colpartition.h.

142  {
143  return median_width_;
144  }

◆ MedianY()

int tesseract::ColPartition::MedianY ( ) const
inline

Definition at line 308 of file colpartition.h.

308  {
309  return (median_top_ + median_bottom_) / 2;
310  }

◆ MidX()

int tesseract::ColPartition::MidX ( ) const
inline

Definition at line 312 of file colpartition.h.

312  {
313  return (bounding_box_.left() + bounding_box_.right()) / 2;
314  }

◆ MidY()

int tesseract::ColPartition::MidY ( ) const
inline

Definition at line 304 of file colpartition.h.

304  {
305  return (bounding_box_.top() + bounding_box_.bottom()) / 2;
306  }

◆ nearest_neighbor_above()

ColPartition* tesseract::ColPartition::nearest_neighbor_above ( ) const
inline

Definition at line 249 of file colpartition.h.

249  {
250  return nearest_neighbor_above_;
251  }

◆ nearest_neighbor_below()

ColPartition* tesseract::ColPartition::nearest_neighbor_below ( ) const
inline

Definition at line 255 of file colpartition.h.

255  {
256  return nearest_neighbor_below_;
257  }

◆ OKDiacriticMerge()

bool tesseract::ColPartition::OKDiacriticMerge ( const ColPartition candidate,
bool  debug 
) const

Definition at line 458 of file colpartition.cpp.

459  {
460  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
461  int min_top = INT32_MAX;
462  int max_bottom = -INT32_MAX;
463  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
464  BLOBNBOX* blob = it.data();
465  if (!blob->IsDiacritic()) {
466  if (debug) {
467  tprintf("Blob is not a diacritic:");
468  blob->bounding_box().print();
469  }
470  return false; // All blobs must have diacritic bases.
471  }
472  if (blob->base_char_top() < min_top)
473  min_top = blob->base_char_top();
474  if (blob->base_char_bottom() > max_bottom)
475  max_bottom = blob->base_char_bottom();
476  }
477  // If the intersection of all vertical ranges of all base characters
478  // overlaps the median range of this, then it is OK.
479  bool result = min_top > candidate.median_bottom_ &&
480  max_bottom < candidate.median_top_;
481  if (debug) {
482  if (result)
483  tprintf("OKDiacritic!\n");
484  else
485  tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
486  max_bottom, min_top, median_bottom_, median_top_);
487  }
488  return result;
489 }

◆ OKMergeOverlap()

bool tesseract::ColPartition::OKMergeOverlap ( const ColPartition merge1,
const ColPartition merge2,
int  ok_box_overlap,
bool  debug 
)

Definition at line 736 of file colpartition.cpp.

738  {
739  // Vertical partitions are not allowed to be involved.
740  if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
741  if (debug)
742  tprintf("Vertical partition\n");
743  return false;
744  }
745  // The merging partitions must strongly overlap each other.
746  if (!merge1.VSignificantCoreOverlap(merge2)) {
747  if (debug)
748  tprintf("Voverlap %d (%d)\n",
749  merge1.VCoreOverlap(merge2),
750  merge1.VSignificantCoreOverlap(merge2));
751  return false;
752  }
753  // The merged box must not overlap the median bounds of this.
754  TBOX merged_box(merge1.bounding_box());
755  merged_box += merge2.bounding_box();
756  if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
757  merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
758  merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
759  if (debug)
760  tprintf("Excessive box overlap\n");
761  return false;
762  }
763  // Looks OK!
764  return true;
765 }

◆ OverlapSplitBlob()

BLOBNBOX * tesseract::ColPartition::OverlapSplitBlob ( const TBOX box)

Definition at line 769 of file colpartition.cpp.

769  {
770  if (boxes_.empty() || boxes_.singleton())
771  return nullptr;
772  BLOBNBOX_C_IT it(&boxes_);
773  TBOX left_box(it.data()->bounding_box());
774  for (it.forward(); !it.at_first(); it.forward()) {
775  BLOBNBOX* bbox = it.data();
776  left_box += bbox->bounding_box();
777  if (left_box.overlap(box))
778  return bbox;
779  }
780  return nullptr;
781 }

◆ owns_blobs()

bool tesseract::ColPartition::owns_blobs ( ) const
inline

Definition at line 291 of file colpartition.h.

291  {
292  return owns_blobs_;
293  }

◆ PartitionType()

PolyBlockType tesseract::ColPartition::PartitionType ( ColumnSpanningType  flow) const

Definition at line 1006 of file colpartition.cpp.

1006  {
1007  if (flow == CST_NOISE) {
1008  if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
1009  blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
1010  return PT_NOISE;
1011  flow = CST_FLOWING;
1012  }
1013 
1014  switch (blob_type_) {
1015  case BRT_NOISE:
1016  return PT_NOISE;
1017  case BRT_HLINE:
1018  return PT_HORZ_LINE;
1019  case BRT_VLINE:
1020  return PT_VERT_LINE;
1021  case BRT_RECTIMAGE:
1022  case BRT_POLYIMAGE:
1023  switch (flow) {
1024  case CST_FLOWING:
1025  return PT_FLOWING_IMAGE;
1026  case CST_HEADING:
1027  return PT_HEADING_IMAGE;
1028  case CST_PULLOUT:
1029  return PT_PULLOUT_IMAGE;
1030  default:
1031  ASSERT_HOST(!"Undefined flow type for image!");
1032  }
1033  break;
1034  case BRT_VERT_TEXT:
1035  return PT_VERTICAL_TEXT;
1036  case BRT_TEXT:
1037  case BRT_UNKNOWN:
1038  default:
1039  switch (flow) {
1040  case CST_FLOWING:
1041  return PT_FLOWING_TEXT;
1042  case CST_HEADING:
1043  return PT_HEADING_TEXT;
1044  case CST_PULLOUT:
1045  return PT_PULLOUT_TEXT;
1046  default:
1047  ASSERT_HOST(!"Undefined flow type for text!");
1048  }
1049  }
1050  ASSERT_HOST(!"Should never get here!");
1051  return PT_NOISE;
1052 }

◆ Print()

void tesseract::ColPartition::Print ( ) const

Definition at line 1782 of file colpartition.cpp.

1782  {
1783  int y = MidY();
1784  tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1785  " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1786  " ts=%d bs=%d ls=%d rs=%d\n",
1787  boxes_.empty() ? 'E' : ' ',
1788  left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
1789  bounding_box_.left(), median_left_,
1790  bounding_box_.bottom(), median_bottom_,
1791  bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
1792  right_margin_, median_right_, bounding_box_.top(), median_top_,
1793  good_width_, good_column_, type_,
1794  kBlobTypes[blob_type_], flow_,
1795  first_column_, last_column_, boxes_.length(),
1796  space_above_, space_below_, space_to_left_, space_to_right_);
1797 }

◆ PrintColors()

void tesseract::ColPartition::PrintColors ( )

Definition at line 1800 of file colpartition.cpp.

1800  {
1801  tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1802  color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1803  color1_[L_ALPHA_CHANNEL],
1804  color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1805 }

◆ RefinePartners()

void tesseract::ColPartition::RefinePartners ( PolyBlockType  type,
bool  get_desperate,
ColPartitionGrid grid 
)

Definition at line 1877 of file colpartition.cpp.

1878  {
1879  if (TypesSimilar(type_, type)) {
1880  RefinePartnersInternal(true, get_desperate, grid);
1881  RefinePartnersInternal(false, get_desperate, grid);
1882  } else if (type == PT_COUNT) {
1883  // This is the final pass. Make sure only the correctly typed
1884  // partners surivive, however many there are.
1885  RefinePartnersByType(true, &upper_partners_);
1886  RefinePartnersByType(false, &lower_partners_);
1887  // It is possible for a merge to have given a partition multiple
1888  // partners again, so the last resort is to use overlap which is
1889  // guaranteed to leave at most one partner left.
1890  if (!upper_partners_.empty() && !upper_partners_.singleton())
1891  RefinePartnersByOverlap(true, &upper_partners_);
1892  if (!lower_partners_.empty() && !lower_partners_.singleton())
1893  RefinePartnersByOverlap(false, &lower_partners_);
1894  }
1895 }

◆ ReflectInYAxis()

void tesseract::ColPartition::ReflectInYAxis ( )

Definition at line 320 of file colpartition.cpp.

320  {
321  BLOBNBOX_CLIST reversed_boxes;
322  BLOBNBOX_C_IT reversed_it(&reversed_boxes);
323  // Reverse the order of the boxes_.
324  BLOBNBOX_C_IT bb_it(&boxes_);
325  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
326  reversed_it.add_before_then_move(bb_it.extract());
327  }
328  bb_it.add_list_after(&reversed_boxes);
329  ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
330  int tmp = left_margin_;
331  left_margin_ = -right_margin_;
332  right_margin_ = -tmp;
333  ComputeLimits();
334 }

◆ ReleaseNonLeaderBoxes()

bool tesseract::ColPartition::ReleaseNonLeaderBoxes ( )

Definition at line 289 of file colpartition.cpp.

289  {
290  BLOBNBOX_C_IT bb_it(&boxes_);
291  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
292  BLOBNBOX* bblob = bb_it.data();
293  if (bblob->flow() != BTFT_LEADER) {
294  if (bblob->owner() == this) bblob->set_owner(nullptr);
295  bb_it.extract();
296  }
297  }
298  if (bb_it.empty()) return false;
299  flow_ = BTFT_LEADER;
300  ComputeLimits();
301  return true;
302 }

◆ RemoveBox()

void tesseract::ColPartition::RemoveBox ( BLOBNBOX box)

Definition at line 202 of file colpartition.cpp.

202  {
203  BLOBNBOX_C_IT bb_it(&boxes_);
204  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
205  if (box == bb_it.data()) {
206  bb_it.extract();
207  ComputeLimits();
208  return;
209  }
210  }
211 }

◆ RemovePartner()

void tesseract::ColPartition::RemovePartner ( bool  upper,
ColPartition partner 
)

Definition at line 618 of file colpartition.cpp.

618  {
619  ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
620  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
621  if (it.data() == partner) {
622  it.extract();
623  break;
624  }
625  }
626 }

◆ right_key()

int tesseract::ColPartition::right_key ( ) const
inline

Definition at line 178 of file colpartition.h.

178  {
179  return right_key_;
180  }

◆ right_key_tab()

bool tesseract::ColPartition::right_key_tab ( ) const
inline

Definition at line 175 of file colpartition.h.

175  {
176  return right_key_tab_;
177  }

◆ right_margin()

int tesseract::ColPartition::right_margin ( ) const
inline

Definition at line 118 of file colpartition.h.

118  {
119  return right_margin_;
120  }

◆ RightAtY()

int tesseract::ColPartition::RightAtY ( int  y) const
inline

Definition at line 344 of file colpartition.h.

344  {
345  return XAtY(right_key_, y);
346  }

◆ RightBlobRule()

int tesseract::ColPartition::RightBlobRule ( ) const

Definition at line 550 of file colpartition.cpp.

550  {
551  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
552  it.move_to_last();
553  return it.data()->right_rule();
554 }

◆ set_blob_type()

void tesseract::ColPartition::set_blob_type ( BlobRegionType  t)
inline

Definition at line 151 of file colpartition.h.

151  {
152  blob_type_ = t;
153  }

◆ set_block_owned()

void tesseract::ColPartition::set_block_owned ( bool  owned)
inline

Definition at line 208 of file colpartition.h.

208  {
209  block_owned_ = owned;
210  }

◆ set_bottom_spacing()

void tesseract::ColPartition::set_bottom_spacing ( int  spacing)
inline

Definition at line 223 of file colpartition.h.

223  {
224  bottom_spacing_ = spacing;
225  }

◆ set_first_column()

void tesseract::ColPartition::set_first_column ( int  column)
inline

Definition at line 731 of file colpartition.h.

731  {
732  first_column_ = column;
733  }

◆ set_flow()

void tesseract::ColPartition::set_flow ( BlobTextFlowType  f)
inline

Definition at line 157 of file colpartition.h.

157  {
158  flow_ = f;
159  }

◆ set_inside_table_column()

void tesseract::ColPartition::set_inside_table_column ( bool  val)
inline

Definition at line 246 of file colpartition.h.

246  {
247  inside_table_column_ = val;
248  }

◆ set_last_column()

void tesseract::ColPartition::set_last_column ( int  column)
inline

Definition at line 734 of file colpartition.h.

734  {
735  last_column_ = column;
736  }

◆ set_left_margin()

void tesseract::ColPartition::set_left_margin ( int  margin)
inline

Definition at line 115 of file colpartition.h.

115  {
116  left_margin_ = margin;
117  }

◆ set_median_height()

void tesseract::ColPartition::set_median_height ( int  height)
inline

Definition at line 139 of file colpartition.h.

139  {
140  median_height_ = height;
141  }

◆ set_median_width()

void tesseract::ColPartition::set_median_width ( int  width)
inline

Definition at line 145 of file colpartition.h.

145  {
146  median_width_ = width;
147  }

◆ set_nearest_neighbor_above()

void tesseract::ColPartition::set_nearest_neighbor_above ( ColPartition part)
inline

Definition at line 252 of file colpartition.h.

252  {
253  nearest_neighbor_above_ = part;
254  }

◆ set_nearest_neighbor_below()

void tesseract::ColPartition::set_nearest_neighbor_below ( ColPartition part)
inline

Definition at line 258 of file colpartition.h.

258  {
259  nearest_neighbor_below_ = part;
260  }

◆ set_owns_blobs()

void tesseract::ColPartition::set_owns_blobs ( bool  owns_blobs)
inline

Definition at line 294 of file colpartition.h.

294  {
295  // Do NOT change ownership flag when there are blobs in the list.
296  // Immediately set the ownership flag when creating copies.
297  ASSERT_HOST(boxes_.empty());
298  owns_blobs_ = owns_blobs;
299  }

◆ set_right_margin()

void tesseract::ColPartition::set_right_margin ( int  margin)
inline

Definition at line 121 of file colpartition.h.

121  {
122  right_margin_ = margin;
123  }

◆ set_side_step()

void tesseract::ColPartition::set_side_step ( int  step)
inline

Definition at line 217 of file colpartition.h.

217  {
218  side_step_ = step;
219  }

◆ set_space_above()

void tesseract::ColPartition::set_space_above ( int  space)
inline

Definition at line 264 of file colpartition.h.

264  {
265  space_above_ = space;
266  }

◆ set_space_below()

void tesseract::ColPartition::set_space_below ( int  space)
inline

Definition at line 270 of file colpartition.h.

270  {
271  space_below_ = space;
272  }

◆ set_space_to_left()

void tesseract::ColPartition::set_space_to_left ( int  space)
inline

Definition at line 276 of file colpartition.h.

276  {
277  space_to_left_ = space;
278  }

◆ set_space_to_right()

void tesseract::ColPartition::set_space_to_right ( int  space)
inline

Definition at line 282 of file colpartition.h.

282  {
283  space_to_right_ = space;
284  }

◆ set_table_type()

void tesseract::ColPartition::set_table_type ( )
inline

Definition at line 233 of file colpartition.h.

233  {
234  if (type_ != PT_TABLE) {
235  type_before_table_ = type_;
236  type_ = PT_TABLE;
237  }
238  }

◆ set_top_spacing()

void tesseract::ColPartition::set_top_spacing ( int  spacing)
inline

Definition at line 229 of file colpartition.h.

229  {
230  top_spacing_ = spacing;
231  }

◆ set_type()

void tesseract::ColPartition::set_type ( PolyBlockType  t)
inline

Definition at line 184 of file colpartition.h.

184  {
185  type_ = t;
186  }

◆ set_vertical()

void tesseract::ColPartition::set_vertical ( const ICOORD v)
inline

Definition at line 193 of file colpartition.h.

193  {
194  vertical_ = v;
195  }

◆ set_working_set()

void tesseract::ColPartition::set_working_set ( WorkingPartSet working_set)
inline

Definition at line 202 of file colpartition.h.

202  {
203  working_set_ = working_set;
204  }

◆ SetBlobTypes()

void tesseract::ColPartition::SetBlobTypes ( )

Definition at line 1265 of file colpartition.cpp.

1265  {
1266  if (!owns_blobs())
1267  return;
1268  BLOBNBOX_C_IT it(&boxes_);
1269  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1270  BLOBNBOX* blob = it.data();
1271  if (blob->flow() != BTFT_LEADER)
1272  blob->set_flow(flow_);
1273  blob->set_region_type(blob_type_);
1274  ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this);
1275  }
1276 }

◆ SetColumnGoodness()

void tesseract::ColPartition::SetColumnGoodness ( WidthCallback  cb)

Definition at line 1070 of file colpartition.cpp.

1070  {
1071  int y = MidY();
1072  int width = RightAtY(y) - LeftAtY(y);
1073  good_width_ = cb(width);
1074  good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
1075 }

◆ SetLeftTab()

void tesseract::ColPartition::SetLeftTab ( const TabVector tab_vector)

Definition at line 494 of file colpartition.cpp.

494  {
495  if (tab_vector != nullptr) {
496  left_key_ = tab_vector->sort_key();
497  left_key_tab_ = left_key_ <= BoxLeftKey();
498  } else {
499  left_key_tab_ = false;
500  }
501  if (!left_key_tab_)
502  left_key_ = BoxLeftKey();
503 }

◆ SetPartitionType()

void tesseract::ColPartition::SetPartitionType ( int  resolution,
ColPartitionSet columns 
)

Definition at line 973 of file colpartition.cpp.

973  {
974  int first_spanned_col = -1;
975  ColumnSpanningType span_type =
976  columns->SpanningType(resolution,
977  bounding_box_.left(), bounding_box_.right(),
978  std::min(bounding_box_.height(), bounding_box_.width()),
979  MidY(), left_margin_, right_margin_,
980  &first_column_, &last_column_,
981  &first_spanned_col);
982  column_set_ = columns;
983  if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
984  !IsLineType()) {
985  // Unequal columns may indicate that the pullout spans one of the columns
986  // it lies in, so force it to be allocated to just that column.
987  if (first_spanned_col >= 0) {
988  first_column_ = first_spanned_col;
989  last_column_ = first_spanned_col;
990  } else {
991  if ((first_column_ & 1) == 0)
992  last_column_ = first_column_;
993  else if ((last_column_ & 1) == 0)
994  first_column_ = last_column_;
995  else
996  first_column_ = last_column_ = (first_column_ + last_column_) / 2;
997  }
998  }
999  type_ = PartitionType(span_type);
1000 }

◆ SetRegionAndFlowTypesFromProjectionValue()

void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue ( int  value)

Definition at line 1191 of file colpartition.cpp.

1191  {
1192  int blob_count = 0; // Total # blobs.
1193  int good_blob_score_ = 0; // Total # good strokewidth neighbours.
1194  int noisy_count = 0; // Total # neighbours marked as noise.
1195  int hline_count = 0;
1196  int vline_count = 0;
1197  BLOBNBOX_C_IT it(&boxes_);
1198  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1199  BLOBNBOX* blob = it.data();
1200  ++blob_count;
1201  noisy_count += blob->NoisyNeighbours();
1202  good_blob_score_ += blob->GoodTextBlob();
1203  if (blob->region_type() == BRT_HLINE) ++hline_count;
1204  if (blob->region_type() == BRT_VLINE) ++vline_count;
1205  }
1206  flow_ = BTFT_NEIGHBOURS;
1207  blob_type_ = BRT_UNKNOWN;
1208  if (hline_count > vline_count) {
1209  flow_ = BTFT_NONE;
1210  blob_type_ = BRT_HLINE;
1211  } else if (vline_count > hline_count) {
1212  flow_ = BTFT_NONE;
1213  blob_type_ = BRT_VLINE;
1214  } else if (value < -1 || 1 < value) {
1215  int long_side;
1216  int short_side;
1217  if (value > 0) {
1218  long_side = bounding_box_.width();
1219  short_side = bounding_box_.height();
1220  blob_type_ = BRT_TEXT;
1221  } else {
1222  long_side = bounding_box_.height();
1223  short_side = bounding_box_.width();
1224  blob_type_ = BRT_VERT_TEXT;
1225  }
1226  // We will combine the old metrics using aspect ratio and blob counts
1227  // with the input value by allowing a strong indication to flip the
1228  // STRONG_CHAIN/CHAIN flow values.
1229  int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
1230  if (short_side > kHorzStrongTextlineHeight) ++strong_score;
1231  if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
1232  if (abs(value) >= kMinStrongTextValue)
1233  flow_ = BTFT_STRONG_CHAIN;
1234  else if (abs(value) >= kMinChainTextValue)
1235  flow_ = BTFT_CHAIN;
1236  else
1237  flow_ = BTFT_NEIGHBOURS;
1238  // Upgrade chain to strong chain if the other indicators are good
1239  if (flow_ == BTFT_CHAIN && strong_score == 3)
1240  flow_ = BTFT_STRONG_CHAIN;
1241  // Downgrade strong vertical text to chain if the indicators are bad.
1242  if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
1243  flow_ = BTFT_CHAIN;
1244  }
1245  if (flow_ == BTFT_NEIGHBOURS) {
1246  // Check for noisy neighbours.
1247  if (noisy_count >= blob_count) {
1248  flow_ = BTFT_NONTEXT;
1249  blob_type_= BRT_NOISE;
1250  }
1251  }
1252  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1253  bounding_box_.bottom())) {
1254  tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1255  blob_count, noisy_count, good_blob_score_);
1256  tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
1257  value, flow_, blob_type_);
1258  Print();
1259  }
1260  SetBlobTypes();
1261 }

◆ SetRightTab()

void tesseract::ColPartition::SetRightTab ( const TabVector tab_vector)

Definition at line 506 of file colpartition.cpp.

506  {
507  if (tab_vector != nullptr) {
508  right_key_ = tab_vector->sort_key();
509  right_key_tab_ = right_key_ >= BoxRightKey();
510  } else {
511  right_key_tab_ = false;
512  }
513  if (!right_key_tab_)
514  right_key_ = BoxRightKey();
515 }

◆ SetSpecialBlobsDensity()

void tesseract::ColPartition::SetSpecialBlobsDensity ( const BlobSpecialTextType  type,
const float  density 
)

Definition at line 576 of file colpartition.cpp.

577  {
579  special_blobs_densities_[type] = density;
580 }

◆ ShallowCopy()

ColPartition * tesseract::ColPartition::ShallowCopy ( ) const

Definition at line 1731 of file colpartition.cpp.

1731  {
1732  auto* part = new ColPartition(blob_type_, vertical_);
1733  part->left_margin_ = left_margin_;
1734  part->right_margin_ = right_margin_;
1735  part->bounding_box_ = bounding_box_;
1736  memcpy(part->special_blobs_densities_, special_blobs_densities_,
1737  sizeof(special_blobs_densities_));
1738  part->median_bottom_ = median_bottom_;
1739  part->median_top_ = median_top_;
1740  part->median_height_ = median_height_;
1741  part->median_left_ = median_left_;
1742  part->median_right_ = median_right_;
1743  part->median_width_ = median_width_;
1744  part->good_width_ = good_width_;
1745  part->good_column_ = good_column_;
1746  part->left_key_tab_ = left_key_tab_;
1747  part->right_key_tab_ = right_key_tab_;
1748  part->type_ = type_;
1749  part->flow_ = flow_;
1750  part->left_key_ = left_key_;
1751  part->right_key_ = right_key_;
1752  part->first_column_ = first_column_;
1753  part->last_column_ = last_column_;
1754  part->owns_blobs_ = false;
1755  return part;
1756 }

◆ SingletonPartner()

ColPartition * tesseract::ColPartition::SingletonPartner ( bool  upper)

Definition at line 629 of file colpartition.cpp.

629  {
630  ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
631  if (!partners->singleton())
632  return nullptr;
633  ColPartition_C_IT it(partners);
634  return it.data();
635 }

◆ SmoothPartnerRun()

void tesseract::ColPartition::SmoothPartnerRun ( int  working_set_count)

Definition at line 1808 of file colpartition.cpp.

1808  {
1809  STATS left_stats(0, working_set_count);
1810  STATS right_stats(0, working_set_count);
1811  PolyBlockType max_type = type_;
1812  ColPartition* partner;
1813  for (partner = SingletonPartner(false); partner != nullptr;
1814  partner = partner->SingletonPartner(false)) {
1815  if (partner->type_ > max_type)
1816  max_type = partner->type_;
1817  if (column_set_ == partner->column_set_) {
1818  left_stats.add(partner->first_column_, 1);
1819  right_stats.add(partner->last_column_, 1);
1820  }
1821  }
1822  type_ = max_type;
1823  // TODO(rays) Either establish that it isn't necessary to set the columns,
1824  // or find a way to do it that does not cause an assert failure in
1825  // AddToWorkingSet.
1826 #if 0
1827  first_column_ = left_stats.mode();
1828  last_column_ = right_stats.mode();
1829  if (last_column_ < first_column_)
1830  last_column_ = first_column_;
1831 #endif
1832 
1833  for (partner = SingletonPartner(false); partner != nullptr;
1834  partner = partner->SingletonPartner(false)) {
1835  partner->type_ = max_type;
1836 #if 0 // See TODO above
1837  if (column_set_ == partner->column_set_) {
1838  partner->first_column_ = first_column_;
1839  partner->last_column_ = last_column_;
1840  }
1841 #endif
1842  }
1843 }

◆ SortByBBox()

static int tesseract::ColPartition::SortByBBox ( const void *  p1,
const void *  p2 
)
inlinestatic

Definition at line 714 of file colpartition.h.

714  {
715  const ColPartition* part1 = *static_cast<const ColPartition* const*>(p1);
716  const ColPartition* part2 = *static_cast<const ColPartition* const*>(p2);
717  int mid_y1 = part1->bounding_box_.y_middle();
718  int mid_y2 = part2->bounding_box_.y_middle();
719  if ((part2->bounding_box_.bottom() <= mid_y1 &&
720  mid_y1 <= part2->bounding_box_.top()) ||
721  (part1->bounding_box_.bottom() <= mid_y2 &&
722  mid_y2 <= part1->bounding_box_.top())) {
723  // Sort by increasing x.
724  return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
725  }
726  // Sort by decreasing y.
727  return mid_y2 - mid_y1;
728  }

◆ SortKey()

int tesseract::ColPartition::SortKey ( int  x,
int  y 
) const
inline

Definition at line 316 of file colpartition.h.

316  {
317  return TabVector::SortKey(vertical_, x, y);
318  }

◆ space_above()

int tesseract::ColPartition::space_above ( ) const
inline

Definition at line 261 of file colpartition.h.

261  {
262  return space_above_;
263  }

◆ space_below()

int tesseract::ColPartition::space_below ( ) const
inline

Definition at line 267 of file colpartition.h.

267  {
268  return space_below_;
269  }

◆ space_to_left()

int tesseract::ColPartition::space_to_left ( ) const
inline

Definition at line 273 of file colpartition.h.

273  {
274  return space_to_left_;
275  }

◆ space_to_right()

int tesseract::ColPartition::space_to_right ( ) const
inline

Definition at line 279 of file colpartition.h.

279  {
280  return space_to_right_;
281  }

◆ SpecialBlobsCount()

int tesseract::ColPartition::SpecialBlobsCount ( const BlobSpecialTextType  type)

Definition at line 561 of file colpartition.cpp.

561  {
563  BLOBNBOX_C_IT blob_it(&boxes_);
564  int count = 0;
565  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
566  BLOBNBOX* blob = blob_it.data();
568  if (blob_type == type) {
569  count++;
570  }
571  }
572 
573  return count;
574 }

◆ SpecialBlobsDensity()

float tesseract::ColPartition::SpecialBlobsDensity ( const BlobSpecialTextType  type) const

Definition at line 556 of file colpartition.cpp.

556  {
558  return special_blobs_densities_[type];
559 }

◆ SplitAt()

ColPartition * tesseract::ColPartition::SplitAt ( int  split_x)

Definition at line 823 of file colpartition.cpp.

823  {
824  if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
825  return nullptr; // There will be no change.
826  ColPartition* split_part = ShallowCopy();
827  split_part->set_owns_blobs(owns_blobs());
828  BLOBNBOX_C_IT it(&boxes_);
829  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
830  BLOBNBOX* bbox = it.data();
831  ColPartition* prev_owner = bbox->owner();
832  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
833  const TBOX& box = bbox->bounding_box();
834  if (box.left() >= split_x) {
835  split_part->AddBox(it.extract());
836  if (owns_blobs() && prev_owner != nullptr)
837  bbox->set_owner(split_part);
838  }
839  }
840  if (it.empty()) {
841  // Possible if split-x passes through the first blob.
842  it.add_list_after(&split_part->boxes_);
843  }
844  ASSERT_HOST(!it.empty());
845  if (split_part->IsEmpty()) {
846  // Split part ended up with nothing. Possible if split_x passes
847  // through the last blob.
848  delete split_part;
849  return nullptr;
850  }
851  right_key_tab_ = false;
852  split_part->left_key_tab_ = false;
853  right_margin_ = split_x;
854  split_part->left_margin_ = split_x;
855  ComputeLimits();
856  split_part->ComputeLimits();
857  return split_part;
858 }

◆ SplitAtBlob()

ColPartition * tesseract::ColPartition::SplitAtBlob ( BLOBNBOX split_blob)

Definition at line 787 of file colpartition.cpp.

787  {
788  ColPartition* split_part = ShallowCopy();
789  split_part->set_owns_blobs(owns_blobs());
790  BLOBNBOX_C_IT it(&boxes_);
791  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
792  BLOBNBOX* bbox = it.data();
793  ColPartition* prev_owner = bbox->owner();
794  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
795  if (bbox == split_blob || !split_part->boxes_.empty()) {
796  split_part->AddBox(it.extract());
797  if (owns_blobs() && prev_owner != nullptr)
798  bbox->set_owner(split_part);
799  }
800  }
801  ASSERT_HOST(!it.empty());
802  if (split_part->IsEmpty()) {
803  // Split part ended up with nothing. Possible if split_blob is not
804  // in the list of blobs.
805  delete split_part;
806  return nullptr;
807  }
808  right_key_tab_ = false;
809  split_part->left_key_tab_ = false;
810  ComputeLimits();
811  // TODO(nbeato) Merge Ray's CL like this:
812  // if (owns_blobs())
813  // SetBlobTextlineGoodness();
814  split_part->ComputeLimits();
815  // TODO(nbeato) Merge Ray's CL like this:
816  // if (split_part->owns_blobs())
817  // split_part->SetBlobTextlineGoodness();
818  return split_part;
819 }

◆ top_spacing()

int tesseract::ColPartition::top_spacing ( ) const
inline

Definition at line 226 of file colpartition.h.

226  {
227  return top_spacing_;
228  }

◆ type()

PolyBlockType tesseract::ColPartition::type ( ) const
inline

Definition at line 181 of file colpartition.h.

181  {
182  return type_;
183  }

◆ TypesMatch() [1/2]

static bool tesseract::ColPartition::TypesMatch ( BlobRegionType  type1,
BlobRegionType  type2 
)
inlinestatic

Definition at line 412 of file colpartition.h.

412  {
413  return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
414  !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
415  }

◆ TypesMatch() [2/2]

bool tesseract::ColPartition::TypesMatch ( const ColPartition other) const
inline

Definition at line 409 of file colpartition.h.

409  {
410  return TypesMatch(blob_type_, other.blob_type_);
411  }

◆ TypesSimilar()

static bool tesseract::ColPartition::TypesSimilar ( PolyBlockType  type1,
PolyBlockType  type2 
)
inlinestatic

Definition at line 418 of file colpartition.h.

418  {
419  return (type1 == type2 ||
420  (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
421  (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
422  }

◆ upper_partners()

ColPartition_CLIST* tesseract::ColPartition::upper_partners ( )
inline

Definition at line 196 of file colpartition.h.

196  {
197  return &upper_partners_;
198  }

◆ VCoreOverlap()

int tesseract::ColPartition::VCoreOverlap ( const ColPartition other) const
inline

Definition at line 375 of file colpartition.h.

375  {
376  if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
377  return 0;
378  }
379  return std::min(median_top_, other.median_top_) -
380  std::max(median_bottom_, other.median_bottom_);
381  }

◆ VOverlaps()

bool tesseract::ColPartition::VOverlaps ( const ColPartition other) const
inline

Definition at line 370 of file colpartition.h.

370  {
371  return bounding_box_.y_gap(other.bounding_box_) < 0;
372  }

◆ VSignificantCoreOverlap()

bool tesseract::ColPartition::VSignificantCoreOverlap ( const ColPartition other) const
inline

Definition at line 390 of file colpartition.h.

390  {
391  if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
392  return false;
393  }
394  int overlap = VCoreOverlap(other);
395  int height = std::min(median_top_ - median_bottom_,
396  other.median_top_ - other.median_bottom_);
397  return overlap * 3 > height;
398  }

◆ WithinSameMargins()

bool tesseract::ColPartition::WithinSameMargins ( const ColPartition other) const
inline

Definition at line 401 of file colpartition.h.

401  {
402  return left_margin_ <= other.bounding_box_.left() &&
403  bounding_box_.left() >= other.left_margin_ &&
404  bounding_box_.right() <= other.right_margin_ &&
405  right_margin_ >= other.bounding_box_.right();
406  }

◆ XAtY()

int tesseract::ColPartition::XAtY ( int  sort_key,
int  y 
) const
inline

Definition at line 320 of file colpartition.h.

320  {
321  return TabVector::XAtY(vertical_, sort_key, y);
322  }

The documentation for this class was generated from the following files:
TBOX
Definition: cleanapi_test.cc:19
C_BLOB::FakeBlob
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:236
tesseract::CST_FLOWING
Definition: colpartition.h:49
tesseract::CST_PULLOUT
Definition: colpartition.h:51
tesseract::ColPartition::Print
void Print() const
Definition: colpartition.cpp:1782
tesseract::TabVector::SortKey
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:279
BTFT_NONE
Definition: blobbox.h:114
DominatesInMerge
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
Definition: blobbox.h:128
BLOBNBOX::NoisyNeighbours
int NoisyNeighbours() const
Definition: blobbox.cpp:235
tesseract::ColPartition::IsHorizontalType
bool IsHorizontalType() const
Definition: colpartition.h:445
tesseract::ColPartition::IsLineType
bool IsLineType() const
Definition: colpartition.h:425
BTFT_STRONG_CHAIN
Definition: blobbox.h:118
tesseract::ColPartition::BoxLeftKey
int BoxLeftKey() const
Definition: colpartition.h:332
tesseract::kMaxLeaderGapFractionOfMin
const double kMaxLeaderGapFractionOfMin
Definition: colpartition.cpp:55
tesseract::ColPartition::TypesSimilar
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
Definition: colpartition.h:418
BRT_NOISE
Definition: blobbox.h:72
tesseract::kMinBaselineCoverage
const double kMinBaselineCoverage
Definition: colpartition.cpp:72
tesseract::kMinChainTextValue
const int kMinChainTextValue
Definition: colpartition.cpp:61
tesseract::kMinLeaderCount
const int kMinLeaderCount
Definition: colpartition.cpp:57
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::ColPartition::median_width
int median_width() const
Definition: colpartition.h:142
tesseract::ColPartition::flow
BlobTextFlowType flow() const
Definition: colpartition.h:154
NearlyEqual
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:36
tesseract::WorkingPartSet::AddPartition
void AddPartition(ColPartition *part)
Definition: workingpartset.cpp:31
BLOBNBOX::base_char_top
int base_char_top() const
Definition: blobbox.h:382
TBOX::overlap
bool overlap(const TBOX &box) const
Definition: rect.h:350
tesseract::kColumnWidthFactor
const int kColumnWidthFactor
Definition: tabfind.h:41
BRT_UNKNOWN
Definition: blobbox.h:77
tesseract::ColPartition::IsPulloutType
bool IsPulloutType() const
Definition: colpartition.h:437
ICOORD
integer coordinate
Definition: points.h:30
BLOBNBOX::set_flow
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:297
BlobSpecialTextType
BlobSpecialTextType
Definition: blobbox.h:95
TBOX::print
void print() const
Definition: rect.h:277
PT_HEADING_IMAGE
Definition: capi.h:118
PTIsPulloutType
bool PTIsPulloutType(PolyBlockType type)
Definition: publictypes.h:88
tesseract::kMaxLeaderGapFractionOfMax
const double kMaxLeaderGapFractionOfMax
Definition: colpartition.cpp:53
tesseract::ColPartition::median_height
int median_height() const
Definition: colpartition.h:136
TBOX::top
int16_t top() const
Definition: rect.h:57
tesseract::ColPartition::type
PolyBlockType type() const
Definition: colpartition.h:181
PT_NOISE
Definition: capi.h:122
TBOX::area
int32_t area() const
Definition: rect.h:121
TO_BLOCK
Definition: blobbox.h:691
BRT_VERT_TEXT
Definition: blobbox.h:78
TBOX::set_top
void set_top(int y)
Definition: rect.h:60
PT_FLOWING_IMAGE
Definition: capi.h:117
PT_INLINE_EQUATION
Definition: capi.h:113
tesseract::ColPartition::SortKey
int SortKey(int x, int y) const
Definition: colpartition.h:316
PT_TABLE
Definition: capi.h:114
tesseract::CST_HEADING
Definition: colpartition.h:50
tesseract::DPPoint::CostWithVariance
int64_t CostWithVariance(const DPPoint *prev)
Definition: dppoint.cpp:85
BRT_RECTIMAGE
Definition: blobbox.h:75
tesseract::ColPartition::boxes
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:187
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
tesseract::kMinStrongTextValue
const int kMinStrongTextValue
Definition: colpartition.cpp:59
BLOBNBOX
Definition: blobbox.h:142
BTFT_CHAIN
Definition: blobbox.h:117
BRT_HLINE
Definition: blobbox.h:73
BTFT_LEADER
Definition: blobbox.h:120
BRT_POLYIMAGE
Definition: blobbox.h:76
PT_VERTICAL_TEXT
Definition: capi.h:115
BLOBNBOX::UnMergeableType
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:429
PT_COUNT
Definition: capi.h:123
tesseract::ColPartition::TypesMatch
bool TypesMatch(const ColPartition &other) const
Definition: colpartition.h:409
TBOX::height
int16_t height() const
Definition: rect.h:107
BLOBNBOX::TextlineColor
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
Definition: blobbox.cpp:442
TBOX::y_gap
int y_gap(const TBOX &box) const
Definition: rect.h:232
BTFT_NONTEXT
Definition: blobbox.h:115
BLOBNBOX::GoodTextBlob
int GoodTextBlob() const
Definition: blobbox.cpp:224
tesseract::ColPartition::BoxRightKey
int BoxRightKey() const
Definition: colpartition.h:336
tesseract::kMaxColorDistance
const int kMaxColorDistance
Definition: colpartition.cpp:77
tesseract::DPPoint::Solve
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
Definition: dppoint.cpp:47
BLOBNBOX::IsLineType
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:425
BLOBNBOX::special_text_type
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:288
textord_debug_bugs
int textord_debug_bugs
Definition: alignedblob.cpp:28
TBOX::set_right
void set_right(int x)
Definition: rect.h:81
tesseract::ColPartition::SingletonPartner
ColPartition * SingletonPartner(bool upper)
Definition: colpartition.cpp:629
tesseract::ColumnSpanningType
ColumnSpanningType
Definition: colpartition.h:47
BLOCK
Definition: ocrblock.h:28
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
tesseract::kHorzStrongTextlineAspect
const int kHorzStrongTextlineAspect
Definition: colpartition.cpp:67
BLOBNBOX::base_char_bottom
int base_char_bottom() const
Definition: blobbox.h:385
PDBLK::set_poly_block
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:56
tesseract::ColPartition::IsImageType
bool IsImageType() const
Definition: colpartition.h:429
TO_BLOCK::block
BLOCK * block
Definition: blobbox.h:776
tesseract::AlignedBlob::WithinTestRegion
static bool WithinTestRegion(int detail_level, int x, int y)
Definition: alignedblob.cpp:150
PT_HEADING_TEXT
Definition: capi.h:110
BRT_TEXT
Definition: blobbox.h:79
tesseract::ColPartition::left_key
int left_key() const
Definition: colpartition.h:172
tesseract::kHorzStrongTextlineCount
const int kHorzStrongTextlineCount
Definition: colpartition.cpp:63
tesseract::kHorzStrongTextlineHeight
const int kHorzStrongTextlineHeight
Definition: colpartition.cpp:65
tesseract::ColPartition::blob_type
BlobRegionType blob_type() const
Definition: colpartition.h:148
tesseract::ColPartition::MidY
int MidY() const
Definition: colpartition.h:304
TBOX::width
int16_t width() const
Definition: rect.h:114
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
BLOBNBOX::set_owner
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:354
tesseract::TabFind::DifferentSizes
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:407
BLOBNBOX::IsDiacritic
bool IsDiacritic() const
Definition: blobbox.h:379
tesseract::TabVector::XAtY
int XAtY(int y) const
Definition: tabvector.h:188
PTIsImageType
bool PTIsImageType(PolyBlockType type)
Definition: publictypes.h:76
tesseract::ColPartition::owns_blobs
bool owns_blobs() const
Definition: colpartition.h:291
tesseract::ColPartition::right_key
int right_key() const
Definition: colpartition.h:178
BLOBNBOX::set_region_type
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:285
tesseract::kMaxRMSColorNoise
const int kMaxRMSColorNoise
Definition: colpartition.cpp:74
PT_VERT_LINE
Definition: capi.h:121
TO_ROW::add_blob
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:723
PT_UNKNOWN
Definition: capi.h:108
STATS
Definition: statistc.h:30
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
tesseract::ColPartition::KeyWidth
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:324
tesseract::ColPartition::MakeBlock
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
Definition: colpartition.cpp:1623
tesseract::ColPartition::XAtY
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320
tesseract::ColPartition::SetBlobTypes
void SetBlobTypes()
Definition: colpartition.cpp:1265
TBOX::x_overlap
bool x_overlap(const TBOX &box) const
Definition: rect.h:393
PT_PULLOUT_IMAGE
Definition: capi.h:119
tesseract::ColPartition::SetColumnGoodness
void SetColumnGoodness(WidthCallback cb)
Definition: colpartition.cpp:1070
PTIsLineType
bool PTIsLineType(PolyBlockType type)
Definition: publictypes.h:72
tesseract::ColPartition::RightBlobRule
int RightBlobRule() const
Definition: colpartition.cpp:550
tesseract::ColPartition::PartitionType
PolyBlockType PartitionType(ColumnSpanningType flow) const
Definition: colpartition.cpp:1006
count
int count(LIST var_list)
Definition: oldlist.cpp:79
BLOBNBOX::flow
BlobTextFlowType flow() const
Definition: blobbox.h:294
tesseract::ColPartition::RightAtY
int RightAtY(int y) const
Definition: colpartition.h:344
tesseract::ColPartition::ComputeLimits
void ComputeLimits()
Definition: colpartition.cpp:861
PTIsTextType
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:81
BRT_VLINE
Definition: blobbox.h:74
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::ColPartition::IsVerticalType
bool IsVerticalType() const
Definition: colpartition.h:441
PT_FLOWING_TEXT
Definition: capi.h:109
BLOBNBOX::region_type
BlobRegionType region_type() const
Definition: blobbox.h:282
TBOX::right
int16_t right() const
Definition: rect.h:78
POLY_BLOCK::ColorForPolyBlockType
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
Definition: polyblk.cpp:392
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
POLY_BLOCK
Definition: polyblk.h:26
TO_ROW
Definition: blobbox.h:543
tesseract::ImageFind::ColorDistanceFromLine
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
Definition: imagefind.cpp:355
tesseract::ColPartition::SortByBBox
static int SortByBBox(const void *p1, const void *p2)
Definition: colpartition.h:714
PT_PULLOUT_TEXT
Definition: capi.h:111
TBOX::set_bottom
void set_bottom(int y)
Definition: rect.h:67
UpdateRange
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:118
BLOBNBOX::cblob
C_BLOB * cblob() const
Definition: blobbox.h:267
BLOBNBOX::owner
tesseract::ColPartition * owner() const
Definition: blobbox.h:351
PolyBlockType
PolyBlockType
Definition: publictypes.h:52
tesseract::kMaxBaselineError
const double kMaxBaselineError
Definition: colpartition.cpp:70
tesseract::ColPartition::ColPartition
ColPartition()=default
tesseract::kMaxSameBlockLineSpacing
const double kMaxSameBlockLineSpacing
Definition: colpartition.cpp:49
ICOORDELT
Definition: points.h:160
tesseract::ColPartition::LeftAtY
int LeftAtY(int y) const
Definition: colpartition.h:340
PT_HORZ_LINE
Definition: capi.h:120
tesseract::ColPartition::VCoreOverlap
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:375
textord_debug_tabfind
int textord_debug_tabfind
Definition: alignedblob.cpp:27
tesseract::ColPartition::MakeVerticalTextBlock
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
Definition: colpartition.cpp:1680
BTFT_NEIGHBOURS
Definition: blobbox.h:116
tesseract::ColPartition::ShallowCopy
ColPartition * ShallowCopy() const
Definition: colpartition.cpp:1731
tesseract::ColPartition::LeftBlobRule
int LeftBlobRule() const
Definition: colpartition.cpp:545
TBOX::set_left
void set_left(int x)
Definition: rect.h:74
ICOORD::y
int16_t y() const
access_function
Definition: points.h:55
tesseract::CST_NOISE
Definition: colpartition.h:48
BSTT_COUNT
Definition: blobbox.h:102
TBOX
Definition: rect.h:33