tesseract  5.0.0-alpha-619-ge9db
colpartitionset.cpp
Go to the documentation of this file.
1 // File: colpartitionset.cpp
3 // Description: Class to hold a list of ColPartitions of the page that
4 // correspond roughly to columns.
5 // Author: Ray Smith
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config_auto.h"
22 #endif
23 
24 #include "colpartitionset.h"
25 #include "workingpartset.h"
26 #include "tablefind.h"
27 
28 namespace tesseract {
29 
30 // Minimum width of a column to be interesting as a multiple of resolution.
31 const double kMinColumnWidth = 2.0 / 3;
32 
34 
35 ColPartitionSet::ColPartitionSet(ColPartition_LIST* partitions) {
36  ColPartition_IT it(&parts_);
37  it.add_list_after(partitions);
38  ComputeCoverage();
39 }
40 
42  ColPartition_IT it(&parts_);
43  it.add_after_then_move(part);
44  ComputeCoverage();
45 }
46 
47 // Returns the number of columns of good width.
49  int num_good_cols = 0;
50  // This is a read-only iteration of the list.
51  ColPartition_IT it(const_cast<ColPartition_LIST*>(&parts_));
52  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
53  if (it.data()->good_width()) ++num_good_cols;
54  }
55  return num_good_cols;
56 }
57 
58 // Return an element of the parts_ list from its index.
60  ColPartition_IT it(&parts_);
61  it.mark_cycle_pt();
62  for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward());
63  if (it.cycled_list())
64  return nullptr;
65  return it.data();
66 }
67 
68 // Return the ColPartition that contains the given coords, if any, else nullptr.
70  ColPartition_IT it(&parts_);
71  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
72  ColPartition* part = it.data();
73  if (part->ColumnContains(x, y))
74  return part;
75  }
76  return nullptr;
77 }
78 
79 // Extract all the parts from the list, relinquishing ownership.
81  ColPartition_IT it(&parts_);
82  while (!it.empty()) {
83  it.extract();
84  it.forward();
85  }
86 }
87 
88 // Attempt to improve this by adding partitions or expanding partitions.
90  PartSetVector* src_sets) {
91  int set_size = src_sets->size();
92  // Iterate over the provided column sets, as each one may have something
93  // to improve this.
94  for (int i = 0; i < set_size; ++i) {
95  ColPartitionSet* column_set = src_sets->get(i);
96  if (column_set == nullptr)
97  continue;
98  // Iterate over the parts in this and column_set, adding bigger or
99  // new parts in column_set to this.
100  ColPartition_IT part_it(&parts_);
101  ASSERT_HOST(!part_it.empty());
102  int prev_right = INT32_MIN;
103  part_it.mark_cycle_pt();
104  ColPartition_IT col_it(&column_set->parts_);
105  for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
106  ColPartition* col_part = col_it.data();
107  if (col_part->blob_type() < BRT_UNKNOWN)
108  continue; // Ignore image partitions.
109  int col_left = col_part->left_key();
110  int col_right = col_part->right_key();
111  // Sync-up part_it (in this) so it matches the col_part in column_set.
112  ColPartition* part = part_it.data();
113  while (!part_it.at_last() && part->right_key() < col_left) {
114  prev_right = part->right_key();
115  part_it.forward();
116  part = part_it.data();
117  }
118  int part_left = part->left_key();
119  int part_right = part->right_key();
120  if (part_right < col_left || col_right < part_left) {
121  // There is no overlap so this is a new partition.
122  AddPartition(col_part->ShallowCopy(), &part_it);
123  continue;
124  }
125  // Check the edges of col_part to see if they can improve part.
126  bool part_width_ok = cb(part->KeyWidth(part_left, part_right));
127  if (col_left < part_left && col_left > prev_right) {
128  // The left edge of the column is better and it doesn't overlap,
129  // so we can potentially expand it.
130  int col_box_left = col_part->BoxLeftKey();
131  bool tab_width_ok = cb(part->KeyWidth(col_left, part_right));
132  bool box_width_ok = cb(part->KeyWidth(col_box_left, part_right));
133  if (tab_width_ok || (!part_width_ok)) {
134  // The tab is leaving the good column metric at least as good as
135  // it was before, so use the tab.
136  part->CopyLeftTab(*col_part, false);
137  part->SetColumnGoodness(cb);
138  } else if (col_box_left < part_left &&
139  (box_width_ok || !part_width_ok)) {
140  // The box is leaving the good column metric at least as good as
141  // it was before, so use the box.
142  part->CopyLeftTab(*col_part, true);
143  part->SetColumnGoodness(cb);
144  }
145  part_left = part->left_key();
146  }
147  if (col_right > part_right &&
148  (part_it.at_last() ||
149  part_it.data_relative(1)->left_key() > col_right)) {
150  // The right edge is better, so we can possibly expand it.
151  int col_box_right = col_part->BoxRightKey();
152  bool tab_width_ok = cb(part->KeyWidth(part_left, col_right));
153  bool box_width_ok = cb(part->KeyWidth(part_left, col_box_right));
154  if (tab_width_ok || (!part_width_ok)) {
155  // The tab is leaving the good column metric at least as good as
156  // it was before, so use the tab.
157  part->CopyRightTab(*col_part, false);
158  part->SetColumnGoodness(cb);
159  } else if (col_box_right > part_right &&
160  (box_width_ok || !part_width_ok)) {
161  // The box is leaving the good column metric at least as good as
162  // it was before, so use the box.
163  part->CopyRightTab(*col_part, true);
164  part->SetColumnGoodness(cb);
165  }
166  }
167  }
168  }
169  ComputeCoverage();
170 }
171 
172 // If this set is good enough to represent a new partitioning into columns,
173 // add it to the vector of sets, otherwise delete it.
175  WidthCallback cb) {
176  bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
177  bounding_box_.bottom());
178  if (debug) {
179  tprintf("Considering new column candidate:\n");
180  Print();
181  }
182  if (!LegalColumnCandidate()) {
183  if (debug) {
184  tprintf("Not a legal column candidate:\n");
185  Print();
186  }
187  delete this;
188  return;
189  }
190  for (int i = 0; i < column_sets->size(); ++i) {
191  ColPartitionSet* columns = column_sets->get(i);
192  // In ordering the column set candidates, good_coverage_ is king,
193  // followed by good_column_count_ and then bad_coverage_.
194  bool better = good_coverage_ > columns->good_coverage_;
195  if (good_coverage_ == columns->good_coverage_) {
196  better = good_column_count_ > columns->good_column_count_;
197  if (good_column_count_ == columns->good_column_count_) {
198  better = bad_coverage_ > columns->bad_coverage_;
199  }
200  }
201  if (better) {
202  // The new one is better so add it.
203  if (debug)
204  tprintf("Good one\n");
205  column_sets->insert(this, i);
206  return;
207  }
208  if (columns->CompatibleColumns(false, this, cb)) {
209  if (debug)
210  tprintf("Duplicate\n");
211  delete this;
212  return; // It is not unique.
213  }
214  }
215  if (debug)
216  tprintf("Added to end\n");
217  column_sets->push_back(this);
218 }
219 
220 // Return true if the partitions in other are all compatible with the columns
221 // in this.
223  WidthCallback cb) {
224  if (debug) {
225  tprintf("CompatibleColumns testing compatibility\n");
226  Print();
227  other->Print();
228  }
229  if (other->parts_.empty()) {
230  if (debug)
231  tprintf("CompatibleColumns true due to empty other\n");
232  return true;
233  }
234  ColPartition_IT it(&other->parts_);
235  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
236  ColPartition* part = it.data();
237  if (part->blob_type() < BRT_UNKNOWN) {
238  if (debug) {
239  tprintf("CompatibleColumns ignoring image partition\n");
240  part->Print();
241  }
242  continue; // Image partitions are irrelevant to column compatibility.
243  }
244  int y = part->MidY();
245  int left = part->bounding_box().left();
246  int right = part->bounding_box().right();
247  ColPartition* left_col = ColumnContaining(left, y);
248  ColPartition* right_col = ColumnContaining(right, y);
249  if (right_col == nullptr || left_col == nullptr) {
250  if (debug) {
251  tprintf("CompatibleColumns false due to partition edge outside\n");
252  part->Print();
253  }
254  return false; // A partition edge lies outside of all columns
255  }
256  if (right_col != left_col && cb(right - left)) {
257  if (debug) {
258  tprintf("CompatibleColumns false due to good width in multiple cols\n");
259  part->Print();
260  }
261  return false; // Partition with a good width must be in a single column.
262  }
263 
264  ColPartition_IT it2= it;
265  while (!it2.at_last()) {
266  it2.forward();
267  ColPartition* next_part = it2.data();
268  if (!BLOBNBOX::IsTextType(next_part->blob_type()))
269  continue; // Non-text partitions are irrelevant.
270  int next_left = next_part->bounding_box().left();
271  if (next_left == right) {
272  break; // They share the same edge, so one must be a pull-out.
273  }
274  // Search to see if right and next_left fall within a single column.
275  ColPartition* next_left_col = ColumnContaining(next_left, y);
276  if (right_col == next_left_col) {
277  // There is a column break in this column.
278  // This can be due to a figure caption within a column, a pull-out
279  // block, or a simple broken textline that remains to be merged:
280  // all allowed, or a change in column layout: not allowed.
281  // If both partitions are of good width, then it is likely
282  // a change in column layout, otherwise probably an allowed situation.
283  if (part->good_width() && next_part->good_width()) {
284  if (debug) {
285  int next_right = next_part->bounding_box().right();
286  tprintf("CompatibleColumns false due to 2 parts of good width\n");
287  tprintf("part1 %d-%d, part2 %d-%d\n",
288  left, right, next_left, next_right);
289  right_col->Print();
290  }
291  return false;
292  }
293  }
294  break;
295  }
296  }
297  if (debug)
298  tprintf("CompatibleColumns true!\n");
299  return true;
300 }
301 
302 // Returns the total width of all blobs in the part_set that do not lie
303 // within an approved column. Used as a cost measure for using this
304 // column set over another that might be compatible.
306  int total_width = 0;
307  ColPartition_IT it(&part_set->parts_);
308  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
309  ColPartition* part = it.data();
310  if (!BLOBNBOX::IsTextType(part->blob_type())) {
311  continue; // Non-text partitions are irrelevant to column compatibility.
312  }
313  int y = part->MidY();
314  BLOBNBOX_C_IT box_it(part->boxes());
315  for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
316  const TBOX& box = it.data()->bounding_box();
317  // Assume that the whole blob is outside any column iff its x-middle
318  // is outside.
319  int x = (box.left() + box.right()) / 2;
320  ColPartition* col = ColumnContaining(x, y);
321  if (col == nullptr)
322  total_width += box.width();
323  }
324  }
325  return total_width;
326 }
327 
328 // Return true if this ColPartitionSet makes a legal column candidate by
329 // having legal individual partitions and non-overlapping adjacent pairs.
331  ColPartition_IT it(&parts_);
332  if (it.empty())
333  return false;
334  bool any_text_parts = false;
335  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
336  ColPartition* part = it.data();
337  if (BLOBNBOX::IsTextType(part->blob_type())) {
338  if (!part->IsLegal())
339  return false; // Individual partition is illegal.
340  any_text_parts = true;
341  }
342  if (!it.at_last()) {
343  ColPartition* next_part = it.data_relative(1);
344  if (next_part->left_key() < part->right_key()) {
345  return false;
346  }
347  }
348  }
349  return any_text_parts;
350 }
351 
352 // Return a copy of this. If good_only will only copy the Good ColPartitions.
354  ColPartition_LIST copy_parts;
355  ColPartition_IT src_it(&parts_);
356  ColPartition_IT dest_it(&copy_parts);
357  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
358  ColPartition* part = src_it.data();
359  if (BLOBNBOX::IsTextType(part->blob_type()) &&
360  (!good_only || part->good_width() || part->good_column()))
361  dest_it.add_after_then_move(part->ShallowCopy());
362  }
363  if (dest_it.empty())
364  return nullptr;
365  return new ColPartitionSet(&copy_parts);
366 }
367 
368 // Return the bounding boxes of columns at the given y-range
369 void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top,
370  ColSegment_LIST *segments) {
371  ColPartition_IT it(&parts_);
372  ColSegment_IT col_it(segments);
373  col_it.move_to_last();
374  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
375  ColPartition* part = it.data();
376  ICOORD bot_left(part->LeftAtY(y_top), y_bottom);
377  ICOORD top_right(part->RightAtY(y_bottom), y_top);
378  auto *col_seg = new ColSegment();
379  col_seg->InsertBox(TBOX(bot_left, top_right));
380  col_it.add_after_then_move(col_seg);
381  }
382 }
383 
384 // Display the edges of the columns at the given y coords.
385 void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top,
386  ScrollView* win) {
387 #ifndef GRAPHICS_DISABLED
388  ColPartition_IT it(&parts_);
389  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
390  ColPartition* part = it.data();
391  win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom);
392  win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom);
393  }
394 #endif // GRAPHICS_DISABLED
395 }
396 
397 // Return the ColumnSpanningType that best explains the columns overlapped
398 // by the given coords(left,right,y), with the given margins.
399 // Also return the first and last column index touched by the coords and
400 // the leftmost spanned column.
401 // Column indices are 2n + 1 for real columns (0 based) and even values
402 // represent the gaps in between columns, with 0 being left of the leftmost.
403 // resolution refers to the ppi resolution of the image.
405  int left, int right,
406  int height, int y,
407  int left_margin,
408  int right_margin,
409  int* first_col,
410  int* last_col,
411  int* first_spanned_col) {
412  *first_col = -1;
413  *last_col = -1;
414  *first_spanned_col = -1;
415  int margin_columns = 0;
416  ColPartition_IT it(&parts_);
417  int col_index = 1;
418  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {
419  ColPartition* part = it.data();
420  if (part->ColumnContains(left, y) ||
421  (it.at_first() && part->ColumnContains(left + height, y))) {
422  // In the default case, first_col is set, but columns_spanned remains
423  // zero, so first_col will get reset in the first column genuinely
424  // spanned, but we can tell the difference from a noise partition
425  // that touches no column.
426  *first_col = col_index;
427  if (part->ColumnContains(right, y) ||
428  (it.at_last() && part->ColumnContains(right - height, y))) {
429  // Both within a single column.
430  *last_col = col_index;
431  return CST_FLOWING;
432  }
433  if (left_margin <= part->LeftAtY(y)) {
434  // It completely spans this column.
435  *first_spanned_col = col_index;
436  margin_columns = 1;
437  }
438  } else if (part->ColumnContains(right, y) ||
439  (it.at_last() && part->ColumnContains(right - height, y))) {
440  if (*first_col < 0) {
441  // It started in-between.
442  *first_col = col_index - 1;
443  }
444  if (right_margin >= part->RightAtY(y)) {
445  // It completely spans this column.
446  if (margin_columns == 0)
447  *first_spanned_col = col_index;
448  ++margin_columns;
449  }
450  *last_col = col_index;
451  break;
452  } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {
453  // Neither left nor right are contained within, so it spans this
454  // column.
455  if (*first_col < 0) {
456  // It started in between the previous column and the current column.
457  *first_col = col_index - 1;
458  }
459  if (margin_columns == 0)
460  *first_spanned_col = col_index;
461  *last_col = col_index;
462  } else if (right < part->LeftAtY(y)) {
463  // We have gone past the end.
464  *last_col = col_index - 1;
465  if (*first_col < 0) {
466  // It must lie completely between columns =>noise.
467  *first_col = col_index - 1;
468  }
469  break;
470  }
471  }
472  if (*first_col < 0)
473  *first_col = col_index - 1; // The last in-between.
474  if (*last_col < 0)
475  *last_col = col_index - 1; // The last in-between.
476  ASSERT_HOST(*first_col >= 0 && *last_col >= 0);
477  ASSERT_HOST(*first_col <= *last_col);
478  if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {
479  // Neither end was in a column, and it didn't span any, so it lies
480  // entirely between columns, therefore noise.
481  return CST_NOISE;
482  } else if (margin_columns <= 1) {
483  // An exception for headings that stick outside of single-column text.
484  if (margin_columns == 1 && parts_.singleton()) {
485  return CST_HEADING;
486  }
487  // It is a pullout, as left and right were not in the same column, but
488  // it doesn't go to the edge of its start and end.
489  return CST_PULLOUT;
490  }
491  // Its margins went to the edges of first and last columns => heading.
492  return CST_HEADING;
493 }
494 
495 // The column_set has changed. Close down all in-progress WorkingPartSets in
496 // columns that do not match and start new ones for the new columns in this.
497 // As ColPartitions are turned into BLOCKs, the used ones are put in
498 // used_parts, as they still need to be referenced in the grid.
500  const ICOORD& tright,
501  int resolution,
502  ColPartition_LIST* used_parts,
503  WorkingPartSet_LIST* working_set_list) {
504  // Move the input list to a temporary location so we can delete its elements
505  // as we add them to the output working_set.
506  WorkingPartSet_LIST work_src;
507  WorkingPartSet_IT src_it(&work_src);
508  src_it.add_list_after(working_set_list);
509  src_it.move_to_first();
510  WorkingPartSet_IT dest_it(working_set_list);
511  // Completed blocks and to_blocks are accumulated and given to the first new
512  // one whenever we keep a column, or at the end.
513  BLOCK_LIST completed_blocks;
514  TO_BLOCK_LIST to_blocks;
515  WorkingPartSet* first_new_set = nullptr;
516  WorkingPartSet* working_set = nullptr;
517  ColPartition_IT col_it(&parts_);
518  for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
519  ColPartition* column = col_it.data();
520  // Any existing column to the left of column is completed.
521  while (!src_it.empty() &&
522  ((working_set = src_it.data())->column() == nullptr ||
523  working_set->column()->right_key() <= column->left_key())) {
524  src_it.extract();
525  working_set->ExtractCompletedBlocks(bleft, tright, resolution,
526  used_parts, &completed_blocks,
527  &to_blocks);
528  delete working_set;
529  src_it.forward();
530  }
531  // Make a new between-column WorkingSet for before the current column.
532  working_set = new WorkingPartSet(nullptr);
533  dest_it.add_after_then_move(working_set);
534  if (first_new_set == nullptr)
535  first_new_set = working_set;
536  // A matching column gets to stay, and first_new_set gets all the
537  // completed_sets.
538  working_set = src_it.empty() ? nullptr : src_it.data();
539  if (working_set != nullptr &&
540  working_set->column()->MatchingColumns(*column)) {
541  working_set->set_column(column);
542  dest_it.add_after_then_move(src_it.extract());
543  src_it.forward();
544  first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
545  first_new_set = nullptr;
546  } else {
547  // Just make a new working set for the current column.
548  working_set = new WorkingPartSet(column);
549  dest_it.add_after_then_move(working_set);
550  }
551  }
552  // Complete any remaining src working sets.
553  while (!src_it.empty()) {
554  working_set = src_it.extract();
555  working_set->ExtractCompletedBlocks(bleft, tright, resolution,
556  used_parts, &completed_blocks,
557  &to_blocks);
558  delete working_set;
559  src_it.forward();
560  }
561  // Make a new between-column WorkingSet for after the last column.
562  working_set = new WorkingPartSet(nullptr);
563  dest_it.add_after_then_move(working_set);
564  if (first_new_set == nullptr)
565  first_new_set = working_set;
566  // The first_new_set now gets any accumulated completed_parts/blocks.
567  first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
568 }
569 
570 // Accumulate the widths and gaps into the given variables.
572  int* width_samples,
573  int* total_gap,
574  int* gap_samples) {
575  ColPartition_IT it(&parts_);
576  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
577  ColPartition* part = it.data();
578  *total_width += part->ColumnWidth();
579  ++*width_samples;
580  if (!it.at_last()) {
581  ColPartition* next_part = it.data_relative(1);
582  int part_left = part->right_key();
583  int part_right = next_part->left_key();
584  int gap = part->KeyWidth(part_left, part_right);
585  *total_gap += gap;
586  ++*gap_samples;
587  }
588  }
589 }
590 
591 // Provide debug output for this ColPartitionSet and all the ColPartitions.
593  ColPartition_IT it(&parts_);
594  tprintf("Partition set of %d parts, %d good, coverage=%d+%d"
595  " (%d,%d)->(%d,%d)\n",
596  it.length(), good_column_count_, good_coverage_, bad_coverage_,
597  bounding_box_.left(), bounding_box_.bottom(),
598  bounding_box_.right(), bounding_box_.top());
599  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
600  ColPartition* part = it.data();
601  part->Print();
602  }
603 }
604 
605 // PRIVATE CODE.
606 
607 // Add the given partition to the list in the appropriate place.
608 void ColPartitionSet::AddPartition(ColPartition* new_part,
609  ColPartition_IT* it) {
610  AddPartitionCoverageAndBox(*new_part);
611  int new_right = new_part->right_key();
612  if (it->data()->left_key() >= new_right)
613  it->add_before_stay_put(new_part);
614  else
615  it->add_after_stay_put(new_part);
616 }
617 
618 // Compute the coverage and good column count. Coverage is the amount of the
619 // width of the page (in pixels) that is covered by ColPartitions, which are
620 // used to provide candidate column layouts.
621 // Coverage is split into good and bad. Good coverage is provided by
622 // ColPartitions of a frequent width (according to the callback function
623 // provided by TabFinder::WidthCB, which accesses stored statistics on the
624 // widths of ColPartitions) and bad coverage is provided by all other
625 // ColPartitions, even if they have tab vectors at both sides. Thus:
626 // |-----------------------------------------------------------------|
627 // | Double width heading |
628 // |-----------------------------------------------------------------|
629 // |-------------------------------| |-------------------------------|
630 // | Common width ColParition | | Common width ColPartition |
631 // |-------------------------------| |-------------------------------|
632 // the layout with two common-width columns has better coverage than the
633 // double width heading, because the coverage is "good," even though less in
634 // total coverage than the heading, because the heading coverage is "bad."
635 void ColPartitionSet::ComputeCoverage() {
636  // Count the number of good columns and sum their width.
637  ColPartition_IT it(&parts_);
638  good_column_count_ = 0;
639  good_coverage_ = 0;
640  bad_coverage_ = 0;
641  bounding_box_ = TBOX();
642  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
643  ColPartition* part = it.data();
644  AddPartitionCoverageAndBox(*part);
645  }
646 }
647 
648 // Adds the coverage, column count and box for a single partition,
649 // without adding it to the list. (Helper factored from ComputeCoverage.)
650 void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) {
651  bounding_box_ += part.bounding_box();
652  int coverage = part.ColumnWidth();
653  if (part.good_width()) {
654  good_coverage_ += coverage;
655  good_column_count_ += 2;
656  } else {
657  if (part.blob_type() < BRT_UNKNOWN)
658  coverage /= 2;
659  if (part.good_column())
660  ++good_column_count_;
661  bad_coverage_ += coverage;
662  }
663 }
664 
665 } // namespace tesseract.
TBOX
Definition: cleanapi_test.cc:19
ScrollView
Definition: scrollview.h:97
tesseract::CST_FLOWING
Definition: colpartition.h:49
tesseract::CST_PULLOUT
Definition: colpartition.h:51
tesseract::ColPartition::Print
void Print() const
Definition: colpartition.cpp:1782
tesseract::ColPartitionSet::AccumulateColumnWidthsAndGaps
void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, int *total_gap, int *gap_samples)
Definition: colpartitionset.cpp:571
tesseract::ColPartitionSet::DisplayColumnEdges
void DisplayColumnEdges(int y_bottom, int y_top, ScrollView *win)
Definition: colpartitionset.cpp:385
tesseract::ColPartition::BoxLeftKey
int BoxLeftKey() const
Definition: colpartition.h:332
tesseract::ColPartitionSet::GetColumnByIndex
ColPartition * GetColumnByIndex(int index)
Definition: colpartitionset.cpp:59
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::ColSegment
Definition: tablefind.h:45
GenericVector::insert
void insert(const T &t, int index)
Definition: genericvector.h:750
tesseract::ColPartitionSet::ChangeWorkColumns
void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
Definition: colpartitionset.cpp:499
BRT_UNKNOWN
Definition: blobbox.h:77
tesseract::ColPartition::IsLegal
bool IsLegal()
Definition: colpartition.cpp:342
ICOORD
integer coordinate
Definition: points.h:30
tesseract::WidthCallback
std::function< bool(int)> WidthCallback
Definition: tabfind.h:35
TBOX::top
int16_t top() const
Definition: rect.h:57
tesseract::WorkingPartSet::ExtractCompletedBlocks
void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: workingpartset.cpp:55
tesseract::ColPartitionSet::SpanningType
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
Definition: colpartitionset.cpp:404
colpartitionset.h
tesseract::ColPartitionSet
Definition: colpartitionset.h:39
tesseract::CST_HEADING
Definition: colpartition.h:50
tesseract::ColPartitionSet::GoodColumnCount
int GoodColumnCount() const
Definition: colpartitionset.cpp:48
tesseract::ColPartitionSet::LegalColumnCandidate
bool LegalColumnCandidate()
Definition: colpartitionset.cpp:330
tesseract::ColPartition::boxes
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:187
tesseract::ColPartition::CopyLeftTab
void CopyLeftTab(const ColPartition &src, bool take_box)
Definition: colpartition.cpp:519
tesseract::ColPartitionSet::AddToColumnSetsIfUnique
void AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback cb)
Definition: colpartitionset.cpp:174
tesseract::ColPartition::ColumnWidth
int ColumnWidth() const
Definition: colpartition.h:328
tesseract::ColPartition
Definition: colpartition.h:67
tesseract::ColPartition::BoxRightKey
int BoxRightKey() const
Definition: colpartition.h:336
tesseract::ColPartitionSet::GetColumnBoxes
void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments)
Definition: colpartitionset.cpp:369
tesseract::ColumnSpanningType
ColumnSpanningType
Definition: colpartition.h:47
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
tesseract::ColPartitionSet::CompatibleColumns
bool CompatibleColumns(bool debug, ColPartitionSet *other, WidthCallback cb)
Definition: colpartitionset.cpp:222
tesseract::kMinColumnWidth
const double kMinColumnWidth
Definition: colpartitionset.cpp:31
tesseract::AlignedBlob::WithinTestRegion
static bool WithinTestRegion(int detail_level, int x, int y)
Definition: alignedblob.cpp:150
tesseract::ColPartition::left_key
int left_key() const
Definition: colpartition.h:172
tesseract::ColPartition::blob_type
BlobRegionType blob_type() const
Definition: colpartition.h:148
tesseract::ColPartition::MidY
int MidY() const
Definition: colpartition.h:304
TBOX::width
int16_t width() const
Definition: rect.h:114
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::ColPartition::MatchingColumns
bool MatchingColumns(const ColPartition &other) const
Definition: colpartition.cpp:370
tesseract::ColPartition::good_column
bool good_column() const
Definition: colpartition.h:166
tesseract::ColPartitionSet::UnmatchedWidth
int UnmatchedWidth(ColPartitionSet *part_set)
Definition: colpartitionset.cpp:305
tesseract::ColPartition::right_key
int right_key() const
Definition: colpartition.h:178
tesseract
Definition: baseapi.h:65
tesseract::ColPartitionSet::ImproveColumnCandidate
void ImproveColumnCandidate(WidthCallback cb, PartSetVector *src_sets)
Definition: colpartitionset.cpp:89
GenericVector< ColPartitionSet * >
tesseract::ColPartition::KeyWidth
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:324
tesseract::WorkingPartSet
Definition: workingpartset.h:32
tesseract::ColPartition::SetColumnGoodness
void SetColumnGoodness(WidthCallback cb)
Definition: colpartition.cpp:1070
tesseract::WorkingPartSet::column
ColPartition * column() const
Definition: workingpartset.h:39
tesseract::ColPartition::bounding_box
const TBOX & bounding_box() const
Definition: colpartition.h:109
tesseract::WorkingPartSet::set_column
void set_column(ColPartition *col)
Definition: workingpartset.h:42
tesseract::ColPartition::RightAtY
int RightAtY(int y) const
Definition: colpartition.h:344
TBOX::left
int16_t left() const
Definition: rect.h:71
GenericVector::get
T & get(int index) const
Definition: genericvector.h:716
tesseract::ColPartitionSet::Print
void Print()
Definition: colpartitionset.cpp:592
TBOX::right
int16_t right() const
Definition: rect.h:78
ScrollView::Line
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:531
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::ColPartitionSet::Copy
ColPartitionSet * Copy(bool good_only)
Definition: colpartitionset.cpp:353
workingpartset.h
tesseract::ColPartition::good_width
bool good_width() const
Definition: colpartition.h:163
tesseract::ColPartition::ColumnContains
bool ColumnContains(int x, int y) const
Definition: colpartition.h:353
tesseract::ColPartition::LeftAtY
int LeftAtY(int y) const
Definition: colpartition.h:340
GenericVector::size
int size() const
Definition: genericvector.h:71
BLOBNBOX::IsTextType
static bool IsTextType(BlobRegionType type)
Definition: blobbox.h:417
tesseract::ColPartitionSet::ColumnContaining
ColPartition * ColumnContaining(int x, int y)
Definition: colpartitionset.cpp:69
tesseract::ColPartitionSet::ColPartitionSet
ColPartitionSet()=default
tablefind.h
tesseract::ColPartition::ShallowCopy
ColPartition * ShallowCopy() const
Definition: colpartition.cpp:1731
tesseract::ColPartition::CopyRightTab
void CopyRightTab(const ColPartition &src, bool take_box)
Definition: colpartition.cpp:532
ELISTIZE
#define ELISTIZE(CLASSNAME)
Definition: elst.h:919
tesseract::ColPartitionSet::RelinquishParts
void RelinquishParts()
Definition: colpartitionset.cpp:80
tesseract::WorkingPartSet::InsertCompletedBlocks
void InsertCompletedBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: workingpartset.cpp:72
tesseract::CST_NOISE
Definition: colpartition.h:48
TBOX
Definition: rect.h:33