tesseract  5.0.0-alpha-619-ge9db
colpartitiongrid.cpp
Go to the documentation of this file.
1 // File: colpartitiongrid.cpp
3 // Description: Class collecting code that acts on a BBGrid of ColPartitions.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2009, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifdef HAVE_CONFIG_H
20 #include "config_auto.h"
21 #endif
22 
23 #include "colpartitiongrid.h"
24 #include "colpartitionset.h"
25 #include "imagefind.h"
26 
27 #include <algorithm>
28 
29 namespace tesseract {
30 
31 // Max pad factor used to search the neighbourhood of a partition to smooth
32 // partition types.
33 const int kMaxPadFactor = 6;
34 // Max multiple of size (min(height, width)) for the distance of the nearest
35 // neighbour for the change of type to be used.
37 // Maximum number of lines in a credible figure caption.
38 const int kMaxCaptionLines = 7;
39 // Min ratio between biggest and smallest gap to bound a caption.
40 const double kMinCaptionGapRatio = 2.0;
41 // Min ratio between biggest gap and mean line height to bound a caption.
42 const double kMinCaptionGapHeightRatio = 0.5;
43 // Min fraction of ColPartition height to be overlapping for margin purposes.
44 const double kMarginOverlapFraction = 0.25;
45 // Size ratio required to consider an unmerged overlapping partition to be big.
46 const double kBigPartSizeRatio = 1.75;
47 // Fraction of gridsize to allow arbitrary overlap between partitions.
49 // Max vertical distance of neighbouring ColPartition as a multiple of
50 // partition height for it to be a partner.
51 // TODO(rays) fix the problem that causes a larger number to not work well.
52 // The value needs to be larger as sparse text blocks in a page that gets
53 // marked as single column will not find adjacent lines as partners, and
54 // will merge horizontally distant, but aligned lines. See rep.4B3 p5.
55 // The value needs to be small because double-spaced legal docs written
56 // in a single column, but justified courier have widely spaced lines
57 // that need to get merged before they partner-up with the lines above
58 // and below. See legal.3B5 p13/17. Neither of these should depend on
59 // the value of kMaxPartitionSpacing to be successful, and ColPartition
60 // merging needs attention to fix this problem.
61 const double kMaxPartitionSpacing = 1.75;
62 // Margin by which text has to beat image or vice-versa to make a firm
63 // decision in GridSmoothNeighbour.
64 const int kSmoothDecisionMargin = 4;
65 
67  const ICOORD& bleft, const ICOORD& tright)
68  : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize,
69  bleft, tright) {
70 }
71 
72 // Handles a click event in a display window.
73 void ColPartitionGrid::HandleClick(int x, int y) {
75  ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y);
76  // Run a radial search for partitions that overlap.
77  ColPartitionGridSearch radsearch(this);
78  radsearch.SetUniqueMode(true);
79  radsearch.StartRadSearch(x, y, 1);
80  ColPartition* neighbour;
81  FCOORD click(x, y);
82  while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
83  const TBOX& nbox = neighbour->bounding_box();
84  if (nbox.contains(click)) {
85  tprintf("Block box:");
86  neighbour->bounding_box().print();
87  neighbour->Print();
88  }
89  }
90 }
91 
92 // Merges ColPartitions in the grid that look like they belong in the same
93 // textline.
94 // For all partitions in the grid, calls the box_cb permanent callback
95 // to compute the search box, searches the box, and if a candidate is found,
96 // calls the confirm_cb to check any more rules. If the confirm_cb returns
97 // true, then the partitions are merged.
98 // Both callbacks are deleted before returning.
100  std::function<bool(ColPartition*, TBOX*)> box_cb,
101  std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb) {
102  // Iterate the ColPartitions in the grid.
103  ColPartitionGridSearch gsearch(this);
104  gsearch.StartFullSearch();
105  ColPartition* part;
106  while ((part = gsearch.NextFullSearch()) != nullptr) {
107  if (MergePart(box_cb, confirm_cb, part))
108  gsearch.RepositionIterator();
109  }
110 }
111 
112 // For the given partition, calls the box_cb permanent callback
113 // to compute the search box, searches the box, and if a candidate is found,
114 // calls the confirm_cb to check any more rules. If the confirm_cb returns
115 // true, then the partitions are merged.
116 // Returns true if the partition is consumed by one or more merges.
118  std::function<bool(ColPartition*, TBOX*)> box_cb,
119  std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb,
120  ColPartition* part) {
121  if (part->IsUnMergeableType())
122  return false;
123  bool any_done = false;
124  // Repeatedly merge part while we find a best merge candidate that works.
125  bool merge_done = false;
126  do {
127  merge_done = false;
128  TBOX box = part->bounding_box();
129  bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
130  if (debug) {
131  tprintf("Merge candidate:");
132  box.print();
133  }
134  // Set up a rectangle search bounded by the part.
135  if (!box_cb(part, &box))
136  continue;
137  // Create a list of merge candidates.
138  ColPartition_CLIST merge_candidates;
139  FindMergeCandidates(part, box, debug, &merge_candidates);
140  // Find the best merge candidate based on minimal overlap increase.
141  int overlap_increase;
142  ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug,
143  confirm_cb,
144  &overlap_increase);
145  if (neighbour != nullptr && overlap_increase <= 0) {
146  if (debug) {
147  tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
148  part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
149  overlap_increase);
150  }
151  // Looks like a good candidate so merge it.
152  RemoveBBox(neighbour);
153  // We will modify the box of part, so remove it from the grid, merge
154  // it and then re-insert it into the grid.
155  RemoveBBox(part);
156  part->Absorb(neighbour, nullptr);
157  InsertBBox(true, true, part);
158  merge_done = true;
159  any_done = true;
160  } else if (neighbour != nullptr) {
161  if (debug) {
162  tprintf("Overlapped when merged with increase %d: ", overlap_increase);
163  neighbour->bounding_box().print();
164  }
165  } else if (debug) {
166  tprintf("No candidate neighbour returned\n");
167  }
168  } while (merge_done);
169  return any_done;
170 }
171 
172 // Returns true if the given part and merge candidate might believably
173 // be part of a single text line according to the default rules.
174 // In general we only want to merge partitions that look like they
175 // are on the same text line, ie their median limits overlap, but we have
176 // to make exceptions for diacritics and stray punctuation.
177 static bool OKMergeCandidate(const ColPartition* part,
178  const ColPartition* candidate,
179  bool debug) {
180  const TBOX& part_box = part->bounding_box();
181  if (candidate == part)
182  return false; // Ignore itself.
183  if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType())
184  return false; // Don't mix inappropriate types.
185 
186  const TBOX& c_box = candidate->bounding_box();
187  if (debug) {
188  tprintf("Examining merge candidate:");
189  c_box.print();
190  }
191  // Candidates must be within a reasonable distance.
192  if (candidate->IsVerticalType() || part->IsVerticalType()) {
193  int h_dist = -part->HCoreOverlap(*candidate);
194  if (h_dist >= std::max(part_box.width(), c_box.width()) / 2) {
195  if (debug)
196  tprintf("Too far away: h_dist = %d\n", h_dist);
197  return false;
198  }
199  } else {
200  // Coarse filter by vertical distance between partitions.
201  int v_dist = -part->VCoreOverlap(*candidate);
202  if (v_dist >= std::max(part_box.height(), c_box.height()) / 2) {
203  if (debug)
204  tprintf("Too far away: v_dist = %d\n", v_dist);
205  return false;
206  }
207  // Candidates must either overlap in median y,
208  // or part or candidate must be an acceptable diacritic.
209  if (!part->VSignificantCoreOverlap(*candidate) &&
210  !part->OKDiacriticMerge(*candidate, debug) &&
211  !candidate->OKDiacriticMerge(*part, debug)) {
212  if (debug)
213  tprintf("Candidate fails overlap and diacritic tests!\n");
214  return false;
215  }
216  }
217  return true;
218 }
219 
220 // Helper function to compute the increase in overlap of the parts list of
221 // Colpartitions with the combination of merge1 and merge2, compared to
222 // the overlap with them uncombined.
223 // An overlap is not counted if passes the OKMergeOverlap test with ok_overlap
224 // as the pixel overlap limit. merge1 and merge2 must both be non-nullptr.
225 static int IncreaseInOverlap(const ColPartition* merge1,
226  const ColPartition* merge2,
227  int ok_overlap,
228  ColPartition_CLIST* parts) {
229  ASSERT_HOST(merge1 != nullptr && merge2 != nullptr);
230  int total_area = 0;
231  ColPartition_C_IT it(parts);
232  TBOX merged_box(merge1->bounding_box());
233  merged_box += merge2->bounding_box();
234  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
235  ColPartition* part = it.data();
236  if (part == merge1 || part == merge2)
237  continue;
238  TBOX part_box = part->bounding_box();
239  // Compute the overlap of the merged box with part.
240  int overlap_area = part_box.intersection(merged_box).area();
241  if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2,
242  ok_overlap, false)) {
243  total_area += overlap_area;
244  // Subtract the overlap of merge1 and merge2 individually.
245  overlap_area = part_box.intersection(merge1->bounding_box()).area();
246  if (overlap_area > 0)
247  total_area -= overlap_area;
248  TBOX intersection_box = part_box.intersection(merge2->bounding_box());
249  overlap_area = intersection_box.area();
250  if (overlap_area > 0) {
251  total_area -= overlap_area;
252  // Add back the 3-way area.
253  intersection_box &= merge1->bounding_box(); // In-place intersection.
254  overlap_area = intersection_box.area();
255  if (overlap_area > 0)
256  total_area += overlap_area;
257  }
258  }
259  }
260  return total_area;
261 }
262 
263 // Helper function to test that each partition in candidates is either a
264 // good diacritic merge with part or an OK merge candidate with all others
265 // in the candidates list.
266 // ASCII Art Scenario:
267 // We sometimes get text such as "join-this" where the - is actually a long
268 // dash culled from a standard set of extra characters that don't match the
269 // font of the text. This makes its strokewidth not match and forms a broken
270 // set of 3 partitions for "join", "-" and "this" and the dash may slightly
271 // overlap BOTH words.
272 // ------- -------
273 // | ==== |
274 // ------- -------
275 // The standard merge rule: "you can merge 2 partitions as long as there is
276 // no increase in overlap elsewhere" fails miserably here. Merge any pair
277 // of partitions and the combined box overlaps more with the third than
278 // before. To allow the merge, we need to consider whether it is safe to
279 // merge everything, without merging separate text lines. For that we need
280 // everything to be an OKMergeCandidate (which is supposed to prevent
281 // separate text lines merging), but this is hard for diacritics to satisfy,
282 // so an alternative to being OKMergeCandidate with everything is to be an
283 // OKDiacriticMerge with part as the base character.
284 static bool TestCompatibleCandidates(const ColPartition& part, bool debug,
285  ColPartition_CLIST* candidates) {
286  ColPartition_C_IT it(candidates);
287  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
288  ColPartition* candidate = it.data();
289  if (!candidate->OKDiacriticMerge(part, false)) {
290  ColPartition_C_IT it2(it);
291  for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
292  ColPartition* candidate2 = it2.data();
293  if (candidate2 != candidate &&
294  !OKMergeCandidate(candidate, candidate2, false)) {
295  if (debug) {
296  tprintf("NC overlap failed:Candidate:");
297  candidate2->bounding_box().print();
298  tprintf("fails to be a good merge with:");
299  candidate->bounding_box().print();
300  }
301  return false;
302  }
303  }
304  }
305  }
306  return true;
307 }
308 
309 // Computes and returns the total overlap of all partitions in the grid.
310 // If overlap_grid is non-null, it is filled with a grid that holds empty
311 // partitions representing the union of all overlapped partitions.
313  int total_overlap = 0;
314  // Iterate the ColPartitions in the grid.
315  ColPartitionGridSearch gsearch(this);
316  gsearch.StartFullSearch();
317  ColPartition* part;
318  while ((part = gsearch.NextFullSearch()) != nullptr) {
319  ColPartition_CLIST neighbors;
320  const TBOX& part_box = part->bounding_box();
321  FindOverlappingPartitions(part_box, part, &neighbors);
322  ColPartition_C_IT n_it(&neighbors);
323  bool any_part_overlap = false;
324  for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
325  const TBOX& n_box = n_it.data()->bounding_box();
326  int overlap = n_box.intersection(part_box).area();
327  if (overlap > 0 && overlap_grid != nullptr) {
328  if (*overlap_grid == nullptr) {
329  *overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright());
330  }
331  (*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy());
332  if (!any_part_overlap) {
333  (*overlap_grid)->InsertBBox(true, true, part->ShallowCopy());
334  }
335  }
336  any_part_overlap = true;
337  total_overlap += overlap;
338  }
339  }
340  return total_overlap;
341 }
342 
343 // Finds all the ColPartitions in the grid that overlap with the given
344 // box and returns them SortByBoxLeft(ed) and uniqued in the given list.
345 // Any partition equal to not_this (may be nullptr) is excluded.
347  const ColPartition* not_this,
348  ColPartition_CLIST* parts) {
349  ColPartitionGridSearch rsearch(this);
350  rsearch.StartRectSearch(box);
351  ColPartition* part;
352  while ((part = rsearch.NextRectSearch()) != nullptr) {
353  if (part != not_this)
354  parts->add_sorted(SortByBoxLeft<ColPartition>, true, part);
355  }
356 }
357 
358 // Finds and returns the best candidate ColPartition to merge with part,
359 // selected from the candidates list, based on the minimum increase in
360 // pairwise overlap among all the partitions overlapped by the combined box.
361 // If overlap_increase is not nullptr then it returns the increase in overlap
362 // that would result from the merge.
363 // confirm_cb is a permanent callback that (if non-null) will be used to
364 // confirm the validity of a proposed merge candidate before selecting it.
365 //
366 // ======HOW MERGING WORKS======
367 // The problem:
368 // We want to merge all the parts of a textline together, but avoid merging
369 // separate textlines. Diacritics, i dots, punctuation, and broken characters
370 // are examples of small bits that need merging with the main textline.
371 // Drop-caps and descenders in one line that touch ascenders in the one below
372 // are examples of cases where we don't want to merge.
373 //
374 // The solution:
375 // Merges that increase overlap among other partitions are generally bad.
376 // Those that don't increase overlap (much) and minimize the total area
377 // seem to be good.
378 //
379 // Ascii art example:
380 // The text:
381 // groggy descenders
382 // minimum ascenders
383 // The boxes: The === represents a small box near or overlapping the lower box.
384 // -----------------
385 // | |
386 // -----------------
387 // -===-------------
388 // | |
389 // -----------------
390 // In considering what to do with the small === box, we find the 2 larger
391 // boxes as neighbours and possible merge candidates, but merging with the
392 // upper box increases overlap with the lower box, whereas merging with the
393 // lower box does not increase overlap.
394 // If the small === box didn't overlap either to start with, total area
395 // would be minimized by merging with the nearer (lower) box.
396 //
397 // This is a simple example. In reality, we have to allow some increase
398 // in overlap, or tightly spaced text would end up in bits.
400  const ColPartition* part, ColPartition_CLIST* candidates, bool debug,
401  std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb,
402  int* overlap_increase) {
403  if (overlap_increase != nullptr)
404  *overlap_increase = 0;
405  if (candidates->empty())
406  return nullptr;
407  int ok_overlap =
408  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
409  // The best neighbour to merge with is the one that causes least
410  // total pairwise overlap among all the neighbours.
411  // If more than one offers the same total overlap, choose the one
412  // with the least total area.
413  const TBOX& part_box = part->bounding_box();
414  ColPartition_C_IT it(candidates);
415  ColPartition* best_candidate = nullptr;
416  // Find the total combined box of all candidates and the original.
417  TBOX full_box(part_box);
418  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
419  ColPartition* candidate = it.data();
420  full_box += candidate->bounding_box();
421  }
422  // Keep valid neighbours in a list.
423  ColPartition_CLIST neighbours;
424  // Now run a rect search of the merged box for overlapping neighbours, as
425  // we need anything that might be overlapped by the merged box.
426  FindOverlappingPartitions(full_box, part, &neighbours);
427  if (debug) {
428  tprintf("Finding best merge candidate from %d, %d neighbours for box:",
429  candidates->length(), neighbours.length());
430  part_box.print();
431  }
432  // If the best increase in overlap is positive, then we also check the
433  // worst non-candidate overlap. This catches the case of multiple good
434  // candidates that overlap each other when merged. If the worst
435  // non-candidate overlap is better than the best overlap, then return
436  // the worst non-candidate overlap instead.
437  ColPartition_CLIST non_candidate_neighbours;
438  non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
439  &neighbours, candidates);
440  int worst_nc_increase = 0;
441  int best_increase = INT32_MAX;
442  int best_area = 0;
443  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
444  ColPartition* candidate = it.data();
445  if (confirm_cb != nullptr && !confirm_cb(part, candidate)) {
446  if (debug) {
447  tprintf("Candidate not confirmed:");
448  candidate->bounding_box().print();
449  }
450  continue;
451  }
452  int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours);
453  const TBOX& cand_box = candidate->bounding_box();
454  if (best_candidate == nullptr || increase < best_increase) {
455  best_candidate = candidate;
456  best_increase = increase;
457  best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
458  if (debug) {
459  tprintf("New best merge candidate has increase %d, area %d, over box:",
460  increase, best_area);
461  full_box.print();
462  candidate->Print();
463  }
464  } else if (increase == best_increase) {
465  int area = cand_box.bounding_union(part_box).area() - cand_box.area();
466  if (area < best_area) {
467  best_area = area;
468  best_candidate = candidate;
469  }
470  }
471  increase = IncreaseInOverlap(part, candidate, ok_overlap,
472  &non_candidate_neighbours);
473  if (increase > worst_nc_increase)
474  worst_nc_increase = increase;
475  }
476  if (best_increase > 0) {
477  // If the worst non-candidate increase is less than the best increase
478  // including the candidates, then all the candidates can merge together
479  // and the increase in outside overlap would be less, so use that result,
480  // but only if each candidate is either a good diacritic merge with part,
481  // or an ok merge candidate with all the others.
482  // See TestCompatibleCandidates for more explanation and a picture.
483  if (worst_nc_increase < best_increase &&
484  TestCompatibleCandidates(*part, debug, candidates)) {
485  best_increase = worst_nc_increase;
486  }
487  }
488  if (overlap_increase != nullptr)
489  *overlap_increase = best_increase;
490  return best_candidate;
491 }
492 
493 // Helper to remove the given box from the given partition, put it in its
494 // own partition, and add to the partition list.
495 static void RemoveBadBox(BLOBNBOX* box, ColPartition* part,
496  ColPartition_LIST* part_list) {
497  part->RemoveBox(box);
498  ColPartition::MakeBigPartition(box, part_list);
499 }
500 
501 
502 // Split partitions where it reduces overlap between their bounding boxes.
503 // ColPartitions are after all supposed to be a partitioning of the blobs
504 // AND of the space on the page!
505 // Blobs that cause overlaps get removed, put in individual partitions
506 // and added to the big_parts list. They are most likely characters on
507 // 2 textlines that touch, or something big like a dropcap.
509  ColPartition_LIST* big_parts) {
510  int ok_overlap =
511  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
512  // Iterate the ColPartitions in the grid.
513  ColPartitionGridSearch gsearch(this);
514  gsearch.StartFullSearch();
515  ColPartition* part;
516  while ((part = gsearch.NextFullSearch()) != nullptr) {
517  // Set up a rectangle search bounded by the part.
518  const TBOX& box = part->bounding_box();
519  ColPartitionGridSearch rsearch(this);
520  rsearch.SetUniqueMode(true);
521  rsearch.StartRectSearch(box);
522  int unresolved_overlaps = 0;
523 
524  ColPartition* neighbour;
525  while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
526  if (neighbour == part)
527  continue;
528  const TBOX& neighbour_box = neighbour->bounding_box();
529  if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) &&
530  part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false))
531  continue; // The overlap is OK both ways.
532 
533  // If removal of the biggest box from either partition eliminates the
534  // overlap, and it is much bigger than the box left behind, then
535  // it is either a drop-cap, an inter-line join, or some junk that
536  // we don't want anyway, so put it in the big_parts list.
537  if (!part->IsSingleton()) {
538  BLOBNBOX* excluded = part->BiggestBox();
539  TBOX shrunken = part->BoundsWithoutBox(excluded);
540  if (!shrunken.overlap(neighbour_box) &&
541  excluded->bounding_box().height() >
542  kBigPartSizeRatio * shrunken.height()) {
543  // Removing the biggest box fixes the overlap, so do it!
544  gsearch.RemoveBBox();
545  RemoveBadBox(excluded, part, big_parts);
546  InsertBBox(true, true, part);
547  gsearch.RepositionIterator();
548  break;
549  }
550  } else if (box.contains(neighbour_box)) {
551  ++unresolved_overlaps;
552  continue; // No amount of splitting will fix it.
553  }
554  if (!neighbour->IsSingleton()) {
555  BLOBNBOX* excluded = neighbour->BiggestBox();
556  TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
557  if (!shrunken.overlap(box) &&
558  excluded->bounding_box().height() >
559  kBigPartSizeRatio * shrunken.height()) {
560  // Removing the biggest box fixes the overlap, so do it!
561  rsearch.RemoveBBox();
562  RemoveBadBox(excluded, neighbour, big_parts);
563  InsertBBox(true, true, neighbour);
564  gsearch.RepositionIterator();
565  break;
566  }
567  }
568  int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
569  int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
570  ColPartition* right_part = nullptr;
571  if (neighbour_overlap_count <= part_overlap_count ||
572  part->IsSingleton()) {
573  // Try to split the neighbour to reduce overlap.
574  BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box);
575  if (split_blob != nullptr) {
576  rsearch.RemoveBBox();
577  right_part = neighbour->SplitAtBlob(split_blob);
578  InsertBBox(true, true, neighbour);
579  ASSERT_HOST(right_part != nullptr);
580  }
581  } else {
582  // Try to split part to reduce overlap.
583  BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box);
584  if (split_blob != nullptr) {
585  gsearch.RemoveBBox();
586  right_part = part->SplitAtBlob(split_blob);
587  InsertBBox(true, true, part);
588  ASSERT_HOST(right_part != nullptr);
589  }
590  }
591  if (right_part != nullptr) {
592  InsertBBox(true, true, right_part);
593  gsearch.RepositionIterator();
594  rsearch.RepositionIterator();
595  break;
596  }
597  }
598  if (unresolved_overlaps > 2 && part->IsSingleton()) {
599  // This part is no good so just add to big_parts.
600  RemoveBBox(part);
601  ColPartition_IT big_it(big_parts);
602  part->set_block_owned(true);
603  big_it.add_to_end(part);
604  gsearch.RepositionIterator();
605  }
606  }
607 }
608 
609 // Filters partitions of source_type by looking at local neighbours.
610 // Where a majority of neighbours have a text type, the partitions are
611 // changed to text, where the neighbours have image type, they are changed
612 // to image, and partitions that have no definite neighbourhood type are
613 // left unchanged.
614 // im_box and rerotation are used to map blob coordinates onto the
615 // nontext_map, which is used to prevent the spread of text neighbourhoods
616 // into images.
617 // Returns true if anything was changed.
619  Pix* nontext_map,
620  const TBOX& im_box,
621  const FCOORD& rotation) {
622  // Iterate the ColPartitions in the grid.
623  ColPartitionGridSearch gsearch(this);
624  gsearch.StartFullSearch();
625  ColPartition* part;
626  bool any_changed = false;
627  while ((part = gsearch.NextFullSearch()) != nullptr) {
628  if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type()))
629  continue;
630  const TBOX& box = part->bounding_box();
631  bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
632  if (SmoothRegionType(nontext_map, im_box, rotation, debug, part))
633  any_changed = true;
634  }
635  return any_changed;
636 }
637 
638 // Reflects the grid and its colpartitions in the y-axis, assuming that
639 // all blob boxes have already been done.
641  ColPartition_LIST parts;
642  ColPartition_IT part_it(&parts);
643  // Iterate the ColPartitions in the grid to extract them.
644  ColPartitionGridSearch gsearch(this);
645  gsearch.StartFullSearch();
646  ColPartition* part;
647  while ((part = gsearch.NextFullSearch()) != nullptr) {
648  part_it.add_after_then_move(part);
649  }
650  ICOORD bot_left(-tright().x(), bleft().y());
651  ICOORD top_right(-bleft().x(), tright().y());
652  // Reinitializing the grid with reflected coords also clears all the
653  // pointers, so parts will now own the ColPartitions. (Briefly).
654  Init(gridsize(), bot_left, top_right);
655  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
656  part = part_it.extract();
657  part->ReflectInYAxis();
658  InsertBBox(true, true, part);
659  }
660 }
661 
662 // Transforms the grid of partitions to the output blocks, putting each
663 // partition into a separate block. We don't really care about the order,
664 // as we just want to get as much text as possible without trying to organize
665 // it into proper blocks or columns.
666 // TODO(rays) some kind of sort function would be useful and probably better
667 // than the default here, which is to sort by order of the grid search.
669  TO_BLOCK_LIST* to_blocks) {
670  TO_BLOCK_IT to_block_it(to_blocks);
671  BLOCK_IT block_it(blocks);
672  // All partitions will be put on this list and deleted on return.
673  ColPartition_LIST parts;
674  ColPartition_IT part_it(&parts);
675  // Iterate the ColPartitions in the grid to extract them.
676  ColPartitionGridSearch gsearch(this);
677  gsearch.StartFullSearch();
678  ColPartition* part;
679  while ((part = gsearch.NextFullSearch()) != nullptr) {
680  part_it.add_after_then_move(part);
681  // The partition has to be at least vaguely like text.
682  BlobRegionType blob_type = part->blob_type();
683  if (BLOBNBOX::IsTextType(blob_type) ||
684  (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
686  : PT_FLOWING_TEXT;
687  // Get metrics from the row that will be used for the block.
688  TBOX box = part->bounding_box();
689  int median_width = part->median_width();
690  int median_height = part->median_height();
691  // Turn the partition into a TO_ROW.
692  TO_ROW* row = part->MakeToRow();
693  if (row == nullptr) {
694  // This partition is dead.
695  part->DeleteBoxes();
696  continue;
697  }
698  auto* block = new BLOCK("", true, 0, 0, box.left(), box.bottom(),
699  box.right(), box.top());
700  block->pdblk.set_poly_block(new POLY_BLOCK(box, type));
701  auto* to_block = new TO_BLOCK(block);
702  TO_ROW_IT row_it(to_block->get_rows());
703  row_it.add_after_then_move(row);
704  // We haven't differentially rotated vertical and horizontal text at
705  // this point, so use width or height as appropriate.
706  if (blob_type == BRT_VERT_TEXT) {
707  to_block->line_size = static_cast<float>(median_width);
708  to_block->line_spacing = static_cast<float>(box.width());
709  to_block->max_blob_size = static_cast<float>(box.width() + 1);
710  } else {
711  to_block->line_size = static_cast<float>(median_height);
712  to_block->line_spacing = static_cast<float>(box.height());
713  to_block->max_blob_size = static_cast<float>(box.height() + 1);
714  }
715  if (to_block->line_size == 0) to_block->line_size = 1;
716  block_it.add_to_end(block);
717  to_block_it.add_to_end(to_block);
718  } else {
719  // This partition is dead.
720  part->DeleteBoxes();
721  }
722  }
723  Clear();
724  // Now it is safe to delete the ColPartitions as parts goes out of scope.
725 }
726 
727 // Rotates the grid and its colpartitions by the given angle, assuming that
728 // all blob boxes have already been done.
729 void ColPartitionGrid::Deskew(const FCOORD& deskew) {
730  ColPartition_LIST parts;
731  ColPartition_IT part_it(&parts);
732  // Iterate the ColPartitions in the grid to extract them.
733  ColPartitionGridSearch gsearch(this);
734  gsearch.StartFullSearch();
735  ColPartition* part;
736  while ((part = gsearch.NextFullSearch()) != nullptr) {
737  part_it.add_after_then_move(part);
738  }
739  // Rebuild the grid to the new size.
740  TBOX grid_box(bleft_, tright_);
741  grid_box.rotate_large(deskew);
742  Init(gridsize(), grid_box.botleft(), grid_box.topright());
743  // Reinitializing the grid with rotated coords also clears all the
744  // pointers, so parts will now own the ColPartitions. (Briefly).
745  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
746  part = part_it.extract();
747  part->ComputeLimits();
748  InsertBBox(true, true, part);
749  }
750 }
751 
752 // Sets the left and right tabs of the partitions in the grid.
754  // Iterate the ColPartitions in the grid.
755  ColPartitionGridSearch gsearch(this);
756  gsearch.StartFullSearch();
757  ColPartition* part;
758  while ((part = gsearch.NextFullSearch()) != nullptr) {
759  const TBOX& part_box = part->bounding_box();
760  TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false);
761  // If the overlapping line is not a left tab, try for non-overlapping.
762  if (left_line != nullptr && !left_line->IsLeftTab())
763  left_line = tabgrid->LeftTabForBox(part_box, false, false);
764  if (left_line != nullptr && left_line->IsLeftTab())
765  part->SetLeftTab(left_line);
766  TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false);
767  if (right_line != nullptr && !right_line->IsRightTab())
768  right_line = tabgrid->RightTabForBox(part_box, false, false);
769  if (right_line != nullptr && right_line->IsRightTab())
770  part->SetRightTab(right_line);
771  part->SetColumnGoodness(tabgrid->WidthCB());
772  }
773 }
774 
775 // Makes the ColPartSets and puts them in the PartSetVector ready
776 // for finding column bounds. Returns false if no partitions were found.
778  auto* part_lists = new ColPartition_LIST[gridheight()];
779  part_sets->reserve(gridheight());
780  // Iterate the ColPartitions in the grid to get parts onto lists for the
781  // y bottom of each.
782  ColPartitionGridSearch gsearch(this);
783  gsearch.StartFullSearch();
784  ColPartition* part;
785  bool any_parts_found = false;
786  while ((part = gsearch.NextFullSearch()) != nullptr) {
787  BlobRegionType blob_type = part->blob_type();
788  if (blob_type != BRT_NOISE &&
789  (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
790  int grid_x, grid_y;
791  const TBOX& part_box = part->bounding_box();
792  GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
793  ColPartition_IT part_it(&part_lists[grid_y]);
794  part_it.add_to_end(part);
795  any_parts_found = true;
796  }
797  }
798  if (any_parts_found) {
799  for (int grid_y = 0; grid_y < gridheight(); ++grid_y) {
800  ColPartitionSet* line_set = nullptr;
801  if (!part_lists[grid_y].empty()) {
802  line_set = new ColPartitionSet(&part_lists[grid_y]);
803  }
804  part_sets->push_back(line_set);
805  }
806  }
807  delete [] part_lists;
808  return any_parts_found;
809 }
810 
811 // Makes a single ColPartitionSet consisting of a single ColPartition that
812 // represents the total horizontal extent of the significant content on the
813 // page. Used for the single column setting in place of automatic detection.
814 // Returns nullptr if the page is empty of significant content.
816  ColPartition* single_column_part = nullptr;
817  // Iterate the ColPartitions in the grid to get parts onto lists for the
818  // y bottom of each.
819  ColPartitionGridSearch gsearch(this);
820  gsearch.StartFullSearch();
821  ColPartition* part;
822  while ((part = gsearch.NextFullSearch()) != nullptr) {
823  BlobRegionType blob_type = part->blob_type();
824  if (blob_type != BRT_NOISE &&
825  (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
826  // Consider for single column.
827  BlobTextFlowType flow = part->flow();
828  if ((blob_type == BRT_TEXT &&
829  (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
830  flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
831  blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
832  if (single_column_part == nullptr) {
833  single_column_part = part->ShallowCopy();
834  single_column_part->set_blob_type(BRT_TEXT);
835  // Copy the tabs from itself to properly setup the margins.
836  single_column_part->CopyLeftTab(*single_column_part, false);
837  single_column_part->CopyRightTab(*single_column_part, false);
838  } else {
839  if (part->left_key() < single_column_part->left_key())
840  single_column_part->CopyLeftTab(*part, false);
841  if (part->right_key() > single_column_part->right_key())
842  single_column_part->CopyRightTab(*part, false);
843  }
844  }
845  }
846  }
847  if (single_column_part != nullptr) {
848  // Make a ColPartitionSet out of the single_column_part as a candidate
849  // for the single column case.
850  single_column_part->SetColumnGoodness(cb);
851  return new ColPartitionSet(single_column_part);
852  }
853  return nullptr;
854 }
855 
856 // Mark the BLOBNBOXes in each partition as being owned by that partition.
858  // Iterate the ColPartitions in the grid.
859  ColPartitionGridSearch gsearch(this);
860  gsearch.StartFullSearch();
861  ColPartition* part;
862  while ((part = gsearch.NextFullSearch()) != nullptr) {
863  part->ClaimBoxes();
864  }
865 }
866 
867 // Retypes all the blobs referenced by the partitions in the grid.
868 // Image blobs are found and returned in the im_blobs list, as they are not
869 // owned by the block.
870 void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST* im_blobs) {
871  BLOBNBOX_IT im_blob_it(im_blobs);
872  ColPartition_LIST dead_parts;
873  ColPartition_IT dead_part_it(&dead_parts);
874  // Iterate the ColPartitions in the grid.
875  ColPartitionGridSearch gsearch(this);
876  gsearch.StartFullSearch();
877  ColPartition* part;
878  while ((part = gsearch.NextFullSearch()) != nullptr) {
879  BlobRegionType blob_type = part->blob_type();
880  BlobTextFlowType flow = part->flow();
881  bool any_blobs_moved = false;
882  if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) {
883  BLOBNBOX_C_IT blob_it(part->boxes());
884  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
885  BLOBNBOX* blob = blob_it.data();
886  im_blob_it.add_after_then_move(blob);
887  }
888  } else if (blob_type != BRT_NOISE) {
889  // Make sure the blobs are marked with the correct type and flow.
890  BLOBNBOX_C_IT blob_it(part->boxes());
891  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
892  BLOBNBOX* blob = blob_it.data();
893  if (blob->region_type() == BRT_NOISE) {
894  // TODO(rays) Deprecated. Change this section to an assert to verify
895  // and then delete.
896  ASSERT_HOST(blob->cblob()->area() != 0);
897  blob->set_owner(nullptr);
898  blob_it.extract();
899  any_blobs_moved = true;
900  } else {
901  blob->set_region_type(blob_type);
902  if (blob->flow() != BTFT_LEADER)
903  blob->set_flow(flow);
904  }
905  }
906  }
907  if (blob_type == BRT_NOISE || part->boxes()->empty()) {
908  BLOBNBOX_C_IT blob_it(part->boxes());
909  part->DisownBoxes();
910  dead_part_it.add_to_end(part);
911  gsearch.RemoveBBox();
912  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
913  BLOBNBOX* blob = blob_it.data();
914  if (blob->cblob()->area() == 0) {
915  // Any blob with zero area is a fake image blob and should be deleted.
916  delete blob->cblob();
917  delete blob;
918  }
919  }
920  } else if (any_blobs_moved) {
921  gsearch.RemoveBBox();
922  part->ComputeLimits();
923  InsertBBox(true, true, part);
924  gsearch.RepositionIterator();
925  }
926  }
927 }
928 
929 // The boxes within the partitions have changed (by deskew) so recompute
930 // the bounds of all the partitions and reinsert them into the grid.
932  const ICOORD& bleft,
933  const ICOORD& tright,
934  const ICOORD& vertical) {
935  ColPartition_LIST saved_parts;
936  ColPartition_IT part_it(&saved_parts);
937  // Iterate the ColPartitions in the grid to get parts onto a list.
938  ColPartitionGridSearch gsearch(this);
939  gsearch.StartFullSearch();
940  ColPartition* part;
941  while ((part = gsearch.NextFullSearch()) != nullptr) {
942  part_it.add_to_end(part);
943  }
944  // Reinitialize grid to the new size.
946  // Recompute the bounds of the parts and put them back in the new grid.
947  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
948  part = part_it.extract();
949  part->set_vertical(vertical);
950  part->ComputeLimits();
951  InsertBBox(true, true, part);
952  }
953 }
954 
955 // Improves the margins of the ColPartitions in the grid by calling
956 // FindPartitionMargins on each.
957 // best_columns, which may be nullptr, is an array of pointers indicating the
958 // column set at each y-coordinate in the grid.
959 // best_columns is usually the best_columns_ member of ColumnFinder.
961  // Iterate the ColPartitions in the grid.
962  ColPartitionGridSearch gsearch(this);
963  gsearch.StartFullSearch();
964  ColPartition* part;
965  while ((part = gsearch.NextFullSearch()) != nullptr) {
966  // Set up a rectangle search x-bounded by the column and y by the part.
967  ColPartitionSet* columns = best_columns != nullptr
968  ? best_columns[gsearch.GridY()]
969  : nullptr;
970  FindPartitionMargins(columns, part);
971  const TBOX& box = part->bounding_box();
972  if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
973  tprintf("Computed margins for part:");
974  part->Print();
975  }
976  }
977 }
978 
979 // Improves the margins of the ColPartitions in the list by calling
980 // FindPartitionMargins on each.
981 // best_columns, which may be nullptr, is an array of pointers indicating the
982 // column set at each y-coordinate in the grid.
983 // best_columns is usually the best_columns_ member of ColumnFinder.
985  ColPartition_LIST* parts) {
986  ColPartition_IT part_it(parts);
987  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
988  ColPartition* part = part_it.data();
989  ColPartitionSet* columns = nullptr;
990  if (best_columns != nullptr) {
991  const TBOX& part_box = part->bounding_box();
992  // Get the columns from the y grid coord.
993  int grid_x, grid_y;
994  GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
995  columns = best_columns[grid_y];
996  }
997  FindPartitionMargins(columns, part);
998  }
999 }
1000 
1001 // Deletes all the partitions in the grid after disowning all the blobs.
1003  ColPartition_LIST dead_parts;
1004  ColPartition_IT dead_it(&dead_parts);
1005  ColPartitionGridSearch gsearch(this);
1006  gsearch.StartFullSearch();
1007  ColPartition* part;
1008  while ((part = gsearch.NextFullSearch()) != nullptr) {
1009  part->DisownBoxes();
1010  dead_it.add_to_end(part); // Parts will be deleted on return.
1011  }
1012  Clear();
1013 }
1014 
1015 // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and
1016 // all the blobs in them.
1018  ColPartitionGridSearch gsearch(this);
1019  gsearch.StartFullSearch();
1020  ColPartition* part;
1021  while ((part = gsearch.NextFullSearch()) != nullptr) {
1022  if (part->blob_type() == BRT_UNKNOWN) {
1023  gsearch.RemoveBBox();
1024  // Once marked, the blobs will be swept up by DeleteUnownedNoise.
1025  part->set_flow(BTFT_NONTEXT);
1026  part->set_blob_type(BRT_NOISE);
1027  part->SetBlobTypes();
1028  part->DisownBoxes();
1029  delete part;
1030  }
1031  }
1032  block->DeleteUnownedNoise();
1033 }
1034 
1035 // Deletes all the partitions in the grid that are NOT of flow type BTFT_LEADER.
1037  ColPartitionGridSearch gsearch(this);
1038  gsearch.StartFullSearch();
1039  ColPartition* part;
1040  while ((part = gsearch.NextFullSearch()) != nullptr) {
1041  if (part->flow() != BTFT_LEADER) {
1042  gsearch.RemoveBBox();
1043  if (part->ReleaseNonLeaderBoxes()) {
1044  InsertBBox(true, true, part);
1045  gsearch.RepositionIterator();
1046  } else {
1047  delete part;
1048  }
1049  }
1050  }
1051 }
1052 
1053 // Finds and marks text partitions that represent figure captions.
1055  // For each image region find its best candidate text caption region,
1056  // if any and mark it as such.
1057  ColPartitionGridSearch gsearch(this);
1058  gsearch.StartFullSearch();
1059  ColPartition* part;
1060  while ((part = gsearch.NextFullSearch()) != nullptr) {
1061  if (part->IsImageType()) {
1062  const TBOX& part_box = part->bounding_box();
1063  bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(),
1064  part_box.bottom());
1065  ColPartition* best_caption = nullptr;
1066  int best_dist = 0; // Distance to best_caption.
1067  int best_upper = 0; // Direction of best_caption.
1068  // Handle both lower and upper directions.
1069  for (int upper = 0; upper < 2; ++upper) {
1070  ColPartition_C_IT partner_it(upper ? part->upper_partners()
1071  : part->lower_partners());
1072  // If there are no image partners, then this direction is ok.
1073  for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1074  partner_it.forward()) {
1075  ColPartition* partner = partner_it.data();
1076  if (partner->IsImageType()) {
1077  break;
1078  }
1079  }
1080  if (!partner_it.cycled_list()) continue;
1081  // Find the nearest totally overlapping text partner.
1082  for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
1083  partner_it.forward()) {
1084  ColPartition* partner = partner_it.data();
1085  if (!partner->IsTextType() || partner->type() == PT_TABLE) continue;
1086  const TBOX& partner_box = partner->bounding_box();
1087  if (debug) {
1088  tprintf("Finding figure captions for image part:");
1089  part_box.print();
1090  tprintf("Considering partner:");
1091  partner_box.print();
1092  }
1093  if (partner_box.left() >= part_box.left() &&
1094  partner_box.right() <= part_box.right()) {
1095  int dist = partner_box.y_gap(part_box);
1096  if (best_caption == nullptr || dist < best_dist) {
1097  best_dist = dist;
1098  best_caption = partner;
1099  best_upper = upper;
1100  }
1101  }
1102  }
1103  }
1104  if (best_caption != nullptr) {
1105  if (debug) {
1106  tprintf("Best caption candidate:");
1107  best_caption->bounding_box().print();
1108  }
1109  // We have a candidate caption. Qualify it as being separable from
1110  // any body text. We are looking for either a small number of lines
1111  // or a big gap that indicates a separation from the body text.
1112  int line_count = 0;
1113  int biggest_gap = 0;
1114  int smallest_gap = INT16_MAX;
1115  int total_height = 0;
1116  int mean_height = 0;
1117  ColPartition* end_partner = nullptr;
1118  ColPartition* next_partner = nullptr;
1119  for (ColPartition* partner = best_caption; partner != nullptr &&
1120  line_count <= kMaxCaptionLines;
1121  partner = next_partner) {
1122  if (!partner->IsTextType()) {
1123  end_partner = partner;
1124  break;
1125  }
1126  ++line_count;
1127  total_height += partner->bounding_box().height();
1128  next_partner = partner->SingletonPartner(best_upper);
1129  if (next_partner != nullptr) {
1130  int gap = partner->bounding_box().y_gap(
1131  next_partner->bounding_box());
1132  if (gap > biggest_gap) {
1133  biggest_gap = gap;
1134  end_partner = next_partner;
1135  mean_height = total_height / line_count;
1136  } else if (gap < smallest_gap) {
1137  smallest_gap = gap;
1138  }
1139  // If the gap looks big compared to the text size and the smallest
1140  // gap seen so far, then we can stop.
1141  if (biggest_gap > mean_height * kMinCaptionGapHeightRatio &&
1142  biggest_gap > smallest_gap * kMinCaptionGapRatio)
1143  break;
1144  }
1145  }
1146  if (debug) {
1147  tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
1148  line_count, biggest_gap, smallest_gap, mean_height);
1149  if (end_partner != nullptr) {
1150  tprintf("End partner:");
1151  end_partner->bounding_box().print();
1152  }
1153  }
1154  if (next_partner == nullptr && line_count <= kMaxCaptionLines)
1155  end_partner = nullptr; // No gap, but line count is small.
1156  if (line_count <= kMaxCaptionLines) {
1157  // This is a qualified caption. Mark the text as caption.
1158  for (ColPartition* partner = best_caption; partner != nullptr &&
1159  partner != end_partner;
1160  partner = next_partner) {
1161  partner->set_type(PT_CAPTION_TEXT);
1162  partner->SetBlobTypes();
1163  if (debug) {
1164  tprintf("Set caption type for partition:");
1165  partner->bounding_box().print();
1166  }
1167  next_partner = partner->SingletonPartner(best_upper);
1168  }
1169  }
1170  }
1171  }
1172  }
1173 }
1174 
1177 
1178 // For every ColPartition in the grid, finds its upper and lower neighbours.
1180  ColPartitionGridSearch gsearch(this);
1181  gsearch.StartFullSearch();
1182  ColPartition* part;
1183  while ((part = gsearch.NextFullSearch()) != nullptr) {
1184  if (part->IsVerticalType()) {
1185  FindVPartitionPartners(true, part);
1186  FindVPartitionPartners(false, part);
1187  } else {
1188  FindPartitionPartners(true, part);
1189  FindPartitionPartners(false, part);
1190  }
1191  }
1192 }
1193 
1194 // Finds the best partner in the given direction for the given partition.
1195 // Stores the result with AddPartner.
1197  if (part->type() == PT_NOISE)
1198  return; // Noise is not allowed to partner anything.
1199  const TBOX& box = part->bounding_box();
1200  int top = part->median_top();
1201  int bottom = part->median_bottom();
1202  int height = top - bottom;
1203  int mid_y = (bottom + top) / 2;
1204  ColPartitionGridSearch vsearch(this);
1205  // Search down for neighbour below
1206  vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY());
1207  ColPartition* neighbour;
1208  ColPartition* best_neighbour = nullptr;
1209  int best_dist = INT32_MAX;
1210  while ((neighbour = vsearch.NextVerticalSearch(!upper)) != nullptr) {
1211  if (neighbour == part || neighbour->type() == PT_NOISE)
1212  continue; // Noise is not allowed to partner anything.
1213  int neighbour_bottom = neighbour->median_bottom();
1214  int neighbour_top = neighbour->median_top();
1215  int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
1216  if (upper != (neighbour_y > mid_y))
1217  continue;
1218  if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour))
1219  continue;
1220  if (!part->TypesMatch(*neighbour)) {
1221  if (best_neighbour == nullptr)
1222  best_neighbour = neighbour;
1223  continue;
1224  }
1225  int dist = upper ? neighbour_bottom - top : bottom - neighbour_top;
1226  if (dist <= kMaxPartitionSpacing * height) {
1227  if (dist < best_dist) {
1228  best_dist = dist;
1229  best_neighbour = neighbour;
1230  }
1231  } else {
1232  break;
1233  }
1234  }
1235  if (best_neighbour != nullptr)
1236  part->AddPartner(upper, best_neighbour);
1237 }
1238 
1239 // Finds the best partner in the given direction for the given partition.
1240 // Stores the result with AddPartner.
1242  ColPartition* part) {
1243  if (part->type() == PT_NOISE)
1244  return; // Noise is not allowed to partner anything.
1245  const TBOX& box = part->bounding_box();
1246  int left = part->median_left();
1247  int right = part->median_right();
1248  int width = right >= left ? right - left : -1;
1249  int mid_x = (left + right) / 2;
1250  ColPartitionGridSearch hsearch(this);
1251  // Search left for neighbour to_the_left
1252  hsearch.StartSideSearch(mid_x, box.bottom(), box.top());
1253  ColPartition* neighbour;
1254  ColPartition* best_neighbour = nullptr;
1255  int best_dist = INT32_MAX;
1256  while ((neighbour = hsearch.NextSideSearch(to_the_left)) != nullptr) {
1257  if (neighbour == part || neighbour->type() == PT_NOISE)
1258  continue; // Noise is not allowed to partner anything.
1259  int neighbour_left = neighbour->median_left();
1260  int neighbour_right = neighbour->median_right();
1261  int neighbour_x = (neighbour_left + neighbour_right) / 2;
1262  if (to_the_left != (neighbour_x < mid_x))
1263  continue;
1264  if (!part->VOverlaps(*neighbour))
1265  continue;
1266  if (!part->TypesMatch(*neighbour))
1267  continue; // Only match to other vertical text.
1268  int dist = to_the_left ? left - neighbour_right : neighbour_left - right;
1269  if (dist <= kMaxPartitionSpacing * width) {
1270  if (dist < best_dist || best_neighbour == nullptr) {
1271  best_dist = dist;
1272  best_neighbour = neighbour;
1273  }
1274  } else {
1275  break;
1276  }
1277  }
1278  // For vertical partitions, the upper partner is to the left, and lower is
1279  // to the right.
1280  if (best_neighbour != nullptr)
1281  part->AddPartner(to_the_left, best_neighbour);
1282 }
1283 
1284 // For every ColPartition with multiple partners in the grid, reduces the
1285 // number of partners to 0 or 1. If get_desperate is true, goes to more
1286 // desperate merge methods to merge flowing text before breaking partnerships.
1288  ColPartitionGridSearch gsearch(this);
1289  // Refine in type order so that chasing multiple partners can be done
1290  // before eliminating type mis-matching partners.
1291  for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) {
1292  // Iterate the ColPartitions in the grid.
1293  gsearch.StartFullSearch();
1294  ColPartition* part;
1295  while ((part = gsearch.NextFullSearch()) != nullptr) {
1296  part->RefinePartners(static_cast<PolyBlockType>(type),
1297  get_desperate, this);
1298  // Iterator may have been messed up by a merge.
1299  gsearch.RepositionIterator();
1300  }
1301  }
1302 }
1303 
1304 
1305 // ========================== PRIVATE CODE ========================
1306 
1307 // Finds and returns a list of candidate ColPartitions to merge with part.
1308 // The candidates must overlap search_box, and when merged must not
1309 // overlap any other partitions that are not overlapped by each individually.
1310 void ColPartitionGrid::FindMergeCandidates(const ColPartition* part,
1311  const TBOX& search_box, bool debug,
1312  ColPartition_CLIST* candidates) {
1313  int ok_overlap =
1314  static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
1315  const TBOX& part_box = part->bounding_box();
1316  // Now run the rect search.
1317  ColPartitionGridSearch rsearch(this);
1318  rsearch.SetUniqueMode(true);
1319  rsearch.StartRectSearch(search_box);
1320  ColPartition* candidate;
1321  while ((candidate = rsearch.NextRectSearch()) != nullptr) {
1322  if (!OKMergeCandidate(part, candidate, debug))
1323  continue;
1324  const TBOX& c_box = candidate->bounding_box();
1325  // Candidate seems to be a potential merge with part. If one contains
1326  // the other, then the merge is a no-brainer. Otherwise, search the
1327  // combined box to see if anything else is inappropriately overlapped.
1328  if (!part_box.contains(c_box) && !c_box.contains(part_box)) {
1329  // Search the combined rectangle to see if anything new is overlapped.
1330  // This is a preliminary test designed to quickly weed-out poor
1331  // merge candidates that would create a big list of overlapped objects
1332  // for the squared-order overlap analysis. Eg. vertical and horizontal
1333  // line-like objects that overlap real text when merged:
1334  // || ==========================
1335  // ||
1336  // || r e a l t e x t
1337  // ||
1338  // ||
1339  TBOX merged_box(part_box);
1340  merged_box += c_box;
1341  ColPartitionGridSearch msearch(this);
1342  msearch.SetUniqueMode(true);
1343  msearch.StartRectSearch(merged_box);
1344  ColPartition* neighbour;
1345  while ((neighbour = msearch.NextRectSearch()) != nullptr) {
1346  if (neighbour == part || neighbour == candidate)
1347  continue; // Ignore itself.
1348  if (neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, false))
1349  continue; // This kind of merge overlap is OK.
1350  TBOX n_box = neighbour->bounding_box();
1351  // The overlap is OK if:
1352  // * the n_box already overlapped the part or the candidate OR
1353  // * the n_box is a suitable merge with either part or candidate
1354  if (!n_box.overlap(part_box) && !n_box.overlap(c_box) &&
1355  !OKMergeCandidate(part, neighbour, false) &&
1356  !OKMergeCandidate(candidate, neighbour, false))
1357  break;
1358  }
1359  if (neighbour != nullptr) {
1360  if (debug) {
1361  tprintf("Combined box overlaps another that is not OK despite"
1362  " allowance of %d:", ok_overlap);
1363  neighbour->bounding_box().print();
1364  tprintf("Reason:");
1365  OKMergeCandidate(part, neighbour, true);
1366  tprintf("...and:");
1367  OKMergeCandidate(candidate, neighbour, true);
1368  tprintf("Overlap:");
1369  neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, true);
1370  }
1371  continue;
1372  }
1373  }
1374  if (debug) {
1375  tprintf("Adding candidate:");
1376  candidate->bounding_box().print();
1377  }
1378  // Unique elements as they arrive.
1379  candidates->add_sorted(SortByBoxLeft<ColPartition>, true, candidate);
1380  }
1381 }
1382 
1383 // Smoothes the region type/flow type of the given part by looking at local
1384 // neighbours and the given image mask. Searches a padded rectangle with the
1385 // padding truncated on one size of the part's box in turn for each side,
1386 // using the result (if any) that has the least distance to all neighbours
1387 // that contribute to the decision. This biases in favor of rectangular
1388 // regions without completely enforcing them.
1389 // If a good decision cannot be reached, the part is left unchanged.
1390 // im_box and rerotation are used to map blob coordinates onto the
1391 // nontext_map, which is used to prevent the spread of text neighbourhoods
1392 // into images.
1393 // Returns true if the partition was changed.
1394 bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map,
1395  const TBOX& im_box,
1396  const FCOORD& rerotation,
1397  bool debug,
1398  ColPartition* part) {
1399  const TBOX& part_box = part->bounding_box();
1400  if (debug) {
1401  tprintf("Smooothing part at:");
1402  part_box.print();
1403  }
1404  BlobRegionType best_type = BRT_UNKNOWN;
1405  int best_dist = INT32_MAX;
1406  int max_dist = std::min(part_box.width(), part_box.height());
1407  max_dist = std::max(max_dist * kMaxNeighbourDistFactor, gridsize() * 2);
1408  // Search with the pad truncated on each side of the box in turn.
1409  bool any_image = false;
1410  bool all_image = true;
1411  for (int d = 0; d < BND_COUNT; ++d) {
1412  int dist;
1413  auto dir = static_cast<BlobNeighbourDir>(d);
1414  BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box,
1415  rerotation, debug, *part,
1416  &dist);
1417  if (debug) {
1418  tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist);
1419  }
1420  if (type != BRT_UNKNOWN && dist < best_dist) {
1421  best_dist = dist;
1422  best_type = type;
1423  }
1424  if (type == BRT_POLYIMAGE)
1425  any_image = true;
1426  else
1427  all_image = false;
1428  }
1429  if (best_dist > max_dist)
1430  return false; // Too far away to set the type with it.
1431  if (part->flow() == BTFT_STRONG_CHAIN && !all_image) {
1432  return false; // We are not modifying it.
1433  }
1434  BlobRegionType new_type = part->blob_type();
1435  BlobTextFlowType new_flow = part->flow();
1436  if (best_type == BRT_TEXT && !any_image) {
1437  new_flow = BTFT_STRONG_CHAIN;
1438  new_type = BRT_TEXT;
1439  } else if (best_type == BRT_VERT_TEXT && !any_image) {
1440  new_flow = BTFT_STRONG_CHAIN;
1441  new_type = BRT_VERT_TEXT;
1442  } else if (best_type == BRT_POLYIMAGE) {
1443  new_flow = BTFT_NONTEXT;
1444  new_type = BRT_UNKNOWN;
1445  }
1446  if (new_type != part->blob_type() || new_flow != part->flow()) {
1447  part->set_flow(new_flow);
1448  part->set_blob_type(new_type);
1449  part->SetBlobTypes();
1450  if (debug) {
1451  tprintf("Modified part:");
1452  part->Print();
1453  }
1454  return true;
1455  } else {
1456  return false;
1457  }
1458 }
1459 
1460 // Sets up a search box based on the part_box, padded in all directions
1461 // except direction. Also setup dist_scaling to weight x,y distances according
1462 // to the given direction.
1463 static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction,
1464  const TBOX& part_box,
1465  int min_padding,
1466  TBOX* search_box,
1467  ICOORD* dist_scaling) {
1468  *search_box = part_box;
1469  // Generate a pad value based on the min dimension of part_box, but at least
1470  // min_padding and then scaled by kMaxPadFactor.
1471  int padding = std::min(part_box.height(), part_box.width());
1472  padding = std::max(padding, min_padding);
1473  padding *= kMaxPadFactor;
1474  search_box->pad(padding, padding);
1475  // Truncate the box in the appropriate direction and make the distance
1476  // metric slightly biased in the truncated direction.
1477  switch (direction) {
1478  case BND_LEFT:
1479  search_box->set_left(part_box.left());
1480  *dist_scaling = ICOORD(2, 1);
1481  break;
1482  case BND_BELOW:
1483  search_box->set_bottom(part_box.bottom());
1484  *dist_scaling = ICOORD(1, 2);
1485  break;
1486  case BND_RIGHT:
1487  search_box->set_right(part_box.right());
1488  *dist_scaling = ICOORD(2, 1);
1489  break;
1490  case BND_ABOVE:
1491  search_box->set_top(part_box.top());
1492  *dist_scaling = ICOORD(1, 2);
1493  break;
1494  default:
1495  ASSERT_HOST(false);
1496  }
1497 }
1498 
1499 // Local enum used by SmoothInOneDirection and AccumulatePartDistances
1500 // for the different types of partition neighbour.
1502  NPT_HTEXT, // Definite horizontal text.
1503  NPT_VTEXT, // Definite vertical text.
1504  NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but
1505  // image for image and VTEXT.
1506  NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but
1507  // image for image and HTEXT.
1508  NPT_IMAGE, // Defininte non-text.
1509  NPT_COUNT // Number of array elements.
1510 };
1511 
1512 // Executes the search for SmoothRegionType in a single direction.
1513 // Creates a bounding box that is padded in all directions except direction,
1514 // and searches it for other partitions. Finds the nearest collection of
1515 // partitions that makes a decisive result (if any) and returns the type
1516 // and the distance of the collection. If there are any pixels in the
1517 // nontext_map, then the decision is biased towards image.
1518 BlobRegionType ColPartitionGrid::SmoothInOneDirection(
1519  BlobNeighbourDir direction, Pix* nontext_map,
1520  const TBOX& im_box, const FCOORD& rerotation,
1521  bool debug, const ColPartition& part, int* best_distance) {
1522  // Set up a rectangle search bounded by the part.
1523  const TBOX& part_box = part.bounding_box();
1524  TBOX search_box;
1525  ICOORD dist_scaling;
1526  ComputeSearchBoxAndScaling(direction, part_box, gridsize(),
1527  &search_box, &dist_scaling);
1528  bool image_region = ImageFind::CountPixelsInRotatedBox(search_box, im_box,
1529  rerotation,
1530  nontext_map) > 0;
1532  AccumulatePartDistances(part, dist_scaling, search_box,
1533  nontext_map, im_box, rerotation, debug, dists);
1534  // By iteratively including the next smallest distance across the vectors,
1535  // (as in a merge sort) we can use the vector indices as counts of each type
1536  // and find the nearest set of objects that give us a definite decision.
1537  int counts[NPT_COUNT];
1538  memset(counts, 0, sizeof(counts[0]) * NPT_COUNT);
1539  // If there is image in the search box, tip the balance in image's favor.
1540  int image_bias = image_region ? kSmoothDecisionMargin / 2 : 0;
1541  BlobRegionType text_dir = part.blob_type();
1542  BlobTextFlowType flow_type = part.flow();
1543  int min_dist = 0;
1544  do {
1545  // Find the minimum new entry across the vectors
1546  min_dist = INT32_MAX;
1547  for (int i = 0; i < NPT_COUNT; ++i) {
1548  if (counts[i] < dists[i].size() && dists[i][counts[i]] < min_dist)
1549  min_dist = dists[i][counts[i]];
1550  }
1551  // Step all the indices/counts forward to include min_dist.
1552  for (int i = 0; i < NPT_COUNT; ++i) {
1553  while (counts[i] < dists[i].size() && dists[i][counts[i]] <= min_dist)
1554  ++counts[i];
1555  }
1556  *best_distance = min_dist;
1557  if (debug) {
1558  tprintf("Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n",
1559  counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT],
1560  counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT],
1561  counts[NPT_IMAGE], image_bias, min_dist);
1562  }
1563  // See if we have a decision yet.
1564  int image_count = counts[NPT_IMAGE];
1565  int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] -
1566  (image_count + counts[NPT_WEAK_VTEXT]);
1567  int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] -
1568  (image_count + counts[NPT_WEAK_HTEXT]);
1569  if (image_count > 0 &&
1570  image_bias - htext_score >= kSmoothDecisionMargin &&
1571  image_bias - vtext_score >= kSmoothDecisionMargin) {
1572  *best_distance = dists[NPT_IMAGE][0];
1573  if (!dists[NPT_WEAK_VTEXT].empty() &&
1574  *best_distance > dists[NPT_WEAK_VTEXT][0])
1575  *best_distance = dists[NPT_WEAK_VTEXT][0];
1576  if (!dists[NPT_WEAK_HTEXT].empty() &&
1577  *best_distance > dists[NPT_WEAK_HTEXT][0])
1578  *best_distance = dists[NPT_WEAK_HTEXT][0];
1579  return BRT_POLYIMAGE;
1580  }
1581  if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) &&
1582  counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) {
1583  *best_distance = dists[NPT_HTEXT][0];
1584  return BRT_TEXT;
1585  } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) &&
1586  counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) {
1587  *best_distance = dists[NPT_VTEXT][0];
1588  return BRT_VERT_TEXT;
1589  }
1590  } while (min_dist < INT32_MAX);
1591  return BRT_UNKNOWN;
1592 }
1593 
1594 // Counts the partitions in the given search_box by appending the gap
1595 // distance (scaled by dist_scaling) of the part from the base_part to the
1596 // vector of the appropriate type for the partition. Prior to return, the
1597 // vectors in the dists array are sorted in increasing order.
1598 // The nontext_map (+im_box, rerotation) is used to make text invisible if
1599 // there is non-text in between.
1600 // dists must be an array of GenericVectors of size NPT_COUNT.
1601 void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part,
1602  const ICOORD& dist_scaling,
1603  const TBOX& search_box,
1604  Pix* nontext_map,
1605  const TBOX& im_box,
1606  const FCOORD& rerotation,
1607  bool debug,
1608  GenericVector<int>* dists) {
1609  const TBOX& part_box = base_part.bounding_box();
1610  ColPartitionGridSearch rsearch(this);
1611  rsearch.SetUniqueMode(true);
1612  rsearch.StartRectSearch(search_box);
1613  ColPartition* neighbour;
1614  // Search for compatible neighbours with a similar strokewidth, but not
1615  // on the other side of a tab vector.
1616  while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
1617  if (neighbour->IsUnMergeableType() ||
1618  !base_part.ConfirmNoTabViolation(*neighbour) ||
1619  neighbour == &base_part)
1620  continue;
1621  TBOX nbox = neighbour->bounding_box();
1622  BlobRegionType n_type = neighbour->blob_type();
1623  if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
1624  !ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation,
1625  nontext_map))
1626  continue; // Text not visible the other side of image.
1627  if (BLOBNBOX::IsLineType(n_type))
1628  continue; // Don't use horizontal lines as neighbours.
1629  int x_gap = std::max(part_box.x_gap(nbox), 0);
1630  int y_gap = std::max(part_box.y_gap(nbox), 0);
1631  int n_dist = x_gap * dist_scaling.x() + y_gap* dist_scaling.y();
1632  if (debug) {
1633  tprintf("Part has x-gap=%d, y=%d, dist=%d at:",
1634  x_gap, y_gap, n_dist);
1635  nbox.print();
1636  }
1637  // Truncate the number of boxes, so text doesn't get too much advantage.
1638  int n_boxes = std::min(neighbour->boxes_count(), kSmoothDecisionMargin);
1639  BlobTextFlowType n_flow = neighbour->flow();
1640  GenericVector<int>* count_vector = nullptr;
1641  if (n_flow == BTFT_STRONG_CHAIN) {
1642  if (n_type == BRT_TEXT)
1643  count_vector = &dists[NPT_HTEXT];
1644  else
1645  count_vector = &dists[NPT_VTEXT];
1646  if (debug) {
1647  tprintf("%s %d\n", n_type == BRT_TEXT ? "Htext" : "Vtext", n_boxes);
1648  }
1649  } else if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
1650  (n_flow == BTFT_CHAIN || n_flow == BTFT_NEIGHBOURS)) {
1651  // Medium text counts as weak, and all else counts as image.
1652  if (n_type == BRT_TEXT)
1653  count_vector = &dists[NPT_WEAK_HTEXT];
1654  else
1655  count_vector = &dists[NPT_WEAK_VTEXT];
1656  if (debug) tprintf("Weak %d\n", n_boxes);
1657  } else {
1658  count_vector = &dists[NPT_IMAGE];
1659  if (debug) tprintf("Image %d\n", n_boxes);
1660  }
1661  if (count_vector != nullptr) {
1662  for (int i = 0; i < n_boxes; ++i)
1663  count_vector->push_back(n_dist);
1664  }
1665  if (debug) {
1666  neighbour->Print();
1667  }
1668  }
1669  for (int i = 0; i < NPT_COUNT; ++i)
1670  dists[i].sort();
1671 }
1672 
1673 // Improves the margins of the part ColPartition by searching for
1674 // neighbours that vertically overlap significantly.
1675 // columns may be nullptr, and indicates the assigned column structure this
1676 // is applicable to part.
1677 void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns,
1678  ColPartition* part) {
1679  // Set up a rectangle search x-bounded by the column and y by the part.
1680  TBOX box = part->bounding_box();
1681  int y = part->MidY();
1682  // Initial left margin is based on the column, if there is one.
1683  int left_margin = bleft().x();
1684  int right_margin = tright().x();
1685  if (columns != nullptr) {
1686  ColPartition* column = columns->ColumnContaining(box.left(), y);
1687  if (column != nullptr)
1688  left_margin = column->LeftAtY(y);
1689  column = columns->ColumnContaining(box.right(), y);
1690  if (column != nullptr)
1691  right_margin = column->RightAtY(y);
1692  }
1693  left_margin -= kColumnWidthFactor;
1694  right_margin += kColumnWidthFactor;
1695  // Search for ColPartitions that reduce the margin.
1696  left_margin = FindMargin(box.left() + box.height(), true, left_margin,
1697  box.bottom(), box.top(), part);
1698  part->set_left_margin(left_margin);
1699  // Search for ColPartitions that reduce the margin.
1700  right_margin = FindMargin(box.right() - box.height(), false, right_margin,
1701  box.bottom(), box.top(), part);
1702  part->set_right_margin(right_margin);
1703 }
1704 
1705 // Starting at x, and going in the specified direction, up to x_limit, finds
1706 // the margin for the given y range by searching sideways,
1707 // and ignoring not_this.
1708 int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit,
1709  int y_bottom, int y_top,
1710  const ColPartition* not_this) {
1711  int height = y_top - y_bottom;
1712  // Iterate the ColPartitions in the grid.
1713  ColPartitionGridSearch side_search(this);
1714  side_search.SetUniqueMode(true);
1715  side_search.StartSideSearch(x, y_bottom, y_top);
1716  ColPartition* part;
1717  while ((part = side_search.NextSideSearch(right_to_left)) != nullptr) {
1718  // Ignore itself.
1719  if (part == not_this) // || part->IsLineType())
1720  continue;
1721  // Must overlap by enough, based on the min of the heights, so
1722  // large partitions can't smash through small ones.
1723  TBOX box = part->bounding_box();
1724  int min_overlap = std::min(height, static_cast<int>(box.height()));
1725  min_overlap = static_cast<int>(min_overlap * kMarginOverlapFraction + 0.5);
1726  int y_overlap = std::min(y_top, static_cast<int>(box.top())) - std::max(y_bottom, static_cast<int>(box.bottom()));
1727  if (y_overlap < min_overlap)
1728  continue;
1729  // Must be going the right way.
1730  int x_edge = right_to_left ? box.right() : box.left();
1731  if ((x_edge < x) != right_to_left)
1732  continue;
1733  // If we have gone past x_limit, then x_limit will do.
1734  if ((x_edge < x_limit) == right_to_left)
1735  break;
1736  // It reduces x limit, so save the new one.
1737  x_limit = x_edge;
1738  }
1739  return x_limit;
1740 }
1741 
1742 
1743 } // namespace tesseract.
tesseract::ColPartition::set_block_owned
void set_block_owned(bool owned)
Definition: colpartition.h:208
BlobTextFlowType
BlobTextFlowType
Definition: blobbox.h:113
tesseract::NPT_WEAK_HTEXT
Definition: colpartitiongrid.cpp:1504
tesseract::ImageFind::CountPixelsInRotatedBox
static int CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
Definition: imagefind.cpp:597
tesseract::GridSearch::StartRectSearch
void StartRectSearch(const TBOX &rect)
Definition: bbgrid.h:830
TBOX::rotate_large
void rotate_large(const FCOORD &vec)
Definition: rect.cpp:69
tesseract::ColPartitionGrid::MakeSingleColumnSet
ColPartitionSet * MakeSingleColumnSet(WidthCallback cb)
Definition: colpartitiongrid.cpp:815
tesseract::ColPartition::Print
void Print() const
Definition: colpartition.cpp:1782
tesseract::TabVector
Definition: tabvector.h:111
BND_RIGHT
Definition: blobbox.h:89
tesseract::kSmoothDecisionMargin
const int kSmoothDecisionMargin
Definition: colpartitiongrid.cpp:64
tesseract::ColPartitionGrid::GridFindMargins
void GridFindMargins(ColPartitionSet **best_columns)
Definition: colpartitiongrid.cpp:960
tesseract::GridSearch::StartSideSearch
void StartSideSearch(int x, int ymin, int ymax)
Definition: bbgrid.h:746
TBOX::intersection
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:83
tesseract::GridSearch::RepositionIterator
void RepositionIterator()
Definition: bbgrid.h:892
BTFT_STRONG_CHAIN
Definition: blobbox.h:118
tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >::InsertBBox
void InsertBBox(bool h_spread, bool v_spread, ColPartition *bbox)
Definition: bbgrid.h:486
tesseract::ColPartition::OverlapSplitBlob
BLOBNBOX * OverlapSplitBlob(const TBOX &box)
Definition: colpartition.cpp:769
tesseract::ColPartitionGrid::DeleteUnknownParts
void DeleteUnknownParts(TO_BLOCK *block)
Definition: colpartitiongrid.cpp:1017
tesseract::ColPartitionGrid::BestMergeCandidate
ColPartition * BestMergeCandidate(const ColPartition *part, ColPartition_CLIST *candidates, bool debug, std::function< bool(const ColPartition *, const ColPartition *)> confirm_cb, int *overlap_increase)
Definition: colpartitiongrid.cpp:399
BRT_NOISE
Definition: blobbox.h:72
tesseract::ColPartition::IsUnMergeableType
bool IsUnMergeableType() const
Definition: colpartition.h:449
tesseract::ColPartition::HCoreOverlap
int HCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:384
tesseract::GridSearch::StartVerticalSearch
void StartVerticalSearch(int xmin, int xmax, int y)
Definition: bbgrid.h:788
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
tesseract::ColPartition::median_width
int median_width() const
Definition: colpartition.h:142
BND_BELOW
Definition: blobbox.h:88
tesseract::ColPartition::flow
BlobTextFlowType flow() const
Definition: colpartition.h:154
tesseract::GridSearch::NextRectSearch
BBC * NextRectSearch()
Definition: bbgrid.h:842
tesseract::GridBase::tright_
ICOORD tright_
Definition: bbgrid.h:91
tesseract::ColPartition::ReleaseNonLeaderBoxes
bool ReleaseNonLeaderBoxes()
Definition: colpartition.cpp:289
TBOX::overlap
bool overlap(const TBOX &box) const
Definition: rect.h:350
tesseract::kColumnWidthFactor
const int kColumnWidthFactor
Definition: tabfind.h:41
BRT_UNKNOWN
Definition: blobbox.h:77
ICOORD
integer coordinate
Definition: points.h:30
tesseract::ColPartitionGrid::DeleteParts
void DeleteParts()
Definition: colpartitiongrid.cpp:1002
tesseract::ColPartitionGrid::FindFigureCaptions
void FindFigureCaptions()
Definition: colpartitiongrid.cpp:1054
tesseract::WidthCallback
std::function< bool(int)> WidthCallback
Definition: tabfind.h:35
BLOBNBOX::set_flow
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:297
tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >::RemoveBBox
void RemoveBBox(ColPartition *bbox)
Definition: bbgrid.h:533
TBOX::print
void print() const
Definition: rect.h:277
tesseract::ColPartitionGrid::SplitOverlappingPartitions
void SplitOverlappingPartitions(ColPartition_LIST *big_parts)
Definition: colpartitiongrid.cpp:508
tesseract::GridSearch::StartFullSearch
void StartFullSearch()
Definition: bbgrid.h:665
TO_BLOCK::DeleteUnownedNoise
void DeleteUnownedNoise()
Definition: blobbox.cpp:1020
tesseract::ColPartition::median_height
int median_height() const
Definition: colpartition.h:136
TBOX::top
int16_t top() const
Definition: rect.h:57
TBOX::contains
bool contains(const FCOORD pt) const
Definition: rect.h:330
tesseract::ColPartition::type
PolyBlockType type() const
Definition: colpartition.h:181
tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >::Clear
void Clear()
Definition: bbgrid.h:455
TBOX::bounding_union
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:124
PT_NOISE
Definition: capi.h:122
TBOX::area
int32_t area() const
Definition: rect.h:121
TO_BLOCK
Definition: blobbox.h:691
BRT_VERT_TEXT
Definition: blobbox.h:78
TBOX::set_top
void set_top(int y)
Definition: rect.h:60
colpartitionset.h
tesseract::ColPartition::median_top
int median_top() const
Definition: colpartition.h:124
tesseract::ColPartitionSet
Definition: colpartitionset.h:39
tesseract::ColPartition::median_bottom
int median_bottom() const
Definition: colpartition.h:127
PT_TABLE
Definition: capi.h:114
PT_CAPTION_TEXT
Definition: capi.h:116
tesseract::kMaxPadFactor
const int kMaxPadFactor
Definition: colpartitiongrid.cpp:33
tesseract::ColPartitionGridSearch
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:935
BRT_RECTIMAGE
Definition: blobbox.h:75
tesseract::GridSearch::StartRadSearch
void StartRadSearch(int x, int y, int max_radius)
Definition: bbgrid.h:698
tesseract::ColPartition::IsSingleton
bool IsSingleton() const
Definition: colpartition.h:361
tesseract::kMaxPartitionSpacing
const double kMaxPartitionSpacing
Definition: colpartitiongrid.cpp:61
tesseract::kTinyEnoughTextlineOverlapFraction
const double kTinyEnoughTextlineOverlapFraction
Definition: colpartitiongrid.cpp:48
tesseract::ColPartition::boxes
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:187
ICOORD::x
int16_t x() const
access function
Definition: points.h:51
FCOORD
Definition: points.h:187
tesseract::ColPartition::lower_partners
ColPartition_CLIST * lower_partners()
Definition: colpartition.h:199
BLOBNBOX
Definition: blobbox.h:142
tesseract::ColPartition::CopyLeftTab
void CopyLeftTab(const ColPartition &src, bool take_box)
Definition: colpartition.cpp:519
BND_ABOVE
Definition: blobbox.h:90
BTFT_CHAIN
Definition: blobbox.h:117
BTFT_LEADER
Definition: blobbox.h:120
BRT_POLYIMAGE
Definition: blobbox.h:76
PT_VERTICAL_TEXT
Definition: capi.h:115
tesseract::TabVector::IsLeftTab
bool IsLeftTab() const
Definition: tabvector.h:212
C_BLOB::area
int32_t area()
Definition: stepblob.cpp:266
PT_COUNT
Definition: capi.h:123
tesseract::NPT_COUNT
Definition: colpartitiongrid.cpp:1509
tesseract::ColPartitionGrid::ListFindMargins
void ListFindMargins(ColPartitionSet **best_columns, ColPartition_LIST *parts)
Definition: colpartitiongrid.cpp:984
tesseract::ColPartition
Definition: colpartition.h:67
tesseract::ColPartition::TypesMatch
bool TypesMatch(const ColPartition &other) const
Definition: colpartition.h:409
tesseract::ColPartitionGrid::HandleClick
void HandleClick(int x, int y) override
Definition: colpartitiongrid.cpp:73
tesseract::GridBase::tright
const ICOORD & tright() const
Definition: bbgrid.h:75
TBOX::height
int16_t height() const
Definition: rect.h:107
tesseract::ColPartitionGrid::DeleteNonLeaderParts
void DeleteNonLeaderParts()
Definition: colpartitiongrid.cpp:1036
TBOX::y_gap
int y_gap(const TBOX &box) const
Definition: rect.h:232
BTFT_NONTEXT
Definition: blobbox.h:115
tesseract::kMaxNeighbourDistFactor
const int kMaxNeighbourDistFactor
Definition: colpartitiongrid.cpp:36
tesseract::ColPartitionGrid::ComputeTotalOverlap
int ComputeTotalOverlap(ColPartitionGrid **overlap_grid)
Definition: colpartitiongrid.cpp:312
tesseract::kBigPartSizeRatio
const double kBigPartSizeRatio
Definition: colpartitiongrid.cpp:46
tesseract::ColPartition::boxes_count
int boxes_count() const
Definition: colpartition.h:190
BLOBNBOX::IsLineType
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:425
tesseract::ColPartition::set_blob_type
void set_blob_type(BlobRegionType t)
Definition: colpartition.h:151
tesseract::GridSearch::NextRadSearch
BBC * NextRadSearch()
Definition: bbgrid.h:713
TBOX::set_right
void set_right(int x)
Definition: rect.h:81
tesseract::ColPartition::SingletonPartner
ColPartition * SingletonPartner(bool upper)
Definition: colpartition.cpp:629
tesseract::ColPartitionGrid::Deskew
void Deskew(const FCOORD &deskew)
Definition: colpartitiongrid.cpp:729
tesseract::kMaxCaptionLines
const int kMaxCaptionLines
Definition: colpartitiongrid.cpp:38
tesseract::GridSearch::GridY
int GridY() const
Definition: bbgrid.h:245
tesseract::NeighbourPartitionType
NeighbourPartitionType
Definition: colpartitiongrid.cpp:1501
GenericVector::push_back
int push_back(T object)
Definition: genericvector.h:799
BLOCK
Definition: ocrblock.h:28
tesseract::GridSearch::NextSideSearch
BBC * NextSideSearch(bool right_to_left)
Definition: bbgrid.h:761
BLOCK::pdblk
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:189
tesseract::ColPartition::set_vertical
void set_vertical(const ICOORD &v)
Definition: colpartition.h:193
tesseract::ColPartitionGrid::RefinePartitionPartners
void RefinePartitionPartners(bool get_desperate)
Definition: colpartitiongrid.cpp:1287
tesseract::ColPartitionGrid::FindOverlappingPartitions
void FindOverlappingPartitions(const TBOX &box, const ColPartition *not_this, ColPartition_CLIST *parts)
Definition: colpartitiongrid.cpp:346
PDBLK::set_poly_block
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:56
BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:119
BlobRegionType
BlobRegionType
Definition: blobbox.h:71
tesseract::ColPartition::IsImageType
bool IsImageType() const
Definition: colpartition.h:429
tesseract::ColPartition::MakeBigPartition
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
Definition: colpartition.cpp:116
tesseract::AlignedBlob::WithinTestRegion
static bool WithinTestRegion(int detail_level, int x, int y)
Definition: alignedblob.cpp:150
tesseract::kMarginOverlapFraction
const double kMarginOverlapFraction
Definition: colpartitiongrid.cpp:44
BRT_TEXT
Definition: blobbox.h:79
tesseract::ColPartition::left_key
int left_key() const
Definition: colpartition.h:172
tesseract::ColPartition::MakeToRow
TO_ROW * MakeToRow()
Definition: colpartition.cpp:1706
tesseract::ColPartition::HOverlaps
bool HOverlaps(const ColPartition &other) const
Definition: colpartition.h:365
tesseract::ColPartition::blob_type
BlobRegionType blob_type() const
Definition: colpartition.h:148
tesseract::ColPartition::MidY
int MidY() const
Definition: colpartition.h:304
tesseract::NPT_WEAK_VTEXT
Definition: colpartitiongrid.cpp:1506
tesseract::ColPartition::OKDiacriticMerge
bool OKDiacriticMerge(const ColPartition &candidate, bool debug) const
Definition: colpartition.cpp:458
tesseract::ColPartitionGrid::FindVPartitionPartners
void FindVPartitionPartners(bool to_the_left, ColPartition *part)
Definition: colpartitiongrid.cpp:1241
TBOX::width
int16_t width() const
Definition: rect.h:114
tesseract::NPT_HTEXT
Definition: colpartitiongrid.cpp:1502
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
tesseract::ColPartition::median_left
int median_left() const
Definition: colpartition.h:130
BLOBNBOX::set_owner
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:354
tesseract::BBGrid
Definition: bbgrid.h:158
tesseract::NPT_IMAGE
Definition: colpartitiongrid.cpp:1508
TBOX::topright
const ICOORD & topright() const
Definition: rect.h:103
tesseract::GridBase::bleft_
ICOORD bleft_
Definition: bbgrid.h:90
tesseract::ColPartitionGrid::MergePart
bool MergePart(std::function< bool(ColPartition *, TBOX *)> box_cb, std::function< bool(const ColPartition *, const ColPartition *)> confirm_cb, ColPartition *part)
Definition: colpartitiongrid.cpp:117
tesseract::GridSearch
Definition: bbgrid.h:48
tesseract::ColPartition::right_key
int right_key() const
Definition: colpartition.h:178
tesseract
Definition: baseapi.h:65
BLOBNBOX::set_region_type
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:285
tesseract::ColPartition::ReflectInYAxis
void ReflectInYAxis()
Definition: colpartition.cpp:320
BND_LEFT
Definition: blobbox.h:87
tesseract::ColPartitionGrid::ReflectInYAxis
void ReflectInYAxis()
Definition: colpartitiongrid.cpp:640
tesseract::ColPartition::median_right
int median_right() const
Definition: colpartition.h:133
TBOX::botleft
const ICOORD & botleft() const
Definition: rect.h:91
PT_UNKNOWN
Definition: capi.h:108
BLOBNBOX::bounding_box
const TBOX & bounding_box() const
Definition: blobbox.h:229
tesseract::TabFind::RightTabForBox
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:304
tesseract::ImageFind::BlankImageInBetween
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
Definition: imagefind.cpp:576
tesseract::GridSearch::NextVerticalSearch
BBC * NextVerticalSearch(bool top_to_bottom)
Definition: bbgrid.h:802
tesseract::ColPartition::set_flow
void set_flow(BlobTextFlowType f)
Definition: colpartition.h:157
GenericVector< ColPartitionSet * >
tesseract::ColPartition::AddPartner
void AddPartner(bool upper, ColPartition *partner)
Definition: colpartition.cpp:603
GenericVector::reserve
void reserve(int size)
Definition: genericvector.h:679
tesseract::NPT_VTEXT
Definition: colpartitiongrid.cpp:1503
tesseract::TabVector::IsRightTab
bool IsRightTab() const
Definition: tabvector.h:216
tesseract::ColPartition::SetBlobTypes
void SetBlobTypes()
Definition: colpartition.cpp:1265
tesseract::ColPartition::SplitAtBlob
ColPartition * SplitAtBlob(BLOBNBOX *split_blob)
Definition: colpartition.cpp:787
tesseract::GridBase::gridsize
int gridsize() const
Definition: bbgrid.h:63
tesseract::ColPartition::set_type
void set_type(PolyBlockType t)
Definition: colpartition.h:184
tesseract::ColPartition::SetColumnGoodness
void SetColumnGoodness(WidthCallback cb)
Definition: colpartition.cpp:1070
tesseract::ColPartition::BoundsWithoutBox
TBOX BoundsWithoutBox(BLOBNBOX *box)
Definition: colpartition.cpp:234
tesseract::ColPartitionGrid::RecomputeBounds
void RecomputeBounds(int gridsize, const ICOORD &bleft, const ICOORD &tright, const ICOORD &vertical)
Definition: colpartitiongrid.cpp:931
tesseract::ColPartition::bounding_box
const TBOX & bounding_box() const
Definition: colpartition.h:109
tesseract::ColPartitionGrid::Merges
void Merges(std::function< bool(ColPartition *, TBOX *)> box_cb, std::function< bool(const ColPartition *, const ColPartition *)> confirm_cb)
Definition: colpartitiongrid.cpp:99
tesseract::TabFind::LeftTabForBox
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:348
tesseract::kMinCaptionGapHeightRatio
const double kMinCaptionGapHeightRatio
Definition: colpartitiongrid.cpp:42
TBOX::pad
void pad(int xpad, int ypad)
Definition: rect.h:130
tesseract::ColPartitionGrid::FindPartitionPartners
void FindPartitionPartners()
Definition: colpartitiongrid.cpp:1179
tesseract::BBGrid< ColPartition, ColPartition_CLIST, ColPartition_C_IT >::Init
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:445
tesseract::TabFind::WidthCB
WidthCallback WidthCB()
Definition: tabfind.h:157
BLOBNBOX::flow
BlobTextFlowType flow() const
Definition: blobbox.h:294
tesseract::ColPartition::ComputeLimits
void ComputeLimits()
Definition: colpartition.cpp:861
tesseract::ColPartitionGrid::ReTypeBlobs
void ReTypeBlobs(BLOBNBOX_LIST *im_blobs)
Definition: colpartitiongrid.cpp:870
tesseract::ColPartition::RefinePartners
void RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
Definition: colpartition.cpp:1877
imagefind.h
tesseract::TabFind
Definition: tabfind.h:52
TBOX::left
int16_t left() const
Definition: rect.h:71
tesseract::ColPartitionGrid
Definition: colpartitiongrid.h:32
BND_COUNT
Definition: blobbox.h:91
tesseract::ColPartitionGrid::SetTabStops
void SetTabStops(TabFind *tabgrid)
Definition: colpartitiongrid.cpp:753
tesseract::ColPartition::IsVerticalType
bool IsVerticalType() const
Definition: colpartition.h:441
PT_FLOWING_TEXT
Definition: capi.h:109
BLOBNBOX::region_type
BlobRegionType region_type() const
Definition: blobbox.h:282
tesseract::ColPartitionGrid::ClaimBoxes
void ClaimBoxes()
Definition: colpartitiongrid.cpp:857
TBOX::right
int16_t right() const
Definition: rect.h:78
tesseract::ColPartition::DeleteBoxes
void DeleteBoxes()
Definition: colpartition.cpp:305
tesseract::ColPartition::Absorb
void Absorb(ColPartition *other, WidthCallback cb)
Definition: colpartition.cpp:638
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
tesseract::ColPartition::CountOverlappingBoxes
int CountOverlappingBoxes(const TBOX &box)
Definition: colpartition.cpp:960
tesstrain_utils.type
type
Definition: tesstrain_utils.py:141
POLY_BLOCK
Definition: polyblk.h:26
tesseract::ColPartition::upper_partners
ColPartition_CLIST * upper_partners()
Definition: colpartition.h:196
TO_ROW
Definition: blobbox.h:543
tesseract::GridSearch::SetUniqueMode
void SetUniqueMode(bool mode)
Definition: bbgrid.h:253
tesseract::ColPartition::ClaimBoxes
void ClaimBoxes()
Definition: colpartition.cpp:247
tesseract::kMinCaptionGapRatio
const double kMinCaptionGapRatio
Definition: colpartitiongrid.cpp:40
BlobNeighbourDir
BlobNeighbourDir
Definition: blobbox.h:86
tesseract::ColPartitionGrid::GridSmoothNeighbours
bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix *nontext_map, const TBOX &im_box, const FCOORD &rerotation)
Definition: colpartitiongrid.cpp:618
tesseract::ColPartition::SetLeftTab
void SetLeftTab(const TabVector *tab_vector)
Definition: colpartition.cpp:494
tesseract::ColPartition::VSignificantCoreOverlap
bool VSignificantCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:390
TBOX::set_bottom
void set_bottom(int y)
Definition: rect.h:67
tesseract::ColPartition::RemoveBox
void RemoveBox(BLOBNBOX *box)
Definition: colpartition.cpp:202
BLOBNBOX::cblob
C_BLOB * cblob() const
Definition: blobbox.h:267
PolyBlockType
PolyBlockType
Definition: publictypes.h:52
tesseract::ColPartitionGrid::ColPartitionGrid
ColPartitionGrid()=default
tesseract::ColPartition::DisownBoxes
void DisownBoxes()
Definition: colpartition.cpp:263
tesseract::ColPartitionGrid::MakeColPartSets
bool MakeColPartSets(PartSetVector *part_sets)
Definition: colpartitiongrid.cpp:777
BLOBNBOX::IsTextType
static bool IsTextType(BlobRegionType type)
Definition: blobbox.h:417
tesseract::ColPartition::VCoreOverlap
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:375
tesseract::ColPartition::IsTextType
bool IsTextType() const
Definition: colpartition.h:433
tesseract::ColPartition::BiggestBox
BLOBNBOX * BiggestBox()
Definition: colpartition.cpp:215
tesseract::GridBase::gridheight
int gridheight() const
Definition: bbgrid.h:69
tesseract::GridSearch::RemoveBBox
void RemoveBBox()
Definition: bbgrid.h:866
tesseract::GridBase::bleft
const ICOORD & bleft() const
Definition: bbgrid.h:72
tesseract::ColPartition::VOverlaps
bool VOverlaps(const ColPartition &other) const
Definition: colpartition.h:370
colpartitiongrid.h
tesseract::ColPartitionGrid::ExtractPartitionsAsBlocks
void ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: colpartitiongrid.cpp:668
BTFT_NEIGHBOURS
Definition: blobbox.h:116
tesseract::ColPartition::OKMergeOverlap
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
Definition: colpartition.cpp:736
tesseract::ColPartition::ShallowCopy
ColPartition * ShallowCopy() const
Definition: colpartition.cpp:1731
tesseract::ColPartition::CopyRightTab
void CopyRightTab(const ColPartition &src, bool take_box)
Definition: colpartition.cpp:532
tesseract::GridBase::GridCoords
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
Definition: bbgrid.cpp:52
TBOX::set_left
void set_left(int x)
Definition: rect.h:74
TBOX::x_gap
int x_gap(const TBOX &box) const
Definition: rect.h:224
ICOORD::y
int16_t y() const
access_function
Definition: points.h:55
tesseract::ColPartition::WithinSameMargins
bool WithinSameMargins(const ColPartition &other) const
Definition: colpartition.h:401
tesseract::GridSearch::NextFullSearch
BBC * NextFullSearch()
Definition: bbgrid.h:675
TBOX
Definition: rect.h:33
tesseract::ColPartition::SetRightTab
void SetRightTab(const TabVector *tab_vector)
Definition: colpartition.cpp:506