21 #include "config_auto.h"
30 #include "allheaders.h"
34 static INT_VAR(textord_tabfind_show_images,
false,
"Show image blobs");
66 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
69 Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
70 if (textord_tabfind_show_images && pixa_debug !=
nullptr)
71 pixa_debug->
AddPix(pixr,
"CascadeReduced");
81 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
85 Pixa* pixadb = (textord_tabfind_show_images && pixa_debug !=
nullptr)
88 Pix* pixht2 = pixGenerateHalftoneMask(pixr,
nullptr, &ht_found, pixadb);
90 Pix* pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
91 if (textord_tabfind_show_images && pixa_debug !=
nullptr)
92 pixa_debug->
AddPix(pixdb,
"HalftoneMask");
97 if (!ht_found && pixht2 !=
nullptr)
99 if (pixht2 ==
nullptr)
100 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
103 Pix *pixht = pixExpandReplicate(pixht2, 2);
104 if (textord_tabfind_show_images && pixa_debug !=
nullptr)
105 pixa_debug->
AddPix(pixht,
"HalftoneReplicated");
109 Pix *pixt = pixSeedfillBinary(
nullptr, pixht, pix, 8);
110 pixOr(pixht, pixht, pixt);
114 Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
115 pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
116 if (textord_tabfind_show_images && pixa_debug !=
nullptr)
117 pixa_debug->
AddPix(pixfinemask,
"FineMask");
118 Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
119 Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
120 pixDestroy(&pixreduced);
121 pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
122 Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
123 pixDestroy(&pixreduced2);
124 if (textord_tabfind_show_images && pixa_debug !=
nullptr)
125 pixa_debug->
AddPix(pixcoarsemask,
"CoarseMask");
127 pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
128 pixDestroy(&pixfinemask);
130 pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
131 Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
132 pixDestroy(&pixcoarsemask);
133 if (textord_tabfind_show_images && pixa_debug !=
nullptr)
134 pixa_debug->
AddPix(pixmask,
"MaskDilated");
136 pixAnd(pixht, pixht, pixmask);
137 pixDestroy(&pixmask);
138 if (textord_tabfind_show_images && pixa_debug !=
nullptr)
139 pixa_debug->
AddPix(pixht,
"FinalMask");
141 Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
142 pixOr(result, result, pixht);
155 Boxa** boxa, Pixa** pixa) {
159 if (textord_tabfind_show_images && pixa_debug !=
nullptr)
160 pixa_debug->
AddPix(pix,
"Conncompimage");
162 *boxa = pixConnComp(pix, pixa, 8);
167 if (*boxa !=
nullptr && *pixa !=
nullptr) npixes = pixaGetCount(*pixa);
168 for (
int i = 0; i < npixes; ++i) {
169 int x_start, x_end, y_start, y_end;
170 Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
171 if (textord_tabfind_show_images && pixa_debug !=
nullptr)
172 pixa_debug->
AddPix(img_pix,
"A component");
176 &x_start, &y_start, &x_end, &y_end)) {
177 Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
178 pixSetAll(simple_pix);
179 pixDestroy(&img_pix);
181 pixaReplacePix(*pixa, i, simple_pix,
nullptr);
182 img_pix = pixaGetPix(*pixa, i, L_CLONE);
184 l_int32 x, y, width, height;
185 boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
186 Box* simple_box = boxCreate(x + x_start, y + y_start,
187 x_end - x_start, y_end - y_start);
188 boxaReplaceBox(*boxa, i, simple_box);
190 pixDestroy(&img_pix);
201 static bool HScanForEdge(uint32_t* data,
int wpl,
int x_start,
int x_end,
202 int min_count,
int mid_width,
int max_count,
203 int y_end,
int y_step,
int* y_start) {
205 for (
int y = *y_start; y != y_end; y += y_step) {
208 uint32_t* line = data + wpl * y;
209 for (
int x = x_start; x < x_end; ++x) {
210 if (GET_DATA_BIT(line, x))
213 if (mid_rows == 0 && pix_count < min_count)
217 if (pix_count > max_count)
220 if (mid_rows > mid_width)
233 static bool VScanForEdge(uint32_t* data,
int wpl,
int y_start,
int y_end,
234 int min_count,
int mid_width,
int max_count,
235 int x_end,
int x_step,
int* x_start) {
237 for (
int x = *x_start; x != x_end; x += x_step) {
239 uint32_t* line = data + y_start * wpl;
240 for (
int y = y_start; y < y_end; ++y, line += wpl) {
241 if (GET_DATA_BIT(line, x))
244 if (mid_cols == 0 && pix_count < min_count)
248 if (pix_count > max_count)
251 if (mid_cols > mid_width)
267 double min_fraction,
double max_fraction,
268 double max_skew_gradient,
269 int* x_start,
int* y_start,
270 int* x_end,
int* y_end) {
273 *x_end = pixGetWidth(pix);
275 *y_end = pixGetHeight(pix);
277 uint32_t* data = pixGetData(pix);
278 int wpl = pixGetWpl(pix);
279 bool any_cut =
false;
280 bool left_done =
false;
281 bool right_done =
false;
282 bool top_done =
false;
283 bool bottom_done =
false;
287 int width = *x_end - *x_start;
288 int min_count = static_cast<int>(width * min_fraction);
289 int max_count = static_cast<int>(width * max_fraction);
290 int edge_width = static_cast<int>(width * max_skew_gradient);
291 if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
292 max_count, *y_end, 1, y_start) && !top_done) {
297 if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
298 max_count, *y_start, -1, y_end) && !bottom_done) {
305 int height = *y_end - *y_start;
306 min_count = static_cast<int>(height * min_fraction);
307 max_count = static_cast<int>(height * max_fraction);
308 edge_width = static_cast<int>(height * max_skew_gradient);
309 if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
310 max_count, *x_end, 1, x_start) && !left_done) {
315 if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
316 max_count, *x_start, -1, x_end) && !right_done) {
325 return left_done && right_done && top_done && bottom_done;
333 int* x_end,
int* y_end) {
334 Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start,
336 Box* output_box =
nullptr;
337 pixClipBoxToForeground(pix, input_box,
nullptr, &output_box);
338 bool result = output_box !=
nullptr;
340 l_int32 x, y, width, height;
341 boxGetGeometry(output_box, &x, &y, &width, &height);
346 boxDestroy(&output_box);
348 boxDestroy(&input_box);
356 const uint8_t* line2,
357 const uint8_t* point) {
361 line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
362 point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
364 line_vector[L_ALPHA_CHANNEL] = 0;
367 cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE]
368 - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
369 cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED]
370 - line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
371 cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN]
372 - line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
373 cross[L_ALPHA_CHANNEL] = 0;
375 double cross_sq = 0.0;
376 double line_sq = 0.0;
378 cross_sq += static_cast<double>(cross[j]) * cross[j];
379 line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
381 if (line_sq == 0.0) {
384 return cross_sq / line_sq;
391 composeRGBPixel(r, g, b, &result);
399 else if (pixel >= 255.0)
401 return static_cast<uint8_t>(pixel);
415 Pix* color_map1, Pix* color_map2,
417 uint8_t* color1, uint8_t* color2) {
418 ASSERT_HOST(pix !=
nullptr && pixGetDepth(pix) == 32);
421 int width = pixGetWidth(pix);
422 int height = pixGetHeight(pix);
423 int left_pad = std::max(rect.
left() - 2 * factor, 0) / factor;
424 int top_pad = (rect.
top() + 2 * factor + (factor - 1)) / factor;
425 top_pad = std::min(height, top_pad);
426 int right_pad = (rect.
right() + 2 * factor + (factor - 1)) / factor;
427 right_pad = std::min(width, right_pad);
428 int bottom_pad = std::max(rect.
bottom() - 2 * factor, 0) / factor;
429 int width_pad = right_pad - left_pad;
430 int height_pad = top_pad - bottom_pad;
431 if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
434 Box* scaled_box = boxCreate(left_pad, height - top_pad,
435 width_pad, height_pad);
436 Pix* scaled = pixClipRectangle(pix, scaled_box,
nullptr);
439 STATS red_stats(0, 256);
440 STATS green_stats(0, 256);
441 STATS blue_stats(0, 256);
442 uint32_t* data = pixGetData(scaled);
444 for (
int y = 0; y < height_pad; ++y) {
445 for (
int x = 0; x < width_pad; ++x, ++data) {
446 int r = GET_DATA_BYTE(data, COLOR_RED);
447 int g = GET_DATA_BYTE(data, COLOR_GREEN);
448 int b = GET_DATA_BYTE(data, COLOR_BLUE);
450 green_stats.
add(g, 1);
451 blue_stats.
add(b, 1);
458 int best_l8 = static_cast<int>(red_stats.
ile(0.125f));
459 int best_u8 = static_cast<int>(ceil(red_stats.
ile(0.875f)));
460 int best_i8r = best_u8 - best_l8;
461 int x_color = COLOR_RED;
462 int y1_color = COLOR_GREEN;
463 int y2_color = COLOR_BLUE;
464 int l8 = static_cast<int>(green_stats.
ile(0.125f));
465 int u8 = static_cast<int>(ceil(green_stats.
ile(0.875f)));
466 if (u8 - l8 > best_i8r) {
470 x_color = COLOR_GREEN;
471 y1_color = COLOR_RED;
473 l8 = static_cast<int>(blue_stats.
ile(0.125f));
474 u8 = static_cast<int>(ceil(blue_stats.
ile(0.875f)));
475 if (u8 - l8 > best_i8r) {
479 x_color = COLOR_BLUE;
480 y1_color = COLOR_GREEN;
481 y2_color = COLOR_RED;
486 uint32_t* data = pixGetData(scaled);
487 for (
int im_y = 0; im_y < height_pad; ++im_y) {
488 for (
int im_x = 0; im_x < width_pad; ++im_x, ++data) {
489 int x = GET_DATA_BYTE(data, x_color);
490 int y1 = GET_DATA_BYTE(data, y1_color);
491 int y2 = GET_DATA_BYTE(data, y2_color);
496 double m1 = line1.
m();
497 double c1 = line1.
c(m1);
498 double m2 = line2.
m();
499 double c2 = line2.
c(m2);
500 double rms = line1.
rms(m1, c1) + line2.
rms(m2, c2);
504 color1[y1_color] =
ClipToByte(m1 * best_l8 + c1 + 0.5);
505 color1[y2_color] =
ClipToByte(m2 * best_l8 + c2 + 0.5);
508 color2[y1_color] =
ClipToByte(m1 * best_u8 + c1 + 0.5);
509 color2[y2_color] =
ClipToByte(m2 * best_u8 + c2 + 0.5);
516 color1[L_ALPHA_CHANNEL] = 0;
517 memcpy(color2, color1, 4);
519 if (color_map1 !=
nullptr) {
520 pixSetInRectArbitrary(color_map1, scaled_box,
523 color1[COLOR_BLUE]));
524 pixSetInRectArbitrary(color_map2, scaled_box,
527 color2[COLOR_BLUE]));
528 pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
531 boxDestroy(&scaled_box);
579 TBOX search_box(box1);
582 if (box1.
x_gap(box2) <= 0)
587 if (box1.
y_gap(box2) <= 0)
598 const FCOORD& rotation, Pix* pix) {
604 TBOX rotated_im_box(im_box);
605 rotated_im_box.
rotate(rotation);
606 Pix* rect_pix = pixCreate(box.
width(), box.
height(), 1);
607 pixRasterop(rect_pix, 0, 0, box.
width(), box.
height(),
608 PIX_SRC, pix, box.
left() - rotated_im_box.
left(),
609 rotated_im_box.
top() - box.
top());
611 pixCountPixels(rect_pix, &result,
nullptr);
612 pixDestroy(&rect_pix);
620 static void AttemptToShrinkBox(
const FCOORD& rotation,
const FCOORD& rerotation,
621 const TBOX& im_box, Pix* pix,
TBOX* slice) {
622 TBOX rotated_box(*slice);
623 rotated_box.rotate(rerotation);
624 TBOX rotated_im_box(im_box);
625 rotated_im_box.rotate(rerotation);
626 int left = rotated_box.left() - rotated_im_box.left();
627 int right = rotated_box.right() - rotated_im_box.left();
628 int top = rotated_im_box.top() - rotated_box.top();
629 int bottom = rotated_im_box.top() - rotated_box.bottom();
631 top = rotated_im_box.top() - top;
632 bottom = rotated_im_box.top() - bottom;
633 left += rotated_im_box.left();
634 right += rotated_im_box.left();
635 rotated_box.set_to_given_coords(left, bottom, right, top);
636 rotated_box.rotate(rotation);
637 slice->
set_left(rotated_box.left());
665 static void CutChunkFromParts(
const TBOX& box,
const TBOX& im_box,
667 Pix* pix, ColPartition_LIST* part_list) {
669 ColPartition_IT part_it(part_list);
671 ColPartition* part = part_it.data();
672 TBOX part_box = part->bounding_box();
679 if (box.
top() < part_box.
top()) {
680 TBOX slice(part_box);
684 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
685 part_it.add_before_stay_put(
692 TBOX slice(part_box);
694 if (box.
top() < part_box.
top())
700 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
701 part_it.add_before_stay_put(
708 TBOX slice(part_box);
710 if (box.
top() < part_box.
top())
716 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
717 part_it.add_before_stay_put(
724 TBOX slice(part_box);
728 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
729 part_it.add_before_stay_put(
735 delete part_it.extract();
738 }
while (!part_it.at_first());
748 static void DivideImageIntoParts(
const TBOX& im_box,
const FCOORD& rotation,
749 const FCOORD& rerotation, Pix* pix,
751 ColPartition_LIST* part_list) {
756 ColPartition_IT part_it(part_list);
757 part_it.add_after_then_move(pix_part);
759 rectsearch->StartRectSearch(im_box);
761 while ((part = rectsearch->NextRectSearch()) !=
nullptr) {
762 TBOX part_box = part->bounding_box();
765 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
766 ColPartition* pix_part = part_it.extract();
767 pix_part->DeleteBoxes();
776 if (black_area * 2 < part_box.
area() || !im_box.
contains(part_box)) {
781 part_box.
set_top(part_box.
top() + padding / 2);
783 CutChunkFromParts(part_box, im_box, rotation, rerotation,
790 if (part_list->empty()) {
798 static int ExpandImageLeft(
const TBOX& box,
int left_limit,
799 ColPartitionGrid* part_grid) {
804 while ((part =
search.NextSideSearch(
true)) !=
nullptr) {
806 const TBOX& part_box(part->bounding_box());
807 if (part_box.
y_gap(box) < 0) {
808 if (part_box.
right() > left_limit && part_box.
right() < box.
left())
809 left_limit = part_box.
right();
814 if (part !=
nullptr) {
817 search.StartRectSearch(search_box);
818 while ((part =
search.NextRectSearch()) !=
nullptr) {
820 const TBOX& part_box(part->bounding_box());
821 if (part_box.
y_gap(box) < 0) {
822 if (part_box.
right() > left_limit && part_box.
right() < box.
left()) {
823 left_limit = part_box.
right();
834 static int ExpandImageRight(
const TBOX& box,
int right_limit,
835 ColPartitionGrid* part_grid) {
840 while ((part =
search.NextSideSearch(
false)) !=
nullptr) {
842 const TBOX& part_box(part->bounding_box());
843 if (part_box.
y_gap(box) < 0) {
844 if (part_box.
left() < right_limit && part_box.
left() > box.
right())
845 right_limit = part_box.
left();
850 if (part !=
nullptr) {
853 search.StartRectSearch(search_box);
854 while ((part =
search.NextRectSearch()) !=
nullptr) {
856 const TBOX& part_box(part->bounding_box());
857 if (part_box.
y_gap(box) < 0) {
858 if (part_box.
left() < right_limit && part_box.
left() > box.
right())
859 right_limit = part_box.
left();
869 static int ExpandImageBottom(
const TBOX& box,
int bottom_limit,
870 ColPartitionGrid* part_grid) {
875 while ((part =
search.NextVerticalSearch(
true)) !=
nullptr) {
877 const TBOX& part_box(part->bounding_box());
878 if (part_box.
x_gap(box) < 0) {
879 if (part_box.
top() > bottom_limit && part_box.
top() < box.
bottom())
880 bottom_limit = part_box.
top();
885 if (part !=
nullptr) {
888 search.StartRectSearch(search_box);
889 while ((part =
search.NextRectSearch()) !=
nullptr) {
891 const TBOX& part_box(part->bounding_box());
892 if (part_box.
x_gap(box) < 0) {
893 if (part_box.
top() > bottom_limit && part_box.
top() < box.
bottom())
894 bottom_limit = part_box.
top();
904 static int ExpandImageTop(
const TBOX& box,
int top_limit,
905 ColPartitionGrid* part_grid) {
910 while ((part =
search.NextVerticalSearch(
false)) !=
nullptr) {
912 const TBOX& part_box(part->bounding_box());
913 if (part_box.
x_gap(box) < 0) {
915 top_limit = part_box.
bottom();
920 if (part !=
nullptr) {
923 search.StartRectSearch(search_box);
924 while ((part =
search.NextRectSearch()) !=
nullptr) {
926 const TBOX& part_box(part->bounding_box());
927 if (part_box.
x_gap(box) < 0) {
929 top_limit = part_box.
bottom();
942 const TBOX& limit_box,
943 ColPartitionGrid* part_grid,
TBOX* expanded_box) {
944 *expanded_box = im_box;
947 expanded_box->
set_left(ExpandImageLeft(im_box, limit_box.
left(),
951 expanded_box->
set_right(ExpandImageRight(im_box, limit_box.
right(),
955 expanded_box->
set_top(ExpandImageTop(im_box, limit_box.
top(), part_grid));
964 return expanded_box->
area() - im_box.
area();
971 static void MaximalImageBoundingBox(ColPartitionGrid* part_grid,
TBOX* im_box) {
973 memset(dunnit, 0,
sizeof(dunnit));
974 TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(),
975 part_grid->tright().x(), part_grid->tright().y());
976 TBOX text_box(*im_box);
977 for (
int iteration = 0; iteration <
BND_COUNT; ++iteration) {
982 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
983 auto bnd = static_cast<BlobNeighbourDir>(dir);
986 int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid,
987 &expanded_boxes[bnd]);
988 if (best_delta < 0 || area_delta < best_delta) {
989 best_delta = area_delta;
995 dunnit[best_dir] =
true;
996 text_box = expanded_boxes[best_dir];
1005 static void DeletePartition(ColPartition* part) {
1009 part->DeleteBoxes();
1014 part->SetBlobTypes();
1015 part->DisownBoxes();
1032 static bool ExpandImageIntoParts(
const TBOX& max_image_box,
1034 ColPartitionGrid* part_grid,
1035 ColPartition** part_ptr) {
1036 ColPartition* image_part = *part_ptr;
1037 TBOX im_part_box = image_part->bounding_box();
1038 if (textord_tabfind_show_images > 1) {
1039 tprintf(
"Searching for merge with image part:");
1040 im_part_box.
print();
1042 max_image_box.
print();
1044 rectsearch->StartRectSearch(max_image_box);
1046 ColPartition* best_part =
nullptr;
1048 while ((part = rectsearch->NextRectSearch()) !=
nullptr) {
1049 if (textord_tabfind_show_images > 1) {
1050 tprintf(
"Considering merge with part:");
1052 if (im_part_box.
contains(part->bounding_box()))
1054 else if (!max_image_box.
contains(part->bounding_box()))
1055 tprintf(
"Not within text box\n");
1065 TBOX box = part->bounding_box();
1069 rectsearch->RemoveBBox();
1070 DeletePartition(part);
1073 int x_dist = std::max(0, box.
x_gap(im_part_box));
1074 int y_dist = std::max(0, box.
y_gap(im_part_box));
1075 int dist = x_dist * x_dist + y_dist * y_dist;
1076 if (dist > box.
area() || dist > im_part_box.
area())
1078 if (best_part ==
nullptr || dist < best_dist) {
1085 if (best_part !=
nullptr) {
1087 TBOX box = best_part->bounding_box();
1088 if (textord_tabfind_show_images > 1) {
1089 tprintf(
"Merging image part:");
1090 im_part_box.
print();
1098 DeletePartition(image_part);
1099 part_grid->RemoveBBox(best_part);
1100 DeletePartition(best_part);
1101 rectsearch->RepositionIterator();
1109 static int IntersectArea(
const TBOX& box, ColPartition_LIST* part_list) {
1110 int intersect_area = 0;
1111 ColPartition_IT part_it(part_list);
1113 for (part_it.mark_cycle_pt(); !part_it.cycled_list();
1114 part_it.forward()) {
1115 ColPartition* image_part = part_it.data();
1117 intersect_area += intersect.
area();
1119 return intersect_area;
1127 static bool TestWeakIntersectedPart(
const TBOX& im_box,
1128 ColPartition_LIST* part_list,
1129 ColPartition* part) {
1132 const TBOX& part_box = part->bounding_box();
1134 int area = part_box.
area();
1135 int intersect_area = IntersectArea(part_box, part_list);
1136 if (area < 2 * intersect_area) {
1149 static void EliminateWeakParts(
const TBOX& im_box,
1150 ColPartitionGrid* part_grid,
1151 ColPartition_LIST* big_parts,
1152 ColPartition_LIST* part_list) {
1155 rectsearch.StartRectSearch(im_box);
1156 while ((part = rectsearch.NextRectSearch()) !=
nullptr) {
1157 if (TestWeakIntersectedPart(im_box, part_list, part)) {
1160 rectsearch.RemoveBBox();
1161 DeletePartition(part);
1167 part->SetBlobTypes();
1171 ColPartition_IT big_it(big_parts);
1172 for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) {
1173 part = big_it.data();
1174 if (TestWeakIntersectedPart(im_box, part_list, part)) {
1176 DeletePartition(big_it.extract());
1185 static bool ScanForOverlappingText(ColPartitionGrid* part_grid,
TBOX* box) {
1187 TBOX padded_box(*box);
1189 rectsearch.StartRectSearch(padded_box);
1191 bool any_text_in_padded_rect =
false;
1192 while ((part = rectsearch.NextRectSearch()) !=
nullptr) {
1196 any_text_in_padded_rect =
true;
1197 const TBOX& part_box = part->bounding_box();
1203 if (!any_text_in_padded_rect)
1212 static void MarkAndDeleteImageParts(
const FCOORD& rerotate,
1213 ColPartitionGrid* part_grid,
1214 ColPartition_LIST* image_parts,
1216 if (image_pix ==
nullptr)
1218 int imageheight = pixGetHeight(image_pix);
1219 ColPartition_IT part_it(image_parts);
1220 for (; !part_it.empty(); part_it.forward()) {
1221 ColPartition* part = part_it.extract();
1222 TBOX part_box = part->bounding_box();
1224 if (!ScanForOverlappingText(part_grid, &part_box) ||
1228 part_box.
rotate(rerotate);
1229 int left = part_box.
left();
1230 int top = part_box.
top();
1231 pixRasterop(image_pix, left, imageheight - top,
1232 part_box.
width(), part_box.
height(), PIX_SET,
nullptr, 0, 0);
1234 DeletePartition(part);
1249 ColPartition_LIST parts_list;
1250 ColPartition_IT part_it(&parts_list);
1257 part_it.add_after_then_move(part);
1262 MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
1269 if (part_grid !=
nullptr)
return;
1271 gsearch.StartFullSearch();
1273 while ((part = gsearch.NextFullSearch()) !=
nullptr) {
1281 gsearch.RemoveBBox();
1282 DeletePartition(part);
1302 ColPartition_LIST* big_parts) {
1303 int imageheight = pixGetHeight(image_pix);
1309 if (boxa !=
nullptr && pixa !=
nullptr) nboxes = boxaGetCount(boxa);
1310 for (
int i = 0; i < nboxes; ++i) {
1311 l_int32 x, y, width, height;
1312 boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
1313 Pix* pix = pixaGetPix(pixa, i, L_CLONE);
1314 TBOX im_box(x, imageheight -y - height, x + width, imageheight - y);
1318 ColPartition_LIST part_list;
1319 DivideImageIntoParts(im_box, rotation, rerotation, pix,
1320 &rectsearch, &part_list);
1321 if (textord_tabfind_show_images && pixa_debug !=
nullptr) {
1322 pixa_debug->
AddPix(pix,
"ImageComponent");
1323 tprintf(
"Component has %d parts\n", part_list.length());
1326 if (!part_list.empty()) {
1327 ColPartition_IT part_it(&part_list);
1328 if (part_list.singleton()) {
1333 TBOX text_box(im_box);
1334 MaximalImageBoundingBox(part_grid, &text_box);
1335 while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part));
1336 part_it.set_to_list(&part_list);
1337 part_it.add_after_then_move(part);
1340 EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
1342 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
1345 part_grid->
InsertBBox(
true,
true, image_part);
1346 if (!part_it.at_last()) {
1356 DeleteSmallImages(part_grid);
1357 if (textord_tabfind_show_images) {