21 #pragma warning(disable:4244) // Conversion warnings
25 #include "config_auto.h"
123 denorm_(
NULL), grid_box_(bleft, tright), rerotation_(1.0f, 0.0f) {
126 initial_widths_win_ =
NULL;
128 diacritics_win_ =
NULL;
129 textlines_win_ =
NULL;
130 smoothed_win_ =
NULL;
134 if (widths_win_ !=
NULL) {
135 #ifndef GRAPHICS_DISABLED
137 #endif // GRAPHICS_DISABLED
143 delete initial_widths_win_;
145 delete textlines_win_;
146 delete smoothed_win_;
147 delete diacritics_win_;
156 BLOBNBOX_IT blob_it(&block->
blobs);
157 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
158 SetNeighbours(
false,
false, blob_it.data());
171 InsertBlobs(input_block);
173 while (cjk_merge && FixBrokenCJK(input_block));
175 FindTextlineFlowDirection(pageseg_mode,
false);
181 static void CollectHorizVertBlobs(BLOBNBOX_LIST* input_blobs,
182 int* num_vertical_blobs,
183 int* num_horizontal_blobs,
184 BLOBNBOX_CLIST* vertical_blobs,
185 BLOBNBOX_CLIST* horizontal_blobs,
186 BLOBNBOX_CLIST* nondescript_blobs) {
187 BLOBNBOX_C_IT v_it(vertical_blobs);
188 BLOBNBOX_C_IT h_it(horizontal_blobs);
189 BLOBNBOX_C_IT n_it(nondescript_blobs);
190 BLOBNBOX_IT blob_it(input_blobs);
191 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
194 float y_x =
static_cast<float>(box.
height()) / box.
width();
195 float x_y = 1.0f / y_x;
197 float ratio = x_y > y_x ? x_y : y_x;
201 ++*num_vertical_blobs;
202 if (ok_blob) v_it.add_after_then_move(blob);
204 ++*num_horizontal_blobs;
205 if (ok_blob) h_it.add_after_then_move(blob);
206 }
else if (ok_blob) {
207 n_it.add_after_then_move(blob);
221 BLOBNBOX_CLIST* osd_blobs) {
222 int vertical_boxes = 0;
223 int horizontal_boxes = 0;
225 BLOBNBOX_CLIST vertical_blobs;
226 BLOBNBOX_CLIST horizontal_blobs;
227 BLOBNBOX_CLIST nondescript_blobs;
228 CollectHorizVertBlobs(&block->
blobs, &vertical_boxes, &horizontal_boxes,
229 &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
230 CollectHorizVertBlobs(&block->
large_blobs, &vertical_boxes, &horizontal_boxes,
231 &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
233 tprintf(
"TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n",
234 horizontal_boxes, vertical_boxes,
235 horizontal_blobs.length(), vertical_blobs.length(),
236 nondescript_blobs.length());
237 if (osd_blobs !=
NULL && vertical_boxes == 0 && horizontal_boxes == 0) {
239 BLOBNBOX_C_IT osd_it(osd_blobs);
240 osd_it.add_list_after(&nondescript_blobs);
243 int min_vert_boxes =
static_cast<int>((vertical_boxes + horizontal_boxes) *
244 find_vertical_text_ratio);
245 if (vertical_boxes >= min_vert_boxes) {
246 if (osd_blobs !=
NULL) {
247 BLOBNBOX_C_IT osd_it(osd_blobs);
248 osd_it.add_list_after(&vertical_blobs);
252 if (osd_blobs !=
NULL) {
253 BLOBNBOX_C_IT osd_it(osd_blobs);
254 osd_it.add_list_after(&horizontal_blobs);
265 rerotation_.
set_x(rotation.
x());
266 rerotation_.
set_y(-rotation.
y());
274 ColPartition_LIST leader_parts;
275 FindLeadersAndMarkNoise(block, &leader_parts);
279 for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) {
282 MarkLeaderNeighbours(part,
LR_LEFT);
283 MarkLeaderNeighbours(part,
LR_RIGHT);
304 TBOX search_box = box;
305 search_box.
pad(padding, padding);
312 rsearch.StartRectSearch(search_box);
313 while ((n = rsearch.NextRectSearch()) !=
NULL) {
314 if (n == bbox)
continue;
316 if (nbox.
height() > max_size) {
321 tprintf(
"Max neighbour size=%d for candidate line box at:", max_size);
324 if (max_size * kLineResidueSizeRatio < box.
height()) {
325 #ifndef GRAPHICS_DISABLED
326 if (leaders_win_ !=
NULL) {
333 #endif // GRAPHICS_DISABLED
356 Pix* nontext_pix,
const DENORM* denorm,
bool cjk_script,
359 nontext_map_ = nontext_pix;
360 projection_ = projection;
371 FindTextlineFlowDirection(pageseg_mode,
false);
384 FindTextlineFlowDirection(pageseg_mode,
true);
386 FindInitialPartitions(pageseg_mode, rerotation,
true, block,
387 diacritic_blobs, part_grid, big_parts, &skew);
389 tprintf(
"Detected %d diacritics\n", diacritic_blobs->length());
393 FindTextlineFlowDirection(pageseg_mode,
true);
394 r = FindInitialPartitions(pageseg_mode, rerotation,
false, block,
395 diacritic_blobs, part_grid, big_parts, &skew);
402 static void PrintBoxWidths(
BLOBNBOX* neighbour) {
404 tprintf(
"Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n",
417 FCOORD click(static_cast<float>(x), static_cast<float>(y));
421 PrintBoxWidths(neighbour);
432 tprintf(
"Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n"
433 "Good= %d %d %d %d\n",
456 void StrokeWidth::FindLeadersAndMarkNoise(
TO_BLOCK* block,
457 ColPartition_LIST* leader_parts) {
463 gsearch.StartFullSearch();
464 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
465 SetNeighbours(
true,
false, bbox);
467 ColPartition_IT part_it(leader_parts);
468 gsearch.StartFullSearch();
469 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
484 if (part->MarkAsLeaderIfMonospaced())
485 part_it.add_after_then_move(part);
491 leaders_win_ = DisplayGoodBlobs(
"LeaderNeighbours", 0, 0);
495 BLOBNBOX_IT blob_it(&block->
blobs);
497 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
503 blob_it.add_to_end(small_it.extract());
510 for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) {
513 small_it.add_to_end(noise_it.extract());
525 void StrokeWidth::InsertBlobs(
TO_BLOCK* block) {
533 void StrokeWidth::MarkLeaderNeighbours(
const ColPartition* part,
535 const TBOX& part_box = part->bounding_box();
540 blobsearch.StartSideSearch(side ==
LR_LEFT ? part_box.
left()
544 while ((blob = blobsearch.NextSideSearch(side ==
LR_LEFT)) !=
NULL) {
548 int x_gap = blob_box.
x_gap(part_box);
551 }
else if (best_blob ==
NULL || x_gap < best_gap) {
556 if (best_blob !=
NULL) {
561 #ifndef GRAPHICS_DISABLED
562 if (leaders_win_ !=
NULL) {
568 #endif // GRAPHICS_DISABLED
573 static int UpperQuartileCJKSize(
int gridsize, BLOBNBOX_LIST* blobs) {
574 STATS sizes(0, gridsize * kMaxCJKSizeRatio);
575 BLOBNBOX_IT it(blobs);
576 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
580 if (width <= height * kCJKAspectRatio && height < width * kCJKAspectRatio)
581 sizes.add(height, 1);
583 return static_cast<int>(sizes.ile(0.75f) + 0.5);
591 bool StrokeWidth::FixBrokenCJK(
TO_BLOCK* block) {
592 BLOBNBOX_LIST* blobs = &block->
blobs;
593 int median_height = UpperQuartileCJKSize(
gridsize(), blobs);
597 BLOBNBOX_IT blob_it(blobs);
599 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
607 tprintf(
"Checking for Broken CJK (max size=%d):", max_size);
611 BLOBNBOX_CLIST overlapped_blobs;
612 AccumulateOverlaps(blob, debug, max_size, max_dist,
613 &bbox, &overlapped_blobs);
614 if (!overlapped_blobs.empty()) {
618 if (bbox.
width() > bbox.
height() * kCJKAspectRatio ||
621 tprintf(
"Bad final aspectratio:");
629 tprintf(
"Too many neighbours: %d\n", overlapped_blobs.length());
633 BLOBNBOX_C_IT n_it(&overlapped_blobs);
634 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
636 neighbour = n_it.data();
641 if (!n_it.cycled_list()) {
644 PrintBoxWidths(blob);
654 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
660 if (rerotation_.
x() != 1.0f || rerotation_.
y() != 0.0f) {
673 int num_remaining = 0;
674 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
691 static bool AcceptableCJKMerge(
const TBOX& bbox,
const TBOX& nbox,
692 bool debug,
int max_size,
int max_dist,
693 int* x_gap,
int* y_gap) {
694 *x_gap = bbox.
x_gap(nbox);
695 *y_gap = bbox.
y_gap(nbox);
699 tprintf(
"gaps = %d, %d, merged_box:", *x_gap, *y_gap);
702 if (*x_gap <= max_dist && *y_gap <= max_dist &&
703 merged.width() <= max_size && merged.height() <= max_size) {
705 double old_ratio =
static_cast<double>(bbox.
width()) / bbox.
height();
706 if (old_ratio < 1.0) old_ratio = 1.0 / old_ratio;
707 double new_ratio =
static_cast<double>(merged.width()) / merged.height();
708 if (new_ratio < 1.0) new_ratio = 1.0 / new_ratio;
709 if (new_ratio <= old_ratio * kCJKAspectRatioIncrease)
719 void StrokeWidth::AccumulateOverlaps(
const BLOBNBOX* not_this,
bool debug,
720 int max_size,
int max_dist,
721 TBOX* bbox, BLOBNBOX_CLIST* blobs) {
730 int x = (bbox->
left() + bbox->
right()) / 2;
731 int y = (bbox->
bottom() + bbox->
top()) / 2;
734 radsearch.StartRadSearch(x, y, kCJKRadius);
736 while ((neighbour = radsearch.NextRadSearch()) !=
NULL) {
737 if (neighbour == not_this)
continue;
740 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist,
744 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
750 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
751 if (nearests[dir] ==
NULL)
continue;
753 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size,
754 max_dist, &x_gap, &y_gap)) {
757 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, nearests[dir]);
762 nearests[dir] =
NULL;
766 }
else if (x_gap < 0 && x_gap <= y_gap) {
769 if (nearests[dir] ==
NULL ||
770 y_gap < bbox->y_gap(nearests[dir]->bounding_box())) {
771 nearests[dir] = neighbour;
773 }
else if (y_gap < 0 && y_gap <= x_gap) {
776 if (nearests[dir] ==
NULL ||
777 x_gap < bbox->x_gap(nearests[dir]->bounding_box())) {
778 nearests[dir] = neighbour;
787 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
788 if (nearests[dir] ==
NULL)
continue;
791 tprintf(
"Testing for overlap with:");
795 blobs->shallow_clear();
797 tprintf(
"Final box overlaps nearest\n");
810 void StrokeWidth::FindTextlineFlowDirection(
PageSegMode pageseg_mode,
811 bool display_if_debugging) {
815 gsearch.StartFullSearch();
816 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
817 SetNeighbours(
false, display_if_debugging, bbox);
820 gsearch.StartFullSearch();
821 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
822 SimplifyObviousNeighbours(bbox);
825 gsearch.StartFullSearch();
826 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
827 if (FindingVerticalOnly(pageseg_mode)) {
830 }
else if (FindingHorizontalOnly(pageseg_mode)) {
834 SetNeighbourFlows(bbox);
839 initial_widths_win_ = DisplayGoodBlobs(
"InitialStrokewidths", 400, 0);
842 gsearch.StartFullSearch();
843 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
844 SmoothNeighbourTypes(pageseg_mode,
false, bbox);
847 gsearch.StartFullSearch();
848 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
849 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
852 gsearch.StartFullSearch();
853 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
854 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
858 widths_win_ = DisplayGoodBlobs(
"ImprovedStrokewidths", 800, 0);
866 void StrokeWidth::SetNeighbours(
bool leaders,
bool activate_line_trap,
868 int line_trap_count = 0;
869 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
871 line_trap_count += FindGoodNeighbour(bnd, leaders, blob);
873 if (line_trap_count > 0 && activate_line_trap) {
895 tprintf(
"FGN in dir %d for blob:", dir);
898 int top = blob_box.
top();
899 int bottom = blob_box.
bottom();
900 int left = blob_box.
left();
901 int right = blob_box.
right();
902 int width = right - left;
903 int height = top - bottom;
911 int line_trap_count = 0;
914 ? height / 2 : width / 2;
916 ? height / 3 : width / 3;
918 min_good_overlap = min_decent_overlap = 1;
920 int search_pad =
static_cast<int>(
924 TBOX search_box = blob_box;
937 search_box.
set_top(search_box.
top() + search_pad);
944 rectsearch.StartRectSearch(search_box);
946 double best_goodness = 0.0;
947 bool best_is_good =
false;
949 while ((neighbour = rectsearch.NextRectSearch()) !=
NULL) {
951 if (neighbour == blob)
953 int mid_x = (nbox.
left() + nbox.
right()) / 2;
954 if (mid_x < blob->left_rule() || mid_x > blob->
right_rule())
963 int n_width = nbox.
width();
964 int n_height = nbox.
height();
965 if (
MIN(n_width, n_height) > line_trap_min &&
966 MAX(n_width, n_height) < line_trap_max)
972 MAX(width, height)) &&
977 if (debug)
tprintf(
"Bad size\n");
991 perp_overlap = nbox.
width();
993 perp_overlap = overlap;
996 if (debug)
tprintf(
"On wrong side\n");
1003 perp_overlap = nbox.
height();
1005 perp_overlap = overlap;
1008 if (debug)
tprintf(
"On wrong side\n");
1013 if (-gap > overlap) {
1014 if (debug)
tprintf(
"Overlaps wrong way\n");
1017 if (perp_overlap < min_decent_overlap) {
1018 if (debug)
tprintf(
"Doesn't overlap enough\n");
1023 bool is_good = overlap >= min_good_overlap && !bad_sizes &&
1025 kStrokeWidthFractionTolerance,
1026 kStrokeWidthTolerance);
1030 if (gap < 1) gap = 1;
1031 double goodness = (1.0 + is_good) * overlap / gap;
1033 tprintf(
"goodness = %g vs best of %g, good=%d, overlap=%d, gap=%d\n",
1034 goodness, best_goodness, is_good, overlap, gap);
1036 if (goodness > best_goodness) {
1037 best_neighbour = neighbour;
1038 best_goodness = goodness;
1039 best_is_good = is_good;
1043 return line_trap_count;
1047 static void ListNeighbours(
const BLOBNBOX* blob,
1048 BLOBNBOX_CLIST* neighbours) {
1049 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1052 if (neighbour !=
NULL) {
1053 neighbours->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
1059 static void List2ndNeighbours(
const BLOBNBOX* blob,
1060 BLOBNBOX_CLIST* neighbours) {
1061 ListNeighbours(blob, neighbours);
1062 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1065 if (neighbour !=
NULL) {
1066 ListNeighbours(neighbour, neighbours);
1072 static void List3rdNeighbours(
const BLOBNBOX* blob,
1073 BLOBNBOX_CLIST* neighbours) {
1074 List2ndNeighbours(blob, neighbours);
1075 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1078 if (neighbour !=
NULL) {
1079 List2ndNeighbours(neighbour, neighbours);
1086 static void CountNeighbourGaps(
bool debug, BLOBNBOX_CLIST* neighbours,
1087 int* pure_h_count,
int* pure_v_count) {
1090 BLOBNBOX_C_IT it(neighbours);
1091 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1093 int h_min, h_max, v_min, v_max;
1096 tprintf(
"Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max);
1097 if (h_max < v_min ||
1101 if (debug)
tprintf(
"Horz at:");
1102 }
else if (v_max < h_min) {
1105 if (debug)
tprintf(
"Vert at:");
1107 if (debug)
tprintf(
"Neither at:");
1117 void StrokeWidth::SetNeighbourFlows(
BLOBNBOX* blob) {
1123 tprintf(
"SetNeighbourFlows (current flow=%d, type=%d) on:",
1127 BLOBNBOX_CLIST neighbours;
1128 List3rdNeighbours(blob, &neighbours);
1130 int pure_h_count = 0;
1131 int pure_v_count = 0;
1132 CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count);
1136 tprintf(
"SetFlows: h_count=%d, v_count=%d\n",
1137 pure_h_count, pure_v_count);
1139 if (!neighbours.empty()) {
1142 if (pure_h_count > 2 * pure_v_count) {
1145 }
else if (pure_v_count > 2 * pure_h_count) {
1158 static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours,
1159 int* pure_h_count,
int* pure_v_count) {
1160 BLOBNBOX_C_IT it(neighbours);
1161 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1173 void StrokeWidth::SimplifyObviousNeighbours(
BLOBNBOX* blob) {
1195 int h_min, h_max, v_min, v_max;
1197 if ((h_max + margin < v_min && h_max < margin / 2) ||
1202 }
else if (v_max + margin < h_min && v_max < margin / 2) {
1212 void StrokeWidth::SmoothNeighbourTypes(
PageSegMode pageseg_mode,
bool reset_all,
1216 BLOBNBOX_CLIST neighbours;
1217 List2ndNeighbours(blob, &neighbours);
1219 int pure_h_count = 0;
1220 int pure_v_count = 0;
1221 CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count);
1226 tprintf(
"pure_h=%d, pure_v=%d\n",
1227 pure_h_count, pure_v_count);
1229 if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) {
1233 }
else if (pure_v_count > pure_h_count &&
1234 !FindingHorizontalOnly(pageseg_mode)) {
1243 tprintf(
"Clean on pass 3!\n");
1261 TO_BLOCK* block, BLOBNBOX_LIST* diacritic_blobs,
1262 ColPartitionGrid* part_grid, ColPartition_LIST* big_parts,
1264 if (!FindingHorizontalOnly(pageseg_mode)) FindVerticalTextChains(part_grid);
1265 if (!FindingVerticalOnly(pageseg_mode)) FindHorizontalTextChains(part_grid);
1267 chains_win_ =
MakeWindow(0, 400,
"Initial text chains");
1268 part_grid->DisplayBoxes(chains_win_);
1271 if (find_problems) {
1275 part_grid->SplitOverlappingPartitions(big_parts);
1276 EasyMerges(part_grid);
1277 RemoveLargeUnusedBlobs(block, part_grid, big_parts);
1279 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box,
1282 grid_box, rerotation));
1283 int pre_overlap = part_grid->ComputeTotalOverlap(
NULL);
1284 TestDiacritics(part_grid, block);
1285 MergeDiacritics(block, part_grid);
1286 if (find_problems && diacritic_blobs !=
NULL &&
1287 DetectAndRemoveNoise(pre_overlap, grid_box, block, part_grid,
1292 textlines_win_ =
MakeWindow(400, 400,
"GoodTextline blobs");
1293 part_grid->DisplayBoxes(textlines_win_);
1294 diacritics_win_ = DisplayDiacritics(
"Diacritics", 0, 0, block);
1296 PartitionRemainingBlobs(pageseg_mode, part_grid);
1297 part_grid->SplitOverlappingPartitions(big_parts);
1298 EasyMerges(part_grid);
1299 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box,
1302 grid_box, rerotation));
1305 grid_box, rerotation));
1307 smoothed_win_ =
MakeWindow(800, 400,
"Smoothed blobs");
1308 part_grid->DisplayBoxes(smoothed_win_);
1317 bool StrokeWidth::DetectAndRemoveNoise(
int pre_overlap,
const TBOX& grid_box,
1319 ColPartitionGrid* part_grid,
1320 BLOBNBOX_LIST* diacritic_blobs) {
1321 ColPartitionGrid* noise_grid =
NULL;
1322 int post_overlap = part_grid->ComputeTotalOverlap(&noise_grid);
1323 if (pre_overlap == 0) pre_overlap = 1;
1324 BLOBNBOX_IT diacritic_it(diacritic_blobs);
1325 if (noise_grid !=
NULL) {
1326 if (post_overlap > pre_overlap * kNoiseOverlapGrowthFactor &&
1331 noise_grid->DisplayBoxes(noise_win);
1333 part_grid->DeleteNonLeaderParts();
1336 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1344 rsearch.StartRectSearch(search_box);
1345 ColPartition* part = rsearch.NextRectSearch();
1350 diacritic_it.add_after_then_move(blob_it.extract());
1353 noise_grid->DeleteParts();
1357 noise_grid->DeleteParts();
1379 void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) {
1385 gsearch.StartFullSearch();
1386 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
1395 while (blob !=
NULL) {
1397 blob = MutualUnusedVNeighbour(blob,
BND_ABOVE);
1399 blob = MutualUnusedVNeighbour(bbox,
BND_BELOW);
1400 while (blob !=
NULL) {
1402 blob = MutualUnusedVNeighbour(blob,
BND_BELOW);
1404 CompletePartition(pageseg_mode, part, part_grid);
1425 void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) {
1431 gsearch.StartFullSearch();
1432 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
1439 while (blob !=
NULL) {
1441 blob = MutualUnusedHNeighbour(blob,
BND_RIGHT);
1443 blob = MutualUnusedHNeighbour(bbox,
BND_LEFT);
1444 while (blob !=
NULL) {
1446 blob = MutualUnusedVNeighbour(blob,
BND_LEFT);
1448 CompletePartition(pageseg_mode, part, part_grid);
1460 void StrokeWidth::TestDiacritics(ColPartitionGrid* part_grid,
TO_BLOCK* block) {
1463 small_grid.InsertBlobList(&block->
blobs);
1464 int medium_diacritics = 0;
1465 int small_diacritics = 0;
1467 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1470 DiacriticBlob(&small_grid, blob)) {
1474 BLOBNBOX_IT blob_it(&block->
blobs);
1475 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1478 small_it.add_to_end(blob_it.extract());
1481 ColPartition* part = blob->
owner();
1482 if (part ==
NULL && DiacriticBlob(&small_grid, blob)) {
1483 ++medium_diacritics;
1485 small_it.add_to_end(blob_it.extract());
1486 }
else if (part !=
NULL && !part->block_owned() &&
1487 part->boxes_count() < 3) {
1493 BLOBNBOX_C_IT box_it(part->boxes());
1494 for (box_it.mark_cycle_pt(); !box_it.cycled_list() &&
1495 DiacriticBlob(&small_grid, box_it.data());
1497 if (box_it.cycled_list()) {
1499 while (!box_it.empty()) {
1508 ++medium_diacritics;
1515 small_it.add_to_end(blob_it.extract());
1516 part_grid->RemoveBBox(part);
1521 tprintf(
"Blob not available to be a diacritic at:");
1526 tprintf(
"Found %d small diacritics, %d medium\n",
1527 small_diacritics, medium_diacritics);
1537 bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid,
BLOBNBOX* blob) {
1543 small_box.bottom());
1545 tprintf(
"Testing blob for diacriticness at:");
1548 int x = (small_box.left() + small_box.right()) / 2;
1549 int y = (small_box.bottom() + small_box.top()) / 2;
1552 int height = small_box.height();
1567 int best_total_dist = 0;
1571 TBOX search_box(small_box);
1574 search_box.
pad(x_pad, y_pad);
1576 rsearch.SetUniqueMode(
true);
1578 rsearch.StartRectSearch(search_box);
1580 while ((neighbour = rsearch.NextRectSearch()) !=
NULL) {
1582 neighbour == blob || neighbour->
owner() == blob->
owner())
1589 tprintf(
"Neighbour not strong enough:");
1594 if (nbox.
height() < min_height) {
1596 tprintf(
"Neighbour not big enough:");
1601 int x_gap = small_box.x_gap(nbox);
1602 int y_gap = small_box.y_gap(nbox);
1606 if (debug)
tprintf(
"xgap=%d, y=%d, total dist=%d\n",
1607 x_gap, y_gap, total_distance);
1608 if (total_distance >
1611 tprintf(
"Neighbour with median size %d too far away:",
1619 tprintf(
"Computing reduced box for :");
1622 int left = small_box.left() - small_box.width();
1623 int right = small_box.right() + small_box.width();
1625 y_gap = small_box.
y_gap(nbox);
1626 if (best_x_overlap ==
NULL || y_gap < best_y_gap) {
1627 best_x_overlap = neighbour;
1635 tprintf(
"Shrunken box doesn't win:");
1639 if (best_y_overlap ==
NULL || total_distance < best_total_dist) {
1641 tprintf(
"New best y overlap:");
1644 best_y_overlap = neighbour;
1645 best_total_dist = total_distance;
1647 tprintf(
"New y overlap box doesn't win:");
1651 tprintf(
"Neighbour wrong side of a tab:");
1655 if (best_x_overlap !=
NULL &&
1656 (best_y_overlap ==
NULL ||
1661 tprintf(
"DiacriticBlob OK! (x-overlap:");
1667 if (best_y_overlap !=
NULL &&
1668 DiacriticXGapFilled(small_grid, small_box,
1670 NoNoiseInBetween(small_box, best_y_overlap->
bounding_box())) {
1674 tprintf(
"DiacriticBlob OK! (y-overlap:");
1681 tprintf(
"DiacriticBlob fails:");
1683 tprintf(
"Best x+y gap = %d, y = %d\n", best_total_dist, best_y_gap);
1684 if (best_y_overlap !=
NULL) {
1685 tprintf(
"XGapFilled=%d, NoiseBetween=%d\n",
1686 DiacriticXGapFilled(small_grid, small_box,
1688 NoNoiseInBetween(small_box, best_y_overlap->
bounding_box()));
1707 bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid,
1708 const TBOX& diacritic_box,
1709 const TBOX& base_box) {
1713 TBOX occupied_box(base_box);
1715 while ((diacritic_gap = diacritic_box.
x_gap(occupied_box)) > max_gap) {
1716 TBOX search_box(occupied_box);
1717 if (diacritic_box.
left() > search_box.
right()) {
1727 rsearch.StartRectSearch(search_box);
1729 while ((neighbour = rsearch.NextRectSearch()) !=
NULL) {
1731 if (nbox.
x_gap(diacritic_box) < diacritic_gap) {
1732 if (nbox.
left() < occupied_box.left())
1734 if (nbox.
right() > occupied_box.right())
1735 occupied_box.set_right(nbox.
right());
1739 if (neighbour ==
NULL)
1746 void StrokeWidth::MergeDiacritics(
TO_BLOCK* block,
1747 ColPartitionGrid* part_grid) {
1749 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1755 if (part !=
NULL && !part->block_owned() && blob->
owner() ==
NULL &&
1759 part_grid->RemoveBBox(part);
1764 part_grid->InsertBBox(
true,
true, part);
1775 void StrokeWidth::RemoveLargeUnusedBlobs(
TO_BLOCK* block,
1776 ColPartitionGrid* part_grid,
1777 ColPartition_LIST* big_parts) {
1779 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
1781 ColPartition* big_part = blob->
owner();
1782 if (big_part ==
NULL) {
1792 void StrokeWidth::PartitionRemainingBlobs(
PageSegMode pageseg_mode,
1793 ColPartitionGrid* part_grid) {
1796 int prev_grid_x = -1;
1797 int prev_grid_y = -1;
1798 BLOBNBOX_CLIST cell_list;
1799 BLOBNBOX_C_IT cell_it(&cell_list);
1800 bool cell_all_noise =
true;
1801 gsearch.StartFullSearch();
1802 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
1803 int grid_x = gsearch.GridX();
1804 int grid_y = gsearch.GridY();
1805 if (grid_x != prev_grid_x || grid_y != prev_grid_y) {
1807 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
1809 cell_it.set_to_list(&cell_list);
1810 prev_grid_x = grid_x;
1811 prev_grid_y = grid_y;
1812 cell_all_noise =
true;
1815 cell_it.add_to_end(bbox);
1817 cell_all_noise =
false;
1819 cell_all_noise =
false;
1822 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
1828 void StrokeWidth::MakePartitionsFromCellList(
PageSegMode pageseg_mode,
1830 ColPartitionGrid* part_grid,
1831 BLOBNBOX_CLIST* cell_list) {
1832 if (cell_list->empty())
1834 BLOBNBOX_C_IT cell_it(cell_list);
1836 BLOBNBOX* bbox = cell_it.extract();
1840 for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) {
1841 part->AddBox(cell_it.extract());
1843 CompletePartition(pageseg_mode, part, part_grid);
1845 for (; !cell_it.empty(); cell_it.forward()) {
1846 BLOBNBOX* bbox = cell_it.extract();
1850 CompletePartition(pageseg_mode, part, part_grid);
1857 void StrokeWidth::CompletePartition(
PageSegMode pageseg_mode,
1859 ColPartitionGrid* part_grid) {
1860 part->ComputeLimits();
1866 if (value > 0 && FindingVerticalOnly(pageseg_mode)) {
1867 value = part->boxes_count() == 1 ? 0 : -2;
1868 }
else if (value < 0 && FindingHorizontalOnly(pageseg_mode)) {
1869 value = part->boxes_count() == 1 ? 0 : 2;
1871 part->SetRegionAndFlowTypesFromProjectionValue(value);
1873 part_grid->InsertBBox(
true,
true, part);
1878 void StrokeWidth::EasyMerges(ColPartitionGrid* part_grid) {
1887 bool StrokeWidth::OrientationSearchBox(ColPartition* part,
TBOX* box) {
1888 if (part->IsVerticalType()) {
1899 bool StrokeWidth::ConfirmEasyMerge(
const ColPartition* p1,
1900 const ColPartition* p2) {
1906 if ((p1->IsVerticalType() || p2->IsVerticalType()) &&
1907 p1->HCoreOverlap(*p2) <= 0 &&
1908 ((!p1->IsSingleton() &&
1909 !p2->IsSingleton()) ||
1910 !p1->bounding_box().major_overlap(p2->bounding_box())))
1912 if ((p1->IsHorizontalType() || p2->IsHorizontalType()) &&
1913 p1->VCoreOverlap(*p2) <= 0 &&
1914 ((!p1->IsSingleton() &&
1915 !p2->IsSingleton()) ||
1916 (!p1->bounding_box().major_overlap(p2->bounding_box()) &&
1917 !p1->OKDiacriticMerge(*p2,
false) &&
1918 !p2->OKDiacriticMerge(*p1,
false))))
1920 if (!p1->ConfirmNoTabViolation(*p2))
1924 return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box());
1928 bool StrokeWidth::NoNoiseInBetween(
const TBOX& box1,
const TBOX& box2)
const {
1936 ScrollView* StrokeWidth::DisplayGoodBlobs(
const char* window_name,
1939 #ifndef GRAPHICS_DISABLED
1946 gsearch.StartFullSearch();
1948 while ((bbox = gsearch.NextFullSearch()) !=
NULL) {
1950 int left_x = box.
left();
1951 int right_x = box.
right();
1952 int top_y = box.
top();
1953 int bottom_y = box.
bottom();
1964 else if (goodness == 1)
1970 window->
Rectangle(left_x, bottom_y, right_x, top_y);
1978 #ifndef GRAPHICS_DISABLED
1982 int x = (blob_box.
left() + blob_box.
right()) / 2;
1983 window->
Line(x, top, x, bottom);
1984 #endif // GRAPHICS_DISABLED
1988 ScrollView* StrokeWidth::DisplayDiacritics(
const char* window_name,
1991 #ifndef GRAPHICS_DISABLED
1996 BLOBNBOX_IT it(&block->
blobs);
1997 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2000 window->
Pen(ScrollView::GREEN);
2001 DrawDiacriticJoiner(blob, window);
2009 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2012 window->
Pen(ScrollView::GREEN);
2013 DrawDiacriticJoiner(blob, window);
const double kShapePerimeterRatio
virtual void HandleClick(int x, int y)
void MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max) const
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void set_leader_on_right(bool flag)
bool IsVerticalType() const
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
static bool WithinTestRegion(int detail_level, int x, int y)
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
const double kLineResidueSizeRatio
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)
BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir)
static bool UnMergeableType(BlobRegionType type)
static bool DifferentSizes(int size1, int size2)
bool leader_on_left() const
bool joined_to_prev() const
static bool VeryDifferentSizes(int size1, int size2)
void set_owner(tesseract::ColPartition *new_owner)
bool horz_possible() const
const int kLineTrapLongest
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map)
#define BOOL_VAR(name, val, comment)
const double kNoiseOverlapGrowthFactor
void set_neighbour(BlobNeighbourDir n, BLOBNBOX *neighbour, bool good)
void AddBox(BLOBNBOX *box)
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
bool textord_tabfind_only_strokewidths
const float kSizeRatioToReject
const double kDiacriticYPadRatio
const int kLineResiduePadRatio
void InsertBlobList(BLOBNBOX_LIST *blobs)
void set_flow(BlobTextFlowType value)
void DisplayProjection() const
void NeighbourGaps(int gaps[BND_COUNT]) const
const int kMaxCJKSizeRatio
void pad(int xpad, int ypad)
BLOBNBOX_LIST small_blobs
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
C_OUTLINE_LIST * out_list()
const double kStrokeWidthFractionTolerance
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const
const double kMaxDiacriticGapToBaseCharHeight
void DeleteUnownedNoise()
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
int textord_debug_tabfind
const int kMaxLargeOverlaps
const double kBrokenCJKIterationFraction
virtual void HandleClick(int x, int y)
void StartRadSearch(int x, int y, int max_radius)
const double kNoiseOverlapAreaFactor
void set_x(float xin)
rewrite function
TBOX BoundsWithinLimits(int left, int right)
float area_stroke_width() const
int y_gap(const TBOX &box) const
BlobRegionType region_type() const
SVEvent * AwaitEvent(SVEventType type)
void set_y(float yin)
rewrite function
void set_base_char_blob(BLOBNBOX *blob)
BLOBNBOX_LIST noise_blobs
bool DefiniteIndividualFlow()
bool y_overlap(const TBOX &box) const
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
int textord_tabfind_show_strokewidths
const double kMaxDiacriticDistanceRatio
const ICOORD & bleft() const
void set_leader_on_left(bool flag)
Assume a single column of text of variable sizes.
void really_merge(BLOBNBOX *other)
#define INT_VAR(name, val, comment)
void compute_bounding_box()
const int kMostlyOneDirRatio
const double kCJKBrokenDistanceFraction
const double kDiacriticXPadRatio
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
void set_diacritic_box(const TBOX &diacritic_box)
bool ConfirmNoTabViolation(const BLOBNBOX &other) const
tesseract::ColPartition * owner() const
int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const
void rotate_box(FCOORD rotation)
void RemoveLineResidue(ColPartition_LIST *big_part_list)
bool good_stroke_neighbour(BlobNeighbourDir n) const
ScrollView * MakeWindow(int x, int y, const char *window_name)
void set_horz_possible(bool value)
int x_gap(const TBOX &box) const
float horz_stroke_width() const
bool major_y_overlap(const TBOX &box) const
bool vert_possible() const
const int kLineTrapShortest
int IntCastRounded(double x)
BLOBNBOX * neighbour(BlobNeighbourDir n) const
bool UniquelyHorizontal() const
void set_region_type(BlobRegionType new_type)
void set_vert_possible(bool value)
void Rectangle(int x1, int y1, int x2, int y2)
bool UniquelyVertical() const
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
float vert_stroke_width() const
bool contains(const FCOORD pt) const
const ICOORD & tright() const
const double kStrokeWidthTolerance
bool MatchingStrokeWidth(const BLOBNBOX &other, double fractional_tolerance, double constant_tolerance) const
const double kCJKAspectRatioIncrease
const double kNeighbourSearchFactor
const TBOX & bounding_box() const
const double kStrokeWidthFractionCJK
bool leader_on_right() const
void RemoveBBox(BLOBNBOX *bbox)
const double kStrokeWidthCJK
const double kLineResidueAspectRatio
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
int base_char_bottom() const
BlobTextFlowType flow() const
BLOBNBOX_LIST large_blobs
void Line(int x1, int y1, int x2, int y2)
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
const double kCJKAspectRatio
const double kMinDiacriticSizeRatio
ScrollView::Color BoxColor() const
BlobGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright)
bool overlap(const TBOX &box) const
void set_owns_cblob(bool value)
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
int base_char_top() const
BLOBNBOX * base_char_blob() const
const int kCJKMaxComponents
void GridCoords(int x, int y, int *grid_x, int *grid_y) const