21 #include "config_auto.h" 112 :
BlobGrid(gridsize, bleft, tright), nontext_map_(nullptr), projection_(nullptr),
113 denorm_(nullptr), grid_box_(bleft, tright), rerotation_(1.0f, 0.0f) {
114 leaders_win_ =
nullptr;
115 widths_win_ =
nullptr;
116 initial_widths_win_ =
nullptr;
117 chains_win_ =
nullptr;
118 diacritics_win_ =
nullptr;
119 textlines_win_ =
nullptr;
120 smoothed_win_ =
nullptr;
124 if (widths_win_ !=
nullptr) {
125 #ifndef GRAPHICS_DISABLED 127 #endif // GRAPHICS_DISABLED 133 delete initial_widths_win_;
135 delete textlines_win_;
136 delete smoothed_win_;
137 delete diacritics_win_;
146 BLOBNBOX_IT blob_it(&block->
blobs);
147 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
148 SetNeighbours(
false,
false, blob_it.data());
161 InsertBlobs(input_block);
163 while (cjk_merge && FixBrokenCJK(input_block));
165 FindTextlineFlowDirection(pageseg_mode,
false);
171 static void CollectHorizVertBlobs(BLOBNBOX_LIST* input_blobs,
172 int* num_vertical_blobs,
173 int* num_horizontal_blobs,
174 BLOBNBOX_CLIST* vertical_blobs,
175 BLOBNBOX_CLIST* horizontal_blobs,
176 BLOBNBOX_CLIST* nondescript_blobs) {
177 BLOBNBOX_C_IT v_it(vertical_blobs);
178 BLOBNBOX_C_IT h_it(horizontal_blobs);
179 BLOBNBOX_C_IT n_it(nondescript_blobs);
180 BLOBNBOX_IT blob_it(input_blobs);
181 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
184 float y_x =
static_cast<float>(box.
height()) / box.
width();
185 float x_y = 1.0f / y_x;
187 float ratio = x_y > y_x ? x_y : y_x;
191 ++*num_vertical_blobs;
192 if (ok_blob) v_it.add_after_then_move(blob);
194 ++*num_horizontal_blobs;
195 if (ok_blob) h_it.add_after_then_move(blob);
196 }
else if (ok_blob) {
197 n_it.add_after_then_move(blob);
211 BLOBNBOX_CLIST* osd_blobs) {
212 int vertical_boxes = 0;
213 int horizontal_boxes = 0;
215 BLOBNBOX_CLIST vertical_blobs;
216 BLOBNBOX_CLIST horizontal_blobs;
217 BLOBNBOX_CLIST nondescript_blobs;
218 CollectHorizVertBlobs(&block->
blobs, &vertical_boxes, &horizontal_boxes,
219 &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
220 CollectHorizVertBlobs(&block->
large_blobs, &vertical_boxes, &horizontal_boxes,
221 &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
223 tprintf(
"TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n",
224 horizontal_boxes, vertical_boxes,
225 horizontal_blobs.length(), vertical_blobs.length(),
226 nondescript_blobs.length());
227 if (osd_blobs !=
nullptr && vertical_boxes == 0 && horizontal_boxes == 0) {
229 BLOBNBOX_C_IT osd_it(osd_blobs);
230 osd_it.add_list_after(&nondescript_blobs);
233 int min_vert_boxes =
static_cast<int>((vertical_boxes + horizontal_boxes) *
234 find_vertical_text_ratio);
235 if (vertical_boxes >= min_vert_boxes) {
236 if (osd_blobs !=
nullptr) {
237 BLOBNBOX_C_IT osd_it(osd_blobs);
238 osd_it.add_list_after(&vertical_blobs);
242 if (osd_blobs !=
nullptr) {
243 BLOBNBOX_C_IT osd_it(osd_blobs);
244 osd_it.add_list_after(&horizontal_blobs);
255 rerotation_.
set_x(rotation.
x());
256 rerotation_.
set_y(-rotation.
y());
264 ColPartition_LIST leader_parts;
265 FindLeadersAndMarkNoise(block, &leader_parts);
269 for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) {
272 MarkLeaderNeighbours(part,
LR_LEFT);
273 MarkLeaderNeighbours(part,
LR_RIGHT);
294 TBOX search_box = box;
295 search_box.
pad(padding, padding);
302 rsearch.StartRectSearch(search_box);
303 while ((n = rsearch.NextRectSearch()) !=
nullptr) {
304 if (n == bbox)
continue;
306 if (nbox.
height() > max_height) {
307 max_height = nbox.
height();
311 tprintf(
"Max neighbour size=%d for candidate line box at:", max_height);
315 #ifndef GRAPHICS_DISABLED 316 if (leaders_win_ !=
nullptr) {
323 #endif // GRAPHICS_DISABLED 346 Pix* nontext_pix,
const DENORM* denorm,
bool cjk_script,
349 nontext_map_ = nontext_pix;
350 projection_ = projection;
361 FindTextlineFlowDirection(pageseg_mode,
false);
374 FindTextlineFlowDirection(pageseg_mode,
true);
376 FindInitialPartitions(pageseg_mode, rerotation,
true, block,
377 diacritic_blobs, part_grid, big_parts, &skew);
379 tprintf(
"Detected %d diacritics\n", diacritic_blobs->length());
383 FindTextlineFlowDirection(pageseg_mode,
true);
384 r = FindInitialPartitions(pageseg_mode, rerotation,
false, block,
385 diacritic_blobs, part_grid, big_parts, &skew);
387 nontext_map_ =
nullptr;
388 projection_ =
nullptr;
392 static void PrintBoxWidths(
BLOBNBOX* neighbour) {
394 tprintf(
"Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n",
407 FCOORD click(static_cast<float>(x), static_cast<float>(y));
411 PrintBoxWidths(neighbour);
422 tprintf(
"Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n" 423 "Good= %d %d %d %d\n",
446 void StrokeWidth::FindLeadersAndMarkNoise(
TO_BLOCK* block,
447 ColPartition_LIST* leader_parts) {
453 gsearch.StartFullSearch();
454 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
455 SetNeighbours(
true,
false, bbox);
457 ColPartition_IT part_it(leader_parts);
458 gsearch.StartFullSearch();
459 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
467 for (blob = bbox; blob !=
nullptr && blob->
flow() ==
BTFT_NONE;
474 if (part->MarkAsLeaderIfMonospaced())
475 part_it.add_after_then_move(part);
481 leaders_win_ = DisplayGoodBlobs(
"LeaderNeighbours", 0, 0);
485 BLOBNBOX_IT blob_it(&block->
blobs);
487 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
493 blob_it.add_to_end(small_it.extract());
500 for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) {
503 small_it.add_to_end(noise_it.extract());
515 void StrokeWidth::InsertBlobs(
TO_BLOCK* block) {
523 void StrokeWidth::MarkLeaderNeighbours(
const ColPartition* part,
525 const TBOX& part_box = part->bounding_box();
530 blobsearch.StartSideSearch(side ==
LR_LEFT ? part_box.
left()
534 while ((blob = blobsearch.NextSideSearch(side ==
LR_LEFT)) !=
nullptr) {
538 int x_gap = blob_box.
x_gap(part_box);
541 }
else if (best_blob ==
nullptr || x_gap < best_gap) {
546 if (best_blob !=
nullptr) {
551 #ifndef GRAPHICS_DISABLED 552 if (leaders_win_ !=
nullptr) {
558 #endif // GRAPHICS_DISABLED 563 static int UpperQuartileCJKSize(
int gridsize, BLOBNBOX_LIST* blobs) {
565 BLOBNBOX_IT it(blobs);
566 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
571 sizes.add(height, 1);
573 return static_cast<int>(sizes.ile(0.75f) + 0.5);
581 bool StrokeWidth::FixBrokenCJK(
TO_BLOCK* block) {
582 BLOBNBOX_LIST* blobs = &block->
blobs;
583 int median_height = UpperQuartileCJKSize(
gridsize(), blobs);
587 BLOBNBOX_IT blob_it(blobs);
589 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
597 tprintf(
"Checking for Broken CJK (max size=%d):", max_height);
601 BLOBNBOX_CLIST overlapped_blobs;
602 AccumulateOverlaps(blob, debug, max_height, max_dist,
603 &bbox, &overlapped_blobs);
604 if (!overlapped_blobs.empty()) {
611 tprintf(
"Bad final aspectratio:");
619 tprintf(
"Too many neighbours: %d\n", overlapped_blobs.length());
623 BLOBNBOX_C_IT n_it(&overlapped_blobs);
624 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
626 neighbour = n_it.data();
631 if (!n_it.cycled_list()) {
634 PrintBoxWidths(blob);
644 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
650 if (rerotation_.
x() != 1.0f || rerotation_.
y() != 0.0f) {
663 int num_remaining = 0;
664 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
681 static bool AcceptableCJKMerge(
const TBOX& bbox,
const TBOX& nbox,
682 bool debug,
int max_size,
int max_dist,
683 int* x_gap,
int* y_gap) {
684 *x_gap = bbox.
x_gap(nbox);
685 *y_gap = bbox.
y_gap(nbox);
689 tprintf(
"gaps = %d, %d, merged_box:", *x_gap, *y_gap);
692 if (*x_gap <= max_dist && *y_gap <= max_dist &&
693 merged.width() <= max_size && merged.height() <= max_size) {
695 double old_ratio =
static_cast<double>(bbox.
width()) / bbox.
height();
696 if (old_ratio < 1.0) old_ratio = 1.0 / old_ratio;
697 double new_ratio =
static_cast<double>(merged.width()) / merged.height();
698 if (new_ratio < 1.0) new_ratio = 1.0 / new_ratio;
709 void StrokeWidth::AccumulateOverlaps(
const BLOBNBOX* not_this,
bool debug,
710 int max_size,
int max_dist,
711 TBOX* bbox, BLOBNBOX_CLIST* blobs) {
718 nearests[i] =
nullptr;
720 int x = (bbox->
left() + bbox->
right()) / 2;
721 int y = (bbox->
bottom() + bbox->
top()) / 2;
726 while ((neighbour = radsearch.NextRadSearch()) !=
nullptr) {
727 if (neighbour == not_this)
continue;
730 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist,
734 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
740 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
741 if (nearests[dir] ==
nullptr)
continue;
743 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size,
744 max_dist, &x_gap, &y_gap)) {
747 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, nearests[dir]);
752 nearests[dir] =
nullptr;
756 }
else if (x_gap < 0 && x_gap <= y_gap) {
759 if (nearests[dir] ==
nullptr ||
760 y_gap < bbox->y_gap(nearests[dir]->bounding_box())) {
761 nearests[dir] = neighbour;
763 }
else if (y_gap < 0 && y_gap <= x_gap) {
766 if (nearests[dir] ==
nullptr ||
767 x_gap < bbox->x_gap(nearests[dir]->bounding_box())) {
768 nearests[dir] = neighbour;
777 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
778 if (nearests[dir] ==
nullptr)
continue;
781 tprintf(
"Testing for overlap with:");
785 blobs->shallow_clear();
787 tprintf(
"Final box overlaps nearest\n");
800 void StrokeWidth::FindTextlineFlowDirection(
PageSegMode pageseg_mode,
801 bool display_if_debugging) {
805 gsearch.StartFullSearch();
806 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
807 SetNeighbours(
false, display_if_debugging, bbox);
810 gsearch.StartFullSearch();
811 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
812 SimplifyObviousNeighbours(bbox);
815 gsearch.StartFullSearch();
816 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
817 if (FindingVerticalOnly(pageseg_mode)) {
820 }
else if (FindingHorizontalOnly(pageseg_mode)) {
824 SetNeighbourFlows(bbox);
829 initial_widths_win_ = DisplayGoodBlobs(
"InitialStrokewidths", 400, 0);
832 gsearch.StartFullSearch();
833 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
834 SmoothNeighbourTypes(pageseg_mode,
false, bbox);
837 gsearch.StartFullSearch();
838 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
839 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
842 gsearch.StartFullSearch();
843 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
844 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
848 widths_win_ = DisplayGoodBlobs(
"ImprovedStrokewidths", 800, 0);
856 void StrokeWidth::SetNeighbours(
bool leaders,
bool activate_line_trap,
858 int line_trap_count = 0;
859 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
861 line_trap_count += FindGoodNeighbour(bnd, leaders, blob);
863 if (line_trap_count > 0 && activate_line_trap) {
885 tprintf(
"FGN in dir %d for blob:", dir);
888 int top = blob_box.
top();
889 int bottom = blob_box.
bottom();
890 int left = blob_box.
left();
891 int right = blob_box.
right();
892 int width = right - left;
893 int height = top - bottom;
901 int line_trap_count = 0;
904 ? height / 2 : width / 2;
906 ? height / 3 : width / 3;
908 min_good_overlap = min_decent_overlap = 1;
910 int search_pad =
static_cast<int>(
914 TBOX search_box = blob_box;
927 search_box.
set_top(search_box.
top() + search_pad);
934 rectsearch.StartRectSearch(search_box);
936 double best_goodness = 0.0;
937 bool best_is_good =
false;
939 while ((neighbour = rectsearch.NextRectSearch()) !=
nullptr) {
941 if (neighbour == blob)
943 int mid_x = (nbox.
left() + nbox.
right()) / 2;
944 if (mid_x < blob->left_rule() || mid_x > blob->
right_rule())
953 int n_width = nbox.
width();
954 int n_height = nbox.
height();
955 if (std::min(n_width, n_height) > line_trap_min &&
956 std::max(n_width, n_height) < line_trap_max)
962 std::max(width, height)) &&
967 if (debug)
tprintf(
"Bad size\n");
979 overlap = std::min(static_cast<int>(nbox.
top()), top) - std::max(static_cast<int>(nbox.
bottom()), bottom);
981 perp_overlap = nbox.
width();
983 perp_overlap = overlap;
986 if (debug)
tprintf(
"On wrong side\n");
991 overlap = std::min(static_cast<int>(nbox.
right()), right) - std::max(static_cast<int>(nbox.
left()), left);
993 perp_overlap = nbox.
height();
995 perp_overlap = overlap;
998 if (debug)
tprintf(
"On wrong side\n");
1003 if (-gap > overlap) {
1004 if (debug)
tprintf(
"Overlaps wrong way\n");
1007 if (perp_overlap < min_decent_overlap) {
1008 if (debug)
tprintf(
"Doesn't overlap enough\n");
1013 bool is_good = overlap >= min_good_overlap && !bad_sizes &&
1020 if (gap < 1) gap = 1;
1021 double goodness = (1.0 + is_good) * overlap / gap;
1023 tprintf(
"goodness = %g vs best of %g, good=%d, overlap=%d, gap=%d\n",
1024 goodness, best_goodness, is_good, overlap, gap);
1026 if (goodness > best_goodness) {
1027 best_neighbour = neighbour;
1028 best_goodness = goodness;
1029 best_is_good = is_good;
1033 return line_trap_count;
1037 static void ListNeighbours(
const BLOBNBOX* blob,
1038 BLOBNBOX_CLIST* neighbours) {
1039 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1042 if (neighbour !=
nullptr) {
1043 neighbours->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
1049 static void List2ndNeighbours(
const BLOBNBOX* blob,
1050 BLOBNBOX_CLIST* neighbours) {
1051 ListNeighbours(blob, neighbours);
1052 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1055 if (neighbour !=
nullptr) {
1056 ListNeighbours(neighbour, neighbours);
1062 static void List3rdNeighbours(
const BLOBNBOX* blob,
1063 BLOBNBOX_CLIST* neighbours) {
1064 List2ndNeighbours(blob, neighbours);
1065 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1068 if (neighbour !=
nullptr) {
1069 List2ndNeighbours(neighbour, neighbours);
1076 static void CountNeighbourGaps(
bool debug, BLOBNBOX_CLIST* neighbours,
1077 int* pure_h_count,
int* pure_v_count) {
1080 BLOBNBOX_C_IT it(neighbours);
1081 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1083 int h_min, h_max, v_min, v_max;
1086 tprintf(
"Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max);
1087 if (h_max < v_min ||
1091 if (debug)
tprintf(
"Horz at:");
1092 }
else if (v_max < h_min) {
1095 if (debug)
tprintf(
"Vert at:");
1097 if (debug)
tprintf(
"Neither at:");
1107 void StrokeWidth::SetNeighbourFlows(
BLOBNBOX* blob) {
1113 tprintf(
"SetNeighbourFlows (current flow=%d, type=%d) on:",
1117 BLOBNBOX_CLIST neighbours;
1118 List3rdNeighbours(blob, &neighbours);
1120 int pure_h_count = 0;
1121 int pure_v_count = 0;
1122 CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count);
1126 tprintf(
"SetFlows: h_count=%d, v_count=%d\n",
1127 pure_h_count, pure_v_count);
1129 if (!neighbours.empty()) {
1132 if (pure_h_count > 2 * pure_v_count) {
1135 }
else if (pure_v_count > 2 * pure_h_count) {
1148 static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours,
1149 int* pure_h_count,
int* pure_v_count) {
1150 BLOBNBOX_C_IT it(neighbours);
1151 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1163 void StrokeWidth::SimplifyObviousNeighbours(
BLOBNBOX* blob) {
1185 int h_min, h_max, v_min, v_max;
1187 if ((h_max + margin < v_min && h_max < margin / 2) ||
1192 }
else if (v_max + margin < h_min && v_max < margin / 2) {
1202 void StrokeWidth::SmoothNeighbourTypes(
PageSegMode pageseg_mode,
bool reset_all,
1206 BLOBNBOX_CLIST neighbours;
1207 List2ndNeighbours(blob, &neighbours);
1209 int pure_h_count = 0;
1210 int pure_v_count = 0;
1211 CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count);
1216 tprintf(
"pure_h=%d, pure_v=%d\n",
1217 pure_h_count, pure_v_count);
1219 if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) {
1223 }
else if (pure_v_count > pure_h_count &&
1224 !FindingHorizontalOnly(pageseg_mode)) {
1233 tprintf(
"Clean on pass 3!\n");
1251 TO_BLOCK* block, BLOBNBOX_LIST* diacritic_blobs,
1252 ColPartitionGrid* part_grid, ColPartition_LIST* big_parts,
1254 if (!FindingHorizontalOnly(pageseg_mode)) FindVerticalTextChains(part_grid);
1255 if (!FindingVerticalOnly(pageseg_mode)) FindHorizontalTextChains(part_grid);
1257 chains_win_ =
MakeWindow(0, 400,
"Initial text chains");
1258 part_grid->DisplayBoxes(chains_win_);
1261 if (find_problems) {
1265 part_grid->SplitOverlappingPartitions(big_parts);
1266 EasyMerges(part_grid);
1267 RemoveLargeUnusedBlobs(block, part_grid, big_parts);
1269 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box,
1272 grid_box, rerotation));
1273 int pre_overlap = part_grid->ComputeTotalOverlap(
nullptr);
1274 TestDiacritics(part_grid, block);
1275 MergeDiacritics(block, part_grid);
1276 if (find_problems && diacritic_blobs !=
nullptr &&
1277 DetectAndRemoveNoise(pre_overlap, grid_box, block, part_grid,
1282 textlines_win_ =
MakeWindow(400, 400,
"GoodTextline blobs");
1283 part_grid->DisplayBoxes(textlines_win_);
1284 diacritics_win_ = DisplayDiacritics(
"Diacritics", 0, 0, block);
1286 PartitionRemainingBlobs(pageseg_mode, part_grid);
1287 part_grid->SplitOverlappingPartitions(big_parts);
1288 EasyMerges(part_grid);
1289 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box,
1292 grid_box, rerotation));
1295 grid_box, rerotation));
1297 smoothed_win_ =
MakeWindow(800, 400,
"Smoothed blobs");
1298 part_grid->DisplayBoxes(smoothed_win_);
1307 bool StrokeWidth::DetectAndRemoveNoise(
int pre_overlap,
const TBOX& grid_box,
1309 ColPartitionGrid* part_grid,
1310 BLOBNBOX_LIST* diacritic_blobs) {
1311 ColPartitionGrid* noise_grid =
nullptr;
1312 int post_overlap = part_grid->ComputeTotalOverlap(&noise_grid);
1313 if (pre_overlap == 0) pre_overlap = 1;
1314 BLOBNBOX_IT diacritic_it(diacritic_blobs);
1315 if (noise_grid !=
nullptr) {
1321 noise_grid->DisplayBoxes(noise_win);
1323 part_grid->DeleteNonLeaderParts();
1326 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1334 rsearch.StartRectSearch(search_box);
1335 ColPartition* part = rsearch.NextRectSearch();
1336 if (part !=
nullptr) {
1340 diacritic_it.add_after_then_move(blob_it.extract());
1343 noise_grid->DeleteParts();
1347 noise_grid->DeleteParts();
1360 if (next_blob ==
nullptr || next_blob->
owner() !=
nullptr ||
1369 void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) {
1375 gsearch.StartFullSearch();
1376 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1381 (blob = MutualUnusedVNeighbour(bbox,
BND_ABOVE)) !=
nullptr) {
1385 while (blob !=
nullptr) {
1387 blob = MutualUnusedVNeighbour(blob,
BND_ABOVE);
1389 blob = MutualUnusedVNeighbour(bbox,
BND_BELOW);
1390 while (blob !=
nullptr) {
1392 blob = MutualUnusedVNeighbour(blob,
BND_BELOW);
1394 CompletePartition(pageseg_mode, part, part_grid);
1406 if (next_blob ==
nullptr || next_blob->
owner() !=
nullptr ||
1415 void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) {
1421 gsearch.StartFullSearch();
1422 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1425 (blob = MutualUnusedHNeighbour(bbox,
BND_RIGHT)) !=
nullptr) {
1429 while (blob !=
nullptr) {
1431 blob = MutualUnusedHNeighbour(blob,
BND_RIGHT);
1433 blob = MutualUnusedHNeighbour(bbox,
BND_LEFT);
1434 while (blob !=
nullptr) {
1436 blob = MutualUnusedVNeighbour(blob,
BND_LEFT);
1438 CompletePartition(pageseg_mode, part, part_grid);
1450 void StrokeWidth::TestDiacritics(ColPartitionGrid* part_grid,
TO_BLOCK* block) {
1453 small_grid.InsertBlobList(&block->
blobs);
1454 int medium_diacritics = 0;
1455 int small_diacritics = 0;
1457 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1460 DiacriticBlob(&small_grid, blob)) {
1464 BLOBNBOX_IT blob_it(&block->
blobs);
1465 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1468 small_it.add_to_end(blob_it.extract());
1471 ColPartition* part = blob->
owner();
1472 if (part ==
nullptr && DiacriticBlob(&small_grid, blob)) {
1473 ++medium_diacritics;
1475 small_it.add_to_end(blob_it.extract());
1476 }
else if (part !=
nullptr && !part->block_owned() &&
1477 part->boxes_count() < 3) {
1483 BLOBNBOX_C_IT box_it(part->boxes());
1484 for (box_it.mark_cycle_pt(); !box_it.cycled_list() &&
1485 DiacriticBlob(&small_grid, box_it.data());
1487 if (box_it.cycled_list()) {
1489 while (!box_it.empty()) {
1498 ++medium_diacritics;
1505 small_it.add_to_end(blob_it.extract());
1506 part_grid->RemoveBBox(part);
1511 tprintf(
"Blob not available to be a diacritic at:");
1516 tprintf(
"Found %d small diacritics, %d medium\n",
1517 small_diacritics, medium_diacritics);
1527 bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid,
BLOBNBOX* blob) {
1533 small_box.bottom());
1535 tprintf(
"Testing blob for diacriticness at:");
1538 int x = (small_box.left() + small_box.right()) / 2;
1539 int y = (small_box.bottom() + small_box.top()) / 2;
1542 int height = small_box.height();
1555 BLOBNBOX* best_x_overlap =
nullptr;
1556 BLOBNBOX* best_y_overlap =
nullptr;
1557 int best_total_dist = 0;
1561 TBOX search_box(small_box);
1564 search_box.
pad(x_pad, y_pad);
1566 rsearch.SetUniqueMode(
true);
1568 rsearch.StartRectSearch(search_box);
1570 while ((neighbour = rsearch.NextRectSearch()) !=
nullptr) {
1572 neighbour == blob || neighbour->
owner() == blob->
owner())
1579 tprintf(
"Neighbour not strong enough:");
1584 if (nbox.
height() < min_height) {
1586 tprintf(
"Neighbour not big enough:");
1591 int x_gap = small_box.x_gap(nbox);
1592 int y_gap = small_box.y_gap(nbox);
1596 if (debug)
tprintf(
"xgap=%d, y=%d, total dist=%d\n",
1597 x_gap, y_gap, total_distance);
1598 if (total_distance >
1601 tprintf(
"Neighbour with median size %d too far away:",
1609 tprintf(
"Computing reduced box for :");
1612 int left = small_box.left() - small_box.width();
1613 int right = small_box.right() + small_box.width();
1615 y_gap = small_box.
y_gap(nbox);
1616 if (best_x_overlap ==
nullptr || y_gap < best_y_gap) {
1617 best_x_overlap = neighbour;
1625 tprintf(
"Shrunken box doesn't win:");
1629 if (best_y_overlap ==
nullptr || total_distance < best_total_dist) {
1631 tprintf(
"New best y overlap:");
1634 best_y_overlap = neighbour;
1635 best_total_dist = total_distance;
1637 tprintf(
"New y overlap box doesn't win:");
1641 tprintf(
"Neighbour wrong side of a tab:");
1645 if (best_x_overlap !=
nullptr &&
1646 (best_y_overlap ==
nullptr ||
1651 tprintf(
"DiacriticBlob OK! (x-overlap:");
1657 if (best_y_overlap !=
nullptr &&
1658 DiacriticXGapFilled(small_grid, small_box,
1660 NoNoiseInBetween(small_box, best_y_overlap->
bounding_box())) {
1664 tprintf(
"DiacriticBlob OK! (y-overlap:");
1671 tprintf(
"DiacriticBlob fails:");
1673 tprintf(
"Best x+y gap = %d, y = %d\n", best_total_dist, best_y_gap);
1674 if (best_y_overlap !=
nullptr) {
1675 tprintf(
"XGapFilled=%d, NoiseBetween=%d\n",
1676 DiacriticXGapFilled(small_grid, small_box,
1678 NoNoiseInBetween(small_box, best_y_overlap->
bounding_box()));
1697 bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid,
1698 const TBOX& diacritic_box,
1699 const TBOX& base_box) {
1703 TBOX occupied_box(base_box);
1705 while ((diacritic_gap = diacritic_box.
x_gap(occupied_box)) > max_gap) {
1706 TBOX search_box(occupied_box);
1707 if (diacritic_box.
left() > search_box.
right()) {
1717 rsearch.StartRectSearch(search_box);
1719 while ((neighbour = rsearch.NextRectSearch()) !=
nullptr) {
1721 if (nbox.
x_gap(diacritic_box) < diacritic_gap) {
1722 if (nbox.
left() < occupied_box.left())
1724 if (nbox.
right() > occupied_box.right())
1725 occupied_box.set_right(nbox.
right());
1729 if (neighbour ==
nullptr)
1736 void StrokeWidth::MergeDiacritics(
TO_BLOCK* block,
1737 ColPartitionGrid* part_grid) {
1739 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1745 if (part !=
nullptr && !part->block_owned() && blob->
owner() ==
nullptr &&
1749 part_grid->RemoveBBox(part);
1754 part_grid->InsertBBox(
true,
true, part);
1765 void StrokeWidth::RemoveLargeUnusedBlobs(
TO_BLOCK* block,
1766 ColPartitionGrid* part_grid,
1767 ColPartition_LIST* big_parts) {
1769 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
1771 ColPartition* big_part = blob->
owner();
1772 if (big_part ==
nullptr) {
1782 void StrokeWidth::PartitionRemainingBlobs(
PageSegMode pageseg_mode,
1783 ColPartitionGrid* part_grid) {
1786 int prev_grid_x = -1;
1787 int prev_grid_y = -1;
1788 BLOBNBOX_CLIST cell_list;
1789 BLOBNBOX_C_IT cell_it(&cell_list);
1790 bool cell_all_noise =
true;
1791 gsearch.StartFullSearch();
1792 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1793 int grid_x = gsearch.GridX();
1794 int grid_y = gsearch.GridY();
1795 if (grid_x != prev_grid_x || grid_y != prev_grid_y) {
1797 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
1799 cell_it.set_to_list(&cell_list);
1800 prev_grid_x = grid_x;
1801 prev_grid_y = grid_y;
1802 cell_all_noise =
true;
1804 if (bbox->
owner() ==
nullptr) {
1805 cell_it.add_to_end(bbox);
1807 cell_all_noise =
false;
1809 cell_all_noise =
false;
1812 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
1818 void StrokeWidth::MakePartitionsFromCellList(
PageSegMode pageseg_mode,
1820 ColPartitionGrid* part_grid,
1821 BLOBNBOX_CLIST* cell_list) {
1822 if (cell_list->empty())
1824 BLOBNBOX_C_IT cell_it(cell_list);
1826 BLOBNBOX* bbox = cell_it.extract();
1830 for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) {
1831 part->AddBox(cell_it.extract());
1833 CompletePartition(pageseg_mode, part, part_grid);
1835 for (; !cell_it.empty(); cell_it.forward()) {
1836 BLOBNBOX* bbox = cell_it.extract();
1840 CompletePartition(pageseg_mode, part, part_grid);
1847 void StrokeWidth::CompletePartition(
PageSegMode pageseg_mode,
1849 ColPartitionGrid* part_grid) {
1850 part->ComputeLimits();
1856 if (value > 0 && FindingVerticalOnly(pageseg_mode)) {
1857 value = part->boxes_count() == 1 ? 0 : -2;
1858 }
else if (value < 0 && FindingHorizontalOnly(pageseg_mode)) {
1859 value = part->boxes_count() == 1 ? 0 : 2;
1861 part->SetRegionAndFlowTypesFromProjectionValue(value);
1863 part_grid->InsertBBox(
true,
true, part);
1868 void StrokeWidth::EasyMerges(ColPartitionGrid* part_grid) {
1877 bool StrokeWidth::OrientationSearchBox(ColPartition* part,
TBOX* box) {
1878 if (part->IsVerticalType()) {
1889 bool StrokeWidth::ConfirmEasyMerge(
const ColPartition* p1,
1890 const ColPartition* p2) {
1896 if ((p1->IsVerticalType() || p2->IsVerticalType()) &&
1897 p1->HCoreOverlap(*p2) <= 0 &&
1898 ((!p1->IsSingleton() &&
1899 !p2->IsSingleton()) ||
1900 !p1->bounding_box().major_overlap(p2->bounding_box())))
1902 if ((p1->IsHorizontalType() || p2->IsHorizontalType()) &&
1903 p1->VCoreOverlap(*p2) <= 0 &&
1904 ((!p1->IsSingleton() &&
1905 !p2->IsSingleton()) ||
1906 (!p1->bounding_box().major_overlap(p2->bounding_box()) &&
1907 !p1->OKDiacriticMerge(*p2,
false) &&
1908 !p2->OKDiacriticMerge(*p1,
false))))
1910 if (!p1->ConfirmNoTabViolation(*p2))
1914 return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box());
1918 bool StrokeWidth::NoNoiseInBetween(
const TBOX& box1,
const TBOX& box2)
const {
1926 ScrollView* StrokeWidth::DisplayGoodBlobs(
const char* window_name,
1929 #ifndef GRAPHICS_DISABLED 1936 gsearch.StartFullSearch();
1938 while ((bbox = gsearch.NextFullSearch()) !=
nullptr) {
1940 int left_x = box.
left();
1941 int right_x = box.
right();
1942 int top_y = box.
top();
1943 int bottom_y = box.
bottom();
1954 else if (goodness == 1)
1960 window->
Rectangle(left_x, bottom_y, right_x, top_y);
1968 #ifndef GRAPHICS_DISABLED 1970 int top = std::max(static_cast<int>(blob_box.
top()), blob->
base_char_top());
1972 int x = (blob_box.
left() + blob_box.
right()) / 2;
1973 window->
Line(x, top, x, bottom);
1974 #endif // GRAPHICS_DISABLED 1978 ScrollView* StrokeWidth::DisplayDiacritics(
const char* window_name,
1981 #ifndef GRAPHICS_DISABLED 1986 BLOBNBOX_IT it(&block->
blobs);
1987 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1991 DrawDiacriticJoiner(blob, window);
1999 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2003 DrawDiacriticJoiner(blob, window);
float area_stroke_width() const
bool major_y_overlap(const TBOX &box) const
int textord_tabfind_show_strokewidths
void InsertBlobList(BLOBNBOX_LIST *blobs)
static bool DifferentSizes(int size1, int size2)
void set_vert_possible(bool value)
bool DefiniteIndividualFlow()
ScrollView * MakeWindow(int x, int y, const char *window_name)
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)
const int kMaxCJKSizeRatio
#define BOOL_VAR(name, val, comment)
BlobGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright)
float horz_stroke_width() const
int y_gap(const TBOX &box) const
void set_diacritic_box(const TBOX &diacritic_box)
void set_leader_on_right(bool flag)
const double kStrokeWidthFractionCJK
TBOX BoundsWithinLimits(int left, int right)
const ICOORD & bleft() const
void set_owns_cblob(bool value)
BlobTextFlowType flow() const
int median_height() const
void set_base_char_blob(BLOBNBOX *blob)
static bool UnMergeableType(BlobRegionType type)
static bool WithinTestRegion(int detail_level, int x, int y)
const double kCJKAspectRatioIncrease
const double kNoiseOverlapGrowthFactor
int x_gap(const TBOX &box) const
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
int base_char_top() const
void MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max) const
void set_x(float xin)
rewrite function
void RemoveLineResidue(ColPartition_LIST *big_part_list)
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
void DisplayProjection() const
const double kLineResidueSizeRatio
virtual void HandleClick(int x, int y)
const double kCJKBrokenDistanceFraction
const int kMostlyOneDirRatio
const float kSizeRatioToReject
void set_neighbour(BlobNeighbourDir n, BLOBNBOX *neighbour, bool good)
int base_char_bottom() const
const int kLineTrapLongest
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const
void set_horz_possible(bool value)
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
const double kNeighbourSearchFactor
void rotate_box(FCOORD rotation)
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
static bool VeryDifferentSizes(int size1, int size2)
void StartRadSearch(int x, int y, int max_radius)
void set_region_type(BlobRegionType new_type)
bool leader_on_right() const
bool UniquelyHorizontal() const
SVEvent * AwaitEvent(SVEventType type)
Assume a single column of text of variable sizes.
const double kMaxDiacriticDistanceRatio
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map)
virtual void HandleClick(int x, int y)
void set_owner(tesseract::ColPartition *new_owner)
bool good_stroke_neighbour(BlobNeighbourDir n) const
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
const double kLineResidueAspectRatio
bool horz_possible() const
void RemoveBBox(BLOBNBOX *bbox)
BlobRegionType region_type() const
int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const
int textord_debug_tabfind
bool joined_to_prev() const
int IntCastRounded(double x)
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
void NeighbourGaps(int gaps[BND_COUNT]) const
BLOBNBOX * base_char_blob() const
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
const double kStrokeWidthFractionTolerance
DLLSYM void tprintf(const char *format,...)
float vert_stroke_width() const
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
const double kDiacriticYPadRatio
const int kCJKMaxComponents
void DeleteUnownedNoise()
const double kMinDiacriticSizeRatio
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
void AddBox(BLOBNBOX *box)
const double kDiacriticXPadRatio
const double kMaxDiacriticGapToBaseCharHeight
void set_flow(BlobTextFlowType value)
BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir)
void set_leader_on_left(bool flag)
const double kNoiseOverlapAreaFactor
bool vert_possible() const
bool overlap(const TBOX &box) const
bool textord_tabfind_only_strokewidths
bool y_overlap(const TBOX &box) const
bool contains(const FCOORD pt) const
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
C_OUTLINE_LIST * out_list()
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
const TBOX & bounding_box() const
const double kBrokenCJKIterationFraction
bool IsVerticalType() const
bool ConfirmNoTabViolation(const BLOBNBOX &other) const
bool leader_on_left() const
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
ScrollView::Color BoxColor() const
void Rectangle(int x1, int y1, int x2, int y2)
tesseract::ColPartition * owner() const
StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright)
const double kStrokeWidthCJK
BLOBNBOX_LIST large_blobs
bool UniquelyVertical() const
const double kStrokeWidthTolerance
BLOBNBOX * neighbour(BlobNeighbourDir n) const
void set_y(float yin)
rewrite function
const int kLineResiduePadRatio
const double kCJKAspectRatio
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
void really_merge(BLOBNBOX *other)
void pad(int xpad, int ypad)
#define INT_VAR(name, val, comment)
void Line(int x1, int y1, int x2, int y2)
const ICOORD & tright() const
void compute_bounding_box()
BLOBNBOX_LIST small_blobs
const int kLineTrapShortest
bool MatchingStrokeWidth(const BLOBNBOX &other, double fractional_tolerance, double constant_tolerance) const
BLOBNBOX_LIST noise_blobs
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)