32 #include "config_auto.h"
37 static BOOL_VAR (textord_really_old_xheight,
false,
38 "Use original wiseowl xheight");
40 static BOOL_VAR (textord_debug_baselines,
false,
"Debug baseline generation");
41 static BOOL_VAR (textord_oldbl_paradef,
true,
"Use para default mechanism");
42 static BOOL_VAR (textord_oldbl_split_splines,
true,
"Split stepped splines");
43 static BOOL_VAR (textord_oldbl_merge_parts,
true,
"Merge suspect partitions");
44 static BOOL_VAR (oldbl_corrfix,
true,
"Improve correlation of heights");
46 "Fix bug in modes threshold for xheights");
47 static BOOL_VAR(textord_ocropus_mode,
false,
"Make baselines for ocropus");
48 static double_VAR (oldbl_xhfract, 0.4,
"Fraction of est allowed in calc");
49 static INT_VAR (oldbl_holed_losscount, 10,
50 "Max lost before fallback line used");
51 static double_VAR (oldbl_dot_error_size, 1.26,
"Max aspect ratio of a dot");
52 static double_VAR (textord_oldbl_jumplimit, 0.15,
53 "X fraction for new partition");
56 #define X_HEIGHT_FRACTION 0.7
57 #define DESCENDER_FRACTION 0.5
58 #define MIN_ASC_FRACTION 0.20
59 #define MIN_DESC_FRACTION 0.25
60 #define MINASCRISE 2.0
61 #define MAXHEIGHTVARIANCE 0.15
63 #define MAXOVERLAP 0.1
65 #define HEIGHTBUCKETS 200
70 #define ABS(x) ((x)<0 ? (-(x)) : (x))
80 void Textord::make_old_baselines(
TO_BLOCK* block,
85 TO_ROW_IT row_it = block->
get_rows();
88 prev_baseline =
nullptr;
89 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
91 find_textlines(block, row, 2,
nullptr);
92 if (row->
xheight <= 0 && prev_baseline !=
nullptr)
93 find_textlines(block, row, 2, prev_baseline);
97 prev_baseline =
nullptr;
99 if (textord_debug_baselines)
100 tprintf(
"Row baseline generation failed on row at (%d,%d)\n",
101 blob_it.data()->bounding_box().left(),
102 blob_it.data()->bounding_box().bottom());
105 correlate_lines(block, gradient);
118 void Textord::correlate_lines(
TO_BLOCK *block,
float gradient) {
122 TO_ROW_IT row_it = block->
get_rows ();
124 rowcount = row_it.length ();
131 std::vector <TO_ROW *> rows(rowcount);
133 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
135 rows[rowindex++] = row_it.data ();
138 correlate_neighbours(block, &rows[0], rowcount);
141 block->
xheight = static_cast<float>(correlate_with_stats(&rows[0], rowcount, block));
158 void Textord::correlate_neighbours(
TO_BLOCK *block,
168 for (rowindex = 0; rowindex < rowcount; rowindex++) {
169 row = rows[rowindex];
172 for (otherrow = rowindex - 2;
174 && (rows[otherrow]->
xheight < 0.0
178 for (otherrow = rowindex + 1;
180 && (rows[otherrow]->
xheight < 0.0
185 find_textlines(block, row, 2, &rows[upperrow]->
baseline);
186 if (row->
xheight < 0 && lowerrow < rowcount)
187 find_textlines(block, row, 2, &rows[lowerrow]->
baseline);
190 find_textlines(block, row, 1, &rows[upperrow]->
baseline);
191 else if (lowerrow < rowcount)
192 find_textlines(block, row, 1, &rows[lowerrow]->
baseline);
197 for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
198 row = rows[rowindex];
202 biggest = std::max(biggest, row->
xheight);
214 int Textord::correlate_with_stats(
TO_ROW **rows,
230 xcount = fullcount = desccount = 0;
231 lineheight = ascheight = fullheight = descheight = 0.0;
232 for (rowindex = 0; rowindex < rowcount; rowindex++) {
233 row = rows[rowindex];
250 if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
251 lineheight /= xcount;
253 fullheight = lineheight + ascheight / xcount;
259 fullheight /= fullcount;
263 if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2))
264 descheight /= desccount;
269 if (lineheight > 0.0f)
274 for (rowindex = 0; rowindex < rowcount; rowindex++) {
275 row = rows[rowindex];
281 row->
ascrise = fullheight - lineheight;
300 if (row->
ascrise < minascheight)
304 if (row->
descdrop > mindescheight) {
313 return static_cast<int>(lineheight);
323 void Textord::find_textlines(
TO_BLOCK *block,
328 bool holed_line =
false;
339 blobcount = row->
blob_list ()->length ();
341 std::vector<char> partids(blobcount);
343 std::vector<int> xcoords(blobcount);
345 std::vector<int> ycoords(blobcount);
347 std::vector<TBOX> blobcoords(blobcount);
349 std::vector<float> ydiffs(blobcount);
352 holed_line, blobcount);
354 jumplimit = lineheight * textord_oldbl_jumplimit;
360 (
"\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n",
361 block->
line_size, lineheight, jumplimit);
368 &xcoords[0], &ycoords[0], spline, &row->
baseline, jumplimit);
369 #ifndef GRAPHICS_DISABLED
375 &partcount, &partids[0], partsizes,
376 &row->
baseline, jumplimit, &ydiffs[0]);
378 &partids[0], bestpart, &xcoords[0], &ycoords[0]);
380 &xcoords[0], &ycoords[0], degree, pointcount, xstarts);
384 &xcoords[0], &ycoords[0], pointcount, degree);
386 while (textord_oldbl_split_splines
388 &xcoords[0], xstarts, segments));
391 &partids[0], partsizes, partcount, bestpart);
403 if (textord_really_old_xheight) {
405 blobcount, &row->
baseline, jumplimit);
408 blobcount, &row->
baseline, jumplimit);
440 if (blob_it.empty ())
444 blob_it.mark_cycle_pt ();
448 if (blobcoords[blobindex].height () > lineheight * 0.25)
449 heightstat.
add (blobcoords[blobindex].height (), 1);
451 || blobcoords[blobindex].height () > lineheight * 0.25
452 || blob_it.cycled_list ()) {
457 if (blobcoords[blobindex].height ()
458 < blobcoords[blobindex].
width () * oldbl_dot_error_size
459 && blobcoords[blobindex].
width ()
460 < blobcoords[blobindex].
height () * oldbl_dot_error_size) {
467 if (losscount > maxlosscount)
469 maxlosscount = losscount;
473 while (!blob_it.cycled_list ());
475 holed_line = maxlosscount > oldbl_holed_losscount;
476 outcount = blobindex;
480 return static_cast<int>(heightstat.
ile (0.25));
482 return blobcoords[0].
height ();
508 float prevy, thisy, nexty;
510 float maxmax, minmin;
521 leftedge = blobcoords[0].
left ();
523 rightedge = blobcoords[blobcount - 1].
right ();
524 if (spline ==
nullptr
525 || spline->segments < 3
527 || spline->xcoords[1] > leftedge +
MAXOVERLAP * (rightedge - leftedge)
528 || spline->xcoords[spline->segments - 1] < rightedge
530 if (textord_oldbl_paradef)
532 xstarts[0] = blobcoords[0].
left () - 1;
533 for (blobindex = 0; blobindex < blobcount; blobindex++) {
534 xcoords[blobindex] = (blobcoords[blobindex].
left ()
535 + blobcoords[blobindex].
right ()) / 2;
536 ycoords[blobindex] = blobcoords[blobindex].
bottom ();
538 xstarts[1] = blobcoords[blobcount - 1].
right () + 1;
542 *
baseline =
QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
544 if (blobcount >= 3) {
548 maxmax = minmin = 0.0f;
549 thisy = ycoords[0] -
baseline->y (xcoords[0]);
550 nexty = ycoords[1] -
baseline->y (xcoords[1]);
551 for (blobindex = 2; blobindex < blobcount; blobindex++) {
554 nexty = ycoords[blobindex] -
baseline->y (xcoords[blobindex]);
556 if (
ABS (thisy - prevy) < jumplimit &&
ABS (thisy - nexty) < jumplimit) {
562 if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
564 || (y1 > y2 && y2 <= y3))) {
567 xturns[segment] = x2;
568 yturns[segment] = y2;
573 maxmax = minmin = y3;
582 x2 = blobcoords[blobindex - 1].
right ();
588 if (maxmax - minmin > jumplimit) {
590 for (blobindex = 0, segment = 1; blobindex < ycount;
592 if (yturns[blobindex] > minmin + jumplimit
593 || yturns[blobindex] < maxmax - jumplimit) {
596 || yturns[blobindex] > prevy + jumplimit
597 || yturns[blobindex] < prevy - jumplimit) {
599 xstarts[segment] = xturns[blobindex];
601 prevy = yturns[blobindex];
604 else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
606 || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
607 xstarts[segment - 1] = xturns[blobindex];
609 prevy = yturns[blobindex];
613 xstarts[segment] = blobcoords[blobcount - 1].
right () + 1;
616 *
baseline =
QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
622 shift =
ICOORD (0, static_cast<int16_t>(blobcoords[0].bottom ()
623 - spline->
y (blobcoords[0].
right ())));
657 leftedge = blobcoords[0].
left ();
659 rightedge = blobcoords[blobcount - 1].
right();
660 for (blobindex = 0; blobindex < blobcount; blobindex++) {
661 lms.
Add(
ICOORD((blobcoords[blobindex].left() +
662 blobcoords[blobindex].right()) / 2,
663 blobcoords[blobindex].bottom()));
666 xstarts[0] = leftedge;
667 xstarts[1] = rightedge;
669 coeffs[1] = gradient;
672 if (spline !=
nullptr
673 && spline->segments >= 3
675 && spline->xcoords[1] <= leftedge +
MAXOVERLAP * (rightedge - leftedge)
676 && spline->xcoords[spline->segments - 1] >= rightedge
679 x = (leftedge + rightedge) / 2.0;
680 shift =
ICOORD (0, static_cast<int16_t>(gradient * x + c - spline->
y (x)));
713 for (bestpart = 0; bestpart <
MAXPARTS; bestpart++)
714 partsizes[bestpart] = 0;
716 startx =
get_ydiffs (blobcoords, blobcount, spline, ydiffs);
720 float last_delta = 0.0f;
721 for (blobindex = startx; blobindex < blobcount; blobindex++) {
723 diff = ydiffs[blobindex];
725 tprintf (
"%d(%d,%d), ", blobindex,
726 blobcoords[blobindex].left (),
727 blobcoords[blobindex].bottom ());
730 &drift, &last_delta, numparts);
732 partids[blobindex] = bestpart;
733 partsizes[bestpart]++;
741 for (blobindex = startx; blobindex >= 0; blobindex--) {
742 diff = ydiffs[blobindex];
744 tprintf (
"%d(%d,%d), ", blobindex,
745 blobcoords[blobindex].left (),
746 blobcoords[blobindex].bottom ());
749 &drift, &last_delta, numparts);
751 partids[blobindex] = bestpart;
752 partsizes[bestpart]++;
755 for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
756 if (partsizes[bestpart] >= partsizes[biggestpart])
757 biggestpart = bestpart;
758 if (textord_oldbl_merge_parts)
797 prevpart = biggestpart;
800 for (blobindex = 0; blobindex < blobcount; blobindex++) {
801 if (partids[blobindex] != prevpart) {
805 if (prevpart != biggestpart && runlength >
MAXBADRUN) {
807 for (test_blob = startx; test_blob < blobindex; test_blob++) {
808 coord =
FCOORD ((blobcoords[test_blob].left ()
809 + blobcoords[test_blob].right ()) / 2.0,
810 blobcoords[test_blob].bottom ());
811 stats.
add (coord.
x (), coord.
y ());
817 tprintf (
"Fitted line y=%g x + %g\n", m, c);
820 for (test_blob = 1; !found_one
821 && (startx - test_blob >= 0
822 || blobindex + test_blob <= blobcount); test_blob++) {
823 if (startx - test_blob >= 0
824 && partids[startx - test_blob] == biggestpart) {
826 coord =
FCOORD ((blobcoords[startx - test_blob].left ()
827 + blobcoords[startx -
828 test_blob].right ()) /
831 test_blob].bottom ());
832 diff = m * coord.
x () + c - coord.
y ();
835 (
"Diff of common blob to suspect part=%g at (%g,%g)\n",
836 diff, coord.
x (), coord.
y ());
837 if (diff < jumplimit && -diff < jumplimit)
840 if (blobindex + test_blob <= blobcount
841 && partids[blobindex + test_blob - 1] == biggestpart) {
844 FCOORD ((blobcoords[blobindex + test_blob - 1].
845 left () + blobcoords[blobindex + test_blob -
847 blobcoords[blobindex + test_blob -
849 diff = m * coord.
x () + c - coord.
y ();
852 (
"Diff of common blob to suspect part=%g at (%g,%g)\n",
853 diff, coord.
x (), coord.
y ());
854 if (diff < jumplimit && -diff < jumplimit)
861 (
"Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
862 runlength, biggestpart, prevpart,
863 blobcoords[startx].left (),
864 blobcoords[startx].bottom ());
866 partsizes[prevpart] -= runlength;
867 for (test_blob = startx; test_blob < blobindex; test_blob++)
868 partids[test_blob] = biggestpart;
871 prevpart = partids[blobindex];
907 bestsum = static_cast<float>(INT32_MAX);
909 lastx = blobcoords[0].
left ();
911 for (blobindex = 0; blobindex < blobcount; blobindex++) {
913 xcentre = (blobcoords[blobindex].
left () + blobcoords[blobindex].
right ()) >> 1;
915 drift += spline->
step (lastx, xcentre);
917 diff = blobcoords[blobindex].
bottom ();
918 diff -= spline->
y (xcentre);
920 ydiffs[blobindex] = diff;
923 diffsum -=
ABS (ydiffs[blobindex - 3]);
924 diffsum +=
ABS (diff);
925 if (blobindex >= 2 && diffsum < bestsum) {
927 bestindex = blobindex - 1;
962 delta = diff - partdiffs[lastpart] - *drift;
964 tprintf (
"Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift);
966 if (
ABS (delta) > jumplimit / 2) {
968 bestdelta = diff - partdiffs[0] - *drift;
970 for (partition = 1; partition < *partcount; partition++) {
971 delta = diff - partdiffs[partition] - *drift;
972 if (
ABS (delta) <
ABS (bestdelta)) {
974 bestpart = partition;
979 if (
ABS (bestdelta) > jumplimit
981 bestpart = (*partcount)++;
983 partdiffs[bestpart] = diff - *drift;
991 if (bestpart == lastpart
992 && (
ABS (delta - *lastdelta) < jumplimit / 2
993 ||
ABS (delta) < jumplimit / 2))
995 *drift = (3 * *drift + delta) / 3;
1025 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1026 if (partids[blobindex] == bestpart) {
1028 xcoords[pointcount] = (blobcoords[blobindex].
left () + blobcoords[blobindex].
right ()) >> 1;
1029 ycoords[pointcount++] = blobcoords[blobindex].
bottom ();
1049 int degree,
int pointcount,
1054 int lastmin, lastmax;
1059 xstarts[0] = xcoords[0] - 1;
1060 max_x = xcoords[pointcount - 1] + 1;
1064 if (pointcount > 3) {
1066 lastmax = lastmin = 0;
1067 while (ptindex < pointcount - 1 && turncount <
SPLINESIZE - 1) {
1069 if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
1070 if (ycoords[ptindex] < ycoords[lastmax] -
TURNLIMIT) {
1071 if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
1073 turnpoints[turncount++] = lastmax;
1076 else if (ycoords[ptindex] < ycoords[lastmin]) {
1082 if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
1083 if (ycoords[ptindex] > ycoords[lastmin] +
TURNLIMIT) {
1084 if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
1086 turnpoints[turncount++] = lastmin;
1089 else if (ycoords[ptindex] > ycoords[lastmax]) {
1096 if (ycoords[ptindex] < ycoords[lastmax] -
TURNLIMIT
1097 && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
1100 turnpoints[turncount++] = lastmax;
1102 turnpoints[turncount++] = ptindex;
1104 else if (ycoords[ptindex] > ycoords[lastmin] +
TURNLIMIT
1106 && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
1109 turnpoints[turncount++] = lastmin;
1111 turnpoints[turncount++] = ptindex;
1113 else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
1115 if (ycoords[ptindex] > ycoords[lastmax])
1116 turnpoints[turncount++] = ptindex;
1118 turnpoints[turncount++] = lastmax;
1120 else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
1122 if (ycoords[ptindex] < ycoords[lastmin])
1123 turnpoints[turncount++] = ptindex;
1125 turnpoints[turncount++] = lastmin;
1130 tprintf (
"First turn is %d at (%d,%d)\n",
1131 turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
1132 for (segment = 1; segment < turncount; segment++) {
1134 lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
1137 if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
1139 for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++);
1142 for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++);
1145 xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
1146 + xcoords[turnpoints[segment - 1]]
1147 + xcoords[turnpoints[segment]] + 2) / 4;
1150 tprintf (
"Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
1151 segment, turnpoints[segment],
1152 xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
1153 ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
1156 xstarts[segment] = max_x;
1178 int startindex, centreindex, endindex;
1179 float leftcoord, rightcoord;
1180 int leftindex, rightindex;
1185 for (segment = 1; segment < segments - 1; segment++) {
1186 step =
baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
1187 (xstarts[segment] + xstarts[segment + 1]) / 2.0);
1190 if (step > jumplimit) {
1191 while (xcoords[startindex] < xstarts[segment - 1])
1193 centreindex = startindex;
1194 while (xcoords[centreindex] < xstarts[segment])
1196 endindex = centreindex;
1197 while (xcoords[endindex] < xstarts[segment + 1])
1200 if (textord_debug_baselines)
1201 tprintf (
"Too many segments to resegment spline!!\n");
1204 while (centreindex - startindex <
1207 while (endindex - centreindex <
1210 leftindex = (startindex + startindex + centreindex) / 3;
1211 rightindex = (centreindex + endindex + endindex) / 3;
1213 (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
1215 (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
1216 while (xcoords[leftindex] > leftcoord
1219 while (xcoords[leftindex] < leftcoord
1220 && centreindex - leftindex >
1223 if (xcoords[leftindex] - leftcoord >
1224 leftcoord - xcoords[leftindex - 1])
1226 while (xcoords[rightindex] > rightcoord
1227 && rightindex - centreindex >
1230 while (xcoords[rightindex] < rightcoord
1233 if (xcoords[rightindex] - rightcoord >
1234 rightcoord - xcoords[rightindex - 1])
1236 if (textord_debug_baselines)
1237 tprintf (
"Splitting spline at %d with step %g at (%d,%d)\n",
1240 step ((xstarts[segment - 1] +
1241 xstarts[segment]) / 2.0,
1243 xstarts[segment + 1]) / 2.0),
1244 (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
1245 (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
1247 (xcoords[leftindex - 1] +
1248 xcoords[leftindex]) / 2,
1249 (xcoords[rightindex - 1] +
1250 xcoords[rightindex]) / 2, segments);
1253 else if (textord_debug_baselines) {
1255 (
"Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
1256 startindex, centreindex, endindex,
1280 int coord2,
int &segments
1284 for (index = segments; index > segment; index--)
1285 xstarts[index + 1] = xstarts[index];
1287 xstarts[segment] = coord1;
1288 xstarts[segment + 1] = coord2;
1320 for (partition = 0; partition < partcount; partition++)
1321 partsteps[partition] = 0.0;
1322 for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
1323 xcentre = (blobcoords[blobindex].
left ()
1324 + blobcoords[blobindex].
right ()) >> 1;
1327 static_cast<int>(static_cast<unsigned char>(partids[blobindex]));
1328 if (part_id != bestpart) {
1330 if (runlength > biggestrun)
1331 biggestrun = runlength;
1332 partsteps[part_id] += blobcoords[blobindex].
bottom()
1342 poscount = negcount = 0;
1344 for (partition = 0; partition < partcount; partition++) {
1345 if (partition != bestpart) {
1347 if (partsizes[partition] == 0)
1348 partsteps[partition] = 0;
1350 partsteps[partition] /= partsizes[partition];
1354 && partsizes[partition] > poscount) {
1355 poscount = partsizes[partition];
1358 && partsizes[partition] > negcount) {
1360 bestneg = partsteps[partition];
1362 negcount = partsizes[partition];
1367 partsteps[bestpart] /= blobcount;
1401 if (blobcount > 1) {
1402 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1403 xcentre = (blobcoords[blobindex].
left ()
1404 + blobcoords[blobindex].
right ()) / 2;
1406 height = static_cast<int>(blobcoords[blobindex].top () -
baseline->y (xcentre) + 0.5);
1407 if (height > initialheight * oldbl_xhfract
1409 heightstat.
add (height, 1);
1412 lineheight = static_cast<int>(heightstat.
ile (0.25));
1413 if (lineheight <= 0)
1414 lineheight = static_cast<int>(heightstat.
ile (0.5));
1417 lineheight = initialheight;
1420 lineheight = static_cast<int>(blobcoords[0].top ()
1421 -
baseline->y ((blobcoords[0].left ()
1422 + blobcoords[0].
right ()) / 2) +
1428 for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
1430 xcentre = (blobcoords[blobindex].
left ()
1431 + blobcoords[blobindex].
right ()) / 2;
1432 diff = blobcoords[blobindex].
top () -
baseline->y (xcentre);
1434 if (diff > lineheight + jumplimit) {
1438 else if (diff > lineheight - jumplimit) {
1446 xsum = static_cast<float>(lineheight);
1449 row->
ascrise = ascenders / asccount - xsum;
1470 int init_lineheight,
1483 const int kBaselineTouch = 2;
1484 const int kGoodStrength = 8;
1485 const float kMinHeight = 0.25;
1487 sign_bit = row->
xheight > 0 ? 1 : -1;
1492 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1493 int xcenter = (blobcoords[blobindex].
left () +
1494 blobcoords[blobindex].
right ()) / 2;
1496 float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
1497 int strength = textord_ocropus_mode &&
1498 bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
1499 int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5);
1500 if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {
1501 if (height > lineheight * oldbl_xhfract
1503 heightstat.
add (height, strength);
1505 if (xcenter > rights[height])
1506 rights[height] = xcenter;
1507 if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))
1508 lefts[height] = xcenter;
1511 mode_count += strength;
1515 mode_threshold = static_cast<int>(blobcount * 0.1);
1516 if (oldbl_dot_error_size > 1 || oldbl_xhfix)
1517 mode_threshold = static_cast<int>(mode_count * 0.1);
1520 tprintf (
"blobcount=%d, mode_count=%d, mode_t=%d\n",
1521 blobcount, mode_count, mode_threshold);
1525 for (blobindex = 0; blobindex <
MODENUM; blobindex++)
1526 tprintf (
"mode[%d]=%d ", blobindex, modelist[blobindex]);
1529 pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
1554 int modelist[],
int modenum
1558 int last_max = INT32_MAX;
1562 int mode_factor = textord_ocropus_mode ?
1565 for (mode_count = 0; mode_count < modenum; mode_count++) {
1567 for (i = 0; i < statnum; i++) {
1570 ((stats->
pile_count (i) == last_max) && (i > last_i))) {
1577 total_max += last_max;
1578 if (last_max <= total_max / mode_factor)
1580 modelist[mode_count] = mode;
1593 int lefts[],
int rights[],
1595 int mode_threshold) {
1600 int found_one_bigger =
false;
1601 int best_x_height = 0;
1605 for (x = 0; x <
MODENUM; x++) {
1606 for (y = 0; y <
MODENUM; y++) {
1608 if (modelist[x] && modelist[y] &&
1609 heightstat->
pile_count (modelist[x]) > mode_threshold &&
1610 (!textord_ocropus_mode ||
1611 std::min(rights[modelist[x]], rights[modelist[y]]) >
1612 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1613 ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[x]);
1614 if (1.2 < ratio && ratio < 1.8) {
1616 best_x_height = modelist[x];
1617 num_in_best = heightstat->
pile_count (modelist[x]);
1621 found_one_bigger =
false;
1622 for (z = 0; z <
MODENUM; z++) {
1623 if (modelist[z] == best_x_height + 1 &&
1624 (!textord_ocropus_mode ||
1625 std::min(rights[modelist[x]], rights[modelist[y]]) >
1626 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1627 ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[z]);
1628 if ((1.2 < ratio && ratio < 1.8) &&
1631 num_in_best * 0.5) {
1633 found_one_bigger =
true;
1639 while (found_one_bigger);
1643 best_asc = modelist[y];
1644 num_in_best = heightstat->
pile_count (modelist[y]);
1648 found_one_bigger =
false;
1649 for (z = 0; z <
MODENUM; z++) {
1650 if (modelist[z] > best_asc &&
1651 (!textord_ocropus_mode ||
1652 std::min(rights[modelist[x]], rights[modelist[y]]) >
1653 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1654 ratio = static_cast<float>(modelist[z]) / static_cast<float>(best_x_height);
1655 if ((1.2 < ratio && ratio < 1.8) &&
1658 num_in_best * 0.5) {
1659 best_asc = modelist[z];
1660 found_one_bigger =
true;
1666 while (found_one_bigger);
1668 row->
xheight = static_cast<float>(best_x_height);
1669 row->
ascrise = static_cast<float>(best_asc) - best_x_height;
1676 best_x_height = modelist[0];
1677 num_in_best = heightstat->
pile_count (best_x_height);
1680 found_one_bigger =
false;
1681 for (z = 1; z <
MODENUM; z++) {
1683 if ((modelist[z] == best_x_height + 1) &&
1684 (heightstat->
pile_count (modelist[z]) > num_in_best * 0.5)) {
1686 found_one_bigger =
true;
1691 while (found_one_bigger);
1694 row->
xheight = static_cast<float>(best_x_height);