33 #include "config_auto.h" 41 "Use original wiseowl xheight");
49 "Fix bug in modes threshold for xheights");
53 "Max lost before fallback line used");
56 "X fraction for new partition");
59 #define X_HEIGHT_FRACTION 0.7 60 #define DESCENDER_FRACTION 0.5 61 #define MIN_ASC_FRACTION 0.20 62 #define MIN_DESC_FRACTION 0.25 63 #define MINASCRISE 2.0 64 #define MAXHEIGHTVARIANCE 0.15 66 #define MAXOVERLAP 0.1 68 #define HEIGHTBUCKETS 200 69 #define DELTAHEIGHT 5.0 76 #define ABS(x) ((x)<0 ? (-(x)) : (x)) 86 void Textord::make_old_baselines(
TO_BLOCK* block,
91 TO_ROW_IT row_it = block->
get_rows();
94 prev_baseline =
nullptr;
95 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
97 find_textlines(block, row, 2,
nullptr);
98 if (row->
xheight <= 0 && prev_baseline !=
nullptr)
99 find_textlines(block, row, 2, prev_baseline);
103 prev_baseline =
nullptr;
106 tprintf(
"Row baseline generation failed on row at (%d,%d)\n",
107 blob_it.data()->bounding_box().left(),
108 blob_it.data()->bounding_box().bottom());
111 correlate_lines(block, gradient);
124 void Textord::correlate_lines(
TO_BLOCK *block,
float gradient) {
128 TO_ROW_IT row_it = block->
get_rows ();
130 rowcount = row_it.length ();
137 std::vector <TO_ROW *> rows(rowcount);
139 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
141 rows[rowindex++] = row_it.data ();
144 correlate_neighbours(block, &rows[0], rowcount);
147 block->
xheight = (float) correlate_with_stats(&rows[0], rowcount, block);
164 void Textord::correlate_neighbours(
TO_BLOCK *block,
174 for (rowindex = 0; rowindex < rowcount; rowindex++) {
175 row = rows[rowindex];
178 for (otherrow = rowindex - 2;
180 && (rows[otherrow]->
xheight < 0.0
184 for (otherrow = rowindex + 1;
186 && (rows[otherrow]->
xheight < 0.0
191 find_textlines(block, row, 2, &rows[upperrow]->
baseline);
192 if (row->
xheight < 0 && lowerrow < rowcount)
193 find_textlines(block, row, 2, &rows[lowerrow]->
baseline);
196 find_textlines(block, row, 1, &rows[upperrow]->
baseline);
197 else if (lowerrow < rowcount)
198 find_textlines(block, row, 1, &rows[lowerrow]->
baseline);
203 for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
204 row = rows[rowindex];
208 biggest = std::max(biggest, row->
xheight);
220 int Textord::correlate_with_stats(
TO_ROW **rows,
236 xcount = fullcount = desccount = 0;
237 lineheight = ascheight = fullheight = descheight = 0.0;
238 for (rowindex = 0; rowindex < rowcount; rowindex++) {
239 row = rows[rowindex];
257 lineheight /= xcount;
259 fullheight = lineheight + ascheight / xcount;
265 fullheight /= fullcount;
269 if (desccount > 0 && (!
oldbl_corrfix || desccount >= rowcount / 2))
270 descheight /= desccount;
275 if (lineheight > 0.0f)
280 for (rowindex = 0; rowindex < rowcount; rowindex++) {
281 row = rows[rowindex];
287 row->
ascrise = fullheight - lineheight;
306 if (row->
ascrise < minascheight)
310 if (row->
descdrop > mindescheight) {
319 return (
int) lineheight;
329 void Textord::find_textlines(
TO_BLOCK *block,
334 bool holed_line =
false;
345 blobcount = row->
blob_list ()->length ();
347 std::vector<char> partids(blobcount);
349 std::vector<int> xcoords(blobcount);
351 std::vector<int> ycoords(blobcount);
353 std::vector<TBOX> blobcoords(blobcount);
355 std::vector<float> ydiffs(blobcount);
358 holed_line, blobcount);
366 (
"\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n",
367 block->
line_size, lineheight, jumplimit);
374 &xcoords[0], &ycoords[0], spline, &row->
baseline, jumplimit);
375 #ifndef GRAPHICS_DISABLED 381 &partcount, &partids[0], partsizes,
382 &row->
baseline, jumplimit, &ydiffs[0]);
384 &partids[0], bestpart, &xcoords[0], &ycoords[0]);
386 &xcoords[0], &ycoords[0], degree, pointcount, xstarts);
390 &xcoords[0], &ycoords[0], pointcount, degree);
394 &xcoords[0], xstarts, segments));
397 &partids[0], partsizes, partcount, bestpart);
411 blobcount, &row->
baseline, jumplimit);
414 blobcount, &row->
baseline, jumplimit);
446 if (blob_it.empty ())
450 blob_it.mark_cycle_pt ();
454 if (blobcoords[blobindex].height () > lineheight * 0.25)
455 heightstat.
add (blobcoords[blobindex].height (), 1);
457 || blobcoords[blobindex].height () > lineheight * 0.25
458 || blob_it.cycled_list ()) {
463 if (blobcoords[blobindex].height ()
465 && blobcoords[blobindex].
width ()
473 if (losscount > maxlosscount)
475 maxlosscount = losscount;
479 while (!blob_it.cycled_list ());
482 outcount = blobindex;
486 return (
int) heightstat.
ile (0.25);
488 return blobcoords[0].
height ();
514 float prevy, thisy, nexty;
516 float maxmax, minmin;
527 leftedge = blobcoords[0].
left ();
529 rightedge = blobcoords[blobcount - 1].
right ();
530 if (spline ==
nullptr 531 || spline->segments < 3
533 || spline->xcoords[1] > leftedge +
MAXOVERLAP * (rightedge - leftedge)
534 || spline->xcoords[spline->segments - 1] < rightedge
538 xstarts[0] = blobcoords[0].
left () - 1;
539 for (blobindex = 0; blobindex < blobcount; blobindex++) {
540 xcoords[blobindex] = (blobcoords[blobindex].
left ()
541 + blobcoords[blobindex].
right ()) / 2;
542 ycoords[blobindex] = blobcoords[blobindex].
bottom ();
544 xstarts[1] = blobcoords[blobcount - 1].
right () + 1;
548 *
baseline =
QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
550 if (blobcount >= 3) {
554 maxmax = minmin = 0.0f;
555 thisy = ycoords[0] -
baseline->y (xcoords[0]);
556 nexty = ycoords[1] -
baseline->y (xcoords[1]);
557 for (blobindex = 2; blobindex < blobcount; blobindex++) {
560 nexty = ycoords[blobindex] -
baseline->y (xcoords[blobindex]);
562 if (
ABS (thisy - prevy) < jumplimit &&
ABS (thisy - nexty) < jumplimit) {
568 if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
570 || (y1 > y2 && y2 <= y3))) {
573 xturns[segment] = x2;
574 yturns[segment] = y2;
579 maxmax = minmin = y3;
588 x2 = blobcoords[blobindex - 1].
right ();
594 if (maxmax - minmin > jumplimit) {
596 for (blobindex = 0, segment = 1; blobindex < ycount;
598 if (yturns[blobindex] > minmin + jumplimit
599 || yturns[blobindex] < maxmax - jumplimit) {
602 || yturns[blobindex] > prevy + jumplimit
603 || yturns[blobindex] < prevy - jumplimit) {
605 xstarts[segment] = xturns[blobindex];
607 prevy = yturns[blobindex];
610 else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
612 || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
613 xstarts[segment - 1] = xturns[blobindex];
615 prevy = yturns[blobindex];
619 xstarts[segment] = blobcoords[blobcount - 1].
right () + 1;
622 *
baseline =
QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
628 shift =
ICOORD (0, (int16_t) (blobcoords[0].bottom ()
629 - spline->
y (blobcoords[0].
right ())));
663 leftedge = blobcoords[0].
left ();
665 rightedge = blobcoords[blobcount - 1].
right();
666 for (blobindex = 0; blobindex < blobcount; blobindex++) {
667 lms.
Add(
ICOORD((blobcoords[blobindex].left() +
668 blobcoords[blobindex].right()) / 2,
669 blobcoords[blobindex].bottom()));
672 xstarts[0] = leftedge;
673 xstarts[1] = rightedge;
675 coeffs[1] = gradient;
678 if (spline !=
nullptr 679 && spline->segments >= 3
681 && spline->xcoords[1] <= leftedge +
MAXOVERLAP * (rightedge - leftedge)
682 && spline->xcoords[spline->segments - 1] >= rightedge
685 x = (leftedge + rightedge) / 2.0;
686 shift =
ICOORD (0, (int16_t) (gradient * x + c - spline->
y (x)));
719 for (bestpart = 0; bestpart <
MAXPARTS; bestpart++)
720 partsizes[bestpart] = 0;
722 startx =
get_ydiffs (blobcoords, blobcount, spline, ydiffs);
726 float last_delta = 0.0f;
727 for (blobindex = startx; blobindex < blobcount; blobindex++) {
729 diff = ydiffs[blobindex];
731 tprintf (
"%d(%d,%d), ", blobindex,
732 blobcoords[blobindex].left (),
733 blobcoords[blobindex].bottom ());
736 &drift, &last_delta, numparts);
738 partids[blobindex] = bestpart;
739 partsizes[bestpart]++;
747 for (blobindex = startx; blobindex >= 0; blobindex--) {
748 diff = ydiffs[blobindex];
750 tprintf (
"%d(%d,%d), ", blobindex,
751 blobcoords[blobindex].left (),
752 blobcoords[blobindex].bottom ());
755 &drift, &last_delta, numparts);
757 partids[blobindex] = bestpart;
758 partsizes[bestpart]++;
761 for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
762 if (partsizes[bestpart] >= partsizes[biggestpart])
763 biggestpart = bestpart;
803 prevpart = biggestpart;
806 for (blobindex = 0; blobindex < blobcount; blobindex++) {
807 if (partids[blobindex] != prevpart) {
811 if (prevpart != biggestpart && runlength >
MAXBADRUN) {
813 for (test_blob = startx; test_blob < blobindex; test_blob++) {
814 coord =
FCOORD ((blobcoords[test_blob].left ()
815 + blobcoords[test_blob].right ()) / 2.0,
816 blobcoords[test_blob].bottom ());
817 stats.
add (coord.
x (), coord.
y ());
823 tprintf (
"Fitted line y=%g x + %g\n", m, c);
826 for (test_blob = 1; !found_one
827 && (startx - test_blob >= 0
828 || blobindex + test_blob <= blobcount); test_blob++) {
829 if (startx - test_blob >= 0
830 && partids[startx - test_blob] == biggestpart) {
832 coord =
FCOORD ((blobcoords[startx - test_blob].left ()
833 + blobcoords[startx -
834 test_blob].right ()) /
837 test_blob].bottom ());
838 diff = m * coord.
x () + c - coord.
y ();
841 (
"Diff of common blob to suspect part=%g at (%g,%g)\n",
842 diff, coord.
x (), coord.
y ());
843 if (diff < jumplimit && -diff < jumplimit)
846 if (blobindex + test_blob <= blobcount
847 && partids[blobindex + test_blob - 1] == biggestpart) {
850 FCOORD ((blobcoords[blobindex + test_blob - 1].
851 left () + blobcoords[blobindex + test_blob -
853 blobcoords[blobindex + test_blob -
855 diff = m * coord.
x () + c - coord.
y ();
858 (
"Diff of common blob to suspect part=%g at (%g,%g)\n",
859 diff, coord.
x (), coord.
y ());
860 if (diff < jumplimit && -diff < jumplimit)
867 (
"Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
868 runlength, biggestpart, prevpart,
869 blobcoords[startx].left (),
870 blobcoords[startx].bottom ());
872 partsizes[prevpart] -= runlength;
873 for (test_blob = startx; test_blob < blobindex; test_blob++)
874 partids[test_blob] = biggestpart;
877 prevpart = partids[blobindex];
913 bestsum = (float) INT32_MAX;
915 lastx = blobcoords[0].
left ();
917 for (blobindex = 0; blobindex < blobcount; blobindex++) {
919 xcentre = (blobcoords[blobindex].
left () + blobcoords[blobindex].
right ()) >> 1;
921 drift += spline->
step (lastx, xcentre);
923 diff = blobcoords[blobindex].
bottom ();
924 diff -= spline->
y (xcentre);
926 ydiffs[blobindex] = diff;
929 diffsum -=
ABS (ydiffs[blobindex - 3]);
930 diffsum +=
ABS (diff);
931 if (blobindex >= 2 && diffsum < bestsum) {
933 bestindex = blobindex - 1;
968 delta = diff - partdiffs[lastpart] - *drift;
970 tprintf (
"Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift);
972 if (
ABS (delta) > jumplimit / 2) {
974 bestdelta = diff - partdiffs[0] - *drift;
976 for (partition = 1; partition < *partcount; partition++) {
977 delta = diff - partdiffs[partition] - *drift;
978 if (
ABS (delta) <
ABS (bestdelta)) {
980 bestpart = partition;
985 if (
ABS (bestdelta) > jumplimit
987 bestpart = (*partcount)++;
989 partdiffs[bestpart] = diff - *drift;
997 if (bestpart == lastpart
998 && (
ABS (delta - *lastdelta) < jumplimit / 2
999 ||
ABS (delta) < jumplimit / 2))
1001 *drift = (3 * *drift + delta) / 3;
1031 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1032 if (partids[blobindex] == bestpart) {
1034 xcoords[pointcount] = (blobcoords[blobindex].
left () + blobcoords[blobindex].
right ()) >> 1;
1035 ycoords[pointcount++] = blobcoords[blobindex].
bottom ();
1055 int degree,
int pointcount,
1060 int lastmin, lastmax;
1065 xstarts[0] = xcoords[0] - 1;
1066 max_x = xcoords[pointcount - 1] + 1;
1070 if (pointcount > 3) {
1072 lastmax = lastmin = 0;
1073 while (ptindex < pointcount - 1 && turncount <
SPLINESIZE - 1) {
1075 if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
1076 if (ycoords[ptindex] < ycoords[lastmax] -
TURNLIMIT) {
1077 if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
1079 turnpoints[turncount++] = lastmax;
1082 else if (ycoords[ptindex] < ycoords[lastmin]) {
1088 if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
1089 if (ycoords[ptindex] > ycoords[lastmin] +
TURNLIMIT) {
1090 if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
1092 turnpoints[turncount++] = lastmin;
1095 else if (ycoords[ptindex] > ycoords[lastmax]) {
1102 if (ycoords[ptindex] < ycoords[lastmax] -
TURNLIMIT 1103 && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
1106 turnpoints[turncount++] = lastmax;
1108 turnpoints[turncount++] = ptindex;
1110 else if (ycoords[ptindex] > ycoords[lastmin] +
TURNLIMIT 1112 && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
1115 turnpoints[turncount++] = lastmin;
1117 turnpoints[turncount++] = ptindex;
1119 else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
1121 if (ycoords[ptindex] > ycoords[lastmax])
1122 turnpoints[turncount++] = ptindex;
1124 turnpoints[turncount++] = lastmax;
1126 else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
1128 if (ycoords[ptindex] < ycoords[lastmin])
1129 turnpoints[turncount++] = ptindex;
1131 turnpoints[turncount++] = lastmin;
1136 tprintf (
"First turn is %d at (%d,%d)\n",
1137 turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
1138 for (segment = 1; segment < turncount; segment++) {
1140 lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
1143 if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
1145 for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++);
1148 for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++);
1151 xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
1152 + xcoords[turnpoints[segment - 1]]
1153 + xcoords[turnpoints[segment]] + 2) / 4;
1156 tprintf (
"Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
1157 segment, turnpoints[segment],
1158 xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
1159 ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
1162 xstarts[segment] = max_x;
1184 int startindex, centreindex, endindex;
1185 float leftcoord, rightcoord;
1186 int leftindex, rightindex;
1191 for (segment = 1; segment < segments - 1; segment++) {
1192 step =
baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
1193 (xstarts[segment] + xstarts[segment + 1]) / 2.0);
1196 if (step > jumplimit) {
1197 while (xcoords[startindex] < xstarts[segment - 1])
1199 centreindex = startindex;
1200 while (xcoords[centreindex] < xstarts[segment])
1202 endindex = centreindex;
1203 while (xcoords[endindex] < xstarts[segment + 1])
1207 tprintf (
"Too many segments to resegment spline!!\n");
1210 while (centreindex - startindex <
1213 while (endindex - centreindex <
1216 leftindex = (startindex + startindex + centreindex) / 3;
1217 rightindex = (centreindex + endindex + endindex) / 3;
1219 (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
1221 (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
1222 while (xcoords[leftindex] > leftcoord
1225 while (xcoords[leftindex] < leftcoord
1226 && centreindex - leftindex >
1229 if (xcoords[leftindex] - leftcoord >
1230 leftcoord - xcoords[leftindex - 1])
1232 while (xcoords[rightindex] > rightcoord
1233 && rightindex - centreindex >
1236 while (xcoords[rightindex] < rightcoord
1239 if (xcoords[rightindex] - rightcoord >
1240 rightcoord - xcoords[rightindex - 1])
1243 tprintf (
"Splitting spline at %d with step %g at (%d,%d)\n",
1246 step ((xstarts[segment - 1] +
1247 xstarts[segment]) / 2.0,
1249 xstarts[segment + 1]) / 2.0),
1250 (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
1251 (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
1253 (xcoords[leftindex - 1] +
1254 xcoords[leftindex]) / 2,
1255 (xcoords[rightindex - 1] +
1256 xcoords[rightindex]) / 2, segments);
1261 (
"Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
1262 startindex, centreindex, endindex,
1286 int coord2,
int &segments
1290 for (index = segments; index > segment; index--)
1291 xstarts[index + 1] = xstarts[index];
1293 xstarts[segment] = coord1;
1294 xstarts[segment + 1] = coord2;
1326 for (partition = 0; partition < partcount; partition++)
1327 partsteps[partition] = 0.0;
1328 for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
1329 xcentre = (blobcoords[blobindex].
left ()
1330 + blobcoords[blobindex].
right ()) >> 1;
1333 static_cast<int>(
static_cast<unsigned char>(partids[blobindex]));
1334 if (part_id != bestpart) {
1336 if (runlength > biggestrun)
1337 biggestrun = runlength;
1338 partsteps[part_id] += blobcoords[blobindex].
bottom()
1348 poscount = negcount = 0;
1350 for (partition = 0; partition < partcount; partition++) {
1351 if (partition != bestpart) {
1353 if (partsizes[partition] == 0)
1354 partsteps[partition] = 0;
1356 partsteps[partition] /= partsizes[partition];
1360 && partsizes[partition] > poscount) {
1361 poscount = partsizes[partition];
1364 && partsizes[partition] > negcount) {
1366 bestneg = partsteps[partition];
1368 negcount = partsizes[partition];
1373 partsteps[bestpart] /= blobcount;
1407 if (blobcount > 1) {
1408 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1409 xcentre = (blobcoords[blobindex].
left ()
1410 + blobcoords[blobindex].
right ()) / 2;
1412 height = (int) (blobcoords[blobindex].top () -
baseline->y (xcentre) + 0.5);
1415 heightstat.
add (height, 1);
1418 lineheight = (int) heightstat.
ile (0.25);
1419 if (lineheight <= 0)
1420 lineheight = (int) heightstat.
ile (0.5);
1423 lineheight = initialheight;
1426 lineheight = (int) (blobcoords[0].top ()
1427 -
baseline->y ((blobcoords[0].left ()
1428 + blobcoords[0].right ()) / 2) +
1434 for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
1436 xcentre = (blobcoords[blobindex].
left ()
1437 + blobcoords[blobindex].
right ()) / 2;
1438 diff = blobcoords[blobindex].
top () -
baseline->y (xcentre);
1440 if (diff > lineheight + jumplimit) {
1444 else if (diff > lineheight - jumplimit) {
1452 xsum = (float) lineheight;
1455 row->
ascrise = ascenders / asccount - xsum;
1476 int init_lineheight,
1489 const int kBaselineTouch = 2;
1490 const int kGoodStrength = 8;
1491 const float kMinHeight = 0.25;
1493 sign_bit = row->
xheight > 0 ? 1 : -1;
1498 for (blobindex = 0; blobindex < blobcount; blobindex++) {
1499 int xcenter = (blobcoords[blobindex].
left () +
1500 blobcoords[blobindex].
right ()) / 2;
1502 float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
1504 bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
1505 int height =
static_cast<int>(blobcoords[blobindex].
top () - base + 0.5);
1506 if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {
1509 heightstat.
add (height, strength);
1511 if (xcenter > rights[height])
1512 rights[height] = xcenter;
1513 if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))
1514 lefts[height] = xcenter;
1517 mode_count += strength;
1521 mode_threshold = (int) (blobcount * 0.1);
1523 mode_threshold = (int) (mode_count * 0.1);
1526 tprintf (
"blobcount=%d, mode_count=%d, mode_t=%d\n",
1527 blobcount, mode_count, mode_threshold);
1531 for (blobindex = 0; blobindex <
MODENUM; blobindex++)
1532 tprintf (
"mode[%d]=%d ", blobindex, modelist[blobindex]);
1535 pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
1560 int modelist[],
int modenum
1564 int last_max = INT32_MAX;
1571 for (mode_count = 0; mode_count < modenum; mode_count++) {
1573 for (i = 0; i < statnum; i++) {
1576 ((stats->
pile_count (i) == last_max) && (i > last_i))) {
1583 total_max += last_max;
1584 if (last_max <= total_max / mode_factor)
1586 modelist[mode_count] = mode;
1599 int lefts[],
int rights[],
1601 int mode_threshold) {
1606 int found_one_bigger =
FALSE;
1607 int best_x_height = 0;
1611 for (x = 0; x <
MODENUM; x++) {
1612 for (y = 0; y <
MODENUM; y++) {
1614 if (modelist[x] && modelist[y] &&
1615 heightstat->
pile_count (modelist[x]) > mode_threshold &&
1617 std::min(rights[modelist[x]], rights[modelist[y]]) >
1618 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1619 ratio = (float) modelist[y] / (
float) modelist[x];
1620 if (1.2 < ratio && ratio < 1.8) {
1622 best_x_height = modelist[x];
1623 num_in_best = heightstat->
pile_count (modelist[x]);
1627 found_one_bigger =
FALSE;
1628 for (z = 0; z <
MODENUM; z++) {
1629 if (modelist[z] == best_x_height + 1 &&
1631 std::min(rights[modelist[x]], rights[modelist[y]]) >
1632 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1633 ratio = (float) modelist[y] / (
float) modelist[z];
1634 if ((1.2 < ratio && ratio < 1.8) &&
1637 num_in_best * 0.5) {
1639 found_one_bigger =
TRUE;
1645 while (found_one_bigger);
1649 best_asc = modelist[y];
1650 num_in_best = heightstat->
pile_count (modelist[y]);
1654 found_one_bigger =
FALSE;
1655 for (z = 0; z <
MODENUM; z++) {
1656 if (modelist[z] > best_asc &&
1658 std::min(rights[modelist[x]], rights[modelist[y]]) >
1659 std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1660 ratio = (float) modelist[z] / (
float) best_x_height;
1661 if ((1.2 < ratio && ratio < 1.8) &&
1664 num_in_best * 0.5) {
1665 best_asc = modelist[z];
1666 found_one_bigger =
TRUE;
1672 while (found_one_bigger);
1674 row->
xheight = (float) best_x_height;
1675 row->
ascrise = (float) best_asc - best_x_height;
1682 best_x_height = modelist[0];
1683 num_in_best = heightstat->
pile_count (best_x_height);
1686 found_one_bigger =
FALSE;
1687 for (z = 1; z <
MODENUM; z++) {
1689 if ((modelist[z] == best_x_height + 1) &&
1690 (heightstat->
pile_count (modelist[z]) > num_in_best * 0.5)) {
1692 found_one_bigger =
TRUE;
1697 while (found_one_bigger);
1700 row->
xheight = (float) best_x_height;
EXTERN bool textord_oldbl_split_splines
int get_blob_coords(TO_ROW *row, int32_t lineheight, TBOX *blobcoords, bool &holed_line, int &outcount)
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
void find_top_modes(STATS *stats, int statnum, int modelist[], int modenum)
void plot(ScrollView *window, ScrollView::Color colour) const
EXTERN double textord_oldbl_jumplimit
int segment_spline(TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], int degree, int pointcount, int xstarts[])
int32_t pile_count(int32_t value) const
#define DESCENDER_FRACTION
#define BOOL_VAR(name, val, comment)
void make_first_xheight(TO_ROW *row, TBOX blobcoords[], int lineheight, int init_lineheight, int blobcount, QSPLINE *baseline, float jumplimit)
#define MAXHEIGHTVARIANCE
int choose_partition(float diff, float partdiffs[], int lastpart, float jumplimit, float *drift, float *lastdelta, int *partcount)
EXTERN double oldbl_dot_error_size
#define double_VAR(name, val, comment)
void old_first_xheight(TO_ROW *row, TBOX blobcoords[], int initialheight, int blobcount, QSPLINE *baseline, float jumplimit)
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
const int kMinModeFactorOcropus
int textord_spline_medianwin
void extrapolate(double gradient, int left, int right)
#define X_HEIGHT_FRACTION
bool overlap(QSPLINE *spline2, double fraction)
EXTERN double oldbl_xhfract
void compute_block_xheight(TO_BLOCK *block, float gradient)
EXTERN bool textord_oldbl_paradef
static const double kXHeightFraction
void add(double x, double y)
EXTERN bool oldbl_corrfix
double step(double x1, double x2)
void Add(const ICOORD &pt)
void find_lesser_parts(TO_ROW *row, TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int partcount, int bestpart)
bool textord_show_final_rows
double ile(double frac) const
bool split_stepped_spline(QSPLINE *baseline, float jumplimit, int *xcoords, int *xstarts, int &segments)
FCOORD classify_rotation() const
DLLSYM void tprintf(const char *format,...)
void make_first_baseline(TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], QSPLINE *spline, QSPLINE *baseline, float jumplimit)
void add(int32_t value, int32_t count)
EXTERN ScrollView * to_win
void set_cell_over_xheight(float ratio)
void merge_oldbl_parts(TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int biggestpart, float jumplimit)
EXTERN bool textord_debug_baselines
int partition_line(TBOX blobcoords[], int blobcount, int *numparts, char partids[], int partsizes[], QSPLINE *spline, float jumplimit, float ydiffs[])
void set_xheight(int32_t height)
set char size
EXTERN bool textord_ocropus_mode
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
int get_ydiffs(TBOX blobcoords[], int blobcount, QSPLINE *spline, float ydiffs[])
EXTERN int oldbl_holed_losscount
#define MIN_DESC_FRACTION
EXTERN bool textord_oldbl_merge_parts
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
void pick_x_height(TO_ROW *row, int modelist[], int lefts[], int rights[], STATS *heightstat, int mode_threshold)
EXTERN bool textord_oldbl_debug
void insert_spline_point(int xstarts[], int segment, int coord1, int coord2, int &segments)
BLOBNBOX_LIST * blob_list()
int32_t get_total() const
void make_holed_baseline(TBOX blobcoords[], int blobcount, QSPLINE *spline, QSPLINE *baseline, float gradient)
#define INT_VAR(name, val, comment)
EXTERN bool textord_really_old_xheight
int partition_coords(TBOX blobcoords[], int blobcount, char partids[], int bestpart, int xcoords[], int ycoords[])