tesseract  5.0.0-alpha-619-ge9db
oldbasel.h File Reference
#include "params.h"
#include "blobbox.h"

Go to the source code of this file.

Functions

int get_blob_coords (TO_ROW *row, int32_t lineheight, TBOX *blobcoords, bool &holed_line, int &outcount)
 
void make_first_baseline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], QSPLINE *spline, QSPLINE *baseline, float jumplimit)
 
void make_holed_baseline (TBOX blobcoords[], int blobcount, QSPLINE *spline, QSPLINE *baseline, float gradient)
 
int partition_line (TBOX blobcoords[], int blobcount, int *numparts, char partids[], int partsizes[], QSPLINE *spline, float jumplimit, float ydiffs[])
 
void merge_oldbl_parts (TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int biggestpart, float jumplimit)
 
int get_ydiffs (TBOX blobcoords[], int blobcount, QSPLINE *spline, float ydiffs[])
 
int choose_partition (float diff, float partdiffs[], int lastpart, float jumplimit, float *drift, float *last_delta, int *partcount)
 
int partition_coords (TBOX blobcoords[], int blobcount, char partids[], int bestpart, int xcoords[], int ycoords[])
 
int segment_spline (TBOX blobcoords[], int blobcount, int xcoords[], int ycoords[], int degree, int pointcount, int xstarts[])
 
bool split_stepped_spline (QSPLINE *baseline, float jumplimit, int *xcoords, int *xstarts, int &segments)
 
void insert_spline_point (int xstarts[], int segment, int coord1, int coord2, int &segments)
 
void find_lesser_parts (TO_ROW *row, TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int partcount, int bestpart)
 
void old_first_xheight (TO_ROW *row, TBOX blobcoords[], int initialheight, int blobcount, QSPLINE *baseline, float jumplimit)
 
void make_first_xheight (TO_ROW *row, TBOX blobcoords[], int lineheight, int init_lineheight, int blobcount, QSPLINE *baseline, float jumplimit)
 
int * make_height_array (TBOX blobcoords[], int blobcount, QSPLINE *baseline)
 
void find_top_modes (STATS *stats, int statnum, int modelist[], int modenum)
 
void pick_x_height (TO_ROW *row, int modelist[], int lefts[], int rights[], STATS *heightstat, int mode_threshold)
 

Variables

bool textord_oldbl_debug = false
 

Function Documentation

◆ choose_partition()

int choose_partition ( float  diff,
float  partdiffs[],
int  lastpart,
float  jumplimit,
float *  drift,
float *  last_delta,
int *  partcount 
)

Definition at line 933 of file oldbasel.cpp.

949  {
950  int partition; /*partition no */
951  int bestpart; /*best new partition */
952  float bestdelta; /*best gap from a part */
953  float delta; /*diff from part */
954 
955  if (lastpart < 0) {
956  partdiffs[0] = diff;
957  lastpart = 0; /*first point */
958  *drift = 0.0f;
959  *lastdelta = 0.0f;
960  }
961  /*adjusted diff from part */
962  delta = diff - partdiffs[lastpart] - *drift;
963  if (textord_oldbl_debug) {
964  tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift);
965  }
966  if (ABS (delta) > jumplimit / 2) {
967  /*delta on part 0 */
968  bestdelta = diff - partdiffs[0] - *drift;
969  bestpart = 0; /*0 best so far */
970  for (partition = 1; partition < *partcount; partition++) {
971  delta = diff - partdiffs[partition] - *drift;
972  if (ABS (delta) < ABS (bestdelta)) {
973  bestdelta = delta;
974  bestpart = partition; /*part with nearest jump */
975  }
976  }
977  delta = bestdelta;
978  /*too far away */
979  if (ABS (bestdelta) > jumplimit
980  && *partcount < MAXPARTS) { /*and spare part left */
981  bestpart = (*partcount)++; /*best was new one */
982  /*start new one */
983  partdiffs[bestpart] = diff - *drift;
984  delta = 0.0f;
985  }
986  }
987  else {
988  bestpart = lastpart; /*best was last one */
989  }
990 
991  if (bestpart == lastpart
992  && (ABS (delta - *lastdelta) < jumplimit / 2
993  || ABS (delta) < jumplimit / 2))
994  /*smooth the drift */
995  *drift = (3 * *drift + delta) / 3;

◆ find_lesser_parts()

void find_lesser_parts ( TO_ROW row,
TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  partsizes[],
int  partcount,
int  bestpart 
)

Definition at line 1287 of file oldbasel.cpp.

1308  {
1309  int blobindex; /*index of blob */
1310  int partition; /*current partition */
1311  int xcentre; /*centre of blob */
1312  int poscount; /*count of best up step */
1313  int negcount; /*count of best down step */
1314  float partsteps[MAXPARTS]; /*average step to part */
1315  float bestneg; /*best down step */
1316  int runlength; /*length of bad run */
1317  int biggestrun; /*biggest bad run */
1318 
1319  biggestrun = 0;
1320  for (partition = 0; partition < partcount; partition++)
1321  partsteps[partition] = 0.0; /*zero accumulators */
1322  for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
1323  xcentre = (blobcoords[blobindex].left ()
1324  + blobcoords[blobindex].right ()) >> 1;
1325  /*in other parts */
1326  int part_id =
1327  static_cast<int>(static_cast<unsigned char>(partids[blobindex]));
1328  if (part_id != bestpart) {
1329  runlength++; /*run of non bests */
1330  if (runlength > biggestrun)
1331  biggestrun = runlength;
1332  partsteps[part_id] += blobcoords[blobindex].bottom()
1333  - row->baseline.y(xcentre);
1334  }
1335  else
1336  runlength = 0;
1337  }
1338  if (biggestrun > MAXBADRUN)
1339  row->xheight = -1.0f; /*failed */
1340  else
1341  row->xheight = 1.0f; /*success */
1342  poscount = negcount = 0;
1343  bestneg = 0.0; /*no step yet */
1344  for (partition = 0; partition < partcount; partition++) {
1345  if (partition != bestpart) {
1346  // by jetsoft divide by zero possible
1347  if (partsizes[partition] == 0)
1348  partsteps[partition] = 0;
1349  else
1350  partsteps[partition] /= partsizes[partition];
1351  //
1352 
1353  if (partsteps[partition] >= MINASCRISE
1354  && partsizes[partition] > poscount) {
1355  poscount = partsizes[partition];
1356  }

◆ find_top_modes()

void find_top_modes ( STATS stats,
int  statnum,
int  modelist[],
int  modenum 
)

Definition at line 1535 of file oldbasel.cpp.

1555  {
1556  int mode_count;
1557  int last_i = 0;
1558  int last_max = INT32_MAX;
1559  int i;
1560  int mode;
1561  int total_max = 0;
1562  int mode_factor = textord_ocropus_mode ?
1564 
1565  for (mode_count = 0; mode_count < modenum; mode_count++) {
1566  mode = 0;

◆ get_blob_coords()

int get_blob_coords ( TO_ROW row,
int32_t  lineheight,
TBOX blobcoords,
bool &  holed_line,
int &  outcount 
)

Definition at line 423 of file oldbasel.cpp.

431  {
432  //blobs
433  BLOBNBOX_IT blob_it = row->blob_list ();
434  int blobindex; /*no along text line */
435  int losscount; //lost blobs
436  int maxlosscount; //greatest lost blobs
437  /*height stat collection */
438  STATS heightstat (0, MAXHEIGHT);
439 
440  if (blob_it.empty ())
441  return 0; //none
442  maxlosscount = 0;
443  losscount = 0;
444  blob_it.mark_cycle_pt ();
445  blobindex = 0;
446  do {
447  blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
448  if (blobcoords[blobindex].height () > lineheight * 0.25)
449  heightstat.add (blobcoords[blobindex].height (), 1);
450  if (blobindex == 0
451  || blobcoords[blobindex].height () > lineheight * 0.25
452  || blob_it.cycled_list ()) {
453  blobindex++; /*no of merged blobs */
454  losscount = 0;
455  }
456  else {
457  if (blobcoords[blobindex].height ()
458  < blobcoords[blobindex].width () * oldbl_dot_error_size
459  && blobcoords[blobindex].width ()
460  < blobcoords[blobindex].height () * oldbl_dot_error_size) {
461  //counts as dot
462  blobindex++;
463  losscount = 0;
464  }
465  else {
466  losscount++; //lost it
467  if (losscount > maxlosscount)
468  //remember max
469  maxlosscount = losscount;
470  }
471  }
472  }
473  while (!blob_it.cycled_list ());
474 
475  holed_line = maxlosscount > oldbl_holed_losscount;
476  outcount = blobindex; /*total blobs */
477 
478  if (heightstat.get_total () > 1)
479  /*guess x-height */
480  return static_cast<int>(heightstat.ile (0.25));
481  else

◆ get_ydiffs()

int get_ydiffs ( TBOX  blobcoords[],
int  blobcount,
QSPLINE spline,
float  ydiffs[] 
)

Definition at line 883 of file oldbasel.cpp.

895  {
896  int blobindex; /*current blob */
897  int xcentre; /*xcoord */
898  int lastx; /*last xcentre */
899  float diffsum; /*sum of diffs */
900  float diff; /*current difference */
901  float drift; /*sum of spline steps */
902  float bestsum; /*smallest diffsum */
903  int bestindex; /*index of bestsum */
904 
905  diffsum = 0.0f;
906  bestindex = 0;
907  bestsum = static_cast<float>(INT32_MAX);
908  drift = 0.0f;
909  lastx = blobcoords[0].left ();
910  /*do each blob in row */
911  for (blobindex = 0; blobindex < blobcount; blobindex++) {
912  /*centre of blob */
913  xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
914  //step functions in spline
915  drift += spline->step (lastx, xcentre);
916  lastx = xcentre;
917  diff = blobcoords[blobindex].bottom ();
918  diff -= spline->y (xcentre);
919  diff += drift;
920  ydiffs[blobindex] = diff; /*store difference */
921  if (blobindex > 2)
922  /*remove old one */
923  diffsum -= ABS (ydiffs[blobindex - 3]);
924  diffsum += ABS (diff); /*add new one */

◆ insert_spline_point()

void insert_spline_point ( int  xstarts[],
int  segment,
int  coord1,
int  coord2,
int &  segments 
)

Definition at line 1264 of file oldbasel.cpp.

1281  {

◆ make_first_baseline()

void make_first_baseline ( TBOX  blobcoords[],
int  blobcount,
int  xcoords[],
int  ycoords[],
QSPLINE spline,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 492 of file oldbasel.cpp.

503  {
504  int leftedge; /*left edge of line */
505  int rightedge; /*right edge of line */
506  int blobindex; /*current blob */
507  int segment; /*current segment */
508  float prevy, thisy, nexty; /*3 y coords */
509  float y1, y2, y3; /*3 smooth blobs */
510  float maxmax, minmin; /*absolute limits */
511  int x2 = 0; /*right edge of old y3 */
512  int ycount; /*no of ycoords in use */
513  float yturns[SPLINESIZE]; /*y coords of turn pts */
514  int xturns[SPLINESIZE]; /*xcoords of turn pts */
515  int xstarts[SPLINESIZE + 1];
516  int segments; //no of segments
517  ICOORD shift; //shift of spline
518 
519  prevy = 0;
520  /*left edge of row */
521  leftedge = blobcoords[0].left ();
522  /*right edge of line */
523  rightedge = blobcoords[blobcount - 1].right ();
524  if (spline == nullptr /*no given spline */
525  || spline->segments < 3 /*or trivial */
526  /*or too non-overlap */
527  || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
528  || spline->xcoords[spline->segments - 1] < rightedge
529  - MAXOVERLAP * (rightedge - leftedge)) {
530  if (textord_oldbl_paradef)
531  return; //use default
532  xstarts[0] = blobcoords[0].left () - 1;
533  for (blobindex = 0; blobindex < blobcount; blobindex++) {
534  xcoords[blobindex] = (blobcoords[blobindex].left ()
535  + blobcoords[blobindex].right ()) / 2;
536  ycoords[blobindex] = blobcoords[blobindex].bottom ();
537  }
538  xstarts[1] = blobcoords[blobcount - 1].right () + 1;
539  segments = 1; /*no of segments */
540 
541  /*linear */
542  *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
543 
544  if (blobcount >= 3) {
545  y1 = y2 = y3 = 0.0f;
546  ycount = 0;
547  segment = 0; /*no of segments */
548  maxmax = minmin = 0.0f;
549  thisy = ycoords[0] - baseline->y (xcoords[0]);
550  nexty = ycoords[1] - baseline->y (xcoords[1]);
551  for (blobindex = 2; blobindex < blobcount; blobindex++) {
552  prevy = thisy; /*shift ycoords */
553  thisy = nexty;
554  nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
555  /*middle of smooth y */
556  if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) {
557  y1 = y2; /*shift window */
558  y2 = y3;
559  y3 = thisy; /*middle point */
560  ycount++;
561  /*local max */
562  if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
563  /*local min */
564  || (y1 > y2 && y2 <= y3))) {
565  if (segment < SPLINESIZE - 2) {
566  /*turning pt */
567  xturns[segment] = x2;
568  yturns[segment] = y2;
569  segment++; /*no of spline segs */
570  }
571  }
572  if (ycount == 1) {
573  maxmax = minmin = y3;/*initialise limits */
574  }
575  else {
576  if (y3 > maxmax)
577  maxmax = y3; /*biggest max */
578  if (y3 < minmin)
579  minmin = y3; /*smallest min */
580  }
581  /*possible turning pt */
582  x2 = blobcoords[blobindex - 1].right ();
583  }
584  }
585 
586  jumplimit *= 1.2;
587  /*must be wavy */
588  if (maxmax - minmin > jumplimit) {
589  ycount = segment; /*no of segments */
590  for (blobindex = 0, segment = 1; blobindex < ycount;
591  blobindex++) {
592  if (yturns[blobindex] > minmin + jumplimit
593  || yturns[blobindex] < maxmax - jumplimit) {
594  /*significant peak */
595  if (segment == 1
596  || yturns[blobindex] > prevy + jumplimit
597  || yturns[blobindex] < prevy - jumplimit) {
598  /*different to previous */
599  xstarts[segment] = xturns[blobindex];
600  segment++;
601  prevy = yturns[blobindex];
602  }
603  /*bigger max */
604  else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
605  /*smaller min */
606  || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
607  xstarts[segment - 1] = xturns[blobindex];
608  /*improved previous */
609  prevy = yturns[blobindex];
610  }
611  }
612  }
613  xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
614  segments = segment; /*no of segments */
615  /*linear */
616  *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
617  }
618  }
619  }
620  else {
621  *baseline = *spline; /*copy it */
622  shift = ICOORD (0, static_cast<int16_t>(blobcoords[0].bottom ()
623  - spline->y (blobcoords[0].right ())));

◆ make_first_xheight()

void make_first_xheight ( TO_ROW row,
TBOX  blobcoords[],
int  lineheight,
int  init_lineheight,
int  blobcount,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 1451 of file oldbasel.cpp.

1474  {
1475  STATS heightstat (0, HEIGHTBUCKETS);
1476  int lefts[HEIGHTBUCKETS];
1477  int rights[HEIGHTBUCKETS];
1478  int modelist[MODENUM];
1479  int blobindex;
1480  int mode_count; //blobs to count in thr
1481  int sign_bit;
1482  int mode_threshold;
1483  const int kBaselineTouch = 2; // This really should change with resolution.
1484  const int kGoodStrength = 8; // Strength of baseline-touching heights.
1485  const float kMinHeight = 0.25; // Min fraction of lineheight to use.
1486 
1487  sign_bit = row->xheight > 0 ? 1 : -1;
1488 
1489  memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));
1490  memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));
1491  mode_count = 0;
1492  for (blobindex = 0; blobindex < blobcount; blobindex++) {
1493  int xcenter = (blobcoords[blobindex].left () +
1494  blobcoords[blobindex].right ()) / 2;
1495  float base = baseline->y(xcenter);
1496  float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
1497  int strength = textord_ocropus_mode &&
1498  bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
1499  int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5);
1500  if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {
1501  if (height > lineheight * oldbl_xhfract
1502  && height > textord_min_xheight) {
1503  heightstat.add (height, strength);
1504  if (height < HEIGHTBUCKETS) {
1505  if (xcenter > rights[height])
1506  rights[height] = xcenter;
1507  if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))
1508  lefts[height] = xcenter;
1509  }
1510  }
1511  mode_count += strength;
1512  }
1513  }
1514 
1515  mode_threshold = static_cast<int>(blobcount * 0.1);
1516  if (oldbl_dot_error_size > 1 || oldbl_xhfix)
1517  mode_threshold = static_cast<int>(mode_count * 0.1);
1518 
1519  if (textord_oldbl_debug) {
1520  tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
1521  blobcount, mode_count, mode_threshold);
1522  }
1523  find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);

◆ make_height_array()

int* make_height_array ( TBOX  blobcoords[],
int  blobcount,
QSPLINE baseline 
)

◆ make_holed_baseline()

void make_holed_baseline ( TBOX  blobcoords[],
int  blobcount,
QSPLINE spline,
QSPLINE baseline,
float  gradient 
)

Definition at line 634 of file oldbasel.cpp.

644  {
645  int leftedge; /*left edge of line */
646  int rightedge; /*right edge of line */
647  int blobindex; /*current blob */
648  float x; //centre of row
649  ICOORD shift; //shift of spline
650 
651  tesseract::DetLineFit lms; // straight baseline
652  int32_t xstarts[2]; //straight line
653  double coeffs[3];
654  float c; //line parameter
655 
656  /*left edge of row */
657  leftedge = blobcoords[0].left ();
658  /*right edge of line */
659  rightedge = blobcoords[blobcount - 1].right();
660  for (blobindex = 0; blobindex < blobcount; blobindex++) {
661  lms.Add(ICOORD((blobcoords[blobindex].left() +
662  blobcoords[blobindex].right()) / 2,
663  blobcoords[blobindex].bottom()));
664  }
665  lms.ConstrainedFit(gradient, &c);
666  xstarts[0] = leftedge;
667  xstarts[1] = rightedge;
668  coeffs[0] = 0;
669  coeffs[1] = gradient;
670  coeffs[2] = c;
671  *baseline = QSPLINE (1, xstarts, coeffs);
672  if (spline != nullptr /*no given spline */
673  && spline->segments >= 3 /*or trivial */
674  /*or too non-overlap */
675  && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
676  && spline->xcoords[spline->segments - 1] >= rightedge
677  - MAXOVERLAP * (rightedge - leftedge)) {
678  *baseline = *spline; /*copy it */
679  x = (leftedge + rightedge) / 2.0;

◆ merge_oldbl_parts()

void merge_oldbl_parts ( TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  partsizes[],
int  biggestpart,
float  jumplimit 
)

Definition at line 771 of file oldbasel.cpp.

784  {
785  bool found_one; //found a bestpart blob
786  bool close_one; //found was close enough
787  int blobindex; /*no along text line */
788  int prevpart; //previous iteration
789  int runlength; //no in this part
790  float diff; /*difference from line */
791  int startx; /*index of start blob */
792  int test_blob; //another index
793  FCOORD coord; //blob coordinate
794  float m, c; //fitted line
795  QLSQ stats; //line stuff
796 
797  prevpart = biggestpart;
798  runlength = 0;
799  startx = 0;
800  for (blobindex = 0; blobindex < blobcount; blobindex++) {
801  if (partids[blobindex] != prevpart) {
802  // tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n",
803  // blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
804  // prevpart,partids[blobindex],runlength);
805  if (prevpart != biggestpart && runlength > MAXBADRUN) {
806  stats.clear ();
807  for (test_blob = startx; test_blob < blobindex; test_blob++) {
808  coord = FCOORD ((blobcoords[test_blob].left ()
809  + blobcoords[test_blob].right ()) / 2.0,
810  blobcoords[test_blob].bottom ());
811  stats.add (coord.x (), coord.y ());
812  }
813  stats.fit (1);
814  m = stats.get_b ();
815  c = stats.get_c ();
817  tprintf ("Fitted line y=%g x + %g\n", m, c);
818  found_one = false;
819  close_one = false;
820  for (test_blob = 1; !found_one
821  && (startx - test_blob >= 0
822  || blobindex + test_blob <= blobcount); test_blob++) {
823  if (startx - test_blob >= 0
824  && partids[startx - test_blob] == biggestpart) {
825  found_one = true;
826  coord = FCOORD ((blobcoords[startx - test_blob].left ()
827  + blobcoords[startx -
828  test_blob].right ()) /
829  2.0,
830  blobcoords[startx -
831  test_blob].bottom ());
832  diff = m * coord.x () + c - coord.y ();
834  tprintf
835  ("Diff of common blob to suspect part=%g at (%g,%g)\n",
836  diff, coord.x (), coord.y ());
837  if (diff < jumplimit && -diff < jumplimit)
838  close_one = true;
839  }
840  if (blobindex + test_blob <= blobcount
841  && partids[blobindex + test_blob - 1] == biggestpart) {
842  found_one = true;
843  coord =
844  FCOORD ((blobcoords[blobindex + test_blob - 1].
845  left () + blobcoords[blobindex + test_blob -
846  1].right ()) / 2.0,
847  blobcoords[blobindex + test_blob -
848  1].bottom ());
849  diff = m * coord.x () + c - coord.y ();
851  tprintf
852  ("Diff of common blob to suspect part=%g at (%g,%g)\n",
853  diff, coord.x (), coord.y ());
854  if (diff < jumplimit && -diff < jumplimit)
855  close_one = true;
856  }
857  }
858  if (close_one) {
860  tprintf
861  ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
862  runlength, biggestpart, prevpart,
863  blobcoords[startx].left (),
864  blobcoords[startx].bottom ());
865  //switch sides
866  partsizes[prevpart] -= runlength;
867  for (test_blob = startx; test_blob < blobindex; test_blob++)
868  partids[test_blob] = biggestpart;
869  }
870  }
871  prevpart = partids[blobindex];
872  runlength = 1;

◆ old_first_xheight()

void old_first_xheight ( TO_ROW row,
TBOX  blobcoords[],
int  initialheight,
int  blobcount,
QSPLINE baseline,
float  jumplimit 
)

Definition at line 1367 of file oldbasel.cpp.

1388  {
1389  int blobindex; /*current blob */
1390  /*height statistics */
1391  STATS heightstat (0, MAXHEIGHT);
1392  int height; /*height of blob */
1393  int xcentre; /*centre of blob */
1394  int lineheight; /*approx xheight */
1395  float ascenders; /*ascender sum */
1396  int asccount; /*no of ascenders */
1397  float xsum; /*xheight sum */
1398  int xcount; /*xheight count */
1399  float diff; /*height difference */
1400 
1401  if (blobcount > 1) {
1402  for (blobindex = 0; blobindex < blobcount; blobindex++) {
1403  xcentre = (blobcoords[blobindex].left ()
1404  + blobcoords[blobindex].right ()) / 2;
1405  /*height of blob */
1406  height = static_cast<int>(blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5);
1407  if (height > initialheight * oldbl_xhfract
1408  && height > textord_min_xheight)
1409  heightstat.add (height, 1);
1410  }
1411  if (heightstat.get_total () > 3) {
1412  lineheight = static_cast<int>(heightstat.ile (0.25));
1413  if (lineheight <= 0)
1414  lineheight = static_cast<int>(heightstat.ile (0.5));
1415  }
1416  else
1417  lineheight = initialheight;
1418  }
1419  else {
1420  lineheight = static_cast<int>(blobcoords[0].top ()
1421  - baseline->y ((blobcoords[0].left ()
1422  + blobcoords[0].right ()) / 2) +
1423  0.5);
1424  }
1425 
1426  xsum = 0.0f;
1427  xcount = 0;
1428  for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
1429  blobindex++) {
1430  xcentre = (blobcoords[blobindex].left ()
1431  + blobcoords[blobindex].right ()) / 2;
1432  diff = blobcoords[blobindex].top () - baseline->y (xcentre);
1433  /*is it ascender */
1434  if (diff > lineheight + jumplimit) {
1435  ascenders += diff;
1436  asccount++; /*count ascenders */
1437  }
1438  else if (diff > lineheight - jumplimit) {
1439  xsum += diff; /*mean xheight */
1440  xcount++;

◆ partition_coords()

int partition_coords ( TBOX  blobcoords[],
int  blobcount,
char  partids[],
int  bestpart,
int  xcoords[],
int  ycoords[] 
)

Definition at line 1004 of file oldbasel.cpp.

1020  {
1021  int blobindex; /*no along text line */
1022  int pointcount; /*no of points */
1023 
1024  pointcount = 0;

◆ partition_line()

int partition_line ( TBOX  blobcoords[],
int  blobcount,
int *  numparts,
char  partids[],
int  partsizes[],
QSPLINE spline,
float  jumplimit,
float  ydiffs[] 
)

Definition at line 691 of file oldbasel.cpp.

705  {
706  int blobindex; /*no along text line */
707  int bestpart; /*best new partition */
708  int biggestpart; /*part with most members */
709  float diff; /*difference from line */
710  int startx; /*index of start blob */
711  float partdiffs[MAXPARTS]; /*step between parts */
712 
713  for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
714  partsizes[bestpart] = 0; /*zero them all */
715 
716  startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
717  *numparts = 1; /*1 partition */
718  bestpart = -1; /*first point */
719  float drift = 0.0f;
720  float last_delta = 0.0f;
721  for (blobindex = startx; blobindex < blobcount; blobindex++) {
722  /*do each blob in row */
723  diff = ydiffs[blobindex]; /*diff from line */
724  if (textord_oldbl_debug) {
725  tprintf ("%d(%d,%d), ", blobindex,
726  blobcoords[blobindex].left (),
727  blobcoords[blobindex].bottom ());
728  }
729  bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit,
730  &drift, &last_delta, numparts);
731  /*record partition */
732  partids[blobindex] = bestpart;
733  partsizes[bestpart]++; /*another in it */
734  }
735 
736  bestpart = -1; /*first point */
737  drift = 0.0f;
738  last_delta = 0.0f;
739  partsizes[0]--; /*doing 1st pt again */
740  /*do each blob in row */
741  for (blobindex = startx; blobindex >= 0; blobindex--) {
742  diff = ydiffs[blobindex]; /*diff from line */
743  if (textord_oldbl_debug) {
744  tprintf ("%d(%d,%d), ", blobindex,
745  blobcoords[blobindex].left (),
746  blobcoords[blobindex].bottom ());
747  }
748  bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit,
749  &drift, &last_delta, numparts);
750  /*record partition */
751  partids[blobindex] = bestpart;
752  partsizes[bestpart]++; /*another in it */
753  }
754 
755  for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
756  if (partsizes[bestpart] >= partsizes[biggestpart])
757  biggestpart = bestpart; /*new biggest */
758  if (textord_oldbl_merge_parts)
759  merge_oldbl_parts(blobcoords,
760  blobcount,
761  partids,

◆ pick_x_height()

void pick_x_height ( TO_ROW row,
int  modelist[],
int  lefts[],
int  rights[],
STATS heightstat,
int  mode_threshold 
)

Definition at line 1574 of file oldbasel.cpp.

1595  {
1596  int x;
1597  int y;
1598  int z;
1599  float ratio;
1600  int found_one_bigger = false;
1601  int best_x_height = 0;
1602  int best_asc = 0;
1603  int num_in_best;
1604 
1605  for (x = 0; x < MODENUM; x++) {
1606  for (y = 0; y < MODENUM; y++) {
1607  /* Check for two modes */
1608  if (modelist[x] && modelist[y] &&
1609  heightstat->pile_count (modelist[x]) > mode_threshold &&
1610  (!textord_ocropus_mode ||
1611  std::min(rights[modelist[x]], rights[modelist[y]]) >
1612  std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1613  ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[x]);
1614  if (1.2 < ratio && ratio < 1.8) {
1615  /* Two modes found */
1616  best_x_height = modelist[x];
1617  num_in_best = heightstat->pile_count (modelist[x]);
1618 
1619  /* Try to get one higher */
1620  do {
1621  found_one_bigger = false;
1622  for (z = 0; z < MODENUM; z++) {
1623  if (modelist[z] == best_x_height + 1 &&
1624  (!textord_ocropus_mode ||
1625  std::min(rights[modelist[x]], rights[modelist[y]]) >
1626  std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1627  ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[z]);
1628  if ((1.2 < ratio && ratio < 1.8) &&
1629  /* Should be half of best */
1630  heightstat->pile_count (modelist[z]) >
1631  num_in_best * 0.5) {
1632  best_x_height++;
1633  found_one_bigger = true;
1634  break;
1635  }
1636  }
1637  }
1638  }
1639  while (found_one_bigger);
1640 
1641  /* try to get a higher ascender */
1642 
1643  best_asc = modelist[y];
1644  num_in_best = heightstat->pile_count (modelist[y]);
1645 
1646  /* Try to get one higher */
1647  do {
1648  found_one_bigger = false;
1649  for (z = 0; z < MODENUM; z++) {
1650  if (modelist[z] > best_asc &&
1651  (!textord_ocropus_mode ||
1652  std::min(rights[modelist[x]], rights[modelist[y]]) >
1653  std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
1654  ratio = static_cast<float>(modelist[z]) / static_cast<float>(best_x_height);
1655  if ((1.2 < ratio && ratio < 1.8) &&
1656  /* Should be half of best */
1657  heightstat->pile_count (modelist[z]) >
1658  num_in_best * 0.5) {
1659  best_asc = modelist[z];
1660  found_one_bigger = true;
1661  break;
1662  }
1663  }
1664  }
1665  }
1666  while (found_one_bigger);
1667 
1668  row->xheight = static_cast<float>(best_x_height);
1669  row->ascrise = static_cast<float>(best_asc) - best_x_height;
1670  return;
1671  }
1672  }
1673  }
1674  }
1675 
1676  best_x_height = modelist[0]; /* Single Mode found */
1677  num_in_best = heightstat->pile_count (best_x_height);
1678  do {
1679  /* Try to get one higher */
1680  found_one_bigger = false;

◆ segment_spline()

int segment_spline ( TBOX  blobcoords[],
int  blobcount,
int  xcoords[],
int  ycoords[],
int  degree,
int  pointcount,
int  xstarts[] 
)

Definition at line 1034 of file oldbasel.cpp.

1051  {
1052  int ptindex; /*no along text line */
1053  int segment; /*partition no */
1054  int lastmin, lastmax; /*possible turn points */
1055  int turnpoints[SPLINESIZE]; /*good turning points */
1056  int turncount; /*no of turning points */
1057  int max_x; //max specified coord
1058 
1059  xstarts[0] = xcoords[0] - 1; //leftmost defined pt
1060  max_x = xcoords[pointcount - 1] + 1;
1061  if (degree < 2)
1062  pointcount = 0;
1063  turncount = 0; /*no turning points yet */
1064  if (pointcount > 3) {
1065  ptindex = 1;
1066  lastmax = lastmin = 0; /*start with first one */
1067  while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
1068  /*minimum */
1069  if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
1070  if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
1071  if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
1072  /*new max point */
1073  turnpoints[turncount++] = lastmax;
1074  lastmin = ptindex; /*latest minimum */
1075  }
1076  else if (ycoords[ptindex] < ycoords[lastmin]) {
1077  lastmin = ptindex; /*lower minimum */
1078  }
1079  }
1080 
1081  /*maximum */
1082  if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
1083  if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
1084  if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
1085  /*new min point */
1086  turnpoints[turncount++] = lastmin;
1087  lastmax = ptindex; /*latest maximum */
1088  }
1089  else if (ycoords[ptindex] > ycoords[lastmax]) {
1090  lastmax = ptindex; /*higher maximum */
1091  }
1092  }
1093  ptindex++;
1094  }
1095  /*possible global min */
1096  if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
1097  && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
1098  if (turncount < SPLINESIZE - 1)
1099  /*2 more turns */
1100  turnpoints[turncount++] = lastmax;
1101  if (turncount < SPLINESIZE - 1)
1102  turnpoints[turncount++] = ptindex;
1103  }
1104  else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
1105  /*possible global max */
1106  && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
1107  if (turncount < SPLINESIZE - 1)
1108  /*2 more turns */
1109  turnpoints[turncount++] = lastmin;
1110  if (turncount < SPLINESIZE - 1)
1111  turnpoints[turncount++] = ptindex;
1112  }
1113  else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
1114  && turncount < SPLINESIZE - 1) {
1115  if (ycoords[ptindex] > ycoords[lastmax])
1116  turnpoints[turncount++] = ptindex;
1117  else
1118  turnpoints[turncount++] = lastmax;
1119  }
1120  else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
1121  && turncount < SPLINESIZE - 1) {
1122  if (ycoords[ptindex] < ycoords[lastmin])
1123  turnpoints[turncount++] = ptindex;
1124  else
1125  turnpoints[turncount++] = lastmin;
1126  }
1127  }
1128 
1129  if (textord_oldbl_debug && turncount > 0)
1130  tprintf ("First turn is %d at (%d,%d)\n",
1131  turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
1132  for (segment = 1; segment < turncount; segment++) {
1133  /*centre y coord */
1134  lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
1135 
1136  /* fix alg so that it works with both rising and falling sections */
1137  if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
1138  /*find rising y centre */
1139  for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++);
1140  else
1141  /*find falling y centre */
1142  for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++);
1143 
1144  /*centre x */
1145  xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
1146  + xcoords[turnpoints[segment - 1]]
1147  + xcoords[turnpoints[segment]] + 2) / 4;
1148  /*halfway between turns */

◆ split_stepped_spline()

bool split_stepped_spline ( QSPLINE baseline,
float  jumplimit,
int *  xcoords,
int *  xstarts,
int &  segments 
)

Definition at line 1158 of file oldbasel.cpp.

1175  {
1176  bool doneany; //return value
1177  int segment; /*partition no */
1178  int startindex, centreindex, endindex;
1179  float leftcoord, rightcoord;
1180  int leftindex, rightindex;
1181  float step; //spline step
1182 
1183  doneany = false;
1184  startindex = 0;
1185  for (segment = 1; segment < segments - 1; segment++) {
1186  step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
1187  (xstarts[segment] + xstarts[segment + 1]) / 2.0);
1188  if (step < 0)
1189  step = -step;
1190  if (step > jumplimit) {
1191  while (xcoords[startindex] < xstarts[segment - 1])
1192  startindex++;
1193  centreindex = startindex;
1194  while (xcoords[centreindex] < xstarts[segment])
1195  centreindex++;
1196  endindex = centreindex;
1197  while (xcoords[endindex] < xstarts[segment + 1])
1198  endindex++;
1199  if (segments >= SPLINESIZE) {
1200  if (textord_debug_baselines)
1201  tprintf ("Too many segments to resegment spline!!\n");
1202  }
1203  else if (endindex - startindex >= textord_spline_medianwin * 3) {
1204  while (centreindex - startindex <
1205  textord_spline_medianwin * 3 / 2)
1206  centreindex++;
1207  while (endindex - centreindex <
1208  textord_spline_medianwin * 3 / 2)
1209  centreindex--;
1210  leftindex = (startindex + startindex + centreindex) / 3;
1211  rightindex = (centreindex + endindex + endindex) / 3;
1212  leftcoord =
1213  (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
1214  rightcoord =
1215  (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
1216  while (xcoords[leftindex] > leftcoord
1217  && leftindex - startindex > textord_spline_medianwin)
1218  leftindex--;
1219  while (xcoords[leftindex] < leftcoord
1220  && centreindex - leftindex >
1222  leftindex++;
1223  if (xcoords[leftindex] - leftcoord >
1224  leftcoord - xcoords[leftindex - 1])
1225  leftindex--;
1226  while (xcoords[rightindex] > rightcoord
1227  && rightindex - centreindex >
1229  rightindex--;
1230  while (xcoords[rightindex] < rightcoord
1231  && endindex - rightindex > textord_spline_medianwin)
1232  rightindex++;
1233  if (xcoords[rightindex] - rightcoord >
1234  rightcoord - xcoords[rightindex - 1])
1235  rightindex--;
1236  if (textord_debug_baselines)
1237  tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
1238  xstarts[segment],
1239  baseline->
1240  step ((xstarts[segment - 1] +
1241  xstarts[segment]) / 2.0,
1242  (xstarts[segment] +
1243  xstarts[segment + 1]) / 2.0),
1244  (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
1245  (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
1246  insert_spline_point (xstarts, segment,
1247  (xcoords[leftindex - 1] +
1248  xcoords[leftindex]) / 2,
1249  (xcoords[rightindex - 1] +
1250  xcoords[rightindex]) / 2, segments);
1251  doneany = true;
1252  }
1253  else if (textord_debug_baselines) {
1254  tprintf
1255  ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",

Variable Documentation

◆ textord_oldbl_debug

bool textord_oldbl_debug = false

"Debug old baseline generation"

Definition at line 38 of file oldbasel.cpp.

QLSQ
Definition: quadlsq.h:24
merge_oldbl_parts
void merge_oldbl_parts(TBOX blobcoords[], int blobcount, char partids[], int partsizes[], int biggestpart, float jumplimit)
Definition: oldbasel.cpp:771
QLSQ::fit
void fit(int degree)
Definition: quadlsq.cpp:95
ABS
#define ABS(x)
Definition: oldbasel.cpp:69
baseline
Definition: mfoutline.h:62
FCOORD::y
float y() const
Definition: points.h:209
ICOORD
integer coordinate
Definition: points.h:30
QSPLINE
Definition: quspline.h:31
FCOORD::x
float x() const
Definition: points.h:206
TBOX::top
int16_t top() const
Definition: rect.h:57
MAXBADRUN
#define MAXBADRUN
Definition: oldbasel.cpp:63
STATS::pile_count
int32_t pile_count(int32_t value) const
Definition: statistc.h:75
QLSQ::get_c
double get_c()
Definition: quadlsq.h:66
FCOORD
Definition: points.h:187
tesseract::DetLineFit
Definition: detlinefit.h:56
MAXHEIGHT
#define MAXHEIGHT
Definition: oldbasel.cpp:61
textord_min_xheight
int textord_min_xheight
Definition: makerow.cpp:67
TBOX::height
int16_t height() const
Definition: rect.h:107
QLSQ::get_b
double get_b()
Definition: quadlsq.h:63
MINASCRISE
#define MINASCRISE
Definition: oldbasel.cpp:59
QLSQ::clear
void clear()
Definition: quadlsq.cpp:32
get_ydiffs
int get_ydiffs(TBOX blobcoords[], int blobcount, QSPLINE *spline, float ydiffs[])
Definition: oldbasel.cpp:883
textord_spline_medianwin
int textord_spline_medianwin
Definition: makerow.cpp:64
MODENUM
#define MODENUM
Definition: oldbasel.cpp:65
TURNLIMIT
#define TURNLIMIT
Definition: oldbasel.cpp:54
find_top_modes
void find_top_modes(STATS *stats, int statnum, int modelist[], int modenum)
Definition: oldbasel.cpp:1535
tesseract::DetLineFit::ConstrainedFit
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:130
textord_oldbl_debug
bool textord_oldbl_debug
Definition: oldbasel.cpp:38
HEIGHTBUCKETS
#define HEIGHTBUCKETS
Definition: oldbasel.cpp:64
TBOX::width
int16_t width() const
Definition: rect.h:114
TBOX::bottom
int16_t bottom() const
Definition: rect.h:64
choose_partition
int choose_partition(float diff, float partdiffs[], int lastpart, float jumplimit, float *drift, float *lastdelta, int *partcount)
Definition: oldbasel.cpp:933
QLSQ::add
void add(double x, double y)
Definition: quadlsq.cpp:53
TO_ROW::xheight
float xheight
Definition: blobbox.h:656
STATS
Definition: statistc.h:30
insert_spline_point
void insert_spline_point(int xstarts[], int segment, int coord1, int coord2, int &segments)
Definition: oldbasel.cpp:1264
box_next_pre_chopped
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:657
kMinModeFactor
const int kMinModeFactor
Definition: oldbasel.cpp:1532
QSPLINE::step
double step(double x1, double x2)
Definition: quspline.cpp:178
QSPLINE::y
double y(double x) const
Definition: quspline.cpp:202
kMinModeFactorOcropus
const int kMinModeFactorOcropus
Definition: oldbasel.cpp:1531
TBOX::left
int16_t left() const
Definition: rect.h:71
MAXPARTS
#define MAXPARTS
Definition: oldbasel.cpp:66
TBOX::right
int16_t right() const
Definition: rect.h:78
tesseract::DetLineFit::Add
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:51
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
MAXOVERLAP
#define MAXOVERLAP
Definition: oldbasel.cpp:62
TO_ROW::ascrise
float ascrise
Definition: blobbox.h:658
TO_ROW::baseline
QSPLINE baseline
Definition: blobbox.h:669
SPLINESIZE
#define SPLINESIZE
Definition: oldbasel.cpp:67
TO_ROW::blob_list
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:599