tesseract  4.0.0-1-g2a2b
pithsync.cpp File Reference
#include <cmath>
#include <cfloat>
#include <vector>
#include "makerow.h"
#include "pitsync1.h"
#include "topitch.h"
#include "pithsync.h"
#include "tprintf.h"

Go to the source code of this file.

Macros

#define PROJECTION_MARGIN   10
 

Functions

double check_pitch_sync2 (BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
 
double check_pitch_sync3 (int16_t projection_left, int16_t projection_right, int16_t zero_count, int16_t pitch, int16_t pitch_error, STATS *projection, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
 

Macro Definition Documentation

◆ PROJECTION_MARGIN

#define PROJECTION_MARGIN   10

Definition at line 29 of file pithsync.cpp.

Function Documentation

◆ check_pitch_sync2()

double check_pitch_sync2 ( BLOBNBOX_IT *  blob_it,
int16_t  blob_count,
int16_t  pitch,
int16_t  pitch_error,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  projection_scale,
int16_t &  occupation_count,
FPSEGPT_LIST *  seg_list,
int16_t  start,
int16_t  end 
)

Definition at line 294 of file pithsync.cpp.

307  {
308  bool faking; //illegal cut pt
309  bool mid_cut; //cheap cut pt.
310  int16_t x; //current coord
311  int16_t blob_index; //blob number
312  int16_t left_edge; //of word
313  int16_t right_edge; //of word
314  int16_t array_origin; //x coord of array
315  int16_t offset; //dist to legal area
316  int16_t zero_count; //projection zero
317  int16_t best_left_x = 0; //for equals
318  int16_t best_right_x = 0; //right edge
319  TBOX this_box; //bounding box
320  TBOX next_box; //box of next blob
321  FPSEGPT *segpt; //segment point
322  double best_cost; //best path
323  double mean_sum; //computes result
324  FPCUTPT *best_end; //end of best path
325  int16_t best_fake; //best fake level
326  int16_t best_count; //no of cuts
327  BLOBNBOX_IT this_it; //copy iterator
328  FPSEGPT_IT seg_it = seg_list; //output iterator
329 
330  // tprintf("Computing sync on word of %d blobs with pitch %d\n",
331  // blob_count, pitch);
332  // if (blob_count==8 && pitch==27)
333  // projection->print(stdout,TRUE);
334  zero_count = 0;
335  if (pitch < 3)
336  pitch = 3; //nothing ludicrous
337  if ((pitch - 3) / 2 < pitch_error)
338  pitch_error = (pitch - 3) / 2;
339  this_it = *blob_it;
340  this_box = box_next (&this_it);//get box
341  // left_edge=this_box.left(); //left of word
342  // right_edge=this_box.right();
343  // for (blob_index=1;blob_index<blob_count;blob_index++)
344  // {
345  // this_box=box_next(&this_it);
346  // if (this_box.right()>right_edge)
347  // right_edge=this_box.right();
348  // }
349  for (left_edge = projection_left; projection->pile_count (left_edge) == 0
350  && left_edge < projection_right; left_edge++);
351  for (right_edge = projection_right; projection->pile_count (right_edge) == 0
352  && right_edge > left_edge; right_edge--);
353  ASSERT_HOST (right_edge >= left_edge);
354  if (pitsync_linear_version >= 4)
355  return check_pitch_sync3 (projection_left, projection_right, zero_count,
356  pitch, pitch_error, projection,
357  projection_scale, occupation_count, seg_list,
358  start, end);
359  array_origin = left_edge - pitch;
360  // array of points
361  std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
362  for (x = array_origin; x < left_edge; x++)
363  //free cuts
364  cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
365  zero_count, pitch, x, 0);
366  for (offset = 0; offset <= pitch_error; offset++, x++)
367  //not quite free
368  cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
369  zero_count, pitch, x, offset);
370 
371  this_it = *blob_it;
372  best_cost = FLT_MAX;
373  best_end = nullptr;
374  this_box = box_next (&this_it);//first box
375  next_box = box_next (&this_it);//second box
376  blob_index = 1;
377  while (x < right_edge - pitch_error) {
378  if (x > this_box.right () + pitch_error && blob_index < blob_count) {
379  this_box = next_box;
380  next_box = box_next (&this_it);
381  blob_index++;
382  }
383  faking = false;
384  mid_cut = false;
385  if (x <= this_box.left ())
386  offset = 0;
387  else if (x <= this_box.left () + pitch_error)
388  offset = x - this_box.left ();
389  else if (x >= this_box.right ())
390  offset = 0;
391  else if (x >= next_box.left () && blob_index < blob_count) {
392  offset = x - next_box.left ();
393  if (this_box.right () - x < offset)
394  offset = this_box.right () - x;
395  }
396  else if (x >= this_box.right () - pitch_error)
397  offset = this_box.right () - x;
398  else if (x - this_box.left () > pitch * pitsync_joined_edge
399  && this_box.right () - x > pitch * pitsync_joined_edge) {
400  mid_cut = true;
401  offset = 0;
402  }
403  else {
404  faking = true;
405  offset = projection->pile_count (x);
406  }
407  cutpts[x - array_origin].assign (&cutpts[0], array_origin, x,
408  faking, mid_cut, offset, projection,
409  projection_scale, zero_count, pitch,
410  pitch_error);
411  x++;
412  }
413 
414  best_fake = INT16_MAX;
415  best_cost = INT32_MAX;
416  best_count = INT16_MAX;
417  while (x < right_edge + pitch) {
418  offset = x < right_edge ? right_edge - x : 0;
419  cutpts[x - array_origin].assign (&cutpts[0], array_origin, x,
420  false, false, offset, projection,
421  projection_scale, zero_count, pitch,
422  pitch_error);
423  cutpts[x - array_origin].terminal = true;
424  if (cutpts[x - array_origin].index () +
425  cutpts[x - array_origin].fake_count <= best_count + best_fake) {
426  if (cutpts[x - array_origin].fake_count < best_fake
427  || (cutpts[x - array_origin].fake_count == best_fake
428  && cutpts[x - array_origin].cost_function () < best_cost)) {
429  best_fake = cutpts[x - array_origin].fake_count;
430  best_cost = cutpts[x - array_origin].cost_function ();
431  best_left_x = x;
432  best_right_x = x;
433  best_count = cutpts[x - array_origin].index ();
434  }
435  else if (cutpts[x - array_origin].fake_count == best_fake
436  && x == best_right_x + 1
437  && cutpts[x - array_origin].cost_function () == best_cost) {
438  //exactly equal
439  best_right_x = x;
440  }
441  }
442  x++;
443  }
444  ASSERT_HOST (best_fake < INT16_MAX);
445 
446  best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
447  if (this_box.right () == textord_test_x
448  && this_box.top () == textord_test_y) {
449  for (x = left_edge - pitch; x < right_edge + pitch; x++) {
450  tprintf ("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
451  x, cutpts[x - array_origin].cost_function (),
452  cutpts[x - array_origin].sum (),
453  cutpts[x - array_origin].squares (),
454  cutpts[x - array_origin].previous ()->position ());
455  }
456  }
457  occupation_count = -1;
458  do {
459  for (x = best_end->position () - pitch + pitch_error;
460  x < best_end->position () - pitch_error
461  && projection->pile_count (x) == 0; x++);
462  if (x < best_end->position () - pitch_error)
463  occupation_count++;
464  //copy it
465  segpt = new FPSEGPT (best_end);
466  seg_it.add_before_then_move (segpt);
467  best_end = best_end->previous ();
468  }
469  while (best_end != nullptr);
470  seg_it.move_to_last ();
471  mean_sum = seg_it.data ()->sum ();
472  mean_sum = mean_sum * mean_sum / best_count;
473  if (seg_it.data ()->squares () - mean_sum < 0)
474  tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
475  seg_it.data ()->squares (), seg_it.data ()->sum (), best_count);
476  // tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n",
477  // blob_count,pitch,seg_it.data()->squares()-mean_sum,
478  // occupation_count);
479  return seg_it.data ()->squares () - mean_sum;
480 }
int32_t pile_count(int32_t value) const
Definition: statistc.h:78
int32_t position()
Definition: pithsync.h:68
double pitsync_joined_edge
Definition: pitsync1.cpp:27
Definition: rect.h:34
FPCUTPT * previous()
Definition: pithsync.h:80
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:637
double sum()
Definition: pithsync.h:77
int16_t left() const
Definition: rect.h:72
int16_t top() const
Definition: rect.h:58
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
int textord_test_y
Definition: makerow.cpp:62
int textord_test_x
Definition: makerow.cpp:61
int16_t right() const
Definition: rect.h:79
double check_pitch_sync3(int16_t projection_left, int16_t projection_right, int16_t zero_count, int16_t pitch, int16_t pitch_error, STATS *projection, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
Definition: pithsync.cpp:491
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ check_pitch_sync3()

double check_pitch_sync3 ( int16_t  projection_left,
int16_t  projection_right,
int16_t  zero_count,
int16_t  pitch,
int16_t  pitch_error,
STATS projection,
float  projection_scale,
int16_t &  occupation_count,
FPSEGPT_LIST *  seg_list,
int16_t  start,
int16_t  end 
)

Definition at line 491 of file pithsync.cpp.

503  {
504  bool faking; //illegal cut pt
505  bool mid_cut; //cheap cut pt.
506  int16_t left_edge; //of word
507  int16_t right_edge; //of word
508  int16_t x; //current coord
509  int16_t array_origin; //x coord of array
510  int16_t offset; //dist to legal area
511  int16_t projection_offset; //from scaled projection
512  int16_t prev_zero; //previous zero dist
513  int16_t next_zero; //next zero dist
514  int16_t zero_offset; //scan window
515  int16_t best_left_x = 0; //for equals
516  int16_t best_right_x = 0; //right edge
517  FPSEGPT *segpt; //segment point
518  int minindex; //next input position
519  int test_index; //index to mins
520  double best_cost; //best path
521  double mean_sum; //computes result
522  FPCUTPT *best_end; //end of best path
523  int16_t best_fake; //best fake level
524  int16_t best_count; //no of cuts
525  FPSEGPT_IT seg_it = seg_list; //output iterator
526 
527  end = (end - start) % pitch;
528  if (pitch < 3)
529  pitch = 3; //nothing ludicrous
530  if ((pitch - 3) / 2 < pitch_error)
531  pitch_error = (pitch - 3) / 2;
532  //min dist of zero
533  zero_offset = (int16_t) (pitch * pitsync_joined_edge);
534  for (left_edge = projection_left; projection->pile_count (left_edge) == 0
535  && left_edge < projection_right; left_edge++);
536  for (right_edge = projection_right; projection->pile_count (right_edge) == 0
537  && right_edge > left_edge; right_edge--);
538  array_origin = left_edge - pitch;
539  // array of points
540  std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
541  // local min results
542  std::vector<BOOL8> mins(pitch_error * 2 + 1);
543  for (x = array_origin; x < left_edge; x++)
544  //free cuts
545  cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
546  zero_count, pitch, x, 0);
547  prev_zero = left_edge - 1;
548  for (offset = 0; offset <= pitch_error; offset++, x++)
549  //not quite free
550  cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
551  zero_count, pitch, x, offset);
552 
553  best_cost = FLT_MAX;
554  best_end = nullptr;
555  for (offset = -pitch_error, minindex = 0; offset < pitch_error;
556  offset++, minindex++)
557  mins[minindex] = projection->local_min (x + offset);
558  next_zero = x + zero_offset + 1;
559  for (offset = next_zero - 1; offset >= x; offset--) {
560  if (projection->pile_count (offset) <= zero_count) {
561  next_zero = offset;
562  break;
563  }
564  }
565  while (x < right_edge - pitch_error) {
566  mins[minindex] = projection->local_min (x + pitch_error);
567  minindex++;
568  if (minindex > pitch_error * 2)
569  minindex = 0;
570  faking = false;
571  mid_cut = false;
572  offset = 0;
573  if (projection->pile_count (x) <= zero_count) {
574  prev_zero = x;
575  }
576  else {
577  for (offset = 1; offset <= pitch_error; offset++)
578  if (projection->pile_count (x + offset) <= zero_count
579  || projection->pile_count (x - offset) <= zero_count)
580  break;
581  }
582  if (offset > pitch_error) {
583  if (x - prev_zero > zero_offset && next_zero - x > zero_offset) {
584  for (offset = 0; offset <= pitch_error; offset++) {
585  test_index = minindex + pitch_error + offset;
586  if (test_index > pitch_error * 2)
587  test_index -= pitch_error * 2 + 1;
588  if (mins[test_index])
589  break;
590  test_index = minindex + pitch_error - offset;
591  if (test_index > pitch_error * 2)
592  test_index -= pitch_error * 2 + 1;
593  if (mins[test_index])
594  break;
595  }
596  }
597  if (offset > pitch_error) {
598  offset = projection->pile_count (x);
599  faking = true;
600  }
601  else {
602  projection_offset =
603  (int16_t) (projection->pile_count (x) / projection_scale);
604  if (projection_offset > offset)
605  offset = projection_offset;
606  mid_cut = true;
607  }
608  }
609  if ((start == 0 && end == 0)
611  || (x - projection_left - start) % pitch <= end)
612  cutpts[x - array_origin].assign(&cutpts[0], array_origin, x,
613  faking, mid_cut, offset, projection,
614  projection_scale, zero_count, pitch,
615  pitch_error);
616  else
617  cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x,
618  faking, mid_cut, offset,
619  projection, projection_scale,
620  zero_count, pitch,
621  pitch_error);
622  x++;
623  if (next_zero < x || next_zero == x + zero_offset)
624  next_zero = x + zero_offset + 1;
625  if (projection->pile_count (x + zero_offset) <= zero_count)
626  next_zero = x + zero_offset;
627  }
628 
629  best_fake = INT16_MAX;
630  best_cost = INT32_MAX;
631  best_count = INT16_MAX;
632  while (x < right_edge + pitch) {
633  offset = x < right_edge ? right_edge - x : 0;
634  cutpts[x - array_origin].assign(&cutpts[0], array_origin, x,
635  false, false, offset, projection,
636  projection_scale, zero_count, pitch,
637  pitch_error);
638  cutpts[x - array_origin].terminal = true;
639  if (cutpts[x - array_origin].index () +
640  cutpts[x - array_origin].fake_count <= best_count + best_fake) {
641  if (cutpts[x - array_origin].fake_count < best_fake
642  || (cutpts[x - array_origin].fake_count == best_fake
643  && cutpts[x - array_origin].cost_function () < best_cost)) {
644  best_fake = cutpts[x - array_origin].fake_count;
645  best_cost = cutpts[x - array_origin].cost_function ();
646  best_left_x = x;
647  best_right_x = x;
648  best_count = cutpts[x - array_origin].index ();
649  }
650  else if (cutpts[x - array_origin].fake_count == best_fake
651  && x == best_right_x + 1
652  && cutpts[x - array_origin].cost_function () == best_cost) {
653  //exactly equal
654  best_right_x = x;
655  }
656  }
657  x++;
658  }
659  ASSERT_HOST (best_fake < INT16_MAX);
660 
661  best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
662  // for (x=left_edge-pitch;x<right_edge+pitch;x++)
663  // {
664  // tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
665  // x,cutpts[x-array_origin].cost_function(),
666  // cutpts[x-array_origin].sum(),
667  // cutpts[x-array_origin].squares(),
668  // cutpts[x-array_origin].previous()->position());
669  // }
670  occupation_count = -1;
671  do {
672  for (x = best_end->position () - pitch + pitch_error;
673  x < best_end->position () - pitch_error
674  && projection->pile_count (x) == 0; x++);
675  if (x < best_end->position () - pitch_error)
676  occupation_count++;
677  //copy it
678  segpt = new FPSEGPT (best_end);
679  seg_it.add_before_then_move (segpt);
680  best_end = best_end->previous ();
681  }
682  while (best_end != nullptr);
683  seg_it.move_to_last ();
684  mean_sum = seg_it.data ()->sum ();
685  mean_sum = mean_sum * mean_sum / best_count;
686  if (seg_it.data ()->squares () - mean_sum < 0)
687  tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
688  seg_it.data ()->squares (), seg_it.data ()->sum (), best_count);
689  return seg_it.data ()->squares () - mean_sum;
690 }
EXTERN bool textord_fast_pitch_test
Definition: topitch.cpp:46
int32_t pile_count(int32_t value) const
Definition: statistc.h:78
int32_t position()
Definition: pithsync.h:68
double pitsync_joined_edge
Definition: pitsync1.cpp:27
bool local_min(int32_t x) const
Definition: statistc.cpp:261
FPCUTPT * previous()
Definition: pithsync.h:80
double sum()
Definition: pithsync.h:77
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
#define ASSERT_HOST(x)
Definition: errcode.h:84