tesseract  5.0.0-alpha-619-ge9db
pithsync.cpp File Reference
#include <cmath>
#include <cfloat>
#include <vector>
#include "makerow.h"
#include "pitsync1.h"
#include "topitch.h"
#include "pithsync.h"
#include "tprintf.h"

Go to the source code of this file.

Functions

double check_pitch_sync2 (BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
 
double check_pitch_sync3 (int16_t projection_left, int16_t projection_right, int16_t zero_count, int16_t pitch, int16_t pitch_error, STATS *projection, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
 

Function Documentation

◆ check_pitch_sync2()

double check_pitch_sync2 ( BLOBNBOX_IT *  blob_it,
int16_t  blob_count,
int16_t  pitch,
int16_t  pitch_error,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  projection_scale,
int16_t &  occupation_count,
FPSEGPT_LIST *  seg_list,
int16_t  start,
int16_t  end 
)

Definition at line 286 of file pithsync.cpp.

304  {
305  bool faking; //illegal cut pt
306  bool mid_cut; //cheap cut pt.
307  int16_t x; //current coord
308  int16_t blob_index; //blob number
309  int16_t left_edge; //of word
310  int16_t right_edge; //of word
311  int16_t array_origin; //x coord of array
312  int16_t offset; //dist to legal area
313  int16_t zero_count; //projection zero
314  int16_t best_left_x = 0; //for equals
315  int16_t best_right_x = 0; //right edge
316  TBOX this_box; //bounding box
317  TBOX next_box; //box of next blob
318  FPSEGPT *segpt; //segment point
319  double best_cost; //best path
320  double mean_sum; //computes result
321  FPCUTPT *best_end; //end of best path
322  int16_t best_fake; //best fake level
323  int16_t best_count; //no of cuts
324  BLOBNBOX_IT this_it; //copy iterator
325  FPSEGPT_IT seg_it = seg_list; //output iterator
326 
327  // tprintf("Computing sync on word of %d blobs with pitch %d\n",
328  // blob_count, pitch);
329  // if (blob_count==8 && pitch==27)
330  // projection->print(stdout,true);
331  zero_count = 0;
332  if (pitch < 3)
333  pitch = 3; //nothing ludicrous
334  if ((pitch - 3) / 2 < pitch_error)
335  pitch_error = (pitch - 3) / 2;
336  this_it = *blob_it;
337  this_box = box_next (&this_it);//get box
338  // left_edge=this_box.left(); //left of word
339  // right_edge=this_box.right();
340  // for (blob_index=1;blob_index<blob_count;blob_index++)
341  // {
342  // this_box=box_next(&this_it);
343  // if (this_box.right()>right_edge)
344  // right_edge=this_box.right();
345  // }
346  for (left_edge = projection_left; projection->pile_count (left_edge) == 0
347  && left_edge < projection_right; left_edge++);
348  for (right_edge = projection_right; projection->pile_count (right_edge) == 0
349  && right_edge > left_edge; right_edge--);
350  ASSERT_HOST (right_edge >= left_edge);
351  if (pitsync_linear_version >= 4)
352  return check_pitch_sync3 (projection_left, projection_right, zero_count,
353  pitch, pitch_error, projection,
354  projection_scale, occupation_count, seg_list,
355  start, end);
356  array_origin = left_edge - pitch;
357  // array of points
358  std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
359  for (x = array_origin; x < left_edge; x++)
360  //free cuts
361  cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
362  zero_count, pitch, x, 0);
363  for (offset = 0; offset <= pitch_error; offset++, x++)
364  //not quite free
365  cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
366  zero_count, pitch, x, offset);
367 
368  this_it = *blob_it;
369  best_cost = FLT_MAX;
370  best_end = nullptr;
371  this_box = box_next (&this_it);//first box
372  next_box = box_next (&this_it);//second box
373  blob_index = 1;
374  while (x < right_edge - pitch_error) {
375  if (x > this_box.right () + pitch_error && blob_index < blob_count) {
376  this_box = next_box;
377  next_box = box_next (&this_it);
378  blob_index++;
379  }
380  faking = false;
381  mid_cut = false;
382  if (x <= this_box.left ())
383  offset = 0;
384  else if (x <= this_box.left () + pitch_error)
385  offset = x - this_box.left ();
386  else if (x >= this_box.right ())
387  offset = 0;
388  else if (x >= next_box.left () && blob_index < blob_count) {
389  offset = x - next_box.left ();
390  if (this_box.right () - x < offset)
391  offset = this_box.right () - x;
392  }
393  else if (x >= this_box.right () - pitch_error)
394  offset = this_box.right () - x;
395  else if (x - this_box.left () > pitch * pitsync_joined_edge
396  && this_box.right () - x > pitch * pitsync_joined_edge) {
397  mid_cut = true;
398  offset = 0;
399  }
400  else {
401  faking = true;
402  offset = projection->pile_count (x);
403  }
404  cutpts[x - array_origin].assign (&cutpts[0], array_origin, x,
405  faking, mid_cut, offset, projection,
406  projection_scale, zero_count, pitch,
407  pitch_error);
408  x++;
409  }
410 
411  best_fake = INT16_MAX;
412  best_cost = INT32_MAX;
413  best_count = INT16_MAX;
414  while (x < right_edge + pitch) {
415  offset = x < right_edge ? right_edge - x : 0;
416  cutpts[x - array_origin].assign (&cutpts[0], array_origin, x,
417  false, false, offset, projection,
418  projection_scale, zero_count, pitch,
419  pitch_error);
420  cutpts[x - array_origin].terminal = true;
421  if (cutpts[x - array_origin].index () +
422  cutpts[x - array_origin].fake_count <= best_count + best_fake) {
423  if (cutpts[x - array_origin].fake_count < best_fake
424  || (cutpts[x - array_origin].fake_count == best_fake
425  && cutpts[x - array_origin].cost_function () < best_cost)) {
426  best_fake = cutpts[x - array_origin].fake_count;
427  best_cost = cutpts[x - array_origin].cost_function ();
428  best_left_x = x;
429  best_right_x = x;
430  best_count = cutpts[x - array_origin].index ();
431  }
432  else if (cutpts[x - array_origin].fake_count == best_fake
433  && x == best_right_x + 1
434  && cutpts[x - array_origin].cost_function () == best_cost) {
435  //exactly equal
436  best_right_x = x;
437  }
438  }
439  x++;
440  }
441  ASSERT_HOST (best_fake < INT16_MAX);
442 
443  best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
444  if (this_box.right () == textord_test_x
445  && this_box.top () == textord_test_y) {
446  for (x = left_edge - pitch; x < right_edge + pitch; x++) {
447  tprintf ("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
448  x, cutpts[x - array_origin].cost_function (),
449  cutpts[x - array_origin].sum (),
450  cutpts[x - array_origin].squares (),
451  cutpts[x - array_origin].previous ()->position ());
452  }
453  }
454  occupation_count = -1;
455  do {
456  for (x = best_end->position () - pitch + pitch_error;
457  x < best_end->position () - pitch_error
458  && projection->pile_count (x) == 0; x++);
459  if (x < best_end->position () - pitch_error)
460  occupation_count++;
461  //copy it
462  segpt = new FPSEGPT (best_end);
463  seg_it.add_before_then_move (segpt);
464  best_end = best_end->previous ();
465  }
466  while (best_end != nullptr);
467  seg_it.move_to_last ();
468  mean_sum = seg_it.data ()->sum ();
469  mean_sum = mean_sum * mean_sum / best_count;
470  if (seg_it.data ()->squares () - mean_sum < 0)
471  tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
472  seg_it.data ()->squares (), seg_it.data ()->sum (), best_count);

◆ check_pitch_sync3()

double check_pitch_sync3 ( int16_t  projection_left,
int16_t  projection_right,
int16_t  zero_count,
int16_t  pitch,
int16_t  pitch_error,
STATS projection,
float  projection_scale,
int16_t &  occupation_count,
FPSEGPT_LIST *  seg_list,
int16_t  start,
int16_t  end 
)

Definition at line 482 of file pithsync.cpp.

500  {
501  bool faking; //illegal cut pt
502  bool mid_cut; //cheap cut pt.
503  int16_t left_edge; //of word
504  int16_t right_edge; //of word
505  int16_t x; //current coord
506  int16_t array_origin; //x coord of array
507  int16_t offset; //dist to legal area
508  int16_t projection_offset; //from scaled projection
509  int16_t prev_zero; //previous zero dist
510  int16_t next_zero; //next zero dist
511  int16_t zero_offset; //scan window
512  int16_t best_left_x = 0; //for equals
513  int16_t best_right_x = 0; //right edge
514  FPSEGPT *segpt; //segment point
515  int minindex; //next input position
516  int test_index; //index to mins
517  double best_cost; //best path
518  double mean_sum; //computes result
519  FPCUTPT *best_end; //end of best path
520  int16_t best_fake; //best fake level
521  int16_t best_count; //no of cuts
522  FPSEGPT_IT seg_it = seg_list; //output iterator
523 
524  end = (end - start) % pitch;
525  if (pitch < 3)
526  pitch = 3; //nothing ludicrous
527  if ((pitch - 3) / 2 < pitch_error)
528  pitch_error = (pitch - 3) / 2;
529  //min dist of zero
530  zero_offset = static_cast<int16_t>(pitch * pitsync_joined_edge);
531  for (left_edge = projection_left; projection->pile_count (left_edge) == 0
532  && left_edge < projection_right; left_edge++);
533  for (right_edge = projection_right; projection->pile_count (right_edge) == 0
534  && right_edge > left_edge; right_edge--);
535  array_origin = left_edge - pitch;
536  // array of points
537  std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
538  // local min results
539  std::vector<bool> mins(pitch_error * 2 + 1);
540  for (x = array_origin; x < left_edge; x++)
541  //free cuts
542  cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
543  zero_count, pitch, x, 0);
544  prev_zero = left_edge - 1;
545  for (offset = 0; offset <= pitch_error; offset++, x++)
546  //not quite free
547  cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
548  zero_count, pitch, x, offset);
549 
550  best_cost = FLT_MAX;
551  best_end = nullptr;
552  for (offset = -pitch_error, minindex = 0; offset < pitch_error;
553  offset++, minindex++)
554  mins[minindex] = projection->local_min (x + offset);
555  next_zero = x + zero_offset + 1;
556  for (offset = next_zero - 1; offset >= x; offset--) {
557  if (projection->pile_count (offset) <= zero_count) {
558  next_zero = offset;
559  break;
560  }
561  }
562  while (x < right_edge - pitch_error) {
563  mins[minindex] = projection->local_min (x + pitch_error);
564  minindex++;
565  if (minindex > pitch_error * 2)
566  minindex = 0;
567  faking = false;
568  mid_cut = false;
569  offset = 0;
570  if (projection->pile_count (x) <= zero_count) {
571  prev_zero = x;
572  }
573  else {
574  for (offset = 1; offset <= pitch_error; offset++)
575  if (projection->pile_count (x + offset) <= zero_count
576  || projection->pile_count (x - offset) <= zero_count)
577  break;
578  }
579  if (offset > pitch_error) {
580  if (x - prev_zero > zero_offset && next_zero - x > zero_offset) {
581  for (offset = 0; offset <= pitch_error; offset++) {
582  test_index = minindex + pitch_error + offset;
583  if (test_index > pitch_error * 2)
584  test_index -= pitch_error * 2 + 1;
585  if (mins[test_index])
586  break;
587  test_index = minindex + pitch_error - offset;
588  if (test_index > pitch_error * 2)
589  test_index -= pitch_error * 2 + 1;
590  if (mins[test_index])
591  break;
592  }
593  }
594  if (offset > pitch_error) {
595  offset = projection->pile_count (x);
596  faking = true;
597  }
598  else {
599  projection_offset =
600  static_cast<int16_t>(projection->pile_count (x) / projection_scale);
601  if (projection_offset > offset)
602  offset = projection_offset;
603  mid_cut = true;
604  }
605  }
606  if ((start == 0 && end == 0)
608  || (x - projection_left - start) % pitch <= end)
609  cutpts[x - array_origin].assign(&cutpts[0], array_origin, x,
610  faking, mid_cut, offset, projection,
611  projection_scale, zero_count, pitch,
612  pitch_error);
613  else
614  cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x,
615  faking, mid_cut, offset,
616  projection, projection_scale,
617  zero_count, pitch,
618  pitch_error);
619  x++;
620  if (next_zero < x || next_zero == x + zero_offset)
621  next_zero = x + zero_offset + 1;
622  if (projection->pile_count (x + zero_offset) <= zero_count)
623  next_zero = x + zero_offset;
624  }
625 
626  best_fake = INT16_MAX;
627  best_cost = INT32_MAX;
628  best_count = INT16_MAX;
629  while (x < right_edge + pitch) {
630  offset = x < right_edge ? right_edge - x : 0;
631  cutpts[x - array_origin].assign(&cutpts[0], array_origin, x,
632  false, false, offset, projection,
633  projection_scale, zero_count, pitch,
634  pitch_error);
635  cutpts[x - array_origin].terminal = true;
636  if (cutpts[x - array_origin].index () +
637  cutpts[x - array_origin].fake_count <= best_count + best_fake) {
638  if (cutpts[x - array_origin].fake_count < best_fake
639  || (cutpts[x - array_origin].fake_count == best_fake
640  && cutpts[x - array_origin].cost_function () < best_cost)) {
641  best_fake = cutpts[x - array_origin].fake_count;
642  best_cost = cutpts[x - array_origin].cost_function ();
643  best_left_x = x;
644  best_right_x = x;
645  best_count = cutpts[x - array_origin].index ();
646  }
647  else if (cutpts[x - array_origin].fake_count == best_fake
648  && x == best_right_x + 1
649  && cutpts[x - array_origin].cost_function () == best_cost) {
650  //exactly equal
651  best_right_x = x;
652  }
653  }
654  x++;
655  }
656  ASSERT_HOST (best_fake < INT16_MAX);
657 
658  best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
659  // for (x=left_edge-pitch;x<right_edge+pitch;x++)
660  // {
661  // tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
662  // x,cutpts[x-array_origin].cost_function(),
663  // cutpts[x-array_origin].sum(),
664  // cutpts[x-array_origin].squares(),
665  // cutpts[x-array_origin].previous()->position());
666  // }
667  occupation_count = -1;
668  do {
669  for (x = best_end->position () - pitch + pitch_error;
670  x < best_end->position () - pitch_error
671  && projection->pile_count (x) == 0; x++);
672  if (x < best_end->position () - pitch_error)
673  occupation_count++;
674  //copy it
675  segpt = new FPSEGPT (best_end);
676  seg_it.add_before_then_move (segpt);
677  best_end = best_end->previous ();
678  }
679  while (best_end != nullptr);
680  seg_it.move_to_last ();
681  mean_sum = seg_it.data ()->sum ();
pitsync_joined_edge
double pitsync_joined_edge
Definition: pitsync1.cpp:25
check_pitch_sync3
double check_pitch_sync3(int16_t projection_left, int16_t projection_right, int16_t zero_count, int16_t pitch, int16_t pitch_error, STATS *projection, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
Definition: pithsync.cpp:482
ASSERT_HOST
#define ASSERT_HOST(x)
Definition: errcode.h:87
TBOX::top
int16_t top() const
Definition: rect.h:57
STATS::pile_count
int32_t pile_count(int32_t value) const
Definition: statistc.h:75
FPCUTPT::previous
FPCUTPT * previous()
Definition: pithsync.h:78
textord_fast_pitch_test
bool textord_fast_pitch_test
Definition: topitch.cpp:42
textord_test_y
int textord_test_y
Definition: makerow.cpp:61
FPCUTPT
Definition: pithsync.h:27
FPCUTPT::position
int32_t position()
Definition: pithsync.h:66
STATS::local_min
bool local_min(int32_t x) const
Definition: statistc.cpp:240
FPCUTPT::sum
double sum()
Definition: pithsync.h:75
box_next
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:629
TBOX::left
int16_t left() const
Definition: rect.h:71
TBOX::right
int16_t right() const
Definition: rect.h:78
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
FPSEGPT
Definition: pitsync1.h:31
textord_test_x
int textord_test_x
Definition: makerow.cpp:60
TBOX
Definition: rect.h:33