tesseract  4.0.0-1-g2a2b
blobbox.h File Reference
#include <cinttypes>
#include <cmath>
#include <cstdint>
#include "elst.h"
#include "elst2.h"
#include "errcode.h"
#include "ocrblock.h"
#include "params.h"
#include "pdblock.h"
#include "points.h"
#include "quspline.h"
#include "rect.h"
#include "scrollview.h"
#include "statistc.h"
#include "stepblob.h"
#include "tprintf.h"
#include "werd.h"

Go to the source code of this file.

Classes

class  BLOBNBOX
 
class  TO_ROW
 
class  TO_BLOCK
 

Namespaces

 tesseract
 

Enumerations

enum  PITCH_TYPE {
  PITCH_DUNNO, PITCH_DEF_FIXED, PITCH_MAYBE_FIXED, PITCH_DEF_PROP,
  PITCH_MAYBE_PROP, PITCH_CORR_FIXED, PITCH_CORR_PROP
}
 
enum  TabType {
  TT_NONE, TT_DELETED, TT_MAYBE_RAGGED, TT_MAYBE_ALIGNED,
  TT_CONFIRMED, TT_VLINE
}
 
enum  BlobRegionType {
  BRT_NOISE, BRT_HLINE, BRT_VLINE, BRT_RECTIMAGE,
  BRT_POLYIMAGE, BRT_UNKNOWN, BRT_VERT_TEXT, BRT_TEXT,
  BRT_COUNT
}
 
enum  BlobNeighbourDir {
  BND_LEFT, BND_BELOW, BND_RIGHT, BND_ABOVE,
  BND_COUNT
}
 
enum  BlobSpecialTextType {
  BSTT_NONE, BSTT_ITALIC, BSTT_DIGIT, BSTT_MATH,
  BSTT_UNCLEAR, BSTT_SKIP, BSTT_COUNT
}
 
enum  BlobTextFlowType {
  BTFT_NONE, BTFT_NONTEXT, BTFT_NEIGHBOURS, BTFT_CHAIN,
  BTFT_STRONG_CHAIN, BTFT_TEXT_ON_IMAGE, BTFT_LEADER, BTFT_COUNT
}
 

Functions

BlobNeighbourDir DirOtherWay (BlobNeighbourDir dir)
 
bool DominatesInMerge (BlobTextFlowType type1, BlobTextFlowType type2)
 
void find_cblob_limits (C_BLOB *blob, float leftx, float rightx, FCOORD rotation, float &ymin, float &ymax)
 
void find_cblob_vlimits (C_BLOB *blob, float leftx, float rightx, float &ymin, float &ymax)
 
void find_cblob_hlimits (C_BLOB *blob, float bottomy, float topy, float &xmin, float &xymax)
 
C_BLOBcrotate_cblob (C_BLOB *blob, FCOORD rotation)
 
TBOX box_next (BLOBNBOX_IT *it)
 
TBOX box_next_pre_chopped (BLOBNBOX_IT *it)
 
void vertical_cblob_projection (C_BLOB *blob, STATS *stats)
 
void vertical_coutline_projection (C_OUTLINE *outline, STATS *stats)
 
void plot_blob_list (ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
 

Variables

double textord_error_weight = 3
 

Enumeration Type Documentation

◆ BlobNeighbourDir

Enumerator
BND_LEFT 
BND_BELOW 
BND_RIGHT 
BND_ABOVE 
BND_COUNT 

Definition at line 88 of file blobbox.h.

88  {
89  BND_LEFT,
90  BND_BELOW,
91  BND_RIGHT,
92  BND_ABOVE,
93  BND_COUNT
94 };

◆ BlobRegionType

Enumerator
BRT_NOISE 
BRT_HLINE 
BRT_VLINE 
BRT_RECTIMAGE 
BRT_POLYIMAGE 
BRT_UNKNOWN 
BRT_VERT_TEXT 
BRT_TEXT 
BRT_COUNT 

Definition at line 73 of file blobbox.h.

73  {
74  BRT_NOISE, // Neither text nor image.
75  BRT_HLINE, // Horizontal separator line.
76  BRT_VLINE, // Vertical separator line.
77  BRT_RECTIMAGE, // Rectangular image.
78  BRT_POLYIMAGE, // Non-rectangular image.
79  BRT_UNKNOWN, // Not determined yet.
80  BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented.
81  BRT_TEXT, // Convincing text.
82 
83  BRT_COUNT // Number of possibilities.
84 };

◆ BlobSpecialTextType

Enumerator
BSTT_NONE 
BSTT_ITALIC 
BSTT_DIGIT 
BSTT_MATH 
BSTT_UNCLEAR 
BSTT_SKIP 
BSTT_COUNT 

Definition at line 97 of file blobbox.h.

97  {
98  BSTT_NONE, // No special.
99  BSTT_ITALIC, // Italic style.
100  BSTT_DIGIT, // Digit symbols.
101  BSTT_MATH, // Mathmatical symobls (not including digit).
102  BSTT_UNCLEAR, // Characters with low recognition rate.
103  BSTT_SKIP, // Characters that we skip labeling (usually too small).
104  BSTT_COUNT
105 };

◆ BlobTextFlowType

Enumerator
BTFT_NONE 
BTFT_NONTEXT 
BTFT_NEIGHBOURS 
BTFT_CHAIN 
BTFT_STRONG_CHAIN 
BTFT_TEXT_ON_IMAGE 
BTFT_LEADER 
BTFT_COUNT 

Definition at line 115 of file blobbox.h.

115  {
116  BTFT_NONE, // No text flow set yet.
117  BTFT_NONTEXT, // Flow too poor to be likely text.
118  BTFT_NEIGHBOURS, // Neighbours support flow in this direction.
119  BTFT_CHAIN, // There is a weak chain of text in this direction.
120  BTFT_STRONG_CHAIN, // There is a strong chain of text in this direction.
121  BTFT_TEXT_ON_IMAGE, // There is a strong chain of text on an image.
122  BTFT_LEADER, // Leader dots/dashes etc.
123  BTFT_COUNT
124 };

◆ PITCH_TYPE

enum PITCH_TYPE
Enumerator
PITCH_DUNNO 
PITCH_DEF_FIXED 
PITCH_MAYBE_FIXED 
PITCH_DEF_PROP 
PITCH_MAYBE_PROP 
PITCH_CORR_FIXED 
PITCH_CORR_PROP 

Definition at line 45 of file blobbox.h.

46 {
47  PITCH_DUNNO, // insufficient data
48  PITCH_DEF_FIXED, // definitely fixed
49  PITCH_MAYBE_FIXED, // could be
54 };

◆ TabType

enum TabType
Enumerator
TT_NONE 
TT_DELETED 
TT_MAYBE_RAGGED 
TT_MAYBE_ALIGNED 
TT_CONFIRMED 
TT_VLINE 

Definition at line 60 of file blobbox.h.

60  {
61  TT_NONE, // Not a tab.
62  TT_DELETED, // Not a tab after detailed analysis.
63  TT_MAYBE_RAGGED, // Initial designation of a tab-stop candidate.
64  TT_MAYBE_ALIGNED, // Initial designation of a tab-stop candidate.
65  TT_CONFIRMED, // Aligned with neighbours.
66  TT_VLINE // Detected as a vertical line.
67 };

Function Documentation

◆ box_next()

TBOX box_next ( BLOBNBOX_IT *  it)

Definition at line 637 of file blobbox.cpp.

639  {
640  BLOBNBOX *blob; //current blob
641  TBOX result; //total box
642 
643  blob = it->data ();
644  result = blob->bounding_box ();
645  do {
646  it->forward ();
647  blob = it->data ();
648  if (blob->cblob() == nullptr)
649  //was pre-chopped
650  result += blob->bounding_box ();
651  }
652  //until next real blob
653  while ((blob->cblob() == nullptr) || blob->joined_to_prev());
654  return result;
655 }
Definition: rect.h:34
bool joined_to_prev() const
Definition: blobbox.h:257
const TBOX & bounding_box() const
Definition: blobbox.h:231
C_BLOB * cblob() const
Definition: blobbox.h:269

◆ box_next_pre_chopped()

TBOX box_next_pre_chopped ( BLOBNBOX_IT *  it)

Definition at line 666 of file blobbox.cpp.

668  {
669  BLOBNBOX *blob; //current blob
670  TBOX result; //total box
671 
672  blob = it->data ();
673  result = blob->bounding_box ();
674  do {
675  it->forward ();
676  blob = it->data ();
677  }
678  //until next real blob
679  while (blob->joined_to_prev ());
680  return result;
681 }
Definition: rect.h:34
bool joined_to_prev() const
Definition: blobbox.h:257
const TBOX & bounding_box() const
Definition: blobbox.h:231

◆ crotate_cblob()

C_BLOB* crotate_cblob ( C_BLOB blob,
FCOORD  rotation 
)

Definition at line 612 of file blobbox.cpp.

615  {
616  C_OUTLINE_LIST out_list; //output outlines
617  //input outlines
618  C_OUTLINE_IT in_it = blob->out_list ();
619  //output outlines
620  C_OUTLINE_IT out_it = &out_list;
621 
622  for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) {
623  out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation));
624  }
625  return new C_BLOB (&out_list);
626 }
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70

◆ DirOtherWay()

BlobNeighbourDir DirOtherWay ( BlobNeighbourDir  dir)
inline

Definition at line 107 of file blobbox.h.

107  {
108  return static_cast<BlobNeighbourDir>(dir ^ 2);
109 }
BlobNeighbourDir
Definition: blobbox.h:88

◆ DominatesInMerge()

bool DominatesInMerge ( BlobTextFlowType  type1,
BlobTextFlowType  type2 
)
inline

Definition at line 130 of file blobbox.h.

130  {
131  // LEADER always loses.
132  if (type1 == BTFT_LEADER) return false;
133  if (type2 == BTFT_LEADER) return true;
134  // With those out of the way, the ordering of the enum determines the result.
135  return type1 >= type2;
136 }

◆ find_cblob_hlimits()

void find_cblob_hlimits ( C_BLOB blob,
float  bottomy,
float  topy,
float &  xmin,
float &  xymax 
)

Definition at line 577 of file blobbox.cpp.

582  {
583  int16_t stepindex; //current point
584  ICOORD pos; //current coords
585  ICOORD vec; //rotated step
586  C_OUTLINE *outline; //current outline
587  //outlines
588  C_OUTLINE_IT out_it = blob->out_list ();
589 
590  xmin = (float) INT32_MAX;
591  xmax = (float) -INT32_MAX;
592  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
593  outline = out_it.data ();
594  pos = outline->start_pos (); //get coords
595  for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
596  //inside
597  if (pos.y () >= bottomy && pos.y () <= topy) {
598  UpdateRange(pos.x(), &xmin, &xmax);
599  }
600  vec = outline->step (stepindex);
601  pos += vec; //move to next
602  }
603  }
604 }
int16_t y() const
access_function
Definition: points.h:57
const ICOORD & start_pos() const
Definition: coutln.h:148
integer coordinate
Definition: points.h:32
int16_t x() const
access function
Definition: points.h:53
int32_t pathlength() const
Definition: coutln.h:135
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
ICOORD step(int index) const
Definition: coutln.h:144
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:121

◆ find_cblob_limits()

void find_cblob_limits ( C_BLOB blob,
float  leftx,
float  rightx,
FCOORD  rotation,
float &  ymin,
float &  ymax 
)

Definition at line 500 of file blobbox.cpp.

506  {
507  int16_t stepindex; //current point
508  ICOORD pos; //current coords
509  ICOORD vec; //rotated step
510  C_OUTLINE *outline; //current outline
511  //outlines
512  C_OUTLINE_IT out_it = blob->out_list ();
513 
514  ymin = (float) INT32_MAX;
515  ymax = (float) -INT32_MAX;
516  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
517  outline = out_it.data ();
518  pos = outline->start_pos (); //get coords
519  pos.rotate (rotation);
520  for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
521  //inside
522  if (pos.x () >= leftx && pos.x () <= rightx) {
523  UpdateRange(pos.y(), &ymin, &ymax);
524  }
525  vec = outline->step (stepindex);
526  vec.rotate (rotation);
527  pos += vec; //move to next
528  }
529  }
530 }
int16_t y() const
access_function
Definition: points.h:57
const ICOORD & start_pos() const
Definition: coutln.h:148
integer coordinate
Definition: points.h:32
int16_t x() const
access function
Definition: points.h:53
int32_t pathlength() const
Definition: coutln.h:135
void rotate(const FCOORD &vec)
Definition: points.h:537
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
ICOORD step(int index) const
Definition: coutln.h:144
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:121

◆ find_cblob_vlimits()

void find_cblob_vlimits ( C_BLOB blob,
float  leftx,
float  rightx,
float &  ymin,
float &  ymax 
)

Definition at line 540 of file blobbox.cpp.

545  {
546  int16_t stepindex; //current point
547  ICOORD pos; //current coords
548  ICOORD vec; //rotated step
549  C_OUTLINE *outline; //current outline
550  //outlines
551  C_OUTLINE_IT out_it = blob->out_list ();
552 
553  ymin = (float) INT32_MAX;
554  ymax = (float) -INT32_MAX;
555  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
556  outline = out_it.data ();
557  pos = outline->start_pos (); //get coords
558  for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
559  //inside
560  if (pos.x () >= leftx && pos.x () <= rightx) {
561  UpdateRange(pos.y(), &ymin, &ymax);
562  }
563  vec = outline->step (stepindex);
564  pos += vec; //move to next
565  }
566  }
567 }
int16_t y() const
access_function
Definition: points.h:57
const ICOORD & start_pos() const
Definition: coutln.h:148
integer coordinate
Definition: points.h:32
int16_t x() const
access function
Definition: points.h:53
int32_t pathlength() const
Definition: coutln.h:135
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
ICOORD step(int index) const
Definition: coutln.h:144
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:121

◆ plot_blob_list()

void plot_blob_list ( ScrollView win,
BLOBNBOX_LIST *  list,
ScrollView::Color  body_colour,
ScrollView::Color  child_colour 
)

Definition at line 1087 of file blobbox.cpp.

1090  { // colour of child
1091  BLOBNBOX_IT it = list;
1092  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1093  it.data()->plot(win, body_colour, child_colour);
1094  }
1095 }

◆ vertical_cblob_projection()

void vertical_cblob_projection ( C_BLOB blob,
STATS stats 
)

Definition at line 869 of file blobbox.cpp.

872  {
873  //outlines of blob
874  C_OUTLINE_IT out_it = blob->out_list ();
875 
876  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
877  vertical_coutline_projection (out_it.data (), stats);
878  }
879 }
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
void vertical_coutline_projection(C_OUTLINE *outline, STATS *stats)
Definition: blobbox.cpp:889

◆ vertical_coutline_projection()

void vertical_coutline_projection ( C_OUTLINE outline,
STATS stats 
)

Definition at line 889 of file blobbox.cpp.

892  {
893  ICOORD pos; //current point
894  ICOORD step; //edge step
895  int32_t length; //of outline
896  int16_t stepindex; //current step
897  C_OUTLINE_IT out_it = outline->child ();
898 
899  pos = outline->start_pos ();
900  length = outline->pathlength ();
901  for (stepindex = 0; stepindex < length; stepindex++) {
902  step = outline->step (stepindex);
903  if (step.x () > 0) {
904  stats->add (pos.x (), -pos.y ());
905  } else if (step.x () < 0) {
906  stats->add (pos.x () - 1, pos.y ());
907  }
908  pos += step;
909  }
910 
911  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
912  vertical_coutline_projection (out_it.data (), stats);
913  }
914 }
int16_t y() const
access_function
Definition: points.h:57
const ICOORD & start_pos() const
Definition: coutln.h:148
integer coordinate
Definition: points.h:32
int16_t x() const
access function
Definition: points.h:53
int32_t pathlength() const
Definition: coutln.h:135
void add(int32_t value, int32_t count)
Definition: statistc.cpp:100
C_OUTLINE_LIST * child()
Definition: coutln.h:108
void vertical_coutline_projection(C_OUTLINE *outline, STATS *stats)
Definition: blobbox.cpp:889
ICOORD step(int index) const
Definition: coutln.h:144

Variable Documentation

◆ textord_error_weight

double textord_error_weight = 3

"Weighting for error in believability"