tesseract  5.0.0-alpha-619-ge9db
werd.h File Reference
#include "bits16.h"
#include "elst2.h"
#include "params.h"
#include "stepblob.h"
#include <tesseract/strngs.h>
#include "ocrrow.h"

Go to the source code of this file.

Classes

class  WERD
 

Enumerations

enum  WERD_FLAGS {
  W_SEGMENTED, W_ITALIC, W_BOLD, W_BOL,
  W_EOL, W_NORMALIZED, W_SCRIPT_HAS_XHEIGHT, W_SCRIPT_IS_LATIN,
  W_DONT_CHOP, W_REP_CHAR, W_FUZZY_SP, W_FUZZY_NON,
  W_INVERSE
}
 
enum  DISPLAY_FLAGS {
  DF_BOX, DF_TEXT, DF_POLYGONAL, DF_EDGE_STEP,
  DF_BN_POLYGONAL, DF_BLAMER
}
 

Functions

int word_comparator (const void *word1p, const void *word2p)
 

Enumeration Type Documentation

◆ DISPLAY_FLAGS

Enumerator
DF_BOX 

Bounding box.

DF_TEXT 

Correct ascii.

DF_POLYGONAL 

Polyg approx.

DF_EDGE_STEP 

Edge steps.

DF_BN_POLYGONAL 

BL normalisd polyapx.

DF_BLAMER 

Blamer information.

Definition at line 43 of file werd.h.

44  {
45  /* Display flags bit number allocations */
46  DF_BOX,
47  DF_TEXT,
48  DF_POLYGONAL,
49  DF_EDGE_STEP,
51  DF_BLAMER

◆ WERD_FLAGS

enum WERD_FLAGS
Enumerator
W_SEGMENTED 

correctly segmented

W_ITALIC 

italic text

W_BOLD 

bold text

W_BOL 

start of line

W_EOL 

end of line

W_NORMALIZED 

flags

W_SCRIPT_HAS_XHEIGHT 

x-height concept makes sense.

W_SCRIPT_IS_LATIN 

Special case latin for y. splitting.

W_DONT_CHOP 

fixed pitch chopped

W_REP_CHAR 

repeated character

W_FUZZY_SP 

fuzzy space

W_FUZZY_NON 

fuzzy nonspace

W_INVERSE 

white on black

Definition at line 27 of file werd.h.

28  {
29  W_SEGMENTED,
30  W_ITALIC,
31  W_BOLD,
32  W_BOL,
33  W_EOL,
34  W_NORMALIZED,
37  W_DONT_CHOP,
38  W_REP_CHAR,
39  W_FUZZY_SP,
40  W_FUZZY_NON,
41  W_INVERSE

Function Documentation

◆ word_comparator()

int word_comparator ( const void *  word1p,
const void *  word2p 
)

word_comparator()

word comparator used to sort a word list so that words are in increasing order of left edge.

Definition at line 369 of file werd.cpp.

370  {
371  const WERD* word1 = *reinterpret_cast<const WERD* const*>(word1p);
372  const WERD* word2 = *reinterpret_cast<const WERD* const*>(word2p);
373  return word1->bounding_box().left() - word2->bounding_box().left();
W_SCRIPT_IS_LATIN
Special case latin for y. splitting.
Definition: werd.h:50
W_REP_CHAR
repeated character
Definition: werd.h:52
W_DONT_CHOP
fixed pitch chopped
Definition: werd.h:51
WERD::bounding_box
TBOX bounding_box() const
Definition: werd.cpp:147
DF_TEXT
Correct ascii.
Definition: werd.h:46
W_SCRIPT_HAS_XHEIGHT
x-height concept makes sense.
Definition: werd.h:49
W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:54
W_EOL
end of line
Definition: werd.h:47
DF_BLAMER
Blamer information.
Definition: werd.h:50
DF_EDGE_STEP
Edge steps.
Definition: werd.h:48
W_NORMALIZED
flags
Definition: werd.h:48
DF_BOX
Bounding box.
Definition: werd.h:45
DF_BN_POLYGONAL
BL normalisd polyapx.
Definition: werd.h:49
W_INVERSE
white on black
Definition: werd.h:55
W_FUZZY_SP
fuzzy space
Definition: werd.h:53
WERD
Definition: werd.h:55
TBOX::left
int16_t left() const
Definition: rect.h:71
W_BOLD
bold text
Definition: werd.h:45
DF_POLYGONAL
Polyg approx.
Definition: werd.h:47
W_SEGMENTED
correctly segmented
Definition: werd.h:43
W_BOL
start of line
Definition: werd.h:46
W_ITALIC
italic text
Definition: werd.h:44