tesseract  5.0.0-alpha-619-ge9db
werd.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: werd.h
3  * Description: Code for the WERD class.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1991, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #ifndef WERD_H
20 #define WERD_H
21 
22 #include "bits16.h"
23 #include "elst2.h"
24 #include "params.h"
25 #include "stepblob.h"
26 #include <tesseract/strngs.h>
27 
28 enum WERD_FLAGS {
29  W_SEGMENTED,
30  W_ITALIC,
31  W_BOLD,
32  W_BOL,
33  W_EOL,
34  W_NORMALIZED,
37  W_DONT_CHOP,
38  W_REP_CHAR,
39  W_FUZZY_SP,
40  W_FUZZY_NON,
41  W_INVERSE
42 };
43 
45  /* Display flags bit number allocations */
52 };
53 
54 class ROW; // forward decl
55 
56 class WERD : public ELIST2_LINK {
57  public:
58  WERD() = default;
59  // WERD constructed with:
60  // blob_list - blobs of the word (we take this list's contents)
61  // blanks - number of blanks before the word
62  // text - correct text (outlives WERD)
63  WERD(C_BLOB_LIST* blob_list, uint8_t blanks, const char* text);
64 
65  // WERD constructed from:
66  // blob_list - blobs in the word
67  // clone - werd to clone flags, etc from.
68  WERD(C_BLOB_LIST* blob_list, WERD* clone);
69 
70  // Construct a WERD from a single_blob and clone the flags from this.
71  // W_BOL and W_EOL flags are set according to the given values.
72  WERD* ConstructFromSingleBlob(bool bol, bool eol, C_BLOB* blob);
73 
74  ~WERD() = default;
75 
76  // assignment
77  WERD& operator=(const WERD& source);
78 
79  // This method returns a new werd constructed using the blobs in the input
80  // all_blobs list, which correspond to the blobs in this werd object. The
81  // blobs used to construct the new word are consumed and removed from the
82  // input all_blobs list.
83  // Returns nullptr if the word couldn't be constructed.
84  // Returns original blobs for which no matches were found in the output list
85  // orphan_blobs (appends).
86  WERD* ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs,
87  C_BLOB_LIST* orphan_blobs);
88 
89  // Accessors for reject / DUFF blobs in various formats
90  C_BLOB_LIST* rej_cblob_list() { // compact format
91  return &rej_cblobs;
92  }
93 
94  // Accessors for good blobs in various formats.
95  C_BLOB_LIST* cblob_list() { // get compact blobs
96  return &cblobs;
97  }
98 
99  uint8_t space() { // access function
100  return blanks;
101  }
102  void set_blanks(uint8_t new_blanks) { blanks = new_blanks; }
103  int script_id() const { return script_id_; }
104  void set_script_id(int id) { script_id_ = id; }
105 
106  // Returns the (default) bounding box including all the dots.
107  TBOX bounding_box() const; // compute bounding box
108  // Returns the bounding box including the desired combination of upper and
109  // lower noise/diacritic elements.
110  TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const;
111  // Returns the bounding box of only the good blobs.
112  TBOX true_bounding_box() const;
113 
114  const char* text() const { return correct.c_str(); }
115  void set_text(const char* new_text) { correct = new_text; }
116 
117  bool flag(WERD_FLAGS mask) const { return flags.bit(mask); }
118  void set_flag(WERD_FLAGS mask, bool value) { flags.set_bit(mask, value); }
119 
120  bool display_flag(uint8_t flag) const { return disp_flags.bit(flag); }
121  void set_display_flag(uint8_t flag, bool value) {
122  disp_flags.set_bit(flag, value);
123  }
124 
125  WERD* shallow_copy(); // shallow copy word
126 
127  // reposition word by vector
128  void move(const ICOORD vec);
129 
130  // join other's blobs onto this werd, emptying out other.
131  void join_on(WERD* other);
132 
133  // copy other's blobs onto this word, leaving other intact.
134  void copy_on(WERD* other);
135 
136  // tprintf word metadata (but not blob innards)
137  void print();
138 
139 #ifndef GRAPHICS_DISABLED
140  // plot word on window in a uniform colour
141  void plot(ScrollView* window, ScrollView::Color colour);
142 
143  // Get the next color in the (looping) rainbow.
145 
146  // plot word on window in a rainbow of colours
147  void plot(ScrollView* window);
148 
149  // plot rejected blobs in a rainbow of colours
150  void plot_rej_blobs(ScrollView* window);
151 #endif // GRAPHICS_DISABLED
152 
153  // Removes noise from the word by moving small outlines to the rej_cblobs
154  // list, based on the size_threshold.
155  void CleanNoise(float size_threshold);
156 
157  // Extracts all the noise outlines and stuffs the pointers into the given
158  // vector of outlines. Afterwards, the outlines vector owns the pointers.
160  // Adds the selected outlines to the indcated real blobs, and puts the rest
161  // back in rej_cblobs where they came from. Where the target_blobs entry is
162  // nullptr, a run of wanted outlines is put into a single new blob.
163  // Ownership of the outlines is transferred back to the word. (Hence
164  // GenericVector and not PointerVector.)
165  // Returns true if any new blob was added to the start of the word, which
166  // suggests that it might need joining to the word before it, and likewise
167  // sets make_next_word_fuzzy true if any new blob was added to the end.
168  bool AddSelectedOutlines(const GenericVector<bool>& wanted,
169  const GenericVector<C_BLOB*>& target_blobs,
170  const GenericVector<C_OUTLINE*>& outlines,
171  bool* make_next_word_fuzzy);
172 
173  private:
174  uint8_t blanks = 0; // no of blanks
175  BITS16 flags; // flags about word
176  BITS16 disp_flags; // display flags
177  int16_t script_id_ = 0; // From unicharset.
178  STRING correct; // correct text
179  C_BLOB_LIST cblobs; // compacted blobs
180  C_BLOB_LIST rej_cblobs; // DUFF blobs
181 };
182 
184 #include "ocrrow.h" // placed here due to
185 // compare words by increasing order of left edge, suitable for qsort(3)
186 int word_comparator(const void* word1p, const void* word2p);
187 #endif
W_SCRIPT_IS_LATIN
Special case latin for y. splitting.
Definition: werd.h:50
ScrollView
Definition: scrollview.h:97
strngs.h
WERD::flag
bool flag(WERD_FLAGS mask) const
Definition: werd.h:116
WERD::set_script_id
void set_script_id(int id)
Definition: werd.h:103
BITS16::bit
bool bit(uint8_t bit_num) const
Definition: bits16.h:65
W_REP_CHAR
repeated character
Definition: werd.h:52
WERD::NextColor
static ScrollView::Color NextColor(ScrollView::Color colour)
Definition: werd.cpp:291
WERD::~WERD
~WERD()=default
WERD::copy_on
void copy_on(WERD *other)
Definition: werd.cpp:220
WERD::shallow_copy
WERD * shallow_copy()
Definition: werd.cpp:333
W_DONT_CHOP
fixed pitch chopped
Definition: werd.h:51
WERD::bounding_box
TBOX bounding_box() const
Definition: werd.cpp:147
DF_TEXT
Correct ascii.
Definition: werd.h:46
WERD::display_flag
bool display_flag(uint8_t flag) const
Definition: werd.h:119
WERD::plot
void plot(ScrollView *window, ScrollView::Color colour)
Definition: werd.cpp:282
params.h
ICOORD
integer coordinate
Definition: points.h:30
W_SCRIPT_HAS_XHEIGHT
x-height concept makes sense.
Definition: werd.h:49
WERD::AddSelectedOutlines
bool AddSelectedOutlines(const GenericVector< bool > &wanted, const GenericVector< C_BLOB * > &target_blobs, const GenericVector< C_OUTLINE * > &outlines, bool *make_next_word_fuzzy)
Definition: werd.cpp:523
STRING
Definition: strngs.h:45
BITS16
Definition: bits16.h:24
WERD::ConstructWerdWithNewBlobs
WERD * ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs)
Definition: werd.cpp:387
WERD::set_display_flag
void set_display_flag(uint8_t flag, bool value)
Definition: werd.h:120
elst2.h
C_BLOB
Definition: stepblob.h:36
WERD::cblob_list
C_BLOB_LIST * cblob_list()
Definition: werd.h:94
WERD::print
void print()
Definition: werd.cpp:252
ELIST2IZEH
#define ELIST2IZEH(CLASSNAME)
Definition: elst2.h:917
W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:54
WERD::space
uint8_t space()
Definition: werd.h:98
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
WERD_FLAGS
WERD_FLAGS
Definition: werd.h:27
WERD::set_flag
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:117
bits16.h
W_EOL
end of line
Definition: werd.h:47
DF_BLAMER
Blamer information.
Definition: werd.h:50
stepblob.h
DF_EDGE_STEP
Edge steps.
Definition: werd.h:48
WERD::CleanNoise
void CleanNoise(float size_threshold)
Definition: werd.cpp:481
WERD::set_text
void set_text(const char *new_text)
Definition: werd.h:114
word_comparator
int word_comparator(const void *word1p, const void *word2p)
Definition: werd.cpp:369
W_NORMALIZED
flags
Definition: werd.h:48
GenericVector
Definition: baseapi.h:40
DF_BOX
Bounding box.
Definition: werd.h:45
DF_BN_POLYGONAL
BL normalisd polyapx.
Definition: werd.h:49
W_INVERSE
white on black
Definition: werd.h:55
BITS16::set_bit
void set_bit(uint8_t bit_num, bool value)
Definition: bits16.h:56
WERD::true_bounding_box
TBOX true_bounding_box() const
Definition: werd.cpp:168
WERD::text
const char * text() const
Definition: werd.h:113
WERD::ConstructFromSingleBlob
WERD * ConstructFromSingleBlob(bool bol, bool eol, C_BLOB *blob)
Definition: werd.cpp:124
W_FUZZY_SP
fuzzy space
Definition: werd.h:53
WERD::script_id
int script_id() const
Definition: werd.h:102
WERD
Definition: werd.h:55
WERD::restricted_bounding_box
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: werd.cpp:151
ROW
Definition: ocrrow.h:35
DISPLAY_FLAGS
DISPLAY_FLAGS
Definition: werd.h:43
ELIST2_LINK
Definition: elst2.h:53
WERD::operator=
WERD & operator=(const WERD &source)
Definition: werd.cpp:348
WERD::set_blanks
void set_blanks(uint8_t new_blanks)
Definition: werd.h:101
WERD::plot_rej_blobs
void plot_rej_blobs(ScrollView *window)
Definition: werd.cpp:319
WERD::join_on
void join_on(WERD *other)
Definition: werd.cpp:198
WERD::move
void move(const ICOORD vec)
Definition: werd.cpp:185
ScrollView::Color
Color
Definition: scrollview.h:100
WERD::GetNoiseOutlines
void GetNoiseOutlines(GenericVector< C_OUTLINE * > *outlines)
Definition: werd.cpp:505
W_BOLD
bold text
Definition: werd.h:45
DF_POLYGONAL
Polyg approx.
Definition: werd.h:47
W_SEGMENTED
correctly segmented
Definition: werd.h:43
WERD::rej_cblob_list
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:89
WERD::WERD
WERD()=default
W_BOL
start of line
Definition: werd.h:46
W_ITALIC
italic text
Definition: werd.h:44
TBOX
Definition: rect.h:33