tesseract  5.0.0-alpha-619-ge9db
devanagari_processing.h
Go to the documentation of this file.
1 // Copyright 2008 Google Inc. All Rights Reserved.
2 // Author: shobhitsaxena@google.com (Shobhit Saxena)
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
14 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
15 
16 #include "allheaders.h"
17 #include "ocrblock.h"
18 #include "params.h"
19 
20 struct Pix;
21 struct Box;
22 struct Boxa;
23 
24 extern
26  "Debug level for split shiro-rekha process.");
27 
28 extern
30  "Whether to create a debug image for split shiro-rekha process.");
31 
32 class TBOX;
33 
34 namespace tesseract {
35 
37  public:
39  hist_ = nullptr;
40  length_ = 0;
41  }
42 
44  Clear();
45  }
46 
47  void Clear() {
48  delete[] hist_;
49  length_ = 0;
50  }
51 
52  int* hist() const { return hist_; }
53 
54  int length() const {
55  return length_;
56  }
57 
58  // Methods to construct histograms from images. These clear any existing data.
59  void ConstructVerticalCountHist(Pix* pix);
60  void ConstructHorizontalCountHist(Pix* pix);
61 
62  // This method returns the global-maxima for the histogram. The frequency of
63  // the global maxima is returned in count, if specified.
64  int GetHistogramMaximum(int* count) const;
65 
66  private:
67  int* hist_;
68  int length_;
69 };
70 
72  public:
74  NO_SPLIT = 0, // No splitting is performed for the phase.
75  MINIMAL_SPLIT, // Blobs are split minimally.
76  MAXIMAL_SPLIT // Blobs are split maximally.
77  };
78 
80  virtual ~ShiroRekhaSplitter();
81 
82  // Top-level method to perform splitting based on current settings.
83  // Returns true if a split was actually performed.
84  // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
85  // splitting. If false, the ocr_split_strategy_ is used.
86  bool Split(bool split_for_pageseg, DebugPixa* pixa_debug);
87 
88  // Clears the memory held by this object.
89  void Clear();
90 
91  // Refreshes the words in the segmentation block list by using blobs in the
92  // input blob list.
93  // The segmentation block list must be set.
94  void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
95 
96  // Returns true if the split strategies for pageseg and ocr are different.
98  return pageseg_split_strategy_ != ocr_split_strategy_;
99  }
100 
101  // This only keeps a copy of the block list pointer. At split call, the list
102  // object should still be alive. This block list is used as a golden
103  // segmentation when performing splitting.
104  void set_segmentation_block_list(BLOCK_LIST* block_list) {
105  segmentation_block_list_ = block_list;
106  }
107 
108  static const int kUnspecifiedXheight = -1;
109 
110  void set_global_xheight(int xheight) {
111  global_xheight_ = xheight;
112  }
113 
114  void set_perform_close(bool perform) {
115  perform_close_ = perform;
116  }
117 
118  // Returns the image obtained from shiro-rekha splitting. The returned object
119  // is owned by this class. Callers may want to clone the returned pix to keep
120  // it alive beyond the life of ShiroRekhaSplitter object.
121  Pix* splitted_image() {
122  return splitted_image_;
123  }
124 
125  // On setting the input image, a clone of it is owned by this class.
126  void set_orig_pix(Pix* pix);
127 
128  // Returns the input image provided to the object. This object is owned by
129  // this class. Callers may want to clone the returned pix to work with it.
130  Pix* orig_pix() {
131  return orig_pix_;
132  }
133 
135  return ocr_split_strategy_;
136  }
137 
139  ocr_split_strategy_ = strategy;
140  }
141 
143  return pageseg_split_strategy_;
144  }
145 
147  pageseg_split_strategy_ = strategy;
148  }
149 
150  BLOCK_LIST* segmentation_block_list() {
151  return segmentation_block_list_;
152  }
153 
154  // This method returns the computed mode-height of blobs in the pix.
155  // It also prunes very small blobs from calculation. Could be used to provide
156  // a global xheight estimate for images which have the same point-size text.
157  static int GetModeHeight(Pix* pix);
158 
159  private:
160  // Method to perform a close operation on the input image. The xheight
161  // estimate decides the size of sel used.
162  static void PerformClose(Pix* pix, int xheight_estimate);
163 
164  // This method resolves the cc bbox to a particular row and returns the row's
165  // xheight. This uses block_list_ if available, else just returns the
166  // global_xheight_ estimate currently set in the object.
167  int GetXheightForCC(Box* cc_bbox);
168 
169  // Returns a list of regions (boxes) which should be cleared in the original
170  // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
171  // (or less) word only. Xheight measure could be the global estimate, the row
172  // estimate, or unspecified. If unspecified, over splitting may occur, since a
173  // conservative estimate of stroke width along with an associated multiplier
174  // is used in its place. It is advisable to have a specified xheight when
175  // splitting for classification/training.
176  void SplitWordShiroRekha(SplitStrategy split_strategy,
177  Pix* pix,
178  int xheight,
179  int word_left,
180  int word_top,
181  Boxa* regions_to_clear);
182 
183  // Returns a new box object for the corresponding TBOX, based on the original
184  // image's coordinate system.
185  Box* GetBoxForTBOX(const TBOX& tbox) const;
186 
187  // This method returns y-extents of the shiro-rekha computed from the input
188  // word image.
189  static void GetShiroRekhaYExtents(Pix* word_pix,
190  int* shirorekha_top,
191  int* shirorekha_bottom,
192  int* shirorekha_ylevel);
193 
194  Pix* orig_pix_; // Just a clone of the input image passed.
195  Pix* splitted_image_; // Image produced after the last splitting round. The
196  // object is owned by this class.
197  SplitStrategy pageseg_split_strategy_;
198  SplitStrategy ocr_split_strategy_;
199  Pix* debug_image_;
200  // This block list is used as a golden segmentation when performing splitting.
201  BLOCK_LIST* segmentation_block_list_;
202  int global_xheight_;
203  bool perform_close_; // Whether a morphological close operation should be
204  // performed before CCs are run through splitting.
205 };
206 
207 } // namespace tesseract.
208 
209 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
tesseract::ShiroRekhaSplitter::set_ocr_split_strategy
void set_ocr_split_strategy(SplitStrategy strategy)
Definition: devanagari_processing.h:138
tesseract::ShiroRekhaSplitter::set_pageseg_split_strategy
void set_pageseg_split_strategy(SplitStrategy strategy)
Definition: devanagari_processing.h:146
tesseract::ShiroRekhaSplitter::set_orig_pix
void set_orig_pix(Pix *pix)
Definition: devanagari_processing.cpp:68
tesseract::PixelHistogram::ConstructVerticalCountHist
void ConstructVerticalCountHist(Pix *pix)
Definition: devanagari_processing.cpp:469
tesseract::PixelHistogram::length
int length() const
Definition: devanagari_processing.h:54
tesseract::PixelHistogram::GetHistogramMaximum
int GetHistogramMaximum(int *count) const
Definition: devanagari_processing.cpp:455
devanagari_split_debuglevel
int devanagari_split_debuglevel
Definition: devanagari_processing.cpp:34
params.h
tesseract::ShiroRekhaSplitter::set_perform_close
void set_perform_close(bool perform)
Definition: devanagari_processing.h:114
tesseract::ShiroRekhaSplitter::Clear
void Clear()
Definition: devanagari_processing.cpp:56
tesseract::ShiroRekhaSplitter::ocr_split_strategy
SplitStrategy ocr_split_strategy() const
Definition: devanagari_processing.h:134
tesseract::ShiroRekhaSplitter::set_segmentation_block_list
void set_segmentation_block_list(BLOCK_LIST *block_list)
Definition: devanagari_processing.h:104
tesseract::ShiroRekhaSplitter::splitted_image
Pix * splitted_image()
Definition: devanagari_processing.h:121
tesseract::ShiroRekhaSplitter::segmentation_block_list
BLOCK_LIST * segmentation_block_list()
Definition: devanagari_processing.h:150
tesseract::ShiroRekhaSplitter::Split
bool Split(bool split_for_pageseg, DebugPixa *pixa_debug)
Definition: devanagari_processing.cpp:80
tesseract::ShiroRekhaSplitter::MINIMAL_SPLIT
Definition: devanagari_processing.h:75
tesseract::PixelHistogram::~PixelHistogram
~PixelHistogram()
Definition: devanagari_processing.h:43
tesseract::ShiroRekhaSplitter::GetModeHeight
static int GetModeHeight(Pix *pix)
Definition: devanagari_processing.cpp:410
tesseract::ShiroRekhaSplitter::orig_pix
Pix * orig_pix()
Definition: devanagari_processing.h:130
tesseract::ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs
void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs)
Definition: devanagari_processing.cpp:356
tesseract::ShiroRekhaSplitter::kUnspecifiedXheight
static const int kUnspecifiedXheight
Definition: devanagari_processing.h:108
tesseract::ShiroRekhaSplitter
Definition: devanagari_processing.h:71
tesseract::ShiroRekhaSplitter::HasDifferentSplitStrategies
bool HasDifferentSplitStrategies() const
Definition: devanagari_processing.h:97
tesseract::PixelHistogram::Clear
void Clear()
Definition: devanagari_processing.h:47
tesseract
Definition: baseapi.h:65
tesseract::ShiroRekhaSplitter::MAXIMAL_SPLIT
Definition: devanagari_processing.h:76
INT_VAR_H
#define INT_VAR_H(name, val, comment)
Definition: params.h:292
tesseract::ShiroRekhaSplitter::ShiroRekhaSplitter
ShiroRekhaSplitter()
Definition: devanagari_processing.cpp:41
tesseract::DebugPixa
Definition: debugpixa.h:10
tesseract::PixelHistogram::ConstructHorizontalCountHist
void ConstructHorizontalCountHist(Pix *pix)
Definition: devanagari_processing.cpp:487
tesseract::PixelHistogram::hist
int * hist() const
Definition: devanagari_processing.h:52
tesseract::PixelHistogram
Definition: devanagari_processing.h:36
tesseract::ShiroRekhaSplitter::set_global_xheight
void set_global_xheight(int xheight)
Definition: devanagari_processing.h:110
count
int count(LIST var_list)
Definition: oldlist.cpp:79
BOOL_VAR_H
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:294
ocrblock.h
tesseract::ShiroRekhaSplitter::pageseg_split_strategy
SplitStrategy pageseg_split_strategy() const
Definition: devanagari_processing.h:142
tesseract::ShiroRekhaSplitter::~ShiroRekhaSplitter
virtual ~ShiroRekhaSplitter()
Definition: devanagari_processing.cpp:52
tesseract::PixelHistogram::PixelHistogram
PixelHistogram()
Definition: devanagari_processing.h:38
devanagari_split_debugimage
bool devanagari_split_debugimage
Definition: devanagari_processing.cpp:37
tesseract::ShiroRekhaSplitter::SplitStrategy
SplitStrategy
Definition: devanagari_processing.h:73
tesseract::ShiroRekhaSplitter::NO_SPLIT
Definition: devanagari_processing.h:74
TBOX
Definition: rect.h:33