tesseract  3.05.02
tesseract_cube_combiner.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: tesseract_cube_combiner.h
3  * Description: Declaration of the Tesseract & Cube results combiner Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // The TesseractCubeCombiner class provides the functionality of combining
21 // the recognition results of Tesseract and Cube at the word level
22 
23 #ifndef TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
24 #define TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
25 
26 #include <string>
27 #include <vector>
28 #include "pageres.h"
29 
30 namespace tesseract {
31 
32 class CubeObject;
33 class NeuralNet;
34 class CubeRecoContext;
35 class WordAltList;
36 
38  public:
39  explicit TesseractCubeCombiner(CubeRecoContext *cube_cntxt);
40  virtual ~TesseractCubeCombiner();
41 
42  // There are 2 public methods for combining the results of tesseract
43  // and cube. Both return the probability that the Tesseract result is
44  // correct. The difference between the two interfaces is in how the
45  // passed-in CubeObject is used.
46 
47  // The CubeObject parameter is used for 2 purposes: 1) to retrieve
48  // cube's alt list, and 2) to compute cube's word cost for the
49  // tesseract result. Both uses may modify the state of the
50  // CubeObject (including the BeamSearch state) with a call to
51  // RecognizeWord().
52  float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj);
53 
54  // The alt_list parameter is expected to have been extracted from the
55  // CubeObject that recognized the word to be combined. The cube_obj
56  // parameter passed in is a separate instance to be used only by
57  // the combiner.
58  float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj,
59  WordAltList *alt_list);
60 
61  // Public method for computing the combiner features. The agreement
62  // output parameter will be true if both answers are identical,
63  // false otherwise. Modifies the cube_alt_list, so no assumptions
64  // should be made about its state upon return.
65  bool ComputeCombinerFeatures(const std::string &tess_res,
66  int tess_confidence,
67  CubeObject *cube_obj,
68  WordAltList *cube_alt_list,
69  std::vector<double> *features,
70  bool *agreement);
71 
72  // Is the word valid according to Tesseract's language model
73  bool ValidWord(const std::string &str);
74 
75  // Loads the combiner neural network from file, using cube_cntxt_
76  // to find path.
77  bool LoadCombinerNet();
78  private:
79  // Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally
80  // strips punc and/or normalizes case and then converts back
81  std::string NormalizeString(const std::string &str, bool remove_punc, bool norm_case);
82 
83  // Compares 2 strings after optionally normalizing them and or stripping
84  // punctuation
85  int CompareStrings(const std::string &str1, const std::string &str2, bool ignore_punc,
86  bool norm_case);
87 
88  NeuralNet *combiner_net_; // pointer to the combiner NeuralNet object
89  CubeRecoContext *cube_cntxt_; // used for language ID and data paths
90 };
91 }
92 
93 #endif // TESSERACT_CCMAIN_TESSERACT_CUBE_COMBINER_H
TesseractCubeCombiner(CubeRecoContext *cube_cntxt)
float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj)
bool ValidWord(const std::string &str)
bool ComputeCombinerFeatures(const std::string &tess_res, int tess_confidence, CubeObject *cube_obj, WordAltList *cube_alt_list, std::vector< double > *features, bool *agreement)