tesseract  5.0.0-alpha-619-ge9db
lstm_test.cc
Go to the documentation of this file.
1 // (C) Copyright 2017, Google Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 // http://www.apache.org/licenses/LICENSE-2.0
6 // Unless required by applicable law or agreed to in writing, software
7 // distributed under the License is distributed on an "AS IS" BASIS,
8 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 // See the License for the specific language governing permissions and
10 // limitations under the License.
11 
12 // Generating the training data:
13 // If the format of the lstmf (ImageData) file changes, the training data will
14 // have to be regenerated as follows:
15 //
16 // Use --xsize 800 for text2image to be similar to original training data.
17 //
18 // src/training/tesstrain.sh --fonts_dir /usr/share/fonts --lang eng \
19 // --linedata_only --noextract_font_properties --langdata_dir ../langdata_lstm \
20 // --tessdata_dir ../tessdata --output_dir ~/tesseract/test/testdata \
21 // --fontlist "Arial" --maxpages 10
22 //
23 
24 #include "lstm_test.h"
25 
26 namespace tesseract {
27 
28 // Tests that some simple networks can learn Arial and meet accuracy targets.
29 TEST_F(LSTMTrainerTest, BasicTest) {
30  // A Convolver sliding window classifier without LSTM.
31  SetupTrainer(
32  "[1,32,0,1 Ct5,5,16 Mp4,4 Ct1,1,16 Ct3,3,128 Mp4,1 Ct1,1,64 S2,1 "
33  "Ct1,1,64O1c1]",
34  "no-lstm", "eng/eng.unicharset", "eng.Arial.exp0.lstmf", false, false,
35  2e-4, false, "eng");
36  double non_lstm_err = TrainIterations(kTrainerIterations * 4);
37  EXPECT_LT(non_lstm_err, 98);
38  LOG(INFO) << "********** Expected < 98 ************\n" ;
39 
40  // A basic single-layer, single direction LSTM.
41  SetupTrainerEng("[1,1,0,32 Lfx100 O1c1]", "1D-lstm", false, false);
42  double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
43  EXPECT_LT(lstm_uni_err, 86);
44  LOG(INFO) << "********** Expected < 86 ************\n" ;
45  // Beats the convolver. (Although it does have a lot more weights, it still
46  // iterates faster.)
47  EXPECT_LT(lstm_uni_err, non_lstm_err);
48 }
49 
50 // Color learns almost as fast as normalized grey/2D.
51 TEST_F(LSTMTrainerTest, ColorTest) {
52  // A basic single-layer, single direction LSTM.
53  SetupTrainerEng("[1,32,0,3 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
54  "2D-color-lstm", true, true);
55  double lstm_uni_err = TrainIterations(kTrainerIterations);
56  EXPECT_LT(lstm_uni_err, 85);
57 // EXPECT_GT(lstm_uni_err, 66);
58  LOG(INFO) << "********** Expected < 85 ************\n" ;
59 }
60 
61 TEST_F(LSTMTrainerTest, BidiTest) {
62  // A basic single-layer, bi-di 1d LSTM.
63  SetupTrainerEng("[1,1,0,32 Lbx100 O1c1]", "bidi-lstm", false, false);
64  double lstm_bi_err = TrainIterations(kTrainerIterations);
65  EXPECT_LT(lstm_bi_err, 75);
66  LOG(INFO) << "********** Expected < 75 ************\n" ;
67  // Int mode training is dead, so convert the trained network to int and check
68  // that its error rate is close to the float version.
69  TestIntMode(kTrainerIterations);
70 }
71 
72 // Tests that a 2d-2-layer network learns correctly.
73 // It takes a lot of iterations to get there.
75  // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
76  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
77  "2-D-2-layer-lstm", false, false);
78  double lstm_2d_err = TrainIterations(kTrainerIterations * 3 / 2 );
79  EXPECT_LT(lstm_2d_err, 98);
80 // EXPECT_GT(lstm_2d_err, 90);
81  LOG(INFO) << "********** Expected < 98 ************\n" ;
82  // Int mode training is dead, so convert the trained network to int and check
83  // that its error rate is close to the float version.
84  TestIntMode(kTrainerIterations);
85 }
86 
87 // Tests that a 2d-2-layer network with Adam does *a lot* better than
88 // without it.
89 TEST_F(LSTMTrainerTest, TestAdam) {
90  // A 2-layer LSTM with a 2-D feature-extracting LSTM on the bottom.
91  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
92  "2-D-2-layer-lstm", false, true);
93  double lstm_2d_err = TrainIterations(kTrainerIterations);
94  EXPECT_LT(lstm_2d_err, 70);
95  LOG(INFO) << "********** Expected < 70 ************\n" ;
96  TestIntMode(kTrainerIterations);
97 }
98 
99 // Trivial test of training speed on a fairly complex network.
100 TEST_F(LSTMTrainerTest, SpeedTest) {
101  SetupTrainerEng(
102  "[1,30,0,1 Ct5,5,16 Mp2,2 L2xy24 Ct1,1,48 Mp5,1 Ct1,1,32 S3,1 Lbx64 "
103  "O1c1]",
104  "2-D-2-layer-lstm", false, true);
105  TrainIterations(kTrainerIterations);
106  LOG(INFO) << "********** *** ************\n" ;
107 }
108 
109 // Tests that two identical networks trained the same get the same results.
110 // Also tests that the same happens with a serialize/deserialize in the middle.
111 TEST_F(LSTMTrainerTest, DeterminismTest) {
112  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
113  "2-D-2-layer-lstm", false, false);
114  double lstm_2d_err_a = TrainIterations(kTrainerIterations);
115  double act_error_a = trainer_->ActivationError();
116  double char_error_a = trainer_->CharError();
117  GenericVector<char> trainer_a_data;
118  EXPECT_TRUE(trainer_->SaveTrainingDump(NO_BEST_TRAINER, trainer_.get(),
119  &trainer_a_data));
120  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
121  "2-D-2-layer-lstm", false, false);
122  double lstm_2d_err_b = TrainIterations(kTrainerIterations);
123  double act_error_b = trainer_->ActivationError();
124  double char_error_b = trainer_->CharError();
125  EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
126  EXPECT_FLOAT_EQ(act_error_a, act_error_b);
127  EXPECT_FLOAT_EQ(char_error_a, char_error_b);
128  // Now train some more iterations.
129  lstm_2d_err_b = TrainIterations(kTrainerIterations / 3);
130  act_error_b = trainer_->ActivationError();
131  char_error_b = trainer_->CharError();
132  // Unpack into a new trainer and train that some more too.
133  SetupTrainerEng("[1,32,0,1 S4,2 L2xy16 Ct1,1,16 S8,1 Lbx100 O1c1]",
134  "2-D-2-layer-lstm", false, false);
135  EXPECT_TRUE(trainer_->ReadTrainingDump(trainer_a_data, trainer_.get()));
136  lstm_2d_err_a = TrainIterations(kTrainerIterations / 3);
137  act_error_a = trainer_->ActivationError();
138  char_error_a = trainer_->CharError();
139  EXPECT_FLOAT_EQ(lstm_2d_err_a, lstm_2d_err_b);
140  EXPECT_FLOAT_EQ(act_error_a, act_error_b);
141  EXPECT_FLOAT_EQ(char_error_a, char_error_b);
142  LOG(INFO) << "********** *** ************\n" ;
143 }
144 
145 // The baseline network against which to test the built-in softmax.
146 TEST_F(LSTMTrainerTest, SoftmaxBaselineTest) {
147  // A basic single-layer, single direction LSTM.
148  SetupTrainerEng("[1,1,0,32 Lfx96 O1c1]", "1D-lstm", false, true);
149  double lstm_uni_err = TrainIterations(kTrainerIterations * 2);
150  EXPECT_LT(lstm_uni_err, 60);
151 // EXPECT_GT(lstm_uni_err, 48);
152  LOG(INFO) << "********** Expected < 60 ************\n" ;
153  // Check that it works in int mode too.
154  TestIntMode(kTrainerIterations);
155  // If we run TestIntMode again, it tests that int_mode networks can
156  // serialize and deserialize correctly.
157  double delta = TestIntMode(kTrainerIterations);
158  // The two tests (both of int mode this time) should be almost identical.
159  LOG(INFO) << "Delta in Int mode error rates = " << delta << "\n";
160  EXPECT_LT(delta, 0.01);
161 }
162 
163 // Tests that the built-in softmax does better than the external one,
164 // which has an error rate slightly less than 55%, as tested by
165 // SoftmaxBaselineTest.
166 TEST_F(LSTMTrainerTest, SoftmaxTest) {
167  // LSTM with a built-in softmax can beat the external softmax.
168  SetupTrainerEng("[1,1,0,32 LS96]", "Lstm-+-softmax", false, true);
169  double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
170  EXPECT_LT(lstm_sm_err, 49.0);
171  LOG(INFO) << "********** Expected < 49 ************\n" ;
172  // Check that it works in int mode too.
173  TestIntMode(kTrainerIterations);
174 }
175 
176 // Tests that the built-in encoded softmax does better than the external one.
177 // It takes a lot of iterations to get there.
178 TEST_F(LSTMTrainerTest, EncodedSoftmaxTest) {
179  // LSTM with a built-in encoded softmax can beat the external softmax.
180  SetupTrainerEng("[1,1,0,32 LE96]", "Lstm-+-softmax", false, true);
181  double lstm_sm_err = TrainIterations(kTrainerIterations * 2);
182  EXPECT_LT(lstm_sm_err, 62.0);
183  LOG(INFO) << "********** Expected < 62 ************\n" ;
184  // Check that it works in int mode too.
185  TestIntMode(kTrainerIterations);
186 }
187 
188 // Tests that layer access methods work correctly.
189 TEST_F(LSTMTrainerTest, TestLayerAccess) {
190  // A 2-layer LSTM with a Squashed feature-extracting LSTM on the bottom.
191  SetupTrainerEng("[1,32,0,1 Ct5,5,16 Mp2,2 Lfys32 Lbx128 O1c1]", "SQU-lstm",
192  false, false);
193  // Number of layers.
194  const int kNumLayers = 8;
195  // Expected layer names.
196  const char* kLayerIds[kNumLayers] = {":0", ":1:0", ":1:1", ":2",
197  ":3:0", ":4:0", ":4:1:0", ":5"};
198  const char* kLayerNames[kNumLayers] = {"Input", "Convolve", "ConvNL",
199  "Maxpool", "Lfys32", "Lbx128LTR",
200  "Lbx128", "Output"};
201  // Expected number of weights.
202  const int kNumWeights[kNumLayers] = {0,
203  0,
204  16 * (25 + 1),
205  0,
206  32 * (4 * (32 + 16 + 1)),
207  128 * (4 * (128 + 32 + 1)),
208  128 * (4 * (128 + 32 + 1)),
209  112 * (2 * 128 + 1)};
210 
211  GenericVector<STRING> layers = trainer_->EnumerateLayers();
212  EXPECT_EQ(kNumLayers, layers.size());
213  for (int i = 0; i < kNumLayers && i < layers.size(); ++i) {
214  EXPECT_STREQ(kLayerIds[i], layers[i].c_str());
215  EXPECT_STREQ(kLayerNames[i],
216  trainer_->GetLayer(layers[i])->name().c_str());
217  EXPECT_EQ(kNumWeights[i], trainer_->GetLayer(layers[i])->num_weights());
218  }
219 }
220 
221 } // namespace tesseract.
INFO
Definition: log.h:29
tesseract::kTrainerIterations
const int kTrainerIterations
Definition: lstm_test.h:35
tesseract::TEST_F
TEST_F(EquationFinderTest, IdentifySpecialText)
Definition: equationdetect_test.cc:181
tesseract
Definition: baseapi.h:65
lstm_test.h
GenericVector< char >
tesseract::LSTMTrainerTest
Definition: lstm_test.h:46
GenericVector::get
T & get(int index) const
Definition: genericvector.h:716
LOG
Definition: cleanapi_test.cc:19
GenericVector::size
int size() const
Definition: genericvector.h:71
tesseract::NO_BEST_TRAINER
Definition: lstmtrainer.h:58