tesseract  4.0.0-1-g2a2b
lstmtester.h
Go to the documentation of this file.
1 // File: lstmtester.h
3 // Description: Top-level line evaluation class for LSTM-based networks.
4 // Author: Ray Smith
5 // Created: Wed Nov 23 11:05:06 PST 2016
6 //
7 // (C) Copyright 2016, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
18 
19 #ifndef TESSERACT_TRAINING_LSTMTESTER_H_
20 #define TESSERACT_TRAINING_LSTMTESTER_H_
21 
22 #include "genericvector.h"
23 #include "lstmtrainer.h"
24 #include "strngs.h"
25 #include "svutil.h"
26 
27 namespace tesseract {
28 
29 class LSTMTester {
30  public:
31  LSTMTester(int64_t max_memory);
32 
33  // Loads a set of lstmf files that were created using the lstm.train config to
34  // tesseract into memory ready for testing. Returns false if nothing was
35  // loaded. The arg is a filename of a file that lists the filenames, with one
36  // name per line. Conveniently, tesstrain.sh generates such a file, along
37  // with the files themselves.
38  bool LoadAllEvalData(const STRING& filenames_file);
39  // Loads a set of lstmf files that were created using the lstm.train config to
40  // tesseract into memory ready for testing. Returns false if nothing was
41  // loaded.
42  bool LoadAllEvalData(const GenericVector<STRING>& filenames);
43 
44  // Runs an evaluation asynchronously on the stored eval data and returns a
45  // string describing the results of the previous test. Args match TestCallback
46  // declared in lstmtrainer.h:
47  // iteration: Current learning iteration number.
48  // training_errors: If not null, is an array of size ET_COUNT, indexed by
49  // the ErrorTypes enum and indicates the current errors measured by the
50  // trainer, and this is a serious request to run an evaluation. If null,
51  // then the caller is just polling for the results of the previous eval.
52  // model_data: is the model to evaluate, which should be a serialized
53  // LSTMTrainer.
54  // training_stage: an arbitrary number on the progress of training.
55  STRING RunEvalAsync(int iteration, const double* training_errors,
56  const TessdataManager& model_mgr, int training_stage);
57  // Runs an evaluation synchronously on the stored eval data and returns a
58  // string describing the results. Args as RunEvalAsync, except verbosity,
59  // which outputs errors, if 1, or all results if 2.
60  STRING RunEvalSync(int iteration, const double* training_errors,
61  const TessdataManager& model_mgr, int training_stage,
62  int verbosity);
63 
64  private:
65  // Static helper thread function for RunEvalAsync, with a specific signature
66  // required by SVSync::StartThread. Actually a member function pretending to
67  // be static, its arg is a this pointer that it will cast back to LSTMTester*
68  // to call RunEvalSync using the stored args that RunEvalAsync saves in *this.
69  // LockIfNotRunning must have returned true before calling ThreadFunc, and
70  // it will call UnlockRunning to release the lock after RunEvalSync completes.
71  static void* ThreadFunc(void* lstmtester_void);
72  // Returns true if there is currently nothing running, and takes the lock
73  // if there is nothing running.
74  bool LockIfNotRunning();
75  // Releases the running lock.
76  void UnlockRunning();
77 
78  // The data to test with.
79  DocumentCache test_data_;
80  int total_pages_;
81  // Flag that indicates an asynchronous test is currently running.
82  // Protected by running_mutex_.
83  bool async_running_;
84  SVMutex running_mutex_;
85  // Stored copies of the args for use while running asynchronously.
86  int test_iteration_;
87  const double* test_training_errors_;
88  TessdataManager test_model_mgr_;
89  int test_training_stage_;
90  STRING test_result_;
91 };
92 
93 } // namespace tesseract
94 
95 #endif // TESSERACT_TRAINING_LSTMTESTER_H_
bool LoadAllEvalData(const STRING &filenames_file)
Definition: lstmtester.cpp:30
LSTMTester(int64_t max_memory)
Definition: lstmtester.cpp:24
STRING RunEvalSync(int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage, int verbosity)
Definition: lstmtester.cpp:82
STRING RunEvalAsync(int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage)
Definition: lstmtester.cpp:52
Definition: strngs.h:45
Definition: svutil.h:78