tesseract  5.0.0-alpha-619-ge9db
lstmtester.h
Go to the documentation of this file.
1 // File: lstmtester.h
3 // Description: Top-level line evaluation class for LSTM-based networks.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2016, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
17 
18 #ifndef TESSERACT_TRAINING_LSTMTESTER_H_
19 #define TESSERACT_TRAINING_LSTMTESTER_H_
20 
21 #include <mutex>
23 #include "lstmtrainer.h"
24 #include <tesseract/strngs.h>
25 
26 namespace tesseract {
27 
28 class LSTMTester {
29  public:
30  LSTMTester(int64_t max_memory);
31 
32  // Loads a set of lstmf files that were created using the lstm.train config to
33  // tesseract into memory ready for testing. Returns false if nothing was
34  // loaded. The arg is a filename of a file that lists the filenames, with one
35  // name per line. Conveniently, tesstrain.sh generates such a file, along
36  // with the files themselves.
37  bool LoadAllEvalData(const STRING& filenames_file);
38  // Loads a set of lstmf files that were created using the lstm.train config to
39  // tesseract into memory ready for testing. Returns false if nothing was
40  // loaded.
41  bool LoadAllEvalData(const GenericVector<STRING>& filenames);
42 
43  // Runs an evaluation asynchronously on the stored eval data and returns a
44  // string describing the results of the previous test. Args match TestCallback
45  // declared in lstmtrainer.h:
46  // iteration: Current learning iteration number.
47  // training_errors: If not null, is an array of size ET_COUNT, indexed by
48  // the ErrorTypes enum and indicates the current errors measured by the
49  // trainer, and this is a serious request to run an evaluation. If null,
50  // then the caller is just polling for the results of the previous eval.
51  // model_data: is the model to evaluate, which should be a serialized
52  // LSTMTrainer.
53  // training_stage: an arbitrary number on the progress of training.
54  STRING RunEvalAsync(int iteration, const double* training_errors,
55  const TessdataManager& model_mgr, int training_stage);
56  // Runs an evaluation synchronously on the stored eval data and returns a
57  // string describing the results. Args as RunEvalAsync, except verbosity,
58  // which outputs errors, if 1, or all results if 2.
59  STRING RunEvalSync(int iteration, const double* training_errors,
60  const TessdataManager& model_mgr, int training_stage,
61  int verbosity);
62 
63  private:
64  // Helper thread function for RunEvalAsync.
65  // LockIfNotRunning must have returned true before calling ThreadFunc, and
66  // it will call UnlockRunning to release the lock after RunEvalSync completes.
67  void ThreadFunc();
68  // Returns true if there is currently nothing running, and takes the lock
69  // if there is nothing running.
70  bool LockIfNotRunning();
71  // Releases the running lock.
72  void UnlockRunning();
73 
74  // The data to test with.
75  DocumentCache test_data_;
76  int total_pages_ = 0;
77  // Flag that indicates an asynchronous test is currently running.
78  // Protected by running_mutex_.
79  bool async_running_ = false;
80  std::mutex running_mutex_;
81  // Stored copies of the args for use while running asynchronously.
82  int test_iteration_ = 0;
83  const double* test_training_errors_ = nullptr;
84  TessdataManager test_model_mgr_;
85  int test_training_stage_ = 0;
86  STRING test_result_;
87 };
88 
89 } // namespace tesseract
90 
91 #endif // TESSERACT_TRAINING_LSTMTESTER_H_
tesseract::LSTMTester
Definition: lstmtester.h:28
strngs.h
tesseract::TessdataManager
Definition: tessdatamanager.h:126
lstmtrainer.h
STRING
Definition: strngs.h:45
tesseract::DocumentCache
Definition: imagedata.h:320
genericvector.h
tesseract
Definition: baseapi.h:65
tesseract::LSTMTester::RunEvalAsync
STRING RunEvalAsync(int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage)
Definition: lstmtester.cpp:53
GenericVector< STRING >
tesseract::LSTMTester::LSTMTester
LSTMTester(int64_t max_memory)
Definition: lstmtester.cpp:25
tesseract::LSTMTester::RunEvalSync
STRING RunEvalSync(int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage, int verbosity)
Definition: lstmtester.cpp:84
tesseract::LSTMTester::LoadAllEvalData
bool LoadAllEvalData(const STRING &filenames_file)
Definition: lstmtester.cpp:31