#include <lstmtrainer.h>

Inheritance diagram for tesseract::LSTMTrainer:

Public Member Functions
	LSTMTrainer ()

	LSTMTrainer (FileReader file_reader, FileWriter file_writer, CheckPointReader checkpoint_reader, CheckPointWriter checkpoint_writer, const char model_base, const char checkpoint_name, int debug_interval, int64_t max_memory)

virtual	~LSTMTrainer ()

bool	TryLoadingCheckpoint (const char filename, const char old_traineddata)

void	InitCharSet (const std::string &traineddata_path)

void	InitCharSet (const TessdataManager &mgr)

bool	InitNetwork (const STRING &network_spec, int append_index, int net_flags, float weight_range, float learning_rate, float momentum, float adam_beta)

int	InitTensorFlowNetwork (const std::string &tf_proto)

void	InitIterations ()

double	ActivationError () const

double	CharError () const

const double *	error_rates () const

double	best_error_rate () const

int	best_iteration () const

int	learning_iteration () const

int32_t	improvement_steps () const

void	set_perfect_delay (int delay)

const GenericVector< char > &	best_trainer () const

double	NewSingleError (ErrorTypes type) const

double	LastSingleError (ErrorTypes type) const

const DocumentCache &	training_data () const

DocumentCache *	mutable_training_data ()

Trainability	GridSearchDictParams (const ImageData trainingdata, int iteration, double min_dict_ratio, double dict_ratio_step, double max_dict_ratio, double min_cert_offset, double cert_offset_step, double max_cert_offset, STRING results)

void	DebugNetwork ()

bool	LoadAllTrainingData (const GenericVector< STRING > &filenames, CachingStrategy cache_strategy, bool randomly_rotate)

bool	MaintainCheckpoints (TestCallback tester, STRING *log_msg)

bool	MaintainCheckpointsSpecific (int iteration, const GenericVector< char > train_model, const GenericVector< char > rec_model, TestCallback tester, STRING *log_msg)

void	PrepareLogMsg (STRING *log_msg) const

void	LogIterations (const char intro_str, STRING log_msg) const

bool	TransitionTrainingStage (float error_threshold)

int	CurrentTrainingStage () const

bool	Serialize (SerializeAmount serialize_amount, const TessdataManager mgr, TFile fp) const

bool	DeSerialize (const TessdataManager mgr, TFile fp)

void	StartSubtrainer (STRING *log_msg)

SubTrainerResult	UpdateSubtrainer (STRING *log_msg)

void	ReduceLearningRates (LSTMTrainer samples_trainer, STRING log_msg)

int	ReduceLayerLearningRates (double factor, int num_samples, LSTMTrainer *samples_trainer)

bool	EncodeString (const STRING &str, GenericVector< int > *labels) const

const ImageData *	TrainOnLine (LSTMTrainer *samples_trainer, bool batch)

Trainability	TrainOnLine (const ImageData *trainingdata, bool batch)

Trainability	PrepareForBackward (const ImageData trainingdata, NetworkIO fwd_outputs, NetworkIO *targets)

bool	SaveTrainingDump (SerializeAmount serialize_amount, const LSTMTrainer trainer, GenericVector< char > data) const

bool	ReadTrainingDump (const GenericVector< char > &data, LSTMTrainer *trainer) const

bool	ReadSizedTrainingDump (const char data, int size, LSTMTrainer trainer) const

bool	ReadLocalTrainingDump (const TessdataManager mgr, const char data, int size)

void	SetupCheckpointInfo ()

bool	SaveTraineddata (const STRING &filename)

void	SaveRecognitionDump (GenericVector< char > *data) const

STRING	DumpFilename () const

void	FillErrorBuffer (double new_error, ErrorTypes type)

std::vector< int >	MapRecoder (const UNICHARSET &old_chset, const UnicharCompress &old_recoder) const

Public Member Functions inherited from tesseract::LSTMRecognizer
	LSTMRecognizer ()

	~LSTMRecognizer ()

int	NumOutputs () const

int	training_iteration () const

int	sample_iteration () const

double	learning_rate () const

LossType	OutputLossType () const

bool	SimpleTextOutput () const

bool	IsIntMode () const

bool	IsRecoding () const

bool	IsTensorFlow () const

GenericVector< STRING >	EnumerateLayers () const

Network *	GetLayer (const STRING &id) const

float	GetLayerLearningRate (const STRING &id) const

void	ScaleLearningRate (double factor)

void	ScaleLayerLearningRate (const STRING &id, double factor)

void	ConvertToInt ()

const UNICHARSET &	GetUnicharset () const

const UnicharCompress &	GetRecoder () const

const Dict *	GetDict () const

void	SetIteration (int iteration)

int	NumInputs () const

int	null_char () const

bool	Load (const char lang, TessdataManager mgr)

bool	Serialize (const TessdataManager mgr, TFile fp) const

bool	DeSerialize (const TessdataManager mgr, TFile fp)

bool	LoadCharsets (const TessdataManager *mgr)

bool	LoadRecoder (TFile *fp)

bool	LoadDictionary (const char lang, TessdataManager mgr)

void	RecognizeLine (const ImageData &image_data, bool invert, bool debug, double worst_dict_cert, const TBOX &line_box, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)

void	OutputStats (const NetworkIO &outputs, float min_output, float mean_output, float *sd)

bool	RecognizeLine (const ImageData &image_data, bool invert, bool debug, bool re_invert, bool upside_down, float scale_factor, NetworkIO inputs, NetworkIO *outputs)

STRING	DecodeLabels (const GenericVector< int > &labels)

void	DisplayForward (const NetworkIO &inputs, const GenericVector< int > &labels, const GenericVector< int > &label_coords, const char window_name, ScrollView *window)

void	LabelsFromOutputs (const NetworkIO &outputs, GenericVector< int > labels, GenericVector< int > xcoords)

Static Public Member Functions
static bool	EncodeString (const STRING &str, const UNICHARSET &unicharset, const UnicharCompress recoder, bool simple_text, int null_char, GenericVector< int > labels)

Protected Member Functions
void	InitCharSet ()

void	SetNullChar ()

void	EmptyConstructor ()

bool	DebugLSTMTraining (const NetworkIO &inputs, const ImageData &trainingdata, const NetworkIO &fwd_outputs, const GenericVector< int > &truth_labels, const NetworkIO &outputs)

void	DisplayTargets (const NetworkIO &targets, const char window_name, ScrollView *window)

bool	ComputeTextTargets (const NetworkIO &outputs, const GenericVector< int > &truth_labels, NetworkIO *targets)

bool	ComputeCTCTargets (const GenericVector< int > &truth_labels, NetworkIO outputs, NetworkIO targets)

double	ComputeErrorRates (const NetworkIO &deltas, double char_error, double word_error)

double	ComputeRMSError (const NetworkIO &deltas)

double	ComputeWinnerError (const NetworkIO &deltas)

double	ComputeCharError (const GenericVector< int > &truth_str, const GenericVector< int > &ocr_str)

double	ComputeWordError (STRING truth_str, STRING ocr_str)

void	UpdateErrorBuffer (double new_error, ErrorTypes type)

void	RollErrorBuffers ()

STRING	UpdateErrorGraph (int iteration, double error_rate, const GenericVector< char > &model_data, TestCallback tester)

Protected Member Functions inherited from tesseract::LSTMRecognizer
void	SetRandomSeed ()

void	DisplayLSTMOutput (const GenericVector< int > &labels, const GenericVector< int > &xcoords, int height, ScrollView *window)

void	DebugActivationPath (const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > &xcoords)

void	DebugActivationRange (const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end)

void	LabelsViaReEncode (const NetworkIO &output, GenericVector< int > labels, GenericVector< int > xcoords)

void	LabelsViaSimpleText (const NetworkIO &output, GenericVector< int > labels, GenericVector< int > xcoords)

const char *	DecodeLabel (const GenericVector< int > &labels, int start, int end, int decoded)

const char *	DecodeSingleLabel (int label)

Protected Attributes
ScrollView *	align_win_

ScrollView *	target_win_

ScrollView *	ctc_win_

ScrollView *	recon_win_

int	debug_interval_

int	checkpoint_iteration_

STRING	model_base_

STRING	checkpoint_name_

bool	randomly_rotate_

DocumentCache	training_data_

STRING	best_model_name_

int	num_training_stages_

FileReader	file_reader_

FileWriter	file_writer_

CheckPointReader	checkpoint_reader_

CheckPointWriter	checkpoint_writer_

double	best_error_rate_

double	best_error_rates_ [ET_COUNT]

int	best_iteration_

double	worst_error_rate_

double	worst_error_rates_ [ET_COUNT]

int	worst_iteration_

int	stall_iteration_

GenericVector< char >	best_model_data_

GenericVector< char >	worst_model_data_

GenericVector< char >	best_trainer_

LSTMTrainer *	sub_trainer_

float	error_rate_of_last_saved_best_

int	training_stage_

GenericVector< double >	best_error_history_

GenericVector< int >	best_error_iterations_

int32_t	improvement_steps_

int	learning_iteration_

int	prev_sample_iteration_

int	perfect_delay_

int	last_perfect_training_iteration_

GenericVector< double >	error_buffers_ [ET_COUNT]

double	error_rates_ [ET_COUNT]

TessdataManager	mgr_

Protected Attributes inherited from tesseract::LSTMRecognizer
Network *	network_

CCUtil	ccutil_

UnicharCompress	recoder_

STRING	network_str_

int32_t	training_flags_

int32_t	training_iteration_

int32_t	sample_iteration_

int32_t	null_char_

float	learning_rate_

float	momentum_

float	adam_beta_

TRand	randomizer_

NetworkScratch	scratch_space_

Dict *	dict_

RecodeBeamSearch *	search_

ScrollView *	debug_win_

Static Protected Attributes
static const int	kRollingBufferSize_ = 1000

Detailed Description

Definition at line 89 of file lstmtrainer.h.

Constructor & Destructor Documentation

◆ LSTMTrainer() [1/2]

tesseract::LSTMTrainer::LSTMTrainer ( )

Definition at line 73 of file lstmtrainer.cpp.

     : randomly_rotate_(false),
       training_data_(0),
       file_reader_(LoadDataFromFile),
       file_writer_(SaveDataToFile),
       checkpoint_reader_(
           NewPermanentTessCallback(this, &LSTMTrainer::ReadTrainingDump)),
       checkpoint_writer_(
           NewPermanentTessCallback(this, &LSTMTrainer::SaveTrainingDump)),
       sub_trainer_(nullptr) {
   EmptyConstructor();
   debug_interval_ = 0;
 }

◆ LSTMTrainer() [2/2]

tesseract::LSTMTrainer::LSTMTrainer	(	FileReader	file_reader,
		FileWriter	file_writer,
		CheckPointReader	checkpoint_reader,
		CheckPointWriter	checkpoint_writer,
		const char *	model_base,
		const char *	checkpoint_name,
		int	debug_interval,
		int64_t	max_memory
	)

Definition at line 87 of file lstmtrainer.cpp.

     : randomly_rotate_(false),
       training_data_(max_memory),
       file_reader_(file_reader),
       file_writer_(file_writer),
       checkpoint_reader_(checkpoint_reader),
       checkpoint_writer_(checkpoint_writer),
       sub_trainer_(nullptr),
       mgr_(file_reader) {
   EmptyConstructor();
   if (file_reader_ == nullptr) file_reader_ = LoadDataFromFile;
   if (file_writer_ == nullptr) file_writer_ = SaveDataToFile;
   if (checkpoint_reader_ == nullptr) {
     checkpoint_reader_ =
         NewPermanentTessCallback(this, &LSTMTrainer::ReadTrainingDump);
   }
   if (checkpoint_writer_ == nullptr) {
     checkpoint_writer_ =
         NewPermanentTessCallback(this, &LSTMTrainer::SaveTrainingDump);
   }
   debug_interval_ = debug_interval;
   model_base_ = model_base;
   checkpoint_name_ = checkpoint_name;
 }

◆ ~LSTMTrainer()

tesseract::LSTMTrainer::~LSTMTrainer ( )

virtual

Definition at line 116 of file lstmtrainer.cpp.

                           {
   delete align_win_;
   delete target_win_;
   delete ctc_win_;
   delete recon_win_;
   delete checkpoint_reader_;
   delete checkpoint_writer_;
   delete sub_trainer_;
 }

Member Function Documentation

◆ ActivationError()

double tesseract::LSTMTrainer::ActivationError ( ) const

inline

Definition at line 136 of file lstmtrainer.h.

                                  {
     return error_rates_[ET_DELTA];
   }

◆ best_error_rate()

double tesseract::LSTMTrainer::best_error_rate ( ) const

inline

Definition at line 143 of file lstmtrainer.h.

                                  {
     return best_error_rate_;
   }

◆ best_iteration()

int tesseract::LSTMTrainer::best_iteration ( ) const

inline

Definition at line 146 of file lstmtrainer.h.

                              {
     return best_iteration_;
   }

◆ best_trainer()

const GenericVector<char>& tesseract::LSTMTrainer::best_trainer ( ) const

inline

Definition at line 152 of file lstmtrainer.h.

152 { return best_trainer_; }

tesseract::LSTMTrainer::best_trainer_

GenericVector< char > best_trainer_

Definition: lstmtrainer.h:447

◆ CharError()

double tesseract::LSTMTrainer::CharError ( ) const

inline

Definition at line 139 of file lstmtrainer.h.

139 { return error_rates_[ET_CHAR_ERROR]; }

tesseract::ET_CHAR_ERROR

Definition: lstmtrainer.h:41

tesseract::LSTMTrainer::error_rates_

double error_rates_[ET_COUNT]

Definition: lstmtrainer.h:481

◆ ComputeCharError()

double tesseract::LSTMTrainer::ComputeCharError	(	const GenericVector< int > &	truth_str,
		const GenericVector< int > &	ocr_str
	)

protected

Definition at line 1187 of file lstmtrainer.cpp.

                                                                         {
   GenericVector<int> label_counts;
   label_counts.init_to_size(NumOutputs(), 0);
   int truth_size = 0;
   for (int i = 0; i < truth_str.size(); ++i) {
     if (truth_str[i] != null_char_) {
       ++label_counts[truth_str[i]];
       ++truth_size;
     }
   }
   for (int i = 0; i < ocr_str.size(); ++i) {
     if (ocr_str[i] != null_char_) {
       --label_counts[ocr_str[i]];
     }
   }
   int char_errors = 0;
   for (int i = 0; i < label_counts.size(); ++i) {
     char_errors += abs(label_counts[i]);
   }
   if (truth_size == 0) {
     return (char_errors == 0) ? 0.0 : 1.0;
   }
   return static_cast<double>(char_errors) / truth_size;
 }

◆ ComputeCTCTargets()

bool tesseract::LSTMTrainer::ComputeCTCTargets	(	const GenericVector< int > &	truth_labels,
		NetworkIO *	outputs,
		NetworkIO *	targets
	)

protected

Definition at line 1119 of file lstmtrainer.cpp.

                                                                             {
   // Bottom-clip outputs to a minimum probability.
   CTC::NormalizeProbs(outputs);
   return CTC::ComputeCTCTargets(truth_labels, null_char_,
                                 outputs->float_array(), targets);
 }

◆ ComputeErrorRates()

double tesseract::LSTMTrainer::ComputeErrorRates	(	const NetworkIO &	deltas,
		double	char_error,
		double	word_error
	)

protected

Definition at line 1130 of file lstmtrainer.cpp.

                                                                             {
   UpdateErrorBuffer(ComputeRMSError(deltas), ET_RMS);
   // Delta error is the fraction of timesteps with >0.5 error in the top choice
   // score. If zero, then the top choice characters are guaranteed correct,
   // even when there is residue in the RMS error.
   double delta_error = ComputeWinnerError(deltas);
   UpdateErrorBuffer(delta_error, ET_DELTA);
   UpdateErrorBuffer(word_error, ET_WORD_RECERR);
   UpdateErrorBuffer(char_error, ET_CHAR_ERROR);
   // Skip ratio measures the difference between sample_iteration_ and
   // training_iteration_, which reflects the number of unusable samples,
   // usually due to unencodable truth text, or the text not fitting in the
   // space for the output.
   double skip_count = sample_iteration_ - prev_sample_iteration_;
   UpdateErrorBuffer(skip_count, ET_SKIP_RATIO);
   return delta_error;
 }

◆ ComputeRMSError()

double tesseract::LSTMTrainer::ComputeRMSError ( const NetworkIO & deltas )

protected

Definition at line 1150 of file lstmtrainer.cpp.

                                                            {
   double total_error = 0.0;
   int width = deltas.Width();
   int num_classes = deltas.NumFeatures();
   for (int t = 0; t < width; ++t) {
     const float* class_errs = deltas.f(t);
     for (int c = 0; c < num_classes; ++c) {
       double error = class_errs[c];
       total_error += error * error;
     }
   }
   return sqrt(total_error / (width * num_classes));
 }

◆ ComputeTextTargets()

bool tesseract::LSTMTrainer::ComputeTextTargets	(	const NetworkIO &	outputs,
		const GenericVector< int > &	truth_labels,
		NetworkIO *	targets
	)

protected

Definition at line 1099 of file lstmtrainer.cpp.

                                                          {
   if (truth_labels.size() > targets->Width()) {
     tprintf("Error: transcription %s too long to fit into target of width %d\n",
             DecodeLabels(truth_labels).string(), targets->Width());
     return false;
   }
   for (int i = 0; i < truth_labels.size() && i < targets->Width(); ++i) {
     targets->SetActivations(i, truth_labels[i], 1.0);
   }
   for (int i = truth_labels.size(); i < targets->Width(); ++i) {
     targets->SetActivations(i, null_char_, 1.0);
   }
   return true;
 }

◆ ComputeWinnerError()

double tesseract::LSTMTrainer::ComputeWinnerError ( const NetworkIO & deltas )

protected

Definition at line 1169 of file lstmtrainer.cpp.

                                                               {
   int num_errors = 0;
   int width = deltas.Width();
   int num_classes = deltas.NumFeatures();
   for (int t = 0; t < width; ++t) {
     const float* class_errs = deltas.f(t);
     for (int c = 0; c < num_classes; ++c) {
       float abs_delta = fabs(class_errs[c]);
       // TODO(rays) Filtering cases where the delta is very large to cut out
       // GT errors doesn't work. Find a better way or get better truth.
       if (0.5 <= abs_delta)
         ++num_errors;
     }
   }
   return static_cast<double>(num_errors) / width;
 }

◆ ComputeWordError()

double tesseract::LSTMTrainer::ComputeWordError	(	STRING *	truth_str,
		STRING *	ocr_str
	)

protected

Definition at line 1215 of file lstmtrainer.cpp.

                                                                        {
   using StrMap = std::unordered_map<std::string, int, std::hash<std::string>>;
   GenericVector<STRING> truth_words, ocr_words;
   truth_str->split(' ', &truth_words);
   if (truth_words.empty()) return 0.0;
   ocr_str->split(' ', &ocr_words);
   StrMap word_counts;
   for (int i = 0; i < truth_words.size(); ++i) {
     std::string truth_word(truth_words[i].string());
     StrMap::iterator it = word_counts.find(truth_word);
     if (it == word_counts.end())
       word_counts.insert(std::make_pair(truth_word, 1));
     else
       ++it->second;
   }
   for (int i = 0; i < ocr_words.size(); ++i) {
     std::string ocr_word(ocr_words[i].string());
     StrMap::iterator it = word_counts.find(ocr_word);
     if (it == word_counts.end())
       word_counts.insert(std::make_pair(ocr_word, -1));
     else
       --it->second;
   }
   int word_recall_errs = 0;
   for (StrMap::const_iterator it = word_counts.begin(); it != word_counts.end();
        ++it) {
     if (it->second > 0) word_recall_errs += it->second;
   }
   return static_cast<double>(word_recall_errs) / truth_words.size();
 }

◆ CurrentTrainingStage()

int tesseract::LSTMTrainer::CurrentTrainingStage ( ) const

inline

Definition at line 211 of file lstmtrainer.h.

211 { return training_stage_; }

tesseract::LSTMTrainer::training_stage_

int training_stage_

Definition: lstmtrainer.h:454

◆ DebugLSTMTraining()

bool tesseract::LSTMTrainer::DebugLSTMTraining	(	const NetworkIO &	inputs,
		const ImageData &	trainingdata,
		const NetworkIO &	fwd_outputs,
		const GenericVector< int > &	truth_labels,
		const NetworkIO &	outputs
	)

protected

Definition at line 1029 of file lstmtrainer.cpp.

                                                               {
   const STRING& truth_text = DecodeLabels(truth_labels);
   if (truth_text.string() == nullptr || truth_text.length() <= 0) {
     tprintf("Empty truth string at decode time!\n");
     return false;
   }
   if (debug_interval_ != 0) {
     // Get class labels, xcoords and string.
     GenericVector<int> labels;
     GenericVector<int> xcoords;
     LabelsFromOutputs(outputs, &labels, &xcoords);
     STRING text = DecodeLabels(labels);
     tprintf("Iteration %d: ALIGNED TRUTH : %s\n",
             training_iteration(), text.string());
     if (debug_interval_ > 0 && training_iteration() % debug_interval_ == 0) {
       tprintf("TRAINING activation path for truth string %s\n",
               truth_text.string());
       DebugActivationPath(outputs, labels, xcoords);
       DisplayForward(inputs, labels, xcoords, "LSTMTraining", &align_win_);
       if (OutputLossType() == LT_CTC) {
         DisplayTargets(fwd_outputs, "CTC Outputs", &ctc_win_);
         DisplayTargets(outputs, "CTC Targets", &target_win_);
       }
     }
   }
   return true;
 }

◆ DebugNetwork()

void tesseract::LSTMTrainer::DebugNetwork ( )

Definition at line 293 of file lstmtrainer.cpp.

                                {
   network_->DebugWeights();
 }

◆ DeSerialize()

bool tesseract::LSTMTrainer::DeSerialize	(	const TessdataManager *	mgr,
		TFile *	fp
	)

Definition at line 468 of file lstmtrainer.cpp.

                                                                    {
   if (!LSTMRecognizer::DeSerialize(mgr, fp)) return false;
   if (!fp->DeSerialize(&learning_iteration_)) {
     // Special case. If we successfully decoded the recognizer, but fail here
     // then it means we were just given a recognizer, so issue a warning and
     // allow it.
     tprintf("Warning: LSTMTrainer deserialized an LSTMRecognizer!\n");
     learning_iteration_ = 0;
     network_->SetEnableTraining(TS_ENABLED);
     return true;
   }
   if (!fp->DeSerialize(&prev_sample_iteration_)) return false;
   if (!fp->DeSerialize(&perfect_delay_)) return false;
   if (!fp->DeSerialize(&last_perfect_training_iteration_)) return false;
   for (int i = 0; i < ET_COUNT; ++i) {
     if (!error_buffers_[i].DeSerialize(fp)) return false;
   }
   if (!fp->DeSerialize(&error_rates_[0], countof(error_rates_))) return false;
   if (!fp->DeSerialize(&training_stage_)) return false;
   uint8_t amount;
   if (!fp->DeSerialize(&amount)) return false;
   if (amount == LIGHT) return true;  // Don't read the rest.
   if (!fp->DeSerialize(&best_error_rate_)) return false;
   if (!fp->DeSerialize(&best_error_rates_[0], countof(best_error_rates_))) return false;
   if (!fp->DeSerialize(&best_iteration_)) return false;
   if (!fp->DeSerialize(&worst_error_rate_)) return false;
   if (!fp->DeSerialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false;
   if (!fp->DeSerialize(&worst_iteration_)) return false;
   if (!fp->DeSerialize(&stall_iteration_)) return false;
   if (!best_model_data_.DeSerialize(fp)) return false;
   if (!worst_model_data_.DeSerialize(fp)) return false;
   if (amount != NO_BEST_TRAINER && !best_trainer_.DeSerialize(fp)) return false;
   GenericVector<char> sub_data;
   if (!sub_data.DeSerialize(fp)) return false;
   delete sub_trainer_;
   if (sub_data.empty()) {
     sub_trainer_ = nullptr;
   } else {
     sub_trainer_ = new LSTMTrainer();
     if (!ReadTrainingDump(sub_data, sub_trainer_)) return false;
   }
   if (!best_error_history_.DeSerialize(fp)) return false;
   if (!best_error_iterations_.DeSerialize(fp)) return false;
   return fp->DeSerialize(&improvement_steps_);
 }

◆ DisplayTargets()

void tesseract::LSTMTrainer::DisplayTargets	(	const NetworkIO &	targets,
		const char *	window_name,
		ScrollView **	window
	)

protected

Definition at line 1062 of file lstmtrainer.cpp.

                                                                                {
 #ifndef GRAPHICS_DISABLED  // do nothing if there's no graphics.
   int width = targets.Width();
   int num_features = targets.NumFeatures();
   Network::ClearWindow(true, window_name, width * kTargetXScale, kTargetYScale,
                        window);
   for (int c = 0; c < num_features; ++c) {
     int color = c % (ScrollView::GREEN_YELLOW - 1) + 2;
     (*window)->Pen(static_cast<ScrollView::Color>(color));
     int start_t = -1;
     for (int t = 0; t < width; ++t) {
       double target = targets.f(t)[c];
       target *= kTargetYScale;
       if (target >= 1) {
         if (start_t < 0) {
           (*window)->SetCursor(t - 1, 0);
           start_t = t;
         }
         (*window)->DrawTo(t, target);
       } else if (start_t >= 0) {
         (*window)->DrawTo(t, 0);
         (*window)->DrawTo(start_t - 1, 0);
         start_t = -1;
       }
     }
     if (start_t >= 0) {
       (*window)->DrawTo(width, 0);
       (*window)->DrawTo(start_t - 1, 0);
     }
   }
   (*window)->Update();
 #endif  // GRAPHICS_DISABLED
 }

◆ DumpFilename()

STRING tesseract::LSTMTrainer::DumpFilename ( ) const

Definition at line 940 of file lstmtrainer.cpp.

                                        {
   STRING filename;
   filename.add_str_double(model_base_.string(), best_error_rate_);
   filename.add_str_int("_", best_iteration_);
   filename += ".checkpoint";
   return filename;
 }

◆ EmptyConstructor()

void tesseract::LSTMTrainer::EmptyConstructor ( )

protected

Definition at line 1014 of file lstmtrainer.cpp.

                                    {
   align_win_ = nullptr;
   target_win_ = nullptr;
   ctc_win_ = nullptr;
   recon_win_ = nullptr;
   checkpoint_iteration_ = 0;
   training_stage_ = 0;
   num_training_stages_ = 2;
   InitIterations();
 }

◆ EncodeString() [1/2]

bool tesseract::LSTMTrainer::EncodeString	(	const STRING &	str,
		GenericVector< int > *	labels
	)		const

inline

Definition at line 246 of file lstmtrainer.h.

                                                                          {
     return EncodeString(str, GetUnicharset(), IsRecoding() ? &recoder_ : nullptr,
                         SimpleTextOutput(), null_char_, labels);
   }

◆ EncodeString() [2/2]

bool tesseract::LSTMTrainer::EncodeString	(	const STRING &	str,
		const UNICHARSET &	unicharset,
		const UnicharCompress *	recoder,
		bool	simple_text,
		int	null_char,
		GenericVector< int > *	labels
	)

static

Definition at line 718 of file lstmtrainer.cpp.

                                                                           {
   if (str.string() == nullptr || str.length() <= 0) {
     tprintf("Empty truth string!\n");
     return false;
   }
   int err_index;
   GenericVector<int> internal_labels;
   labels->truncate(0);
   if (!simple_text) labels->push_back(null_char);
   std::string cleaned = unicharset.CleanupString(str.string());
   if (unicharset.encode_string(cleaned.c_str(), true, &internal_labels, nullptr,
                                &err_index)) {
     bool success = true;
     for (int i = 0; i < internal_labels.size(); ++i) {
       if (recoder != nullptr) {
         // Re-encode labels via recoder.
         RecodedCharID code;
         int len = recoder->EncodeUnichar(internal_labels[i], &code);
         if (len > 0) {
           for (int j = 0; j < len; ++j) {
             labels->push_back(code(j));
             if (!simple_text) labels->push_back(null_char);
           }
         } else {
           success = false;
           err_index = 0;
           break;
         }
       } else {
         labels->push_back(internal_labels[i]);
         if (!simple_text) labels->push_back(null_char);
       }
     }
     if (success) return true;
   }
   tprintf("Encoding of string failed! Failure bytes:");
   while (err_index < cleaned.size()) {
     tprintf(" %x", cleaned[err_index++]);
   }
   tprintf("\n");
   return false;
 }

◆ error_rates()

const double* tesseract::LSTMTrainer::error_rates ( ) const

inline

Definition at line 140 of file lstmtrainer.h.

                                     {
     return error_rates_;
   }

◆ FillErrorBuffer()

void tesseract::LSTMTrainer::FillErrorBuffer	(	double	new_error,
		ErrorTypes	type
	)

Definition at line 949 of file lstmtrainer.cpp.

                                                                    {
   for (int i = 0; i < kRollingBufferSize_; ++i)
     error_buffers_[type][i] = new_error;
   error_rates_[type] = 100.0 * new_error;
 }

◆ GridSearchDictParams()

Trainability tesseract::LSTMTrainer::GridSearchDictParams	(	const ImageData *	trainingdata,
		int	iteration,
		double	min_dict_ratio,
		double	dict_ratio_step,
		double	max_dict_ratio,
		double	min_cert_offset,
		double	cert_offset_step,
		double	max_cert_offset,
		STRING *	results
	)

Definition at line 243 of file lstmtrainer.cpp.

                                                                       {
   sample_iteration_ = iteration;
   NetworkIO fwd_outputs, targets;
   Trainability result =
       PrepareForBackward(trainingdata, &fwd_outputs, &targets);
   if (result == UNENCODABLE || result == HI_PRECISION_ERR || dict_ == nullptr)
     return result;
 
   // Encode/decode the truth to get the normalization.
   GenericVector<int> truth_labels, ocr_labels, xcoords;
   ASSERT_HOST(EncodeString(trainingdata->transcription(), &truth_labels));
   // NO-dict error.
   RecodeBeamSearch base_search(recoder_, null_char_, SimpleTextOutput(), nullptr);
   base_search.Decode(fwd_outputs, 1.0, 0.0, RecodeBeamSearch::kMinCertainty,
                      nullptr);
   base_search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
   STRING truth_text = DecodeLabels(truth_labels);
   STRING ocr_text = DecodeLabels(ocr_labels);
   double baseline_error = ComputeWordError(&truth_text, &ocr_text);
   results->add_str_double("0,0=", baseline_error);
 
   RecodeBeamSearch search(recoder_, null_char_, SimpleTextOutput(), dict_);
   for (double r = min_dict_ratio; r < max_dict_ratio; r += dict_ratio_step) {
     for (double c = min_cert_offset; c < max_cert_offset;
          c += cert_offset_step) {
       search.Decode(fwd_outputs, r, c, RecodeBeamSearch::kMinCertainty, nullptr);
       search.ExtractBestPathAsLabels(&ocr_labels, &xcoords);
       truth_text = DecodeLabels(truth_labels);
       ocr_text = DecodeLabels(ocr_labels);
       // This is destructive on both strings.
       double word_error = ComputeWordError(&truth_text, &ocr_text);
       if ((r == min_dict_ratio && c == min_cert_offset) ||
           !std::isfinite(word_error)) {
         STRING t = DecodeLabels(truth_labels);
         STRING o = DecodeLabels(ocr_labels);
         tprintf("r=%g, c=%g, truth=%s, ocr=%s, wderr=%g, truth[0]=%d\n", r, c,
                 t.string(), o.string(), word_error, truth_labels[0]);
       }
       results->add_str_double(" ", r);
       results->add_str_double(",", c);
       results->add_str_double("=", word_error);
     }
   }
   return result;
 }

◆ improvement_steps()

int32_t tesseract::LSTMTrainer::improvement_steps ( ) const

inline

Definition at line 150 of file lstmtrainer.h.

150 { return improvement_steps_; }

tesseract::LSTMTrainer::improvement_steps_

int32_t improvement_steps_

Definition: lstmtrainer.h:460

◆ InitCharSet() [1/3]

void tesseract::LSTMTrainer::InitCharSet ( const std::string & traineddata_path )

inline

Definition at line 109 of file lstmtrainer.h.

                                                       {
     ASSERT_HOST(mgr_.Init(traineddata_path.c_str()));
     InitCharSet();
   }

◆ InitCharSet() [2/3]

void tesseract::LSTMTrainer::InitCharSet ( const TessdataManager & mgr )

inline

Definition at line 113 of file lstmtrainer.h.

                                                {
     mgr_ = mgr;
     InitCharSet();
   }

◆ InitCharSet() [3/3]

void tesseract::LSTMTrainer::InitCharSet ( )

protected

Definition at line 992 of file lstmtrainer.cpp.

                               {
   EmptyConstructor();
   training_flags_ = TF_COMPRESS_UNICHARSET;
   // Initialize the unicharset and recoder.
   if (!LoadCharsets(&mgr_)) {
     ASSERT_HOST(
         "Must provide a traineddata containing lstm_unicharset and"
         " lstm_recoder!\n" != nullptr);
   }
   SetNullChar();
 }

◆ InitIterations()

void tesseract::LSTMTrainer::InitIterations ( )

Definition at line 218 of file lstmtrainer.cpp.

                                  {
   sample_iteration_ = 0;
   training_iteration_ = 0;
   learning_iteration_ = 0;
   prev_sample_iteration_ = 0;
   best_error_rate_ = 100.0;
   best_iteration_ = 0;
   worst_error_rate_ = 0.0;
   worst_iteration_ = 0;
   stall_iteration_ = kMinStallIterations;
   improvement_steps_ = kMinStallIterations;
   perfect_delay_ = 0;
   last_perfect_training_iteration_ = 0;
   for (int i = 0; i < ET_COUNT; ++i) {
     best_error_rates_[i] = 100.0;
     worst_error_rates_[i] = 0.0;
     error_buffers_[i].init_to_size(kRollingBufferSize_, 0.0);
     error_rates_[i] = 100.0;
   }
   error_rate_of_last_saved_best_ = kMinStartedErrorRate;
 }

◆ InitNetwork()

bool tesseract::LSTMTrainer::InitNetwork	(	const STRING &	network_spec,
		int	append_index,
		int	net_flags,
		float	weight_range,
		float	learning_rate,
		float	momentum,
		float	adam_beta
	)

Definition at line 171 of file lstmtrainer.cpp.

                                                {
   mgr_.SetVersionString(mgr_.VersionString() + ":" + network_spec.string());
   adam_beta_ = adam_beta;
   learning_rate_ = learning_rate;
   momentum_ = momentum;
   SetNullChar();
   if (!NetworkBuilder::InitNetwork(recoder_.code_range(), network_spec,
                                    append_index, net_flags, weight_range,
                                    &randomizer_, &network_)) {
     return false;
   }
   network_str_ += network_spec;
   tprintf("Built network:%s from request %s\n",
           network_->spec().string(), network_spec.string());
   tprintf(
       "Training parameters:\n  Debug interval = %d,"
       " weights = %g, learning rate = %g, momentum=%g\n",
       debug_interval_, weight_range, learning_rate_, momentum_);
   tprintf("null char=%d\n", null_char_);
   return true;
 }

◆ InitTensorFlowNetwork()

int tesseract::LSTMTrainer::InitTensorFlowNetwork ( const std::string & tf_proto )

Definition at line 198 of file lstmtrainer.cpp.

                                                                 {
 #ifdef INCLUDE_TENSORFLOW
   delete network_;
   TFNetwork* tf_net = new TFNetwork("TensorFlow");
   training_iteration_ = tf_net->InitFromProtoStr(tf_proto);
   if (training_iteration_ == 0) {
     tprintf("InitFromProtoStr failed!!\n");
     return 0;
   }
   network_ = tf_net;
   ASSERT_HOST(recoder_.code_range() == tf_net->num_classes());
   return training_iteration_;
 #else
   tprintf("TensorFlow not compiled in! -DINCLUDE_TENSORFLOW\n");
   return 0;
 #endif
 }

◆ LastSingleError()

double tesseract::LSTMTrainer::LastSingleError ( ErrorTypes type ) const

inline

Definition at line 160 of file lstmtrainer.h.

                                                 {
     return error_buffers_[type]
                          [(training_iteration() + kRollingBufferSize_ - 1) %
                           kRollingBufferSize_];
   }

◆ learning_iteration()

int tesseract::LSTMTrainer::learning_iteration ( ) const

inline

Definition at line 149 of file lstmtrainer.h.

149 { return learning_iteration_; }

tesseract::LSTMTrainer::learning_iteration_

int learning_iteration_

Definition: lstmtrainer.h:464

◆ LoadAllTrainingData()

bool tesseract::LSTMTrainer::LoadAllTrainingData	(	const GenericVector< STRING > &	filenames,
		CachingStrategy	cache_strategy,
		bool	randomly_rotate
	)

Definition at line 300 of file lstmtrainer.cpp.

                                                             {
   randomly_rotate_ = randomly_rotate;
   training_data_.Clear();
   return training_data_.LoadDocuments(filenames, cache_strategy, file_reader_);
 }

◆ LogIterations()

void tesseract::LSTMTrainer::LogIterations	(	const char *	intro_str,
		STRING *	log_msg
	)		const

Definition at line 412 of file lstmtrainer.cpp.

                                                                             {
   *log_msg += intro_str;
   log_msg->add_str_int(" iteration ", learning_iteration());
   log_msg->add_str_int("/", training_iteration());
   log_msg->add_str_int("/", sample_iteration());
 }

◆ MaintainCheckpoints()

bool tesseract::LSTMTrainer::MaintainCheckpoints	(	TestCallback	tester,
		STRING *	log_msg
	)

Definition at line 312 of file lstmtrainer.cpp.

                                                                           {
   PrepareLogMsg(log_msg);
   double error_rate = CharError();
   int iteration = learning_iteration();
   if (iteration >= stall_iteration_ &&
       error_rate > best_error_rate_ * (1.0 + kSubTrainerMarginFraction) &&
       best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {
     // It hasn't got any better in a long while, and is a margin worse than the
     // best, so go back to the best model and try a different learning rate.
     StartSubtrainer(log_msg);
   }
   SubTrainerResult sub_trainer_result = STR_NONE;
   if (sub_trainer_ != nullptr) {
     sub_trainer_result = UpdateSubtrainer(log_msg);
     if (sub_trainer_result == STR_REPLACED) {
       // Reset the inputs, as we have overwritten *this.
       error_rate = CharError();
       iteration = learning_iteration();
       PrepareLogMsg(log_msg);
     }
   }
   bool result = true;  // Something interesting happened.
   GenericVector<char> rec_model_data;
   if (error_rate < best_error_rate_) {
     SaveRecognitionDump(&rec_model_data);
     log_msg->add_str_double(" New best char error = ", error_rate);
     *log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
     // If sub_trainer_ is not nullptr, either *this beat it to a new best, or it
     // just overwrote *this. In either case, we have finished with it.
     delete sub_trainer_;
     sub_trainer_ = nullptr;
     stall_iteration_ = learning_iteration() + kMinStallIterations;
     if (TransitionTrainingStage(kStageTransitionThreshold)) {
       log_msg->add_str_int(" Transitioned to stage ", CurrentTrainingStage());
     }
     checkpoint_writer_->Run(NO_BEST_TRAINER, this, &best_trainer_);
     if (error_rate < error_rate_of_last_saved_best_ * kBestCheckpointFraction) {
       STRING best_model_name = DumpFilename();
       if (!(*file_writer_)(best_trainer_, best_model_name)) {
         *log_msg += " failed to write best model:";
       } else {
         *log_msg += " wrote best model:";
         error_rate_of_last_saved_best_ = best_error_rate_;
       }
       *log_msg += best_model_name;
     }
   } else if (error_rate > worst_error_rate_) {
     SaveRecognitionDump(&rec_model_data);
     log_msg->add_str_double(" New worst char error = ", error_rate);
     *log_msg += UpdateErrorGraph(iteration, error_rate, rec_model_data, tester);
     if (worst_error_rate_ > best_error_rate_ + kMinDivergenceRate &&
         best_error_rate_ < kMinStartedErrorRate && !best_trainer_.empty()) {
       // Error rate has ballooned. Go back to the best model.
       *log_msg += "\nDivergence! ";
       // Copy best_trainer_ before reading it, as it will get overwritten.
       GenericVector<char> revert_data(best_trainer_);
       if (checkpoint_reader_->Run(revert_data, this)) {
         LogIterations("Reverted to", log_msg);
         ReduceLearningRates(this, log_msg);
       } else {
         LogIterations("Failed to Revert at", log_msg);
       }
       // If it fails again, we will wait twice as long before reverting again.
       stall_iteration_ = iteration + 2 * (iteration - learning_iteration());
       // Re-save the best trainer with the new learning rates and stall
       // iteration.
       checkpoint_writer_->Run(NO_BEST_TRAINER, this, &best_trainer_);
     }
   } else {
     // Something interesting happened only if the sub_trainer_ was trained.
     result = sub_trainer_result != STR_NONE;
   }
   if (checkpoint_writer_ != nullptr && file_writer_ != nullptr &&
       checkpoint_name_.length() > 0) {
     // Write a current checkpoint.
     GenericVector<char> checkpoint;
     if (!checkpoint_writer_->Run(FULL, this, &checkpoint) ||
         !(*file_writer_)(checkpoint, checkpoint_name_)) {
       *log_msg += " failed to write checkpoint.";
     } else {
       *log_msg += " wrote checkpoint.";
     }
   }
   *log_msg += "\n";
   return result;
 }

◆ MaintainCheckpointsSpecific()

bool tesseract::LSTMTrainer::MaintainCheckpointsSpecific	(	int	iteration,
		const GenericVector< char > *	train_model,
		const GenericVector< char > *	rec_model,
		TestCallback	tester,
		STRING *	log_msg
	)

◆ MapRecoder()

std::vector< int > tesseract::LSTMTrainer::MapRecoder	(	const UNICHARSET &	old_chset,
		const UnicharCompress &	old_recoder
	)		const

Definition at line 957 of file lstmtrainer.cpp.

                                                                            {
   int num_new_codes = recoder_.code_range();
   int num_new_unichars = GetUnicharset().size();
   std::vector<int> code_map(num_new_codes, -1);
   for (int c = 0; c < num_new_codes; ++c) {
     int old_code = -1;
     // Find all new unichar_ids that recode to something that includes c.
     // The <= is to include the null char, which may be beyond the unicharset.
     for (int uid = 0; uid <= num_new_unichars; ++uid) {
       RecodedCharID codes;
       int length = recoder_.EncodeUnichar(uid, &codes);
       int code_index = 0;
       while (code_index < length && codes(code_index) != c) ++code_index;
       if (code_index == length) continue;
       // The old unicharset must have the same unichar.
       int old_uid =
           uid < num_new_unichars
               ? old_chset.unichar_to_id(GetUnicharset().id_to_unichar(uid))
               : old_chset.size() - 1;
       if (old_uid == INVALID_UNICHAR_ID) continue;
       // The encoding of old_uid at the same code_index is the old code.
       RecodedCharID old_codes;
       if (code_index < old_recoder.EncodeUnichar(old_uid, &old_codes)) {
         old_code = old_codes(code_index);
         break;
       }
     }
     code_map[c] = old_code;
   }
   return code_map;
 }

◆ mutable_training_data()

DocumentCache* tesseract::LSTMTrainer::mutable_training_data ( )

inline

Definition at line 168 of file lstmtrainer.h.

168 { return &training_data_; }

tesseract::LSTMTrainer::training_data_

DocumentCache training_data_

Definition: lstmtrainer.h:414

◆ NewSingleError()

double tesseract::LSTMTrainer::NewSingleError ( ErrorTypes type ) const

inline

Definition at line 154 of file lstmtrainer.h.

                                                {
     return error_buffers_[type][training_iteration() % kRollingBufferSize_];
   }

◆ PrepareForBackward()

Trainability tesseract::LSTMTrainer::PrepareForBackward	(	const ImageData *	trainingdata,
		NetworkIO *	fwd_outputs,
		NetworkIO *	targets
	)

Definition at line 798 of file lstmtrainer.cpp.

                                                                  {
   if (trainingdata == nullptr) {
     tprintf("Null trainingdata.\n");
     return UNENCODABLE;
   }
   // Ensure repeatability of random elements even across checkpoints.
   bool debug = debug_interval_ > 0 &&
       training_iteration() % debug_interval_ == 0;
   GenericVector<int> truth_labels;
   if (!EncodeString(trainingdata->transcription(), &truth_labels)) {
     tprintf("Can't encode transcription: '%s' in language '%s'\n",
             trainingdata->transcription().string(),
             trainingdata->language().string());
     return UNENCODABLE;
   }
   bool upside_down = false;
   if (randomly_rotate_) {
     // This ensures consistent training results.
     SetRandomSeed();
     upside_down = randomizer_.SignedRand(1.0) > 0.0;
     if (upside_down) {
       // Modify the truth labels to match the rotation:
       // Apart from space and null, increment the label. This is changes the
       // script-id to the same script-id but upside-down.
       // The labels need to be reversed in order, as the first is now the last.
       for (int c = 0; c < truth_labels.size(); ++c) {
         if (truth_labels[c] != UNICHAR_SPACE && truth_labels[c] != null_char_)
           ++truth_labels[c];
       }
       truth_labels.reverse();
     }
   }
   int w = 0;
   while (w < truth_labels.size() &&
          (truth_labels[w] == UNICHAR_SPACE || truth_labels[w] == null_char_))
     ++w;
   if (w == truth_labels.size()) {
     tprintf("Blank transcription: %s\n",
             trainingdata->transcription().string());
     return UNENCODABLE;
   }
   float image_scale;
   NetworkIO inputs;
   bool invert = trainingdata->boxes().empty();
   if (!RecognizeLine(*trainingdata, invert, debug, invert, upside_down,
                      &image_scale, &inputs, fwd_outputs)) {
     tprintf("Image not trainable\n");
     return UNENCODABLE;
   }
   targets->Resize(*fwd_outputs, network_->NumOutputs());
   LossType loss_type = OutputLossType();
   if (loss_type == LT_SOFTMAX) {
     if (!ComputeTextTargets(*fwd_outputs, truth_labels, targets)) {
       tprintf("Compute simple targets failed!\n");
       return UNENCODABLE;
     }
   } else if (loss_type == LT_CTC) {
     if (!ComputeCTCTargets(truth_labels, fwd_outputs, targets)) {
       tprintf("Compute CTC targets failed!\n");
       return UNENCODABLE;
     }
   } else {
     tprintf("Logistic outputs not implemented yet!\n");
     return UNENCODABLE;
   }
   GenericVector<int> ocr_labels;
   GenericVector<int> xcoords;
   LabelsFromOutputs(*fwd_outputs, &ocr_labels, &xcoords);
   // CTC does not produce correct target labels to begin with.
   if (loss_type != LT_CTC) {
     LabelsFromOutputs(*targets, &truth_labels, &xcoords);
   }
   if (!DebugLSTMTraining(inputs, *trainingdata, *fwd_outputs, truth_labels,
                          *targets)) {
     tprintf("Input width was %d\n", inputs.Width());
     return UNENCODABLE;
   }
   STRING ocr_text = DecodeLabels(ocr_labels);
   STRING truth_text = DecodeLabels(truth_labels);
   targets->SubtractAllFromFloat(*fwd_outputs);
   if (debug_interval_ != 0) {
     tprintf("Iteration %d: BEST OCR TEXT : %s\n", training_iteration(),
             ocr_text.string());
   }
   double char_error = ComputeCharError(truth_labels, ocr_labels);
   double word_error = ComputeWordError(&truth_text, &ocr_text);
   double delta_error = ComputeErrorRates(*targets, char_error, word_error);
   if (debug_interval_ != 0) {
     tprintf("File %s page %d %s:\n", trainingdata->imagefilename().string(),
             trainingdata->page_number(), delta_error == 0.0 ? "(Perfect)" : "");
   }
   if (delta_error == 0.0) return PERFECT;
   if (targets->AnySuspiciousTruth(kHighConfidence)) return HI_PRECISION_ERR;
   return TRAINABLE;
 }

◆ PrepareLogMsg()

void tesseract::LSTMTrainer::PrepareLogMsg ( STRING * log_msg ) const

Definition at line 400 of file lstmtrainer.cpp.

                                                      {
   LogIterations("At", log_msg);
   log_msg->add_str_double(", Mean rms=", error_rates_[ET_RMS]);
   log_msg->add_str_double("%, delta=", error_rates_[ET_DELTA]);
   log_msg->add_str_double("%, char train=", error_rates_[ET_CHAR_ERROR]);
   log_msg->add_str_double("%, word train=", error_rates_[ET_WORD_RECERR]);
   log_msg->add_str_double("%, skip ratio=", error_rates_[ET_SKIP_RATIO]);
   *log_msg += "%, ";
 }

◆ ReadLocalTrainingDump()

bool tesseract::LSTMTrainer::ReadLocalTrainingDump	(	const TessdataManager *	mgr,
		const char *	data,
		int	size
	)

Definition at line 909 of file lstmtrainer.cpp.

                                                                     {
   if (size == 0) {
     tprintf("Warning: data size is 0 in LSTMTrainer::ReadLocalTrainingDump\n");
     return false;
   }
   TFile fp;
   fp.Open(data, size);
   return DeSerialize(mgr, &fp);
 }

◆ ReadSizedTrainingDump()

bool tesseract::LSTMTrainer::ReadSizedTrainingDump	(	const char *	data,
		int	size,
		LSTMTrainer *	trainer
	)		const

inline

Definition at line 296 of file lstmtrainer.h.

                                                          {
     return trainer->ReadLocalTrainingDump(&mgr_, data, size);
   }

◆ ReadTrainingDump()

bool tesseract::LSTMTrainer::ReadTrainingDump	(	const GenericVector< char > &	data,
		LSTMTrainer *	trainer
	)		const

inline

Definition at line 291 of file lstmtrainer.h.

                                                     {
     if (data.empty()) return false;
     return ReadSizedTrainingDump(&data[0], data.size(), trainer);
   }

◆ ReduceLayerLearningRates()

int tesseract::LSTMTrainer::ReduceLayerLearningRates	(	double	factor,
		int	num_samples,
		LSTMTrainer *	samples_trainer
	)

Definition at line 609 of file lstmtrainer.cpp.

                                                                         {
   enum WhichWay {
     LR_DOWN,  // Learning rate will go down by factor.
     LR_SAME,  // Learning rate will stay the same.
     LR_COUNT  // Size of arrays.
   };
   GenericVector<STRING> layers = EnumerateLayers();
   int num_layers = layers.size();
   GenericVector<int> num_weights;
   num_weights.init_to_size(num_layers, 0);
   GenericVector<double> bad_sums[LR_COUNT];
   GenericVector<double> ok_sums[LR_COUNT];
   for (int i = 0; i < LR_COUNT; ++i) {
     bad_sums[i].init_to_size(num_layers, 0.0);
     ok_sums[i].init_to_size(num_layers, 0.0);
   }
   double momentum_factor = 1.0 / (1.0 - momentum_);
   GenericVector<char> orig_trainer;
   samples_trainer->SaveTrainingDump(LIGHT, this, &orig_trainer);
   for (int i = 0; i < num_layers; ++i) {
     Network* layer = GetLayer(layers[i]);
     num_weights[i] = layer->IsTraining() ? layer->num_weights() : 0;
   }
   int iteration = sample_iteration();
   for (int s = 0; s < num_samples; ++s) {
     // Which way will we modify the learning rate?
     for (int ww = 0; ww < LR_COUNT; ++ww) {
       // Transfer momentum to learning rate and adjust by the ww factor.
       float ww_factor = momentum_factor;
       if (ww == LR_DOWN) ww_factor *= factor;
       // Make a copy of *this, so we can mess about without damaging anything.
       LSTMTrainer copy_trainer;
       samples_trainer->ReadTrainingDump(orig_trainer, &copy_trainer);
       // Clear the updates, doing nothing else.
       copy_trainer.network_->Update(0.0, 0.0, 0.0, 0);
       // Adjust the learning rate in each layer.
       for (int i = 0; i < num_layers; ++i) {
         if (num_weights[i] == 0) continue;
         copy_trainer.ScaleLayerLearningRate(layers[i], ww_factor);
       }
       copy_trainer.SetIteration(iteration);
       // Train on the sample, but keep the update in updates_ instead of
       // applying to the weights.
       const ImageData* trainingdata =
           copy_trainer.TrainOnLine(samples_trainer, true);
       if (trainingdata == nullptr) continue;
       // We'll now use this trainer again for each layer.
       GenericVector<char> updated_trainer;
       samples_trainer->SaveTrainingDump(LIGHT, &copy_trainer, &updated_trainer);
       for (int i = 0; i < num_layers; ++i) {
         if (num_weights[i] == 0) continue;
         LSTMTrainer layer_trainer;
         samples_trainer->ReadTrainingDump(updated_trainer, &layer_trainer);
         Network* layer = layer_trainer.GetLayer(layers[i]);
         // Update the weights in just the layer, using Adam if enabled.
         layer->Update(0.0, momentum_, adam_beta_,
                       layer_trainer.training_iteration_ + 1);
         // Zero the updates matrix again.
         layer->Update(0.0, 0.0, 0.0, 0);
         // Train again on the same sample, again holding back the updates.
         layer_trainer.TrainOnLine(trainingdata, true);
         // Count the sign changes in the updates in layer vs in copy_trainer.
         float before_bad = bad_sums[ww][i];
         float before_ok = ok_sums[ww][i];
         layer->CountAlternators(*copy_trainer.GetLayer(layers[i]),
                                 &ok_sums[ww][i], &bad_sums[ww][i]);
         float bad_frac =
             bad_sums[ww][i] + ok_sums[ww][i] - before_bad - before_ok;
         if (bad_frac > 0.0f)
           bad_frac = (bad_sums[ww][i] - before_bad) / bad_frac;
       }
     }
     ++iteration;
   }
   int num_lowered = 0;
   for (int i = 0; i < num_layers; ++i) {
     if (num_weights[i] == 0) continue;
     Network* layer = GetLayer(layers[i]);
     float lr = GetLayerLearningRate(layers[i]);
     double total_down = bad_sums[LR_DOWN][i] + ok_sums[LR_DOWN][i];
     double total_same = bad_sums[LR_SAME][i] + ok_sums[LR_SAME][i];
     double frac_down = bad_sums[LR_DOWN][i] / total_down;
     double frac_same = bad_sums[LR_SAME][i] / total_same;
     tprintf("Layer %d=%s: lr %g->%g%%, lr %g->%g%%", i, layer->name().string(),
             lr * factor, 100.0 * frac_down, lr, 100.0 * frac_same);
     if (frac_down < frac_same * kImprovementFraction) {
       tprintf(" REDUCED\n");
       ScaleLayerLearningRate(layers[i], factor);
       ++num_lowered;
     } else {
       tprintf(" SAME\n");
     }
   }
   if (num_lowered == 0) {
     // Just lower everything to make sure.
     for (int i = 0; i < num_layers; ++i) {
       if (num_weights[i] > 0) {
         ScaleLayerLearningRate(layers[i], factor);
         ++num_lowered;
       }
     }
   }
   return num_lowered;
 }

◆ ReduceLearningRates()

void tesseract::LSTMTrainer::ReduceLearningRates	(	LSTMTrainer *	samples_trainer,
		STRING *	log_msg
	)

Definition at line 590 of file lstmtrainer.cpp.

                                                        {
   if (network_->TestFlag(NF_LAYER_SPECIFIC_LR)) {
     int num_reduced = ReduceLayerLearningRates(
         kLearningRateDecay, kNumAdjustmentIterations, samples_trainer);
     log_msg->add_str_int("\nReduced learning rate on layers: ", num_reduced);
   } else {
     ScaleLearningRate(kLearningRateDecay);
     log_msg->add_str_double("\nReduced learning rate to :", learning_rate_);
   }
   *log_msg += "\n";
 }

◆ RollErrorBuffers()

void tesseract::LSTMTrainer::RollErrorBuffers ( )

protected

Definition at line 1261 of file lstmtrainer.cpp.

                                    {
   prev_sample_iteration_ = sample_iteration_;
   if (NewSingleError(ET_DELTA) > 0.0)
     ++learning_iteration_;
   else
     last_perfect_training_iteration_ = training_iteration_;
   ++training_iteration_;
   if (debug_interval_ != 0) {
     tprintf("Mean rms=%g%%, delta=%g%%, train=%g%%(%g%%), skip ratio=%g%%\n",
             error_rates_[ET_RMS], error_rates_[ET_DELTA],
             error_rates_[ET_CHAR_ERROR], error_rates_[ET_WORD_RECERR],
             error_rates_[ET_SKIP_RATIO]);
   }
 }

◆ SaveRecognitionDump()

void tesseract::LSTMTrainer::SaveRecognitionDump ( GenericVector< char > * data ) const

Definition at line 930 of file lstmtrainer.cpp.

                                                                      {
   TFile fp;
   fp.OpenWrite(data);
   network_->SetEnableTraining(TS_TEMP_DISABLE);
   ASSERT_HOST(LSTMRecognizer::Serialize(&mgr_, &fp));
   network_->SetEnableTraining(TS_RE_ENABLE);
 }

◆ SaveTraineddata()

bool tesseract::LSTMTrainer::SaveTraineddata ( const STRING & filename )

Definition at line 921 of file lstmtrainer.cpp.

                                                         {
   GenericVector<char> recognizer_data;
   SaveRecognitionDump(&recognizer_data);
   mgr_.OverwriteEntry(TESSDATA_LSTM, &recognizer_data[0],
                       recognizer_data.size());
   return mgr_.SaveFile(filename, file_writer_);
 }

◆ SaveTrainingDump()

bool tesseract::LSTMTrainer::SaveTrainingDump	(	SerializeAmount	serialize_amount,
		const LSTMTrainer *	trainer,
		GenericVector< char > *	data
	)		const

Definition at line 900 of file lstmtrainer.cpp.

                                                                     {
   TFile fp;
   fp.OpenWrite(data);
   return trainer->Serialize(serialize_amount, &mgr_, &fp);
 }

◆ Serialize()

bool tesseract::LSTMTrainer::Serialize	(	SerializeAmount	serialize_amount,
		const TessdataManager *	mgr,
		TFile *	fp
	)		const

Definition at line 431 of file lstmtrainer.cpp.

                                                                          {
   if (!LSTMRecognizer::Serialize(mgr, fp)) return false;
   if (!fp->Serialize(&learning_iteration_)) return false;
   if (!fp->Serialize(&prev_sample_iteration_)) return false;
   if (!fp->Serialize(&perfect_delay_)) return false;
   if (!fp->Serialize(&last_perfect_training_iteration_)) return false;
   for (int i = 0; i < ET_COUNT; ++i) {
     if (!error_buffers_[i].Serialize(fp)) return false;
   }
   if (!fp->Serialize(&error_rates_[0], countof(error_rates_))) return false;
   if (!fp->Serialize(&training_stage_)) return false;
   uint8_t amount = serialize_amount;
   if (!fp->Serialize(&amount)) return false;
   if (serialize_amount == LIGHT) return true;  // We are done.
   if (!fp->Serialize(&best_error_rate_)) return false;
   if (!fp->Serialize(&best_error_rates_[0], countof(best_error_rates_))) return false;
   if (!fp->Serialize(&best_iteration_)) return false;
   if (!fp->Serialize(&worst_error_rate_)) return false;
   if (!fp->Serialize(&worst_error_rates_[0], countof(worst_error_rates_))) return false;
   if (!fp->Serialize(&worst_iteration_)) return false;
   if (!fp->Serialize(&stall_iteration_)) return false;
   if (!best_model_data_.Serialize(fp)) return false;
   if (!worst_model_data_.Serialize(fp)) return false;
   if (serialize_amount != NO_BEST_TRAINER && !best_trainer_.Serialize(fp))
     return false;
   GenericVector<char> sub_data;
   if (sub_trainer_ != nullptr && !SaveTrainingDump(LIGHT, sub_trainer_, &sub_data))
     return false;
   if (!sub_data.Serialize(fp)) return false;
   if (!best_error_history_.Serialize(fp)) return false;
   if (!best_error_iterations_.Serialize(fp)) return false;
   return fp->Serialize(&improvement_steps_);
 }

◆ set_perfect_delay()

void tesseract::LSTMTrainer::set_perfect_delay ( int delay )

inline

Definition at line 151 of file lstmtrainer.h.

151 { perfect_delay_ = delay; }

tesseract::LSTMTrainer::perfect_delay_

int perfect_delay_

Definition: lstmtrainer.h:472

◆ SetNullChar()

void tesseract::LSTMTrainer::SetNullChar ( )

protected

Definition at line 1005 of file lstmtrainer.cpp.

                               {
   null_char_ = GetUnicharset().has_special_codes() ? UNICHAR_BROKEN
                                                    : GetUnicharset().size();
   RecodedCharID code;
   recoder_.EncodeUnichar(null_char_, &code);
   null_char_ = code(0);
 }

◆ SetupCheckpointInfo()

void tesseract::LSTMTrainer::SetupCheckpointInfo ( )

◆ StartSubtrainer()

void tesseract::LSTMTrainer::StartSubtrainer ( STRING * log_msg )

Definition at line 517 of file lstmtrainer.cpp.

                                                  {
   delete sub_trainer_;
   sub_trainer_ = new LSTMTrainer();
   if (!checkpoint_reader_->Run(best_trainer_, sub_trainer_)) {
     *log_msg += " Failed to revert to previous best for trial!";
     delete sub_trainer_;
     sub_trainer_ = nullptr;
   } else {
     log_msg->add_str_int(" Trial sub_trainer_ from iteration ",
                          sub_trainer_->training_iteration());
     // Reduce learning rate so it doesn't diverge this time.
     sub_trainer_->ReduceLearningRates(this, log_msg);
     // If it fails again, we will wait twice as long before reverting again.
     int stall_offset =
         learning_iteration() - sub_trainer_->learning_iteration();
     stall_iteration_ = learning_iteration() + 2 * stall_offset;
     sub_trainer_->stall_iteration_ = stall_iteration_;
     // Re-save the best trainer with the new learning rates and stall iteration.
     checkpoint_writer_->Run(NO_BEST_TRAINER, sub_trainer_, &best_trainer_);
   }
 }

◆ training_data()

const DocumentCache& tesseract::LSTMTrainer::training_data ( ) const

inline

Definition at line 165 of file lstmtrainer.h.

                                              {
     return training_data_;
   }

◆ TrainOnLine() [1/2]

const ImageData* tesseract::LSTMTrainer::TrainOnLine	(	LSTMTrainer *	samples_trainer,
		bool	batch
	)

inline

Definition at line 259 of file lstmtrainer.h.

                                                                          {
     int sample_index = sample_iteration();
     const ImageData* image =
         samples_trainer->training_data_.GetPageBySerial(sample_index);
     if (image != nullptr) {
       Trainability trainable = TrainOnLine(image, batch);
       if (trainable == UNENCODABLE || trainable == NOT_BOXED) {
         return nullptr;  // Sample was unusable.
       }
     } else {
       ++sample_iteration_;
     }
     return image;
   }

◆ TrainOnLine() [2/2]

Trainability tesseract::LSTMTrainer::TrainOnLine	(	const ImageData *	trainingdata,
		bool	batch
	)

Definition at line 765 of file lstmtrainer.cpp.

                                                   {
   NetworkIO fwd_outputs, targets;
   Trainability trainable =
       PrepareForBackward(trainingdata, &fwd_outputs, &targets);
   ++sample_iteration_;
   if (trainable == UNENCODABLE || trainable == NOT_BOXED) {
     return trainable;  // Sample was unusable.
   }
   bool debug = debug_interval_ > 0 &&
       training_iteration() % debug_interval_ == 0;
   // Run backprop on the output.
   NetworkIO bp_deltas;
   if (network_->IsTraining() &&
       (trainable != PERFECT ||
        training_iteration() >
            last_perfect_training_iteration_ + perfect_delay_)) {
     network_->Backward(debug, targets, &scratch_space_, &bp_deltas);
     network_->Update(learning_rate_, batch ? -1.0f : momentum_, adam_beta_,
                      training_iteration_ + 1);
   }
 #ifndef GRAPHICS_DISABLED
   if (debug_interval_ == 1 && debug_win_ != nullptr) {
     delete debug_win_->AwaitEvent(SVET_CLICK);
   }
 #endif  // GRAPHICS_DISABLED
   // Roll the memory of past means.
   RollErrorBuffers();
   return trainable;
 }

◆ TransitionTrainingStage()

bool tesseract::LSTMTrainer::TransitionTrainingStage ( float error_threshold )

Definition at line 421 of file lstmtrainer.cpp.

                                                                {
   if (best_error_rate_ < error_threshold &&
       training_stage_ + 1 < num_training_stages_) {
     ++training_stage_;
     return true;
   }
   return false;
 }

◆ TryLoadingCheckpoint()

bool tesseract::LSTMTrainer::TryLoadingCheckpoint	(	const char *	filename,
		const char *	old_traineddata
	)

Definition at line 128 of file lstmtrainer.cpp.

                                                                     {
   GenericVector<char> data;
   if (!(*file_reader_)(filename, &data)) return false;
   tprintf("Loaded file %s, unpacking...\n", filename);
   if (!checkpoint_reader_->Run(data, this)) return false;
   StaticShape shape = network_->OutputShape(network_->InputShape());
   if (((old_traineddata == nullptr || *old_traineddata == '\0') &&
        network_->NumOutputs() == recoder_.code_range()) ||
       filename == old_traineddata) {
     return true;  // Normal checkpoint load complete.
   }
   tprintf("Code range changed from %d to %d!\n", network_->NumOutputs(),
           recoder_.code_range());
   if (old_traineddata == nullptr || *old_traineddata == '\0') {
     tprintf("Must supply the old traineddata for code conversion!\n");
     return false;
   }
   TessdataManager old_mgr;
   ASSERT_HOST(old_mgr.Init(old_traineddata));
   TFile fp;
   if (!old_mgr.GetComponent(TESSDATA_LSTM_UNICHARSET, &fp)) return false;
   UNICHARSET old_chset;
   if (!old_chset.load_from_file(&fp, false)) return false;
   if (!old_mgr.GetComponent(TESSDATA_LSTM_RECODER, &fp)) return false;
   UnicharCompress old_recoder;
   if (!old_recoder.DeSerialize(&fp)) return false;
   std::vector<int> code_map = MapRecoder(old_chset, old_recoder);
   // Set the null_char_ to the new value.
   int old_null_char = null_char_;
   SetNullChar();
   // Map the softmax(s) in the network.
   network_->RemapOutputs(old_recoder.code_range(), code_map);
   tprintf("Previous null char=%d mapped to %d\n", old_null_char, null_char_);
   return true;
 }

◆ UpdateErrorBuffer()

void tesseract::LSTMTrainer::UpdateErrorBuffer	(	double	new_error,
		ErrorTypes	type
	)

protected

Definition at line 1248 of file lstmtrainer.cpp.

                                                                      {
   int index = training_iteration_ % kRollingBufferSize_;
   error_buffers_[type][index] = new_error;
   // Compute the mean error.
   int mean_count = std::min(training_iteration_ + 1, error_buffers_[type].size());
   double buffer_sum = 0.0;
   for (int i = 0; i < mean_count; ++i) buffer_sum += error_buffers_[type][i];
   double mean = buffer_sum / mean_count;
   // Trim precision to 1/1000 of 1%.
   error_rates_[type] = IntCastRounded(100000.0 * mean) / 1000.0;
 }

◆ UpdateErrorGraph()

STRING tesseract::LSTMTrainer::UpdateErrorGraph	(	int	iteration,
		double	error_rate,
		const GenericVector< char > &	model_data,
		TestCallback	tester
	)

protected

Definition at line 1280 of file lstmtrainer.cpp.

                                                           {
   if (error_rate > best_error_rate_
       && iteration < best_iteration_ + kErrorGraphInterval) {
     // Too soon to record a new point.
     if (tester != nullptr && !worst_model_data_.empty()) {
       mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
                           worst_model_data_.size());
       return tester->Run(worst_iteration_, nullptr, mgr_, CurrentTrainingStage());
     } else {
       return "";
     }
   }
   STRING result;
   // NOTE: there are 2 asymmetries here:
   // 1. We are computing the global minimum, but the local maximum in between.
   // 2. If the tester returns an empty string, indicating that it is busy,
   //    call it repeatedly on new local maxima to test the previous min, but
   //    not the other way around, as there is little point testing the maxima
   //    between very frequent minima.
   if (error_rate < best_error_rate_) {
     // This is a new (global) minimum.
     if (tester != nullptr && !worst_model_data_.empty()) {
       mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
                           worst_model_data_.size());
       result = tester->Run(worst_iteration_, worst_error_rates_, mgr_,
                            CurrentTrainingStage());
       worst_model_data_.truncate(0);
       best_model_data_ = model_data;
     }
     best_error_rate_ = error_rate;
     memcpy(best_error_rates_, error_rates_, sizeof(error_rates_));
     best_iteration_ = iteration;
     best_error_history_.push_back(error_rate);
     best_error_iterations_.push_back(iteration);
     // Compute 2% decay time.
     double two_percent_more = error_rate + 2.0;
     int i;
     for (i = best_error_history_.size() - 1;
          i >= 0 && best_error_history_[i] < two_percent_more; --i) {
     }
     int old_iteration = i >= 0 ? best_error_iterations_[i] : 0;
     improvement_steps_ = iteration - old_iteration;
     tprintf("2 Percent improvement time=%d, best error was %g @ %d\n",
             improvement_steps_, i >= 0 ? best_error_history_[i] : 100.0,
             old_iteration);
   } else if (error_rate > best_error_rate_) {
     // This is a new (local) maximum.
     if (tester != nullptr) {
       if (!best_model_data_.empty()) {
         mgr_.OverwriteEntry(TESSDATA_LSTM, &best_model_data_[0],
                             best_model_data_.size());
         result = tester->Run(best_iteration_, best_error_rates_, mgr_,
                              CurrentTrainingStage());
       } else if (!worst_model_data_.empty()) {
         // Allow for multiple data points with "worst" error rate.
         mgr_.OverwriteEntry(TESSDATA_LSTM, &worst_model_data_[0],
                             worst_model_data_.size());
         result = tester->Run(worst_iteration_, worst_error_rates_, mgr_,
                              CurrentTrainingStage());
       }
       if (result.length() > 0)
         best_model_data_.truncate(0);
       worst_model_data_ = model_data;
     }
   }
   worst_error_rate_ = error_rate;
   memcpy(worst_error_rates_, error_rates_, sizeof(error_rates_));
   worst_iteration_ = iteration;
   return result;
 }

◆ UpdateSubtrainer()

SubTrainerResult tesseract::LSTMTrainer::UpdateSubtrainer ( STRING * log_msg )

Definition at line 547 of file lstmtrainer.cpp.

                                                               {
   double training_error = CharError();
   double sub_error = sub_trainer_->CharError();
   double sub_margin = (training_error - sub_error) / sub_error;
   if (sub_margin >= kSubTrainerMarginFraction) {
     log_msg->add_str_double(" sub_trainer=", sub_error);
     log_msg->add_str_double(" margin=", 100.0 * sub_margin);
     *log_msg += "\n";
     // Catch up to current iteration.
     int end_iteration = training_iteration();
     while (sub_trainer_->training_iteration() < end_iteration &&
            sub_margin >= kSubTrainerMarginFraction) {
       int target_iteration =
           sub_trainer_->training_iteration() + kNumPagesPerBatch;
       while (sub_trainer_->training_iteration() < target_iteration) {
         sub_trainer_->TrainOnLine(this, false);
       }
       STRING batch_log = "Sub:";
       sub_trainer_->PrepareLogMsg(&batch_log);
       batch_log += "\n";
       tprintf("UpdateSubtrainer:%s", batch_log.string());
       *log_msg += batch_log;
       sub_error = sub_trainer_->CharError();
       sub_margin = (training_error - sub_error) / sub_error;
     }
     if (sub_error < best_error_rate_ &&
         sub_margin >= kSubTrainerMarginFraction) {
       // The sub_trainer_ has won the race to a new best. Switch to it.
       GenericVector<char> updated_trainer;
       SaveTrainingDump(LIGHT, sub_trainer_, &updated_trainer);
       ReadTrainingDump(updated_trainer, this);
       log_msg->add_str_int(" Sub trainer wins at iteration ",
                            training_iteration());
       *log_msg += "\n";
       return STR_REPLACED;
     }
     return STR_UPDATED;
   }
   return STR_NONE;
 }

Member Data Documentation

◆ align_win_

ScrollView* tesseract::LSTMTrainer::align_win_

protected

Definition at line 397 of file lstmtrainer.h.

◆ best_error_history_

GenericVector<double> tesseract::LSTMTrainer::best_error_history_

protected

Definition at line 457 of file lstmtrainer.h.

◆ best_error_iterations_

GenericVector<int> tesseract::LSTMTrainer::best_error_iterations_

protected

Definition at line 458 of file lstmtrainer.h.

◆ best_error_rate_

double tesseract::LSTMTrainer::best_error_rate_

protected

Definition at line 430 of file lstmtrainer.h.

◆ best_error_rates_

double tesseract::LSTMTrainer::best_error_rates_[ET_COUNT]

protected

Definition at line 432 of file lstmtrainer.h.

◆ best_iteration_

int tesseract::LSTMTrainer::best_iteration_

protected

Definition at line 434 of file lstmtrainer.h.

◆ best_model_data_

GenericVector<char> tesseract::LSTMTrainer::best_model_data_

protected

Definition at line 444 of file lstmtrainer.h.

◆ best_model_name_

STRING tesseract::LSTMTrainer::best_model_name_

protected

Definition at line 416 of file lstmtrainer.h.

◆ best_trainer_

GenericVector<char> tesseract::LSTMTrainer::best_trainer_

protected

Definition at line 447 of file lstmtrainer.h.

◆ checkpoint_iteration_

int tesseract::LSTMTrainer::checkpoint_iteration_

protected

Definition at line 407 of file lstmtrainer.h.

◆ checkpoint_name_

STRING tesseract::LSTMTrainer::checkpoint_name_

protected

Definition at line 411 of file lstmtrainer.h.

◆ checkpoint_reader_

CheckPointReader tesseract::LSTMTrainer::checkpoint_reader_

protected

Definition at line 424 of file lstmtrainer.h.

◆ checkpoint_writer_

CheckPointWriter tesseract::LSTMTrainer::checkpoint_writer_

protected

Definition at line 425 of file lstmtrainer.h.

◆ ctc_win_

ScrollView* tesseract::LSTMTrainer::ctc_win_

protected

Definition at line 401 of file lstmtrainer.h.

◆ debug_interval_

int tesseract::LSTMTrainer::debug_interval_

protected

Definition at line 405 of file lstmtrainer.h.

◆ error_buffers_

GenericVector<double> tesseract::LSTMTrainer::error_buffers_[ET_COUNT]

protected

Definition at line 479 of file lstmtrainer.h.

◆ error_rate_of_last_saved_best_

float tesseract::LSTMTrainer::error_rate_of_last_saved_best_

protected

Definition at line 452 of file lstmtrainer.h.

◆ error_rates_

double tesseract::LSTMTrainer::error_rates_[ET_COUNT]

protected

Definition at line 481 of file lstmtrainer.h.

◆ file_reader_

FileReader tesseract::LSTMTrainer::file_reader_

protected

Definition at line 420 of file lstmtrainer.h.

◆ file_writer_

FileWriter tesseract::LSTMTrainer::file_writer_

protected

Definition at line 421 of file lstmtrainer.h.

◆ improvement_steps_

int32_t tesseract::LSTMTrainer::improvement_steps_

protected

Definition at line 460 of file lstmtrainer.h.

◆ kRollingBufferSize_

const int tesseract::LSTMTrainer::kRollingBufferSize_ = 1000

staticprotected

Definition at line 478 of file lstmtrainer.h.

◆ last_perfect_training_iteration_

int tesseract::LSTMTrainer::last_perfect_training_iteration_

protected

Definition at line 475 of file lstmtrainer.h.

◆ learning_iteration_

int tesseract::LSTMTrainer::learning_iteration_

protected

Definition at line 464 of file lstmtrainer.h.

◆ mgr_

TessdataManager tesseract::LSTMTrainer::mgr_

protected

Definition at line 483 of file lstmtrainer.h.

◆ model_base_

STRING tesseract::LSTMTrainer::model_base_

protected

Definition at line 409 of file lstmtrainer.h.

◆ num_training_stages_

int tesseract::LSTMTrainer::num_training_stages_

protected

Definition at line 418 of file lstmtrainer.h.

◆ perfect_delay_

int tesseract::LSTMTrainer::perfect_delay_

protected

Definition at line 472 of file lstmtrainer.h.

◆ prev_sample_iteration_

int tesseract::LSTMTrainer::prev_sample_iteration_

protected

Definition at line 466 of file lstmtrainer.h.

◆ randomly_rotate_

bool tesseract::LSTMTrainer::randomly_rotate_

protected

Definition at line 413 of file lstmtrainer.h.

◆ recon_win_

ScrollView* tesseract::LSTMTrainer::recon_win_

protected

Definition at line 403 of file lstmtrainer.h.

◆ stall_iteration_

int tesseract::LSTMTrainer::stall_iteration_

protected

Definition at line 442 of file lstmtrainer.h.

◆ sub_trainer_

LSTMTrainer* tesseract::LSTMTrainer::sub_trainer_

protected

Definition at line 450 of file lstmtrainer.h.

◆ target_win_

ScrollView* tesseract::LSTMTrainer::target_win_

protected

Definition at line 399 of file lstmtrainer.h.

◆ training_data_

DocumentCache tesseract::LSTMTrainer::training_data_

protected

Definition at line 414 of file lstmtrainer.h.

◆ training_stage_

int tesseract::LSTMTrainer::training_stage_

protected

Definition at line 454 of file lstmtrainer.h.

◆ worst_error_rate_

double tesseract::LSTMTrainer::worst_error_rate_

protected

Definition at line 436 of file lstmtrainer.h.

◆ worst_error_rates_

double tesseract::LSTMTrainer::worst_error_rates_[ET_COUNT]

protected

Definition at line 438 of file lstmtrainer.h.

◆ worst_iteration_

int tesseract::LSTMTrainer::worst_iteration_

protected

Definition at line 440 of file lstmtrainer.h.

◆ worst_model_data_

GenericVector<char> tesseract::LSTMTrainer::worst_model_data_

protected

Definition at line 445 of file lstmtrainer.h.

The documentation for this class was generated from the following files:

/usr/src/tesseract-ocr.master/src/lstm/lstmtrainer.h
/usr/src/tesseract-ocr.master/src/lstm/lstmtrainer.cpp

Public Member Functions

Static Public Member Functions

Protected Member Functions

Protected Attributes

Static Protected Attributes

Detailed Description

Constructor & Destructor Documentation

◆ LSTMTrainer() [1/2]

◆ LSTMTrainer() [2/2]

◆ ~LSTMTrainer()

Member Function Documentation

◆ ActivationError()

◆ best_error_rate()

◆ best_iteration()

◆ best_trainer()

◆ CharError()

◆ ComputeCharError()

◆ ComputeCTCTargets()

◆ ComputeErrorRates()

◆ ComputeRMSError()

◆ ComputeTextTargets()

◆ ComputeWinnerError()

◆ ComputeWordError()

◆ CurrentTrainingStage()

◆ DebugLSTMTraining()

◆ DebugNetwork()

◆ DeSerialize()

◆ DisplayTargets()

◆ DumpFilename()

◆ EmptyConstructor()

◆ EncodeString() [1/2]

◆ EncodeString() [2/2]

◆ error_rates()

◆ FillErrorBuffer()

◆ GridSearchDictParams()

◆ improvement_steps()

◆ InitCharSet() [1/3]

◆ InitCharSet() [2/3]

◆ InitCharSet() [3/3]

◆ InitIterations()

◆ InitNetwork()

◆ InitTensorFlowNetwork()

◆ LastSingleError()

◆ learning_iteration()

◆ LoadAllTrainingData()

◆ LogIterations()

◆ MaintainCheckpoints()

◆ MaintainCheckpointsSpecific()

◆ MapRecoder()

◆ mutable_training_data()

◆ NewSingleError()

◆ PrepareForBackward()

◆ PrepareLogMsg()

◆ ReadLocalTrainingDump()

◆ ReadSizedTrainingDump()

◆ ReadTrainingDump()

◆ ReduceLayerLearningRates()

◆ ReduceLearningRates()

◆ RollErrorBuffers()

◆ SaveRecognitionDump()

◆ SaveTraineddata()

◆ SaveTrainingDump()

◆ Serialize()

◆ set_perfect_delay()

◆ SetNullChar()

◆ SetupCheckpointInfo()

◆ StartSubtrainer()

◆ training_data()

◆ TrainOnLine() [1/2]

◆ TrainOnLine() [2/2]

◆ TransitionTrainingStage()

◆ TryLoadingCheckpoint()

◆ UpdateErrorBuffer()

◆ UpdateErrorGraph()

◆ UpdateSubtrainer()

Member Data Documentation

◆ align_win_

◆ best_error_history_

◆ best_error_iterations_

◆ best_error_rate_