tesseract  5.0.0-alpha-619-ge9db
tesseract::ErrorCounter Class Reference

#include <errorcounter.h>

Static Public Member Functions

static double ComputeErrorRate (ShapeClassifier *classifier, int report_level, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it, double *unichar_error, double *scaled_error, STRING *fonts_report)
 
static void DebugNewErrors (ShapeClassifier *new_classifier, ShapeClassifier *old_classifier, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it)
 

Detailed Description

Definition at line 94 of file errorcounter.h.

Member Function Documentation

◆ ComputeErrorRate()

double tesseract::ErrorCounter::ComputeErrorRate ( ShapeClassifier classifier,
int  report_level,
CountTypes  boosting_mode,
const FontInfoTable fontinfo_table,
const GenericVector< Pix * > &  page_images,
SampleIterator it,
double *  unichar_error,
double *  scaled_error,
STRING fonts_report 
)
static

Definition at line 39 of file errorcounter.cpp.

43  {
44  const int fontsize = it->sample_set()->NumFonts();
45  ErrorCounter counter(classifier->GetUnicharset(), fontsize);
47 
48  clock_t start = clock();
49  unsigned total_samples = 0;
50  double unscaled_error = 0.0;
51  // Set a number of samples on which to run the classify debug mode.
52  int error_samples = report_level > 3 ? report_level * report_level : 0;
53  // Iterate over all the samples, accumulating errors.
54  for (it->Begin(); !it->AtEnd(); it->Next()) {
55  TrainingSample* mutable_sample = it->MutableSample();
56  int page_index = mutable_sample->page_num();
57  Pix* page_pix = 0 <= page_index && page_index < page_images.size()
58  ? page_images[page_index] : nullptr;
59  // No debug, no keep this.
60  classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
61  INVALID_UNICHAR_ID, &results);
62  bool debug_it = false;
63  int correct_id = mutable_sample->class_id();
64  if (counter.unicharset_.has_special_codes() &&
65  (correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED ||
66  correct_id == UNICHAR_BROKEN)) {
67  // This is junk so use the special counter.
68  debug_it = counter.AccumulateJunk(report_level > 3,
69  results,
70  mutable_sample);
71  } else {
72  debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode,
73  fontinfo_table,
74  results, mutable_sample);
75  }
76  if (debug_it && error_samples > 0) {
77  // Running debug, keep the correct answer, and debug the classifier.
78  tprintf("Error on sample %d: %s Classifier debug output:\n",
79  it->GlobalSampleIndex(),
80  it->sample_set()->SampleToString(*mutable_sample).c_str());
81  classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
82  --error_samples;
83  }
84  ++total_samples;
85  }
86  const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC;
87  // Create the appropriate error report.
88  unscaled_error = counter.ReportErrors(report_level, boosting_mode,
89  fontinfo_table,
90  *it, unichar_error, fonts_report);
91  if (scaled_error != nullptr) *scaled_error = counter.scaled_error_;
92  if (report_level > 1 && total_samples > 0) {
93  // It is useful to know the time in microseconds/char.
94  tprintf("Errors computed in %.2fs at %.1f μs/char\n",
95  total_time, 1000000.0 * total_time / total_samples);
96  }
97  return unscaled_error;
98 }

◆ DebugNewErrors()

void tesseract::ErrorCounter::DebugNewErrors ( ShapeClassifier new_classifier,
ShapeClassifier old_classifier,
CountTypes  boosting_mode,
const FontInfoTable fontinfo_table,
const GenericVector< Pix * > &  page_images,
SampleIterator it 
)
static

Definition at line 106 of file errorcounter.cpp.

110  {
111  int fontsize = it->sample_set()->NumFonts();
112  ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);
113  ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);
115 
116  int total_samples = 0;
117  int error_samples = 25;
118  int total_new_errors = 0;
119  // Iterate over all the samples, accumulating errors.
120  for (it->Begin(); !it->AtEnd(); it->Next()) {
121  TrainingSample* mutable_sample = it->MutableSample();
122  int page_index = mutable_sample->page_num();
123  Pix* page_pix = 0 <= page_index && page_index < page_images.size()
124  ? page_images[page_index] : nullptr;
125  // No debug, no keep this.
126  old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
127  INVALID_UNICHAR_ID, &results);
128  int correct_id = mutable_sample->class_id();
129  if (correct_id != 0 &&
130  !old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
131  results, mutable_sample)) {
132  // old classifier was correct, check the new one.
133  new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
134  INVALID_UNICHAR_ID, &results);
135  if (correct_id != 0 &&
136  new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
137  results, mutable_sample)) {
138  tprintf("New Error on sample %d: Classifier debug output:\n",
139  it->GlobalSampleIndex());
140  ++total_new_errors;
141  new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1,
142  correct_id, &results);
143  if (results.size() > 0 && error_samples > 0) {
144  new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
145  --error_samples;
146  }
147  }
148  }
149  ++total_samples;
150  }
151  tprintf("Total new errors = %d\n", total_new_errors);
152 }

The documentation for this class was generated from the following files:
UNICHAR_BROKEN
Definition: unicharset.h:36
UNICHAR_SPACE
Definition: unicharset.h:34
GenericVector< UnicharRating >
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
GenericVector::size
int size() const
Definition: genericvector.h:71
UNICHAR_JOINED
Definition: unicharset.h:35