48 if (char_net_ !=
NULL) {
53 if (net_input_ !=
NULL) {
58 if (net_output_ !=
NULL) {
86 void ConvNetCharClassifier::Fold() {
91 for (
int class_id = 0; class_id < class_cnt; class_id++) {
96 for (
int ch = 0; ch < upper_form32.length(); ch++) {
97 if (iswalpha(static_cast<int>(upper_form32[ch])) != 0) {
98 upper_form32[ch] = towupper(upper_form32[ch]);
105 upper_form32.c_str()));
106 if (upper_class_id != -1 && class_id != upper_class_id) {
107 float max_out =
MAX(net_output_[class_id], net_output_[upper_class_id]);
108 net_output_[class_id] = max_out;
109 net_output_[upper_class_id] = max_out;
118 for (
int fold_set = 0; fold_set <
fold_set_cnt_; fold_set++) {
121 float max_prob = net_output_[
fold_sets_[fold_set][0]];
123 if (net_output_[fold_sets_[fold_set][ch]] > max_prob) {
124 max_prob = net_output_[fold_sets_[fold_set][ch]];
127 for (
int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
128 net_output_[fold_sets_[fold_set][ch]] =
MAX(max_prob * kFoldingRatio,
129 net_output_[fold_sets_[fold_set][ch]]);
138 bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) {
139 if (char_net_ ==
NULL) {
140 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::RunNets): "
141 "NeuralNet is NULL\n");
144 int feat_cnt = char_net_->
in_cnt();
148 if (net_input_ ==
NULL) {
149 net_input_ =
new float[feat_cnt];
150 if (net_input_ ==
NULL) {
151 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::RunNets): "
152 "unable to allocate memory for input nodes\n");
156 net_output_ =
new float[class_cnt];
157 if (net_output_ ==
NULL) {
158 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::RunNets): "
159 "unable to allocate memory for output nodes\n");
166 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::RunNets): "
167 "unable to compute features\n");
171 if (char_net_ !=
NULL) {
172 if (char_net_->
FeedForward(net_input_, net_output_) ==
false) {
173 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::RunNets): "
174 "unable to run feed-forward\n");
188 if (RunNets(char_samp) ==
false) {
200 if (RunNets(char_samp) ==
false) {
208 if (alt_list ==
NULL) {
209 fprintf(stderr,
"Cube WARNING (ConvNetCharClassifier::Classify): "
210 "returning emtpy CharAltList\n");
214 for (
int out = 1; out < class_cnt; out++) {
216 alt_list->
Insert(out, cost);
226 if (char_net_ !=
NULL) {
230 char_net_ = char_net;
237 bool ConvNetCharClassifier::LoadFoldingSets(
const string &data_file_path,
241 string fold_file_name;
242 fold_file_name = data_file_path +
lang;
243 fold_file_name +=
".cube.fold";
246 FILE *fp = fopen(fold_file_name.c_str(),
"rb");
252 string fold_sets_str;
259 vector<string> str_vec;
261 fold_set_cnt_ = str_vec.size();
264 if (fold_sets_ ==
NULL) {
268 if (fold_set_len_ ==
NULL) {
273 for (
int fold_set = 0; fold_set <
fold_set_cnt_; fold_set++) {
274 reinterpret_cast<TessLangModel *
>(lang_mod)->RemoveInvalidCharacters(
278 if (str_vec[fold_set].length() <= 1) {
279 fprintf(stderr,
"Cube WARNING (ConvNetCharClassifier::LoadFoldingSets): "
280 "invalidating folding set %d\n", fold_set);
281 fold_set_len_[fold_set] = 0;
282 fold_sets_[fold_set] =
NULL;
288 fold_set_len_[fold_set] = str32.length();
289 fold_sets_[fold_set] =
new int[fold_set_len_[fold_set]];
290 if (fold_sets_[fold_set] ==
NULL) {
291 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::LoadFoldingSets): "
292 "could not allocate folding set\n");
293 fold_set_cnt_ = fold_set;
296 for (
int ch = 0; ch < fold_set_len_[fold_set]; ch++) {
306 bool ConvNetCharClassifier::Init(
const string &data_file_path,
308 LangModel *lang_mod) {
315 if (!LoadNets(data_file_path, lang)) {
321 if (!LoadFoldingSets(data_file_path, lang, lang_mod)) {
334 bool ConvNetCharClassifier::LoadNets(
const string &data_file_path,
335 const string &lang) {
336 string char_net_file;
339 char_net_file = data_file_path +
lang;
340 char_net_file +=
".cube.nn";
343 FILE *fp = fopen(char_net_file.c_str(),
"rb");
351 if (char_net_ ==
NULL) {
352 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::LoadNets): "
353 "could not load %s\n", char_net_file.c_str());
359 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::LoadNets): "
360 "could not validate net %s\n", char_net_file.c_str());
365 int feat_cnt = char_net_->
in_cnt();
368 if (char_net_->
out_cnt() != class_cnt) {
369 fprintf(stderr,
"Cube ERROR (ConvNetCharClassifier::LoadNets): "
370 "output count (%d) and class count (%d) are not equal\n",
371 char_net_->
out_cnt(), class_cnt);
376 if (net_input_ ==
NULL) {
377 net_input_ =
new float[feat_cnt];
378 if (net_input_ ==
NULL) {
382 net_output_ =
new float[class_cnt];
383 if (net_output_ ==
NULL) {
virtual int CharCost(CharSamp *char_samp)
virtual bool SetLearnParam(char *var_name, float val)
bool Insert(int class_id, int cost, void *tag=NULL)
static int Prob2Cost(double prob_val)
virtual bool ComputeFeatures(CharSamp *char_samp, float *features)=0
void SetNet(tesseract::NeuralNet *net)
basic_string< char_32 > string_32
virtual bool Train(CharSamp *char_samp, int ClassID)
static bool ReadFileToString(const string &file_name, string *str)
FeatureBase * feat_extract_
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
int ClassID(const char_32 *str) const
bool FeedForward(const Type *inputs, Type *outputs)
virtual ~ConvNetCharClassifier()
ConvNetCharClassifier(CharSet *char_set, TuningParams *params, FeatureBase *feat_extract)
static void SplitStringUsing(const string &str, const string &delims, vector< string > *str_vec)
virtual CharAltList * Classify(CharSamp *char_samp)
virtual int FeatureCnt()=0
const char_32 * ClassString(int class_id) const
static NeuralNet * FromFile(const string file_name)