tesseract  5.0.0-alpha-619-ge9db
lstmboxrenderer.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: lstmboxrenderer.cpp
3  * Description: Renderer for creating box file for LSTM training.
4  * based on the tsv renderer.
5  *
6  * (C) Copyright 2019, Google Inc.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include <tesseract/baseapi.h> // for TessBaseAPI
20 #include <tesseract/renderer.h>
21 #include "tesseractclass.h" // for Tesseract
22 
23 namespace tesseract {
24 
30 static void AddBoxToLSTM(int right, int bottom, int top, int image_height,
31  int page_num, STRING* text) {
32  text->add_str_int(" ", image_height - bottom);
33  text->add_str_int(" ", right + 5);
34  text->add_str_int(" ", image_height - top);
35  text->add_str_int(" ", page_num);
36 }
37 
38 char* TessBaseAPI::GetLSTMBoxText(int page_number=0) {
39  if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
40  return nullptr;
41 
42  STRING lstm_box_str("");
43  bool first_word = true;
44  int left = 0, top = 0, right = 0, bottom = 0;
45 
46  LTRResultIterator* res_it = GetLTRIterator();
47  while (!res_it->Empty(RIL_BLOCK)) {
48  if (res_it->Empty(RIL_SYMBOL)) {
49  res_it->Next(RIL_SYMBOL);
50  continue;
51  }
52  if (!first_word) {
53  if (!(res_it->IsAtBeginningOf(RIL_TEXTLINE))) {
54  if (res_it->IsAtBeginningOf(RIL_WORD)) {
55  lstm_box_str.add_str_int(" ", left);
56  AddBoxToLSTM(right, bottom, top, image_height_, page_number,
57  &lstm_box_str);
58  lstm_box_str += "\n"; // end of row for word
59  } // word
60  } else {
61  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
62  lstm_box_str.add_str_int("\t ", left);
63  AddBoxToLSTM(right, bottom, top, image_height_, page_number,
64  &lstm_box_str);
65  lstm_box_str += "\n"; // end of row for line
66  } // line
67  }
68  } // not first word
69  first_word = false;
70  // Use bounding box for whole line for everything
71  res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
72  do {
73  lstm_box_str +=
74  std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_SYMBOL)).get();
75  res_it->Next(RIL_SYMBOL);
76  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_SYMBOL));
77  lstm_box_str.add_str_int(" ", left);
78  AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
79  lstm_box_str += "\n"; // end of row for symbol
80  }
81  if (!first_word) { // if first_word is true => empty page
82  lstm_box_str.add_str_int("\t ", left);
83  AddBoxToLSTM(right, bottom, top, image_height_, page_number, &lstm_box_str);
84  lstm_box_str += "\n"; // end of PAGE
85  }
86  char* ret = new char[lstm_box_str.length() + 1];
87  strcpy(ret, lstm_box_str.c_str());
88  delete res_it;
89  return ret;
90 }
91 
92 /**********************************************************************
93  * LSTMBox Renderer interface implementation
94  **********************************************************************/
95 TessLSTMBoxRenderer::TessLSTMBoxRenderer(const char* outputbase)
96  : TessResultRenderer(outputbase, "box") {}
97 
99  const std::unique_ptr<const char[]> lstmbox(api->GetLSTMBoxText(imagenum()));
100  if (lstmbox == nullptr) return false;
101 
102  AppendString(lstmbox.get());
103 
104  return true;
105 }
106 
107 } // namespace tesseract.
tesseract::RIL_WORD
Definition: publictypes.h:220
STRING::add_str_int
void add_str_int(const char *str, int number)
Definition: strngs.cpp:370
tesseract::TessBaseAPI::page_res_
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:893
tesseract::TessBaseAPI::image_height_
int image_height_
Definition: baseapi.h:912
tesseract::TessLSTMBoxRenderer::AddImageHandler
bool AddImageHandler(TessBaseAPI *api) override
Definition: lstmboxrenderer.cpp:111
tesseract::TessBaseAPI::GetLSTMBoxText
char * GetLSTMBoxText(int page_number)
Definition: lstmboxrenderer.cpp:52
tesseractclass.h
tesseract::RIL_BLOCK
Definition: publictypes.h:217
STRING
Definition: strngs.h:45
tesseract::TessResultRenderer::AppendString
void AppendString(const char *s)
Definition: renderer.cpp:101
tesseract::RIL_SYMBOL
Definition: publictypes.h:221
baseapi.h
tesseract::TessLSTMBoxRenderer::TessLSTMBoxRenderer
TessLSTMBoxRenderer(const char *outputbase)
Definition: lstmboxrenderer.cpp:108
tesseract::TessBaseAPI::tesseract_
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:886
tesseract
Definition: baseapi.h:65
tesseract::RIL_TEXTLINE
Definition: publictypes.h:219
tesseract::TessBaseAPI::GetLTRIterator
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1304
renderer.h
tesseract::TessResultRenderer::imagenum
int imagenum() const
Definition: renderer.h:107
TessBaseAPI
struct TessBaseAPI TessBaseAPI
Definition: capi.h:72
TessResultRenderer
struct TessResultRenderer TessResultRenderer
Definition: capi.h:71
tesseract::TessBaseAPI::Recognize
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:827