#include <renderer.h>
Renders tesseract output into searchable PDF
Definition at line 189 of file renderer.h.
◆ TessPDFRenderer()
tesseract::TessPDFRenderer::TessPDFRenderer |
( |
const char * |
outputbase, |
|
|
const char * |
datadir, |
|
|
bool |
textonly = false |
|
) |
| |
Definition at line 182 of file pdfrenderer.cpp.
187 textonly_ = textonly;
TessResultRenderer(const char *outputbase, const char *extension)
◆ AddImageHandler()
bool tesseract::TessPDFRenderer::AddImageHandler |
( |
TessBaseAPI * |
api | ) |
|
|
protectedvirtual |
Implements tesseract::TessResultRenderer.
Definition at line 839 of file pdfrenderer.cpp.
841 char buf[kBasicBufSize];
842 char buf2[kBasicBufSize];
843 Pix *pix = api->GetInputImage();
844 const char* filename = api->GetInputName();
845 int ppi = api->GetSourceYResolution();
846 if (!pix || ppi <= 0)
848 double width = pixGetWidth(pix) * 72.0 / ppi;
849 double height = pixGetHeight(pix) * 72.0 / ppi;
851 snprintf(buf2,
sizeof(buf2),
"/XObject << /Im1 %ld 0 R >>\n", obj_ + 2);
852 const char *xobject = (textonly_) ?
"" : buf2;
855 n = snprintf(buf,
sizeof(buf),
860 " /MediaBox [0 0 %.2f %.2f]\n" 861 " /Contents %ld 0 R\n" 865 " /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n" 866 " /Font << /f-0-0 %ld 0 R >>\n" 876 if (n >=
sizeof(buf))
return false;
878 AppendPDFObject(buf);
881 const std::unique_ptr<char[]> pdftext(GetPDFTextObjects(api, width, height));
882 const size_t pdftext_len = strlen(pdftext.get());
884 unsigned char *comp_pdftext = zlibCompress(
885 reinterpret_cast<unsigned char *>(pdftext.get()), pdftext_len, &len);
886 long comp_pdftext_len = len;
887 n = snprintf(buf,
sizeof(buf),
890 " /Length %ld /Filter /FlateDecode\n" 892 "stream\n", obj_, comp_pdftext_len);
893 if (n >=
sizeof(buf)) {
894 lept_free(comp_pdftext);
898 long objsize = strlen(buf);
899 AppendData(reinterpret_cast<char *>(comp_pdftext), comp_pdftext_len);
900 objsize += comp_pdftext_len;
901 lept_free(comp_pdftext);
906 objsize += strlen(b2);
907 AppendPDFObjectDIY(objsize);
910 char *pdf_object =
nullptr;
912 api->GetIntVariable(
"jpg_quality", &jpg_quality);
913 if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize,
918 AppendPDFObjectDIY(objsize);
void AppendData(const char *s, int len)
void AppendString(const char *s)
◆ BeginDocumentHandler()
bool tesseract::TessPDFRenderer::BeginDocumentHandler |
( |
| ) |
|
|
protectedvirtual |
Reimplemented from tesseract::TessResultRenderer.
Definition at line 495 of file pdfrenderer.cpp.
496 char buf[kBasicBufSize];
499 n = snprintf(buf,
sizeof(buf),
502 0xDE, 0xAD, 0xBE, 0xEB);
503 if (n >=
sizeof(buf))
return false;
504 AppendPDFObject(buf);
507 n = snprintf(buf,
sizeof(buf),
515 if (n >=
sizeof(buf))
return false;
516 AppendPDFObject(buf);
524 n = snprintf(buf,
sizeof(buf),
527 " /BaseFont /GlyphLessFont\n" 528 " /DescendantFonts [ %ld 0 R ]\n" 529 " /Encoding /Identity-H\n" 531 " /ToUnicode %ld 0 R\n" 538 if (n >=
sizeof(buf))
return false;
539 AppendPDFObject(buf);
542 n = snprintf(buf,
sizeof(buf),
545 " /BaseFont /GlyphLessFont\n" 546 " /CIDToGIDMap %ld 0 R\n" 549 " /Ordering (Identity)\n" 550 " /Registry (Adobe)\n" 553 " /FontDescriptor %ld 0 R\n" 554 " /Subtype /CIDFontType2\n" 562 if (n >=
sizeof(buf))
return false;
563 AppendPDFObject(buf);
566 const int kCIDToGIDMapSize = 2 * (1 << 16);
567 const std::unique_ptr<unsigned char[]> cidtogidmap(
568 new unsigned char[kCIDToGIDMapSize]);
569 for (
int i = 0; i < kCIDToGIDMapSize; i++) {
570 cidtogidmap[i] = (i % 2) ? 1 : 0;
573 unsigned char *comp = zlibCompress(cidtogidmap.get(), kCIDToGIDMapSize, &len);
574 n = snprintf(buf,
sizeof(buf),
577 " /Length %lu /Filter /FlateDecode\n" 581 if (n >=
sizeof(buf)) {
586 long objsize = strlen(buf);
587 AppendData(reinterpret_cast<char *>(comp), len);
590 const char *endstream_endobj =
594 objsize += strlen(endstream_endobj);
595 AppendPDFObjectDIY(objsize);
598 "/CIDInit /ProcSet findresource begin\n" 603 " /Registry (Adobe)\n" 607 "/CMapName /Adobe-Identify-UCS def\n" 609 "1 begincodespacerange\n" 611 "endcodespacerange\n" 613 "<0000> <FFFF> <0000>\n" 616 "CMapName currentdict /CMap defineresource pop\n" 621 n = snprintf(buf,
sizeof(buf),
623 "<< /Length %lu >>\n" 627 "endobj\n", (
unsigned long) strlen(stream), stream);
628 if (n >=
sizeof(buf))
return false;
629 AppendPDFObject(buf);
632 n = snprintf(buf,
sizeof(buf),
639 " /FontBBox [ 0 0 %d %d ]\n" 640 " /FontFile2 %ld 0 R\n" 641 " /FontName /GlyphLessFont\n" 644 " /Type /FontDescriptor\n" 653 if (n >=
sizeof(buf))
return false;
654 AppendPDFObject(buf);
656 n = snprintf(buf,
sizeof(buf),
"%s/pdf.ttf", datadir_.c_str());
657 if (n >=
sizeof(buf))
return false;
658 FILE *fp = fopen(buf,
"rb");
660 tprintf(
"Can not open file \"%s\"!\n", buf);
663 fseek(fp, 0, SEEK_END);
664 long int size = ftell(fp);
669 fseek(fp, 0, SEEK_SET);
670 const std::unique_ptr<char[]> buffer(
new char[size]);
677 n = snprintf(buf,
sizeof(buf),
683 "stream\n", size, size);
684 if (n >=
sizeof(buf)) {
688 objsize = strlen(buf);
692 objsize += strlen(endstream_endobj);
693 AppendPDFObjectDIY(objsize);
void AppendData(const char *s, int len)
DLLSYM void tprintf(const char *format,...)
void AppendString(const char *s)
bool DeSerialize(FILE *fp, char *data, size_t n)
◆ EndDocumentHandler()
bool tesseract::TessPDFRenderer::EndDocumentHandler |
( |
| ) |
|
|
protectedvirtual |
Reimplemented from tesseract::TessResultRenderer.
Definition at line 925 of file pdfrenderer.cpp.
927 char buf[kBasicBufSize];
936 const long int kPagesObjectNumber = 2;
937 offsets_[kPagesObjectNumber] = offsets_.
back();
938 n = snprintf(buf,
sizeof(buf),
942 " /Kids [ ", kPagesObjectNumber);
943 if (n >=
sizeof(buf))
return false;
945 size_t pages_objsize = strlen(buf);
947 n = snprintf(buf,
sizeof(buf),
948 "%ld 0 R ", pages_[i]);
949 if (n >=
sizeof(buf))
return false;
951 pages_objsize += strlen(buf);
953 n = snprintf(buf,
sizeof(buf),
957 "endobj\n", pages_.
size());
958 if (n >=
sizeof(buf))
return false;
960 pages_objsize += strlen(buf);
961 offsets_.
back() += pages_objsize;
964 STRING utf16_title =
"FEFF";
966 char utf16[kMaxBytesPerCodepoint];
967 for (
char32 code : unicodes) {
968 if (CodepointToUtf16be(code, utf16)) {
969 utf16_title += utf16;
973 char* datestr = l_getFormattedDate();
974 n = snprintf(buf,
sizeof(buf),
977 " /Producer (Tesseract %s)\n" 978 " /CreationDate (D:%s)\n" 983 datestr, utf16_title.
c_str());
985 if (n >=
sizeof(buf))
return false;
986 AppendPDFObject(buf);
987 n = snprintf(buf,
sizeof(buf),
990 "0000000000 65535 f \n", obj_);
991 if (n >=
sizeof(buf))
return false;
993 for (
int i = 1; i < obj_; i++) {
994 n = snprintf(buf,
sizeof(buf),
"%010ld 00000 n \n", offsets_[i]);
995 if (n >=
sizeof(buf))
return false;
998 n = snprintf(buf,
sizeof(buf),
1012 if (n >=
sizeof(buf))
return false;
const char * c_str() const
size_t unsigned_size() const
const char * title() const
static std::vector< char32 > UTF8ToUTF32(const char *utf8_str)
void AppendString(const char *s)
static const char * Version()
The documentation for this class was generated from the following files: