#include <renderer.h>
Renders tesseract output into searchable PDF
Definition at line 168 of file renderer.h.
tesseract::TessPDFRenderer::TessPDFRenderer |
( |
const char * |
outputbase, |
|
|
const char * |
datadir |
|
) |
| |
Definition at line 164 of file pdfrenderer.cpp.
TessResultRenderer(const char *outputbase, const char *extension)
bool tesseract::TessPDFRenderer::AddImageHandler |
( |
TessBaseAPI * |
api | ) |
|
|
protectedvirtual |
Implements tesseract::TessResultRenderer.
Definition at line 807 of file pdfrenderer.cpp.
810 Pix *pix =
api->GetInputImage();
812 int ppi =
api->GetSourceYResolution();
813 if (!pix || ppi <= 0)
815 double width = pixGetWidth(pix) * 72.0 / ppi;
816 double height = pixGetHeight(pix) * 72.0 / ppi;
819 n = snprintf(buf,
sizeof(buf),
824 " /MediaBox [0 0 %.2f %.2f]\n"
825 " /Contents %ld 0 R\n"
828 " /XObject << /Im1 %ld 0 R >>\n"
829 " /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n"
830 " /Font << /f-0-0 %ld 0 R >>\n"
841 if (n >=
sizeof(buf))
return false;
843 AppendPDFObject(buf);
846 char* pdftext = GetPDFTextObjects(
api, width, height);
847 long pdftext_len = strlen(pdftext);
848 unsigned char *pdftext_casted =
reinterpret_cast<unsigned char *
>(pdftext);
850 unsigned char *comp_pdftext =
851 zlibCompress(pdftext_casted, pdftext_len, &len);
852 long comp_pdftext_len = len;
853 n = snprintf(buf,
sizeof(buf),
856 " /Length %ld /Filter /FlateDecode\n"
858 "stream\n", obj_, comp_pdftext_len);
859 if (n >=
sizeof(buf)) {
861 lept_free(comp_pdftext);
865 long objsize = strlen(buf);
866 AppendData(reinterpret_cast<char *>(comp_pdftext), comp_pdftext_len);
867 objsize += comp_pdftext_len;
868 lept_free(comp_pdftext);
874 objsize += strlen(b2);
875 AppendPDFObjectDIY(objsize);
878 if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) {
882 AppendPDFObjectDIY(objsize);
void AppendString(const char *s)
void AppendData(const char *s, int len)
bool tesseract::TessPDFRenderer::BeginDocumentHandler |
( |
| ) |
|
|
protectedvirtual |
Reimplemented from tesseract::TessResultRenderer.
Definition at line 452 of file pdfrenderer.cpp.
456 n = snprintf(buf,
sizeof(buf),
459 0xDE, 0xAD, 0xBE, 0xEB);
460 if (n >=
sizeof(buf))
return false;
461 AppendPDFObject(buf);
464 n = snprintf(buf,
sizeof(buf),
472 if (n >=
sizeof(buf))
return false;
473 AppendPDFObject(buf);
481 n = snprintf(buf,
sizeof(buf),
484 " /BaseFont /GlyphLessFont\n"
485 " /DescendantFonts [ %ld 0 R ]\n"
486 " /Encoding /Identity-H\n"
488 " /ToUnicode %ld 0 R\n"
495 if (n >=
sizeof(buf))
return false;
496 AppendPDFObject(buf);
499 n = snprintf(buf,
sizeof(buf),
502 " /BaseFont /GlyphLessFont\n"
503 " /CIDToGIDMap %ld 0 R\n"
506 " /Ordering (Identity)\n"
507 " /Registry (Adobe)\n"
510 " /FontDescriptor %ld 0 R\n"
511 " /Subtype /CIDFontType2\n"
519 if (n >=
sizeof(buf))
return false;
520 AppendPDFObject(buf);
523 const int kCIDToGIDMapSize = 2 * (1 << 16);
524 unsigned char *cidtogidmap =
new unsigned char[kCIDToGIDMapSize];
525 for (
int i = 0; i < kCIDToGIDMapSize; i++) {
526 cidtogidmap[i] = (i % 2) ? 1 : 0;
529 unsigned char *comp =
530 zlibCompress(cidtogidmap, kCIDToGIDMapSize, &len);
531 delete[] cidtogidmap;
532 n = snprintf(buf,
sizeof(buf),
535 " /Length %ld /Filter /FlateDecode\n"
538 if (n >=
sizeof(buf)) {
543 long objsize = strlen(buf);
544 AppendData(reinterpret_cast<char *>(comp), len);
547 const char *endstream_endobj =
551 objsize += strlen(endstream_endobj);
552 AppendPDFObjectDIY(objsize);
555 "/CIDInit /ProcSet findresource begin\n"
560 " /Registry (Adobe)\n"
564 "/CMapName /Adobe-Identify-UCS def\n"
566 "1 begincodespacerange\n"
568 "endcodespacerange\n"
570 "<0000> <FFFF> <0000>\n"
573 "CMapName currentdict /CMap defineresource pop\n"
578 n = snprintf(buf,
sizeof(buf),
580 "<< /Length %lu >>\n"
584 "endobj\n", (
unsigned long) strlen(stream), stream);
585 if (n >=
sizeof(buf))
return false;
586 AppendPDFObject(buf);
589 const int kCharHeight = 2;
590 n = snprintf(buf,
sizeof(buf),
597 " /FontBBox [ 0 0 %d %d ]\n"
598 " /FontFile2 %ld 0 R\n"
599 " /FontName /GlyphLessFont\n"
602 " /Type /FontDescriptor\n"
611 if (n >=
sizeof(buf))
return false;
612 AppendPDFObject(buf);
614 n = snprintf(buf,
sizeof(buf),
"%s/pdf.ttf", datadir_);
615 if (n >=
sizeof(buf))
return false;
616 FILE *fp = fopen(buf,
"rb");
618 tprintf(
"Can not open file \"%s\"!\n", buf);
621 fseek(fp, 0, SEEK_END);
622 long int size = ftell(fp);
623 fseek(fp, 0, SEEK_SET);
624 char *buffer =
new char[size];
625 if (fread(buffer, 1, size, fp) != size) {
632 n = snprintf(buf,
sizeof(buf),
638 "stream\n", size, size);
639 if (n >=
sizeof(buf)) {
644 objsize = strlen(buf);
649 objsize += strlen(endstream_endobj);
650 AppendPDFObjectDIY(objsize);
void AppendString(const char *s)
void AppendData(const char *s, int len)
bool tesseract::TessPDFRenderer::EndDocumentHandler |
( |
| ) |
|
|
protectedvirtual |
Reimplemented from tesseract::TessResultRenderer.
Definition at line 888 of file pdfrenderer.cpp.
899 const long int kPagesObjectNumber = 2;
900 offsets_[kPagesObjectNumber] = offsets_.
back();
901 n = snprintf(buf,
sizeof(buf),
905 " /Kids [ ", kPagesObjectNumber);
906 if (n >=
sizeof(buf))
return false;
908 size_t pages_objsize = strlen(buf);
909 for (
size_t i = 0; i < pages_.
size(); i++) {
910 n = snprintf(buf,
sizeof(buf),
911 "%ld 0 R ", pages_[i]);
912 if (n >=
sizeof(buf))
return false;
914 pages_objsize += strlen(buf);
916 n = snprintf(buf,
sizeof(buf),
920 "endobj\n", pages_.
size());
921 if (n >=
sizeof(buf))
return false;
923 pages_objsize += strlen(buf);
924 offsets_.
back() += pages_objsize;
927 char* datestr = l_getFormattedDate();
928 n = snprintf(buf,
sizeof(buf),
931 " /Producer (Tesseract %s)\n"
932 " /CreationDate (D:%s)\n"
937 if (n >=
sizeof(buf))
return false;
938 AppendPDFObject(buf);
939 n = snprintf(buf,
sizeof(buf),
942 "0000000000 65535 f \n", obj_);
943 if (n >=
sizeof(buf))
return false;
945 for (
int i = 1; i < obj_; i++) {
946 n = snprintf(buf,
sizeof(buf),
"%010ld 00000 n \n", offsets_[i]);
947 if (n >=
sizeof(buf))
return false;
950 n = snprintf(buf,
sizeof(buf),
964 if (n >=
sizeof(buf))
return false;
void AppendString(const char *s)
const char * title() const
#define TESSERACT_VERSION_STR
The documentation for this class was generated from the following files: