tesseract  5.0.0-alpha-619-ge9db
tesseract::DocumentCache Class Reference

#include <imagedata.h>

Public Member Functions

 DocumentCache (int64_t max_memory)
 
 ~DocumentCache ()
 
void Clear ()
 
bool LoadDocuments (const GenericVector< STRING > &filenames, CachingStrategy cache_strategy, FileReader reader)
 
bool AddToCache (DocumentData *data)
 
DocumentDataFindDocument (const STRING &document_name) const
 
const ImageDataGetPageBySerial (int serial)
 
const PointerVector< DocumentData > & documents () const
 
int TotalPages ()
 

Detailed Description

Definition at line 320 of file imagedata.h.

Constructor & Destructor Documentation

◆ DocumentCache()

tesseract::DocumentCache::DocumentCache ( int64_t  max_memory)
explicit

Definition at line 560 of file imagedata.cpp.

561  : num_pages_per_doc_(0), max_memory_(max_memory) {}

◆ ~DocumentCache()

tesseract::DocumentCache::~DocumentCache ( )

Definition at line 562 of file imagedata.cpp.

562 {}

Member Function Documentation

◆ AddToCache()

bool tesseract::DocumentCache::AddToCache ( DocumentData data)

Definition at line 591 of file imagedata.cpp.

591  {
592  documents_.push_back(data);
593  return true;
594 }

◆ Clear()

void tesseract::DocumentCache::Clear ( )
inline

Definition at line 326 of file imagedata.h.

326  {
327  documents_.clear();
328  num_pages_per_doc_ = 0;
329  }

◆ documents()

const PointerVector<DocumentData>& tesseract::DocumentCache::documents ( ) const
inline

Definition at line 350 of file imagedata.h.

350  {
351  return documents_;
352  }

◆ FindDocument()

DocumentData * tesseract::DocumentCache::FindDocument ( const STRING document_name) const

Definition at line 597 of file imagedata.cpp.

597  {
598  for (int i = 0; i < documents_.size(); ++i) {
599  if (documents_[i]->document_name() == document_name)
600  return documents_[i];
601  }
602  return nullptr;
603 }

◆ GetPageBySerial()

const ImageData* tesseract::DocumentCache::GetPageBySerial ( int  serial)
inline

Definition at line 343 of file imagedata.h.

343  {
344  if (cache_strategy_ == CS_SEQUENTIAL)
345  return GetPageSequential(serial);
346  else
347  return GetPageRoundRobin(serial);
348  }

◆ LoadDocuments()

bool tesseract::DocumentCache::LoadDocuments ( const GenericVector< STRING > &  filenames,
CachingStrategy  cache_strategy,
FileReader  reader 
)

Definition at line 566 of file imagedata.cpp.

568  {
569  cache_strategy_ = cache_strategy;
570  int64_t fair_share_memory = 0;
571  // In the round-robin case, each DocumentData handles restricting its content
572  // to its fair share of memory. In the sequential case, DocumentCache
573  // determines which DocumentDatas are held entirely in memory.
574  if (cache_strategy_ == CS_ROUND_ROBIN)
575  fair_share_memory = max_memory_ / filenames.size();
576  for (int arg = 0; arg < filenames.size(); ++arg) {
577  STRING filename = filenames[arg];
578  auto* document = new DocumentData(filename);
579  document->SetDocument(filename.c_str(), fair_share_memory, reader);
580  AddToCache(document);
581  }
582  if (!documents_.empty()) {
583  // Try to get the first page now to verify the list of filenames.
584  if (GetPageBySerial(0) != nullptr) return true;
585  tprintf("Load of page 0 failed!\n");
586  }
587  return false;
588 }

◆ TotalPages()

int tesseract::DocumentCache::TotalPages ( )

Definition at line 607 of file imagedata.cpp.

607  {
608  if (cache_strategy_ == CS_SEQUENTIAL) {
609  // In sequential mode, we assume each doc has the same number of pages
610  // whether it is true or not.
611  if (num_pages_per_doc_ == 0) GetPageSequential(0);
612  return num_pages_per_doc_ * documents_.size();
613  }
614  int total_pages = 0;
615  int num_docs = documents_.size();
616  for (int d = 0; d < num_docs; ++d) {
617  // We have to load a page to make NumPages() valid.
618  documents_[d]->GetPage(0);
619  total_pages += documents_[d]->NumPages();
620  }
621  return total_pages;
622 }

The documentation for this class was generated from the following files:
tesseract::CS_ROUND_ROBIN
Definition: imagedata.h:53
STRING
Definition: strngs.h:45
tesseract::CS_SEQUENTIAL
Definition: imagedata.h:48
tesseract::DocumentCache::GetPageBySerial
const ImageData * GetPageBySerial(int serial)
Definition: imagedata.h:343
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
tesseract::DocumentCache::AddToCache
bool AddToCache(DocumentData *data)
Definition: imagedata.cpp:591
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
GenericVector::size
int size() const
Definition: genericvector.h:71