tesseract  5.0.0-alpha-619-ge9db
tesseract::DocumentData Class Reference

#include <imagedata.h>

Public Member Functions

 DocumentData (const STRING &name)
 
 ~DocumentData ()
 
bool LoadDocument (const char *filename, int start_page, int64_t max_memory, FileReader reader)
 
void SetDocument (const char *filename, int64_t max_memory, FileReader reader)
 
bool SaveDocument (const char *filename, FileWriter writer)
 
bool SaveToBuffer (GenericVector< char > *buffer)
 
void AddPageToDocument (ImageData *page)
 
const STRINGdocument_name () const
 
int NumPages () const
 
size_t PagesSize () const
 
int64_t memory_used () const
 
void LoadPageInBackground (int index)
 
const ImageDataGetPage (int index)
 
bool IsPageAvailable (int index, ImageData **page)
 
ImageDataTakePage (int index)
 
bool IsCached () const
 
int64_t UnCache ()
 
void Shuffle ()
 

Friends

void * ReCachePagesFunc (void *data)
 

Detailed Description

Definition at line 208 of file imagedata.h.

Constructor & Destructor Documentation

◆ DocumentData()

tesseract::DocumentData::DocumentData ( const STRING name)
explicit

Definition at line 372 of file imagedata.cpp.

373  : document_name_(name),
374  pages_offset_(-1),
375  total_pages_(-1),
376  memory_used_(0),
377  max_memory_(0),
378  reader_(nullptr) {}

◆ ~DocumentData()

tesseract::DocumentData::~DocumentData ( )

Definition at line 380 of file imagedata.cpp.

380  {
381  std::lock_guard<std::mutex> lock_p(pages_mutex_);
382  std::lock_guard<std::mutex> lock_g(general_mutex_);
383 }

Member Function Documentation

◆ AddPageToDocument()

void tesseract::DocumentData::AddPageToDocument ( ImageData page)

Definition at line 424 of file imagedata.cpp.

424  {
425  std::lock_guard<std::mutex> lock(pages_mutex_);
426  pages_.push_back(page);
427  set_memory_used(memory_used() + page->MemoryUsed());
428 }

◆ document_name()

const STRING& tesseract::DocumentData::document_name ( ) const
inline

Definition at line 228 of file imagedata.h.

228  {
229  std::lock_guard<std::mutex> lock(general_mutex_);
230  return document_name_;
231  }

◆ GetPage()

const ImageData * tesseract::DocumentData::GetPage ( int  index)

Definition at line 445 of file imagedata.cpp.

445  {
446  ImageData* page = nullptr;
447  while (!IsPageAvailable(index, &page)) {
448  // If there is no background load scheduled, schedule one now.
449  pages_mutex_.lock();
450  bool needs_loading = pages_offset_ != index;
451  pages_mutex_.unlock();
452  if (needs_loading) LoadPageInBackground(index);
453  // We can't directly load the page, or the background load will delete it
454  // while the caller is using it, so give it a chance to work.
455  std::this_thread::yield();
456  }
457  return page;
458 }

◆ IsCached()

bool tesseract::DocumentData::IsCached ( ) const
inline

Definition at line 271 of file imagedata.h.

271 { return NumPages() >= 0; }

◆ IsPageAvailable()

bool tesseract::DocumentData::IsPageAvailable ( int  index,
ImageData **  page 
)

Definition at line 463 of file imagedata.cpp.

463  {
464  std::lock_guard<std::mutex> lock(pages_mutex_);
465  int num_pages = NumPages();
466  if (num_pages == 0 || index < 0) {
467  *page = nullptr; // Empty Document.
468  return true;
469  }
470  if (num_pages > 0) {
471  index = Modulo(index, num_pages);
472  if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
473  *page = pages_[index - pages_offset_]; // Page is available already.
474  return true;
475  }
476  }
477  return false;
478 }

◆ LoadDocument()

bool tesseract::DocumentData::LoadDocument ( const char *  filename,
int  start_page,
int64_t  max_memory,
FileReader  reader 
)

Definition at line 387 of file imagedata.cpp.

388  {
389  SetDocument(filename, max_memory, reader);
390  pages_offset_ = start_page;
391  return ReCachePages();
392 }

◆ LoadPageInBackground()

void tesseract::DocumentData::LoadPageInBackground ( int  index)

Definition at line 432 of file imagedata.cpp.

432  {
433  ImageData* page = nullptr;
434  if (IsPageAvailable(index, &page)) return;
435  std::lock_guard<std::mutex> lock(pages_mutex_);
436  if (pages_offset_ == index) return;
437  pages_offset_ = index;
438  pages_.clear();
439  std::thread t(&tesseract::DocumentData::ReCachePages, this);
440  t.detach();
441 }

◆ memory_used()

int64_t tesseract::DocumentData::memory_used ( ) const
inline

Definition at line 239 of file imagedata.h.

239  {
240  std::lock_guard<std::mutex> lock(general_mutex_);
241  return memory_used_;
242  }

◆ NumPages()

int tesseract::DocumentData::NumPages ( ) const
inline

Definition at line 232 of file imagedata.h.

232  {
233  std::lock_guard<std::mutex> lock(general_mutex_);
234  return total_pages_;
235  }

◆ PagesSize()

size_t tesseract::DocumentData::PagesSize ( ) const
inline

Definition at line 236 of file imagedata.h.

236  {
237  return pages_.size();
238  }

◆ SaveDocument()

bool tesseract::DocumentData::SaveDocument ( const char *  filename,
FileWriter  writer 
)

Definition at line 406 of file imagedata.cpp.

406  {
407  std::lock_guard<std::mutex> lock(pages_mutex_);
408  TFile fp;
409  fp.OpenWrite(nullptr);
410  if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) {
411  tprintf("Serialize failed: %s\n", filename);
412  return false;
413  }
414  return true;
415 }

◆ SaveToBuffer()

bool tesseract::DocumentData::SaveToBuffer ( GenericVector< char > *  buffer)

Definition at line 416 of file imagedata.cpp.

416  {
417  std::lock_guard<std::mutex> lock(pages_mutex_);
418  TFile fp;
419  fp.OpenWrite(buffer);
420  return pages_.Serialize(&fp);
421 }

◆ SetDocument()

void tesseract::DocumentData::SetDocument ( const char *  filename,
int64_t  max_memory,
FileReader  reader 
)

Definition at line 395 of file imagedata.cpp.

396  {
397  std::lock_guard<std::mutex> lock_p(pages_mutex_);
398  std::lock_guard<std::mutex> lock(general_mutex_);
399  document_name_ = filename;
400  pages_offset_ = -1;
401  max_memory_ = max_memory;
402  reader_ = reader;
403 }

◆ Shuffle()

void tesseract::DocumentData::Shuffle ( )

Definition at line 495 of file imagedata.cpp.

495  {
496  TRand random;
497  // Different documents get shuffled differently, but the same for the same
498  // name.
499  random.set_seed(document_name_.c_str());
500  int num_pages = pages_.size();
501  // Execute one random swap for each page in the document.
502  for (int i = 0; i < num_pages; ++i) {
503  int src = random.IntRand() % num_pages;
504  int dest = random.IntRand() % num_pages;
505  std::swap(pages_[src], pages_[dest]);
506  }
507 }

◆ TakePage()

ImageData* tesseract::DocumentData::TakePage ( int  index)
inline

Definition at line 263 of file imagedata.h.

263  {
264  std::lock_guard<std::mutex> lock(pages_mutex_);
265  ImageData* page = pages_[index];
266  pages_[index] = nullptr;
267  return page;
268  }

◆ UnCache()

int64_t tesseract::DocumentData::UnCache ( )

Definition at line 482 of file imagedata.cpp.

482  {
483  std::lock_guard<std::mutex> lock(pages_mutex_);
484  int64_t memory_saved = memory_used();
485  pages_.clear();
486  pages_offset_ = -1;
487  set_total_pages(-1);
488  set_memory_used(0);
489  tprintf("Unloaded document %s, saving %" PRId64 " memory\n",
490  document_name_.c_str(), memory_saved);
491  return memory_saved;
492 }

Friends And Related Function Documentation

◆ ReCachePagesFunc

void* ReCachePagesFunc ( void *  data)
friend

The documentation for this class was generated from the following files:
tesseract::DocumentData::IsPageAvailable
bool IsPageAvailable(int index, ImageData **page)
Definition: imagedata.cpp:463
tesseract::DocumentData::NumPages
int NumPages() const
Definition: imagedata.h:232
tesseract::DocumentData::SetDocument
void SetDocument(const char *filename, int64_t max_memory, FileReader reader)
Definition: imagedata.cpp:395
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
tesseract::DocumentData::memory_used
int64_t memory_used() const
Definition: imagedata.h:239
tesseract::DocumentData::LoadPageInBackground
void LoadPageInBackground(int index)
Definition: imagedata.cpp:432
tesstrain_utils.dest
dest
Definition: tesstrain_utils.py:139
Modulo
int Modulo(int a, int b)
Definition: helpers.h:156
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34