tesseract  4.0.0-1-g2a2b
tesseract::DocumentData Class Reference

#include <imagedata.h>

Public Member Functions

 DocumentData (const STRING &name)
 
 ~DocumentData ()
 
bool LoadDocument (const char *filename, int start_page, int64_t max_memory, FileReader reader)
 
void SetDocument (const char *filename, int64_t max_memory, FileReader reader)
 
bool SaveDocument (const char *filename, FileWriter writer)
 
bool SaveToBuffer (GenericVector< char > *buffer)
 
void AddPageToDocument (ImageData *page)
 
const STRINGdocument_name () const
 
int NumPages () const
 
int64_t memory_used () const
 
void LoadPageInBackground (int index)
 
const ImageDataGetPage (int index)
 
bool IsPageAvailable (int index, ImageData **page)
 
ImageDataTakePage (int index)
 
bool IsCached () const
 
int64_t UnCache ()
 
void Shuffle ()
 

Friends

void * ReCachePagesFunc (void *data)
 

Detailed Description

Definition at line 205 of file imagedata.h.

Constructor & Destructor Documentation

◆ DocumentData()

tesseract::DocumentData::DocumentData ( const STRING name)
explicit

Definition at line 375 of file imagedata.cpp.

376  : document_name_(name),
377  pages_offset_(-1),
378  total_pages_(-1),
379  memory_used_(0),
380  max_memory_(0),
381  reader_(nullptr) {}

◆ ~DocumentData()

tesseract::DocumentData::~DocumentData ( )

Definition at line 383 of file imagedata.cpp.

383  {
384  SVAutoLock lock_p(&pages_mutex_);
385  SVAutoLock lock_g(&general_mutex_);
386 }

Member Function Documentation

◆ AddPageToDocument()

void tesseract::DocumentData::AddPageToDocument ( ImageData page)

Definition at line 427 of file imagedata.cpp.

427  {
428  SVAutoLock lock(&pages_mutex_);
429  pages_.push_back(page);
430  set_memory_used(memory_used() + page->MemoryUsed());
431 }
int64_t memory_used() const
Definition: imagedata.h:233

◆ document_name()

const STRING& tesseract::DocumentData::document_name ( ) const
inline

Definition at line 225 of file imagedata.h.

225  {
226  SVAutoLock lock(&general_mutex_);
227  return document_name_;
228  }

◆ GetPage()

const ImageData * tesseract::DocumentData::GetPage ( int  index)

Definition at line 447 of file imagedata.cpp.

447  {
448  ImageData* page = nullptr;
449  while (!IsPageAvailable(index, &page)) {
450  // If there is no background load scheduled, schedule one now.
451  pages_mutex_.Lock();
452  bool needs_loading = pages_offset_ != index;
453  pages_mutex_.Unlock();
454  if (needs_loading) LoadPageInBackground(index);
455  // We can't directly load the page, or the background load will delete it
456  // while the caller is using it, so give it a chance to work.
457 #if defined(__MINGW32__)
458  sleep(1);
459 #else
460  std::this_thread::sleep_for(std::chrono::seconds(1));
461 #endif
462  }
463  return page;
464 }
void Unlock()
Unlocks on a mutex.
Definition: svutil.cpp:78
void Lock()
Locks on a mutex.
Definition: svutil.cpp:70
void LoadPageInBackground(int index)
Definition: imagedata.cpp:435
bool IsPageAvailable(int index, ImageData **page)
Definition: imagedata.cpp:469

◆ IsCached()

bool tesseract::DocumentData::IsCached ( ) const
inline

Definition at line 265 of file imagedata.h.

265 { return NumPages() >= 0; }
int NumPages() const
Definition: imagedata.h:229

◆ IsPageAvailable()

bool tesseract::DocumentData::IsPageAvailable ( int  index,
ImageData **  page 
)

Definition at line 469 of file imagedata.cpp.

469  {
470  SVAutoLock lock(&pages_mutex_);
471  int num_pages = NumPages();
472  if (num_pages == 0 || index < 0) {
473  *page = nullptr; // Empty Document.
474  return true;
475  }
476  if (num_pages > 0) {
477  index = Modulo(index, num_pages);
478  if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) {
479  *page = pages_[index - pages_offset_]; // Page is available already.
480  return true;
481  }
482  }
483  return false;
484 }
int Modulo(int a, int b)
Definition: helpers.h:153
int NumPages() const
Definition: imagedata.h:229

◆ LoadDocument()

bool tesseract::DocumentData::LoadDocument ( const char *  filename,
int  start_page,
int64_t  max_memory,
FileReader  reader 
)

Definition at line 390 of file imagedata.cpp.

391  {
392  SetDocument(filename, max_memory, reader);
393  pages_offset_ = start_page;
394  return ReCachePages();
395 }
void SetDocument(const char *filename, int64_t max_memory, FileReader reader)
Definition: imagedata.cpp:398

◆ LoadPageInBackground()

void tesseract::DocumentData::LoadPageInBackground ( int  index)

Definition at line 435 of file imagedata.cpp.

435  {
436  ImageData* page = nullptr;
437  if (IsPageAvailable(index, &page)) return;
438  SVAutoLock lock(&pages_mutex_);
439  if (pages_offset_ == index) return;
440  pages_offset_ = index;
441  pages_.clear();
443 }
friend void * ReCachePagesFunc(void *data)
Definition: imagedata.cpp:369
static void StartThread(void *(*func)(void *), void *arg)
Create new thread.
Definition: svutil.cpp:87
bool IsPageAvailable(int index, ImageData **page)
Definition: imagedata.cpp:469

◆ memory_used()

int64_t tesseract::DocumentData::memory_used ( ) const
inline

Definition at line 233 of file imagedata.h.

233  {
234  SVAutoLock lock(&general_mutex_);
235  return memory_used_;
236  }

◆ NumPages()

int tesseract::DocumentData::NumPages ( ) const
inline

Definition at line 229 of file imagedata.h.

229  {
230  SVAutoLock lock(&general_mutex_);
231  return total_pages_;
232  }

◆ SaveDocument()

bool tesseract::DocumentData::SaveDocument ( const char *  filename,
FileWriter  writer 
)

Definition at line 409 of file imagedata.cpp.

409  {
410  SVAutoLock lock(&pages_mutex_);
411  TFile fp;
412  fp.OpenWrite(nullptr);
413  if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) {
414  tprintf("Serialize failed: %s\n", filename);
415  return false;
416  }
417  return true;
418 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37

◆ SaveToBuffer()

bool tesseract::DocumentData::SaveToBuffer ( GenericVector< char > *  buffer)

Definition at line 419 of file imagedata.cpp.

419  {
420  SVAutoLock lock(&pages_mutex_);
421  TFile fp;
422  fp.OpenWrite(buffer);
423  return pages_.Serialize(&fp);
424 }

◆ SetDocument()

void tesseract::DocumentData::SetDocument ( const char *  filename,
int64_t  max_memory,
FileReader  reader 
)

Definition at line 398 of file imagedata.cpp.

399  {
400  SVAutoLock lock_p(&pages_mutex_);
401  SVAutoLock lock(&general_mutex_);
402  document_name_ = filename;
403  pages_offset_ = -1;
404  max_memory_ = max_memory;
405  reader_ = reader;
406 }

◆ Shuffle()

void tesseract::DocumentData::Shuffle ( )

Definition at line 501 of file imagedata.cpp.

501  {
502  TRand random;
503  // Different documents get shuffled differently, but the same for the same
504  // name.
505  random.set_seed(document_name_.string());
506  int num_pages = pages_.size();
507  // Execute one random swap for each page in the document.
508  for (int i = 0; i < num_pages; ++i) {
509  int src = random.IntRand() % num_pages;
510  int dest = random.IntRand() % num_pages;
511  std::swap(pages_[src], pages_[dest]);
512  }
513 }
const char * string() const
Definition: strngs.cpp:196

◆ TakePage()

ImageData* tesseract::DocumentData::TakePage ( int  index)
inline

Definition at line 257 of file imagedata.h.

257  {
258  SVAutoLock lock(&pages_mutex_);
259  ImageData* page = pages_[index];
260  pages_[index] = nullptr;
261  return page;
262  }

◆ UnCache()

int64_t tesseract::DocumentData::UnCache ( )

Definition at line 488 of file imagedata.cpp.

488  {
489  SVAutoLock lock(&pages_mutex_);
490  int64_t memory_saved = memory_used();
491  pages_.clear();
492  pages_offset_ = -1;
493  set_total_pages(-1);
494  set_memory_used(0);
495  tprintf("Unloaded document %s, saving %" PRId64 " memory\n",
496  document_name_.string(), memory_saved);
497  return memory_saved;
498 }
const char * string() const
Definition: strngs.cpp:196
int64_t memory_used() const
Definition: imagedata.h:233
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37

Friends And Related Function Documentation

◆ ReCachePagesFunc

void* ReCachePagesFunc ( void *  data)
friend

Definition at line 369 of file imagedata.cpp.

369  {
370  DocumentData* document_data = static_cast<DocumentData*>(data);
371  document_data->ReCachePages();
372  return nullptr;
373 }
DocumentData(const STRING &name)
Definition: imagedata.cpp:375

The documentation for this class was generated from the following files: