tesseract  5.0.0-alpha-619-ge9db
fixspace.cpp File Reference
#include "fixspace.h"
#include <cstdint>
#include "blobs.h"
#include "boxword.h"
#include "errcode.h"
#include "normalis.h"
#include <tesseract/ocrclass.h>
#include "pageres.h"
#include "params.h"
#include "ratngs.h"
#include "rect.h"
#include "stepblob.h"
#include <tesseract/strngs.h>
#include "tesseractclass.h"
#include "tessvars.h"
#include "tprintf.h"
#include <tesseract/unichar.h>
#include "unicharset.h"
#include "werd.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define PERFECT_WERDS   999
 

Functions

void initialise_search (WERD_RES_LIST &src_list, WERD_RES_LIST &new_list)
 
transform_to_next_perm()

Examines the current word list to find the smallest word gap size. Then walks the word list closing any gaps of this size by either inserted new combination words, or extending existing ones.

The routine COULD be limited to stop it building words longer than N blobs.

If there are no more gaps then it DELETES the entire list and returns the empty list to cause termination.

void transform_to_next_perm (WERD_RES_LIST &words)
 
void fixspace_dbg (WERD_RES *word)
 

Macro Definition Documentation

◆ PERFECT_WERDS

#define PERFECT_WERDS   999

Definition at line 43 of file fixspace.cpp.

Function Documentation

◆ fixspace_dbg()

void fixspace_dbg ( WERD_RES word)

Definition at line 821 of file fixspace.cpp.

822  {
823  TBOX box = word->word->bounding_box();
824  const bool show_map_detail = false;
825  int16_t i;
826 
827  box.print();
828  tprintf(" \"%s\" ", word->best_choice->unichar_string().c_str());
829  tprintf("Blob count: %d (word); %d/%d (rebuild word)\n",
830  word->word->cblob_list()->length(),
831  word->rebuild_word->NumBlobs(),
832  word->box_word->length());
833  word->reject_map.print(debug_fp);
834  tprintf("\n");
835  if (show_map_detail) {
836  tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
837  for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
838  tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
839  word->reject_map[i].full_print(debug_fp);
840  }
841  }
842 
843  tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
844  tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");

◆ initialise_search()

void initialise_search ( WERD_RES_LIST &  src_list,
WERD_RES_LIST &  new_list 
)

Definition at line 203 of file fixspace.cpp.

204  {
205  WERD_RES_IT src_it(&src_list);
206  WERD_RES_IT new_it(&new_list);
207  WERD_RES *src_wd;
208  WERD_RES *new_wd;
209 
210  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
211  src_wd = src_it.data();
212  if (!src_wd->combination) {
213  new_wd = WERD_RES::deep_copy(src_wd);
214  new_wd->combination = false;
215  new_wd->part_of_combo = false;
216  new_it.add_after_then_move(new_wd);
217  }
218  }

◆ transform_to_next_perm()

void transform_to_next_perm ( WERD_RES_LIST &  words)

Definition at line 398 of file fixspace.cpp.

399  {
400  WERD_RES_IT word_it(&words);
401  WERD_RES_IT prev_word_it(&words);
402  WERD_RES *word;
403  WERD_RES *prev_word;
404  WERD_RES *combo;
405  WERD *copy_word;
406  int16_t prev_right = -INT16_MAX;
407  TBOX box;
408  int16_t gap;
409  int16_t min_gap = INT16_MAX;
410 
411  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
412  word = word_it.data();
413  if (!word->part_of_combo) {
414  box = word->word->bounding_box();
415  if (prev_right > -INT16_MAX) {
416  gap = box.left() - prev_right;
417  if (gap < min_gap)
418  min_gap = gap;
419  }
420  prev_right = box.right();
421  }
422  }
423  if (min_gap < INT16_MAX) {
424  prev_right = -INT16_MAX; // back to start
425  word_it.set_to_list(&words);
426  // Note: we can't use cycle_pt due to inserted combos at start of list.
427  for (; (prev_right == -INT16_MAX) || !word_it.at_first();
428  word_it.forward()) {
429  word = word_it.data();
430  if (!word->part_of_combo) {
431  box = word->word->bounding_box();
432  if (prev_right > -INT16_MAX) {
433  gap = box.left() - prev_right;
434  if (gap <= min_gap) {
435  prev_word = prev_word_it.data();
436  if (prev_word->combination) {
437  combo = prev_word;
438  } else {
439  /* Make a new combination and insert before
440  * the first word being joined. */
441  copy_word = new WERD;
442  *copy_word = *(prev_word->word);
443  // deep copy
444  combo = new WERD_RES(copy_word);
445  combo->combination = true;
446  combo->x_height = prev_word->x_height;
447  prev_word->part_of_combo = true;
448  prev_word_it.add_before_then_move(combo);
449  }
450  combo->word->set_flag(W_EOL, word->word->flag(W_EOL));
451  if (word->combination) {
452  combo->word->join_on(word->word);
453  // Move blobs to combo
454  // old combo no longer needed
455  delete word_it.extract();
456  } else {
457  // Copy current wd to combo
458  combo->copy_on(word);
459  word->part_of_combo = true;
460  }
461  combo->done = false;
462  combo->ClearResults();
463  } else {
464  prev_word_it = word_it; // catch up
465  }
466  }
467  prev_right = box.right();
468  }
469  }
470  } else {
471  words.clear(); // signal termination
472  }
WERD_RES::done
bool done
Definition: pageres.h:299
REJMAP::full_print
void full_print(FILE *fp)
Definition: rejctmap.cpp:332
WERD_CHOICE::unichar_string
const STRING & unichar_string() const
Definition: ratngs.h:529
WERD_RES::box_word
tesseract::BoxWord * box_word
Definition: pageres.h:266
WERD::flag
bool flag(WERD_FLAGS mask) const
Definition: werd.h:116
WERD_RES::rebuild_word
TWERD * rebuild_word
Definition: pageres.h:260
WERD::bounding_box
TBOX bounding_box() const
Definition: werd.cpp:147
TBOX::print
void print() const
Definition: rect.h:277
WERD_RES::combination
bool combination
Definition: pageres.h:333
WERD_RES::x_height
float x_height
Definition: pageres.h:310
WERD_RES
Definition: pageres.h:160
WERD::cblob_list
C_BLOB_LIST * cblob_list()
Definition: werd.h:94
WERD_RES::deep_copy
static WERD_RES * deep_copy(const WERD_RES *src)
Definition: pageres.h:643
WERD_RES::best_choice
WERD_CHOICE * best_choice
Definition: pageres.h:235
STRING::c_str
const char * c_str() const
Definition: strngs.cpp:192
WERD::set_flag
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:117
WERD_RES::ClearResults
void ClearResults()
Definition: pageres.cpp:1100
W_EOL
end of line
Definition: werd.h:47
WERD_RES::tess_accepted
bool tess_accepted
Definition: pageres.h:297
WERD_RES::reject_map
REJMAP reject_map
Definition: pageres.h:288
WERD_RES::copy_on
void copy_on(WERD_RES *word_res)
Definition: pageres.h:654
tesseract::BoxWord::length
int length() const
Definition: boxword.h:82
WERD
Definition: werd.h:55
TBOX::left
int16_t left() const
Definition: rect.h:71
debug_fp
FILE * debug_fp
Definition: tessvars.cpp:23
TBOX::right
int16_t right() const
Definition: rect.h:78
WERD_RES::part_of_combo
bool part_of_combo
Definition: pageres.h:334
tprintf
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:34
WERD::join_on
void join_on(WERD *other)
Definition: werd.cpp:198
WERD_RES::word
WERD * word
Definition: pageres.h:180
REJMAP::print
void print(FILE *fp)
Definition: rejctmap.cpp:320
TWERD::NumBlobs
int NumBlobs() const
Definition: blobs.h:446
TBOX
Definition: rect.h:33