tesseract  4.0.0-1-g2a2b
fixspace.cpp File Reference
#include "fixspace.h"
#include <cstdint>
#include "blobs.h"
#include "boxword.h"
#include "errcode.h"
#include "host.h"
#include "normalis.h"
#include "ocrclass.h"
#include "pageres.h"
#include "params.h"
#include "ratngs.h"
#include "rect.h"
#include "stepblob.h"
#include "strngs.h"
#include "tesseractclass.h"
#include "tessvars.h"
#include "tprintf.h"
#include "unichar.h"
#include "unicharset.h"
#include "werd.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define PERFECT_WERDS   999
 
#define MAXSPACING   128 /*max expected spacing in pix */
 

Functions

void initialise_search (WERD_RES_LIST &src_list, WERD_RES_LIST &new_list)
 
transform_to_next_perm()

Examines the current word list to find the smallest word gap size. Then walks the word list closing any gaps of this size by either inserted new combination words, or extending existing ones.

The routine COULD be limited to stop it building words longer than N blobs.

If there are no more gaps then it DELETES the entire list and returns the empty list to cause termination.

void transform_to_next_perm (WERD_RES_LIST &words)
 
void fixspace_dbg (WERD_RES *word)
 

Macro Definition Documentation

◆ MAXSPACING

#define MAXSPACING   128 /*max expected spacing in pix */

Definition at line 47 of file fixspace.cpp.

◆ PERFECT_WERDS

#define PERFECT_WERDS   999

Definition at line 46 of file fixspace.cpp.

Function Documentation

◆ fixspace_dbg()

void fixspace_dbg ( WERD_RES word)

Definition at line 825 of file fixspace.cpp.

825  {
826  TBOX box = word->word->bounding_box();
827  const bool show_map_detail = false;
828  int16_t i;
829 
830  box.print();
831  tprintf(" \"%s\" ", word->best_choice->unichar_string().string());
832  tprintf("Blob count: %d (word); %d/%d (rebuild word)\n",
833  word->word->cblob_list()->length(),
834  word->rebuild_word->NumBlobs(),
835  word->box_word->length());
836  word->reject_map.print(debug_fp);
837  tprintf("\n");
838  if (show_map_detail) {
839  tprintf("\"%s\"\n", word->best_choice->unichar_string().string());
840  for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
841  tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
842  word->reject_map[i].full_print(debug_fp);
843  }
844  }
845 
846  tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
847  tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
848 }
TWERD * rebuild_word
Definition: pageres.h:260
FILE * debug_fp
Definition: tessvars.cpp:24
void print() const
Definition: rect.h:278
REJMAP reject_map
Definition: pageres.h:287
const char * string() const
Definition: strngs.cpp:196
void full_print(FILE *fp)
Definition: rejctmap.cpp:335
TBOX bounding_box() const
Definition: werd.cpp:159
Definition: rect.h:34
int NumBlobs() const
Definition: blobs.h:432
bool tess_accepted
Definition: pageres.h:296
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:37
C_BLOB_LIST * cblob_list()
Definition: werd.h:98
bool done
Definition: pageres.h:298
int length() const
Definition: boxword.h:83
const STRING & unichar_string() const
Definition: ratngs.h:541
WERD_CHOICE * best_choice
Definition: pageres.h:235
tesseract::BoxWord * box_word
Definition: pageres.h:266
void print(FILE *fp)
Definition: rejctmap.cpp:323
WERD * word
Definition: pageres.h:189

◆ initialise_search()

void initialise_search ( WERD_RES_LIST &  src_list,
WERD_RES_LIST &  new_list 
)

Definition at line 207 of file fixspace.cpp.

207  {
208  WERD_RES_IT src_it(&src_list);
209  WERD_RES_IT new_it(&new_list);
210  WERD_RES *src_wd;
211  WERD_RES *new_wd;
212 
213  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
214  src_wd = src_it.data();
215  if (!src_wd->combination) {
216  new_wd = WERD_RES::deep_copy(src_wd);
217  new_wd->combination = false;
218  new_wd->part_of_combo = false;
219  new_it.add_after_then_move(new_wd);
220  }
221  }
222 }
static WERD_RES * deep_copy(const WERD_RES *src)
Definition: pageres.h:649
bool combination
Definition: pageres.h:334
bool part_of_combo
Definition: pageres.h:335

◆ transform_to_next_perm()

void transform_to_next_perm ( WERD_RES_LIST &  words)

Definition at line 402 of file fixspace.cpp.

402  {
403  WERD_RES_IT word_it(&words);
404  WERD_RES_IT prev_word_it(&words);
405  WERD_RES *word;
406  WERD_RES *prev_word;
407  WERD_RES *combo;
408  WERD *copy_word;
409  int16_t prev_right = -INT16_MAX;
410  TBOX box;
411  int16_t gap;
412  int16_t min_gap = INT16_MAX;
413 
414  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
415  word = word_it.data();
416  if (!word->part_of_combo) {
417  box = word->word->bounding_box();
418  if (prev_right > -INT16_MAX) {
419  gap = box.left() - prev_right;
420  if (gap < min_gap)
421  min_gap = gap;
422  }
423  prev_right = box.right();
424  }
425  }
426  if (min_gap < INT16_MAX) {
427  prev_right = -INT16_MAX; // back to start
428  word_it.set_to_list(&words);
429  // Note: we can't use cycle_pt due to inserted combos at start of list.
430  for (; (prev_right == -INT16_MAX) || !word_it.at_first();
431  word_it.forward()) {
432  word = word_it.data();
433  if (!word->part_of_combo) {
434  box = word->word->bounding_box();
435  if (prev_right > -INT16_MAX) {
436  gap = box.left() - prev_right;
437  if (gap <= min_gap) {
438  prev_word = prev_word_it.data();
439  if (prev_word->combination) {
440  combo = prev_word;
441  } else {
442  /* Make a new combination and insert before
443  * the first word being joined. */
444  copy_word = new WERD;
445  *copy_word = *(prev_word->word);
446  // deep copy
447  combo = new WERD_RES(copy_word);
448  combo->combination = TRUE;
449  combo->x_height = prev_word->x_height;
450  prev_word->part_of_combo = true;
451  prev_word_it.add_before_then_move(combo);
452  }
453  combo->word->set_flag(W_EOL, word->word->flag(W_EOL));
454  if (word->combination) {
455  combo->word->join_on(word->word);
456  // Move blobs to combo
457  // old combo no longer needed
458  delete word_it.extract();
459  } else {
460  // Copy current wd to combo
461  combo->copy_on(word);
462  word->part_of_combo = true;
463  }
464  combo->done = FALSE;
465  combo->ClearResults();
466  } else {
467  prev_word_it = word_it; // catch up
468  }
469  }
470  prev_right = box.right();
471  }
472  }
473  } else {
474  words.clear(); // signal termination
475  }
476 }
#define TRUE
Definition: capi.h:51
TBOX bounding_box() const
Definition: werd.cpp:159
Definition: rect.h:34
Definition: werd.h:35
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:127
int16_t left() const
Definition: rect.h:72
bool flag(WERD_FLAGS mask) const
Definition: werd.h:126
#define FALSE
Definition: capi.h:52
void copy_on(WERD_RES *word_res)
Definition: pageres.h:660
Definition: werd.h:59
bool done
Definition: pageres.h:298
bool combination
Definition: pageres.h:334
float x_height
Definition: pageres.h:311
void ClearResults()
Definition: pageres.cpp:1153
bool part_of_combo
Definition: pageres.h:335
int16_t right() const
Definition: rect.h:79
void join_on(WERD *other)
Definition: werd.cpp:210
WERD * word
Definition: pageres.h:189