34 #include "unicode/uchar.h"   
   48   box_ = boxCreate(x, y, width, height);
 
   53   for (
int i = 0; i < boxes->size(); ++i) {
 
   54     BOX* 
box = (*boxes)[i]->box_;
 
   70   for (
int i = 0; i < boxes->size(); ++i) {
 
   71     if ((*boxes)[i]->box_ == 
NULL) 
tprintf(
"Null box at index %d\n", i);
 
   76   tprintf(
"Rtl = %d ,vertical=%d\n", rtl_rules, vertical_rules);
 
   82                              vector<BoxChar*>* boxes) {
 
   85   for (
int i = 0; i < boxes->size(); ++i) {
 
   86     Box* 
box = (*boxes)[i]->box_;
 
   88       if (prev_i < 0 || prev_i < i - 1 || i + 1 == boxes->size()) {
 
   92           boxes->erase(boxes->begin() + i);
 
   94         } 
while (i >= 0 && i + 1 == boxes->size() && (*boxes)[i]->box_ == 
NULL);
 
   99       Box* prev_box = (*boxes)[prev_i]->box_;
 
  100       int shift = box->x - prev_box->x;
 
  101       if (vertical_rules) {
 
  102         shift = box->y - prev_box->y;
 
  103       } 
else if (rtl_rules) {
 
  106       if (-shift > max_shift) {
 
  108         int width = prev_box->w;
 
  109         int height = prev_box->h;
 
  110         int x = prev_box->x + width;
 
  112         if (vertical_rules) {
 
  114           y = prev_box->y + height;
 
  115         } 
else if (rtl_rules) {
 
  116           x = prev_box->x - width;
 
  118             tprintf(
"prev x = %d, width=%d\n", prev_box->x, width);
 
  122         if (prev_i == i - 1) {
 
  125           new_box->
AddBox(x, y, width, height);
 
  126           new_box->page_ = (*boxes)[i]->page_;
 
  127           boxes->insert(boxes->begin() + i, new_box);
 
  130           (*boxes)[i - 1]->AddBox(x, y, width, height);
 
  131           (*boxes)[i - 1]->ch_ = 
"\t";
 
  134       } 
else if (shift > max_shift) {
 
  145                            vector<BoxChar*>* boxes) {
 
  148   for (
int i = 1; i + 1 < boxes->size(); ++i) {
 
  149     Box* 
box = (*boxes)[i]->box_;
 
  151       Box* prev = (*boxes)[i - 1]->box_;
 
  152       Box* next = (*boxes)[i + 1]->box_;
 
  154       int top = 
MIN(prev->y, next->y);
 
  155       int bottom = 
MAX(prev->y + prev->h, next->y + next->h);
 
  156       int left = prev->x + prev->w;
 
  158       if (vertical_rules) {
 
  159         top = prev->y + prev->h;
 
  161         left = 
MIN(prev->x, next->x);
 
  162         right = 
MAX(prev->x + prev->w, next->x + next->w);
 
  163       } 
else if (rtl_rules) {
 
  168         left = next->x + next->w;
 
  170              j >= 0 && (*boxes)[j]->ch_ != 
" " && (*boxes)[j]->ch_ != 
"\t";
 
  172           prev = (*boxes)[j]->box_;
 
  174           if (prev->x < right) {
 
  180         for (
int j = i + 2; j < boxes->size() && (*boxes)[j]->box_ != 
NULL &&
 
  181                                 (*boxes)[j]->ch_ != 
"\t";
 
  183           next = (*boxes)[j]->box_;
 
  184           if (next->x + next->w > left) {
 
  185             left = next->x + next->w;
 
  191       if (right <= left) right = left + 1;
 
  192       if (bottom <= top) bottom = top + 1;
 
  193       (*boxes)[i]->AddBox(left, top, right - left, bottom - top);
 
  194       (*boxes)[i]->ch_ = 
" ";
 
  206   for (
int start = 0; start < boxes->size(); start = end + 1) {
 
  208     while (end < boxes->size() && (*boxes)[end]->ch_ != 
"\t") ++end;
 
  209     sort(boxes->begin() + start, boxes->begin() + end, sorter);
 
  216   int num_rtl = 0, num_ltr = 0;
 
  217   for (
int i = 0; i < boxes.size(); ++i) {
 
  221       tprintf(
"Illegal utf8 in boxchar %d string:%s = ", i,
 
  222               boxes[i]->ch_.c_str());
 
  223       for (
int c = 0; c < boxes[i]->ch_.size(); ++c) {
 
  224         tprintf(
" 0x%x", boxes[i]->ch_[c]);
 
  229     for (
int j = 0; j < uni_vector.
size(); ++j) {
 
  230       UCharDirection dir = u_charDirection(uni_vector[j]);
 
  231       if (dir == U_RIGHT_TO_LEFT || dir == U_RIGHT_TO_LEFT_ARABIC ||
 
  232           dir == U_ARABIC_NUMBER) {
 
  239   return num_rtl > num_ltr;
 
  245   inT64 total_dx = 0, total_dy = 0;
 
  246   for (
int i = 1; i < boxes.size(); ++i) {
 
  247     if (boxes[i - 1]->box_ != 
NULL && boxes[i]->box_ != 
NULL &&
 
  248         boxes[i - 1]->page_ == boxes[i]->page_) {
 
  249       int dx = boxes[i]->box_->x - boxes[i - 1]->box_->x;
 
  250       int dy = boxes[i]->box_->y - boxes[i - 1]->box_->y;
 
  258   return total_dy > total_dx;
 
  264   int total_length = 0;
 
  265   for (
int i = 0; i < boxes.size(); ++i) total_length += boxes[i]->ch_.size();
 
  273                           int start_box, 
int end_box, vector<BoxChar*>* boxes) {
 
  274   Boxa* orig = boxaCreate(0);
 
  275   for (
int i = start_box; i < end_box; ++i) {
 
  276     BOX* 
box = (*boxes)[i]->box_;
 
  277     if (box) boxaAddBox(orig, box, L_CLONE);
 
  279   Boxa* rotated = boxaRotate(orig, xcenter, ycenter, rotation);
 
  281   for (
int i = start_box, box_ind = 0; i < end_box; ++i) {
 
  282     if ((*boxes)[i]->box_) {
 
  283       boxDestroy(&((*boxes)[i]->box_));
 
  284       (*boxes)[i]->box_ = boxaGetBox(rotated, box_ind++, L_CLONE);
 
  287   boxaDestroy(&rotated);
 
  293                                     const vector<BoxChar*>& boxes) {
 
  296   for (
int i = 0; i < boxes.size(); ++i) {
 
  297     const Box* 
box = boxes[i]->box_;
 
  299       tprintf(
"Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n");
 
  303         snprintf(buffer, kMaxLineLength, 
"%s %d %d %d %d %d\n",
 
  304                  boxes[i]->ch_.c_str(), box->x, height - box->y - box->h,
 
  305                  box->x + box->w, height - box->y, boxes[i]->page_);
 
  306     output.append(buffer, nbytes);
 
static void ReorderRTLText(vector< BoxChar * > *boxes)
static void TranslateBoxes(int xshift, int yshift, vector< BoxChar * > *boxes)
static void WriteStringToFileOrDie(const string &str, const string &filename)
BoxChar(const char *utf8_str, int len)
static bool UTF8ToUnicode(const char *utf8_str, GenericVector< int > *unicodes)
static void InsertSpaces(bool rtl_rules, bool vertical_rules, vector< BoxChar * > *boxes)
void AddBox(int x, int y, int width, int height)
static void WriteTesseractBoxFile(const string &name, int height, const vector< BoxChar * > &boxes)
static void RotateBoxes(float rotation, int xcenter, int ycenter, int start_box, int end_box, vector< BoxChar * > *boxes)
static void PrepareToWrite(vector< BoxChar * > *boxes)
const int kMinNewlineRatio
static void InsertNewlines(bool rtl_rules, bool vertical_rules, vector< BoxChar * > *boxes)
static int TotalByteLength(const vector< BoxChar * > &boxes)
static bool MostlyVertical(const vector< BoxChar * > &boxes)
static bool ContainsMostlyRTL(const vector< BoxChar * > &boxes)