All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
strngs.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: strngs.c (Formerly strings.c)
3  * Description: STRING class functions.
4  * Author: Ray Smith
5  * Created: Fri Feb 15 09:13:30 GMT 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "strngs.h"
21 
22 #include <assert.h>
23 
24 #include "genericvector.h"
25 #include "helpers.h"
26 #include "serialis.h"
27 #include "tprintf.h"
28 
29 using tesseract::TFile;
30 
31 // Size of buffer needed to host the decimal representation of the maximum
32 // possible length of an int (in 64 bits), being -<20 digits>.
33 const int kMaxIntSize = 22;
34 // Size of buffer needed to host the decimal representation of the maximum
35 // possible length of a %.8g being -0.12345678e+999<nul> = 15.
36 const int kMaxDoubleSize = 15;
37 
38 /**********************************************************************
39  * STRING_HEADER provides metadata about the allocated buffer,
40  * including total capacity and how much used (strlen with '\0').
41  *
42  * The implementation hides this header at the start of the data
43  * buffer and appends the string on the end to keep sizeof(STRING)
44  * unchanged from earlier versions so serialization is not affected.
45  *
46  * The collection of MACROS provide different implementations depending
47  * on whether the string keeps track of its strlen or not so that this
48  * feature can be added in later when consumers dont modifify the string
49  **********************************************************************/
50 
51 // Smallest string to allocate by default
52 const int kMinCapacity = 16;
53 
54 char* STRING::AllocData(int used, int capacity) {
55  data_ = (STRING_HEADER *)alloc_string(capacity + sizeof(STRING_HEADER));
56 
57  // header is the metadata for this memory block
58  STRING_HEADER* header = GetHeader();
59  header->capacity_ = capacity;
60  header->used_ = used;
61  return GetCStr();
62 }
63 
64 void STRING::DiscardData() {
65  free_string((char *)data_);
66 }
67 
68 // This is a private method; ensure FixHeader is called (or used_ is well defined)
69 // beforehand
70 char* STRING::ensure_cstr(inT32 min_capacity) {
71  STRING_HEADER* orig_header = GetHeader();
72  if (min_capacity <= orig_header->capacity_)
73  return ((char *)this->data_) + sizeof(STRING_HEADER);
74 
75  // if we are going to grow bigger, than double our existing
76  // size, but if that still is not big enough then keep the
77  // requested capacity
78  if (min_capacity < 2 * orig_header->capacity_)
79  min_capacity = 2 * orig_header->capacity_;
80 
81  int alloc = sizeof(STRING_HEADER) + min_capacity;
82  STRING_HEADER* new_header = (STRING_HEADER*)(alloc_string(alloc));
83 
84  memcpy(&new_header[1], GetCStr(), orig_header->used_);
85  new_header->capacity_ = min_capacity;
86  new_header->used_ = orig_header->used_;
87 
88  // free old memory, then rebind to new memory
89  DiscardData();
90  data_ = new_header;
91 
92  assert(InvariantOk());
93  return ((char *)data_) + sizeof(STRING_HEADER);
94 }
95 
96 // This is const, but is modifying a mutable field
97 // this way it can be used on const or non-const instances.
98 void STRING::FixHeader() const {
99  const STRING_HEADER* header = GetHeader();
100  if (header->used_ < 0)
101  header->used_ = strlen(GetCStr()) + 1;
102 }
103 
104 
106  // Empty STRINGs contain just the "\0".
107  memcpy(AllocData(1, kMinCapacity), "", 1);
108 }
109 
110 STRING::STRING(const STRING& str) {
111  str.FixHeader();
112  const STRING_HEADER* str_header = str.GetHeader();
113  int str_used = str_header->used_;
114  char *this_cstr = AllocData(str_used, str_used);
115  memcpy(this_cstr, str.GetCStr(), str_used);
116  assert(InvariantOk());
117 }
118 
119 STRING::STRING(const char* cstr) {
120  if (cstr == NULL) {
121  // Empty STRINGs contain just the "\0".
122  memcpy(AllocData(1, kMinCapacity), "", 1);
123  } else {
124  int len = strlen(cstr) + 1;
125  char* this_cstr = AllocData(len, len);
126  memcpy(this_cstr, cstr, len);
127  }
128  assert(InvariantOk());
129 }
130 
131 STRING::STRING(const char *data, int length) {
132  if (data == NULL) {
133  // Empty STRINGs contain just the "\0".
134  memcpy(AllocData(1, kMinCapacity), "", 1);
135  } else {
136  char* this_cstr = AllocData(length + 1, length + 1);
137  memcpy(this_cstr, data, length);
138  this_cstr[length] = '\0';
139  }
140 }
141 
143  DiscardData();
144 }
145 
146 // TODO(rays) Change all callers to use TFile and remove the old functions.
147 // Writes to the given file. Returns false in case of error.
148 bool STRING::Serialize(FILE* fp) const {
149  inT32 len = length();
150  if (fwrite(&len, sizeof(len), 1, fp) != 1) return false;
151  if (static_cast<int>(fwrite(GetCStr(), 1, len, fp)) != len) return false;
152  return true;
153 }
154 // Writes to the given file. Returns false in case of error.
155 bool STRING::Serialize(TFile* fp) const {
156  inT32 len = length();
157  if (fp->FWrite(&len, sizeof(len), 1) != 1) return false;
158  if (fp->FWrite(GetCStr(), 1, len) != len) return false;
159  return true;
160 }
161 // Reads from the given file. Returns false in case of error.
162 // If swap is true, assumes a big/little-endian swap is needed.
163 bool STRING::DeSerialize(bool swap, FILE* fp) {
164  inT32 len;
165  if (fread(&len, sizeof(len), 1, fp) != 1) return false;
166  if (swap)
167  ReverseN(&len, sizeof(len));
168  truncate_at(len);
169  if (static_cast<int>(fread(GetCStr(), 1, len, fp)) != len) return false;
170  return true;
171 }
172 // Reads from the given file. Returns false in case of error.
173 // If swap is true, assumes a big/little-endian swap is needed.
174 bool STRING::DeSerialize(bool swap, TFile* fp) {
175  inT32 len;
176  if (fp->FRead(&len, sizeof(len), 1) != 1) return false;
177  if (swap)
178  ReverseN(&len, sizeof(len));
179  truncate_at(len);
180  if (fp->FRead(GetCStr(), 1, len) != len) return false;
181  return true;
182 }
183 
184 BOOL8 STRING::contains(const char c) const {
185  return (c != '\0') && (strchr (GetCStr(), c) != NULL);
186 }
187 
189  FixHeader();
190  return GetHeader()->used_ - 1;
191 }
192 
193 const char* STRING::string() const {
194  const STRING_HEADER* header = GetHeader();
195  if (header->used_ == 0)
196  return NULL;
197 
198  // mark header length unreliable because tesseract might
199  // cast away the const and mutate the string directly.
200  header->used_ = -1;
201  return GetCStr();
202 }
203 
204 const char* STRING::c_str() const {
205  return string();
206 }
207 
208 /******
209  * The STRING_IS_PROTECTED interface adds additional support to migrate
210  * code that needs to modify the STRING in ways not otherwise supported
211  * without violating encapsulation.
212  *
213  * Also makes the [] operator return a const so it is immutable
214  */
215 #if STRING_IS_PROTECTED
216 const char& STRING::operator[](inT32 index) const {
217  return GetCStr()[index];
218 }
219 
220 void STRING::insert_range(inT32 index, const char* str, int len) {
221  // if index is outside current range, then also grow size of string
222  // to accmodate the requested range.
223  STRING_HEADER* this_header = GetHeader();
224  int used = this_header->used_;
225  if (index > used)
226  used = index;
227 
228  char* this_cstr = ensure_cstr(used + len + 1);
229  if (index < used) {
230  // move existing string from index to '\0' inclusive.
231  memmove(this_cstr + index + len,
232  this_cstr + index,
233  this_header->used_ - index);
234  } else if (len > 0) {
235  // We are going to overwrite previous null terminator, so write the new one.
236  this_cstr[this_header->used_ + len - 1] = '\0';
237 
238  // If the old header did not have the terminator,
239  // then we need to account for it now that we've added it.
240  // Otherwise it was already accounted for; we just moved it.
241  if (this_header->used_ == 0)
242  ++this_header->used_;
243  }
244 
245  // Write new string to index.
246  // The string is already terminated from the conditions above.
247  memcpy(this_cstr + index, str, len);
248  this_header->used_ += len;
249 
250  assert(InvariantOk());
251 }
252 
253 void STRING::erase_range(inT32 index, int len) {
254  char* this_cstr = GetCStr();
255  STRING_HEADER* this_header = GetHeader();
256 
257  memcpy(this_cstr+index, this_cstr+index+len,
258  this_header->used_ - index - len);
259  this_header->used_ -= len;
260  assert(InvariantOk());
261 }
262 
263 #else
265  ASSERT_HOST(index >= 0);
266  FixHeader();
267  char* this_cstr = ensure_cstr(index + 1);
268  this_cstr[index] = '\0';
269  GetHeader()->used_ = index + 1;
270  assert(InvariantOk());
271 }
272 
273 char& STRING::operator[](inT32 index) const {
274  // Code is casting away this const and mutating the string,
275  // so mark used_ as -1 to flag it unreliable.
276  GetHeader()->used_ = -1;
277  return ((char *)GetCStr())[index];
278 }
279 #endif
280 
281 void STRING::split(const char c, GenericVector<STRING> *splited) {
282  int start_index = 0;
283  int len = length();
284  for (int i = 0; i < len; i++) {
285  if ((*this)[i] == c) {
286  if (i != start_index) {
287  (*this)[i] = '\0';
288  splited->push_back(STRING(GetCStr() + start_index, i - start_index));
289  (*this)[i] = c;
290  }
291  start_index = i + 1;
292  }
293  }
294 
295  if (len != start_index) {
296  splited->push_back(STRING(GetCStr() + start_index, len - start_index));
297  }
298 }
299 
300 BOOL8 STRING::operator==(const STRING& str) const {
301  FixHeader();
302  str.FixHeader();
303  const STRING_HEADER* str_header = str.GetHeader();
304  const STRING_HEADER* this_header = GetHeader();
305  int this_used = this_header->used_;
306  int str_used = str_header->used_;
307 
308  return (this_used == str_used)
309  && (memcmp(GetCStr(), str.GetCStr(), this_used) == 0);
310 }
311 
312 BOOL8 STRING::operator!=(const STRING& str) const {
313  FixHeader();
314  str.FixHeader();
315  const STRING_HEADER* str_header = str.GetHeader();
316  const STRING_HEADER* this_header = GetHeader();
317  int this_used = this_header->used_;
318  int str_used = str_header->used_;
319 
320  return (this_used != str_used)
321  || (memcmp(GetCStr(), str.GetCStr(), this_used) != 0);
322 }
323 
324 BOOL8 STRING::operator!=(const char* cstr) const {
325  FixHeader();
326  const STRING_HEADER* this_header = GetHeader();
327 
328  if (cstr == NULL)
329  return this_header->used_ > 1; // either '\0' or NULL
330  else {
331  inT32 length = strlen(cstr) + 1;
332  return (this_header->used_ != length)
333  || (memcmp(GetCStr(), cstr, length) != 0);
334  }
335 }
336 
338  str.FixHeader();
339  const STRING_HEADER* str_header = str.GetHeader();
340  int str_used = str_header->used_;
341 
342  GetHeader()->used_ = 0; // clear since ensure doesnt need to copy data
343  char* this_cstr = ensure_cstr(str_used);
344  STRING_HEADER* this_header = GetHeader();
345 
346  memcpy(this_cstr, str.GetCStr(), str_used);
347  this_header->used_ = str_used;
348 
349  assert(InvariantOk());
350  return *this;
351 }
352 
354  FixHeader();
355  str.FixHeader();
356  const STRING_HEADER* str_header = str.GetHeader();
357  const char* str_cstr = str.GetCStr();
358  int str_used = str_header->used_;
359  int this_used = GetHeader()->used_;
360  char* this_cstr = ensure_cstr(this_used + str_used);
361 
362  STRING_HEADER* this_header = GetHeader(); // after ensure for realloc
363 
364  if (this_used > 1) {
365  memcpy(this_cstr + this_used - 1, str_cstr, str_used);
366  this_header->used_ += str_used - 1; // overwrite '\0'
367  } else {
368  memcpy(this_cstr, str_cstr, str_used);
369  this_header->used_ = str_used;
370  }
371 
372  assert(InvariantOk());
373  return *this;
374 }
375 
376 void STRING::add_str_int(const char* str, int number) {
377  if (str != NULL)
378  *this += str;
379  // Allow space for the maximum possible length of inT64.
380  char num_buffer[kMaxIntSize];
381  snprintf(num_buffer, kMaxIntSize - 1, "%d", number);
382  num_buffer[kMaxIntSize - 1] = '\0';
383  *this += num_buffer;
384 }
385 // Appends the given string and double (as a %.8g) to this.
386 void STRING::add_str_double(const char* str, double number) {
387  if (str != NULL)
388  *this += str;
389  // Allow space for the maximum possible length of %8g.
390  char num_buffer[kMaxDoubleSize];
391  snprintf(num_buffer, kMaxDoubleSize - 1, "%.8g", number);
392  num_buffer[kMaxDoubleSize - 1] = '\0';
393  *this += num_buffer;
394 }
395 
396 STRING & STRING::operator=(const char* cstr) {
397  STRING_HEADER* this_header = GetHeader();
398  if (cstr) {
399  int len = strlen(cstr) + 1;
400 
401  this_header->used_ = 0; // dont bother copying data if need to realloc
402  char* this_cstr = ensure_cstr(len);
403  this_header = GetHeader(); // for realloc
404  memcpy(this_cstr, cstr, len);
405  this_header->used_ = len;
406  } else {
407  // Reallocate to same state as default constructor.
408  DiscardData();
409  // Empty STRINGs contain just the "\0".
410  memcpy(AllocData(1, kMinCapacity), "", 1);
411  }
412 
413  assert(InvariantOk());
414  return *this;
415 }
416 
417 void STRING::assign(const char *cstr, int len) {
418  STRING_HEADER* this_header = GetHeader();
419  this_header->used_ = 0; // dont bother copying data if need to realloc
420  char* this_cstr = ensure_cstr(len + 1); // +1 for '\0'
421 
422  this_header = GetHeader(); // for realloc
423  memcpy(this_cstr, cstr, len);
424  this_cstr[len] = '\0';
425  this_header->used_ = len + 1;
426 
427  assert(InvariantOk());
428 }
429 
430 STRING STRING::operator+(const STRING& str) const {
431  STRING result(*this);
432  result += str;
433 
434  assert(InvariantOk());
435  return result;
436 }
437 
438 
439 STRING STRING::operator+(const char ch) const {
440  STRING result;
441  FixHeader();
442  const STRING_HEADER* this_header = GetHeader();
443  int this_used = this_header->used_;
444  char* result_cstr = result.ensure_cstr(this_used + 1);
445  STRING_HEADER* result_header = result.GetHeader();
446  int result_used = result_header->used_;
447 
448  // copies '\0' but we'll overwrite that
449  memcpy(result_cstr, GetCStr(), this_used);
450  result_cstr[result_used] = ch; // overwrite old '\0'
451  result_cstr[result_used + 1] = '\0'; // append on '\0'
452  ++result_header->used_;
453 
454  assert(InvariantOk());
455  return result;
456 }
457 
458 
459 STRING& STRING::operator+=(const char *str) {
460  if (!str || !*str) // empty string has no effect
461  return *this;
462 
463  FixHeader();
464  int len = strlen(str) + 1;
465  int this_used = GetHeader()->used_;
466  char* this_cstr = ensure_cstr(this_used + len);
467  STRING_HEADER* this_header = GetHeader(); // after ensure for realloc
468 
469  // if we had non-empty string then append overwriting old '\0'
470  // otherwise replace
471  if (this_used > 0) {
472  memcpy(this_cstr + this_used - 1, str, len);
473  this_header->used_ += len - 1;
474  } else {
475  memcpy(this_cstr, str, len);
476  this_header->used_ = len;
477  }
478 
479  assert(InvariantOk());
480  return *this;
481 }
482 
483 
484 STRING& STRING::operator+=(const char ch) {
485  if (ch == '\0')
486  return *this;
487 
488  FixHeader();
489  int this_used = GetHeader()->used_;
490  char* this_cstr = ensure_cstr(this_used + 1);
491  STRING_HEADER* this_header = GetHeader();
492 
493  if (this_used > 0)
494  --this_used; // undo old empty null if there was one
495 
496  this_cstr[this_used++] = ch; // append ch to end
497  this_cstr[this_used++] = '\0'; // append '\0' after ch
498  this_header->used_ = this_used;
499 
500  assert(InvariantOk());
501  return *this;
502 }
~STRING()
Definition: strngs.cpp:142
bool DeSerialize(bool swap, FILE *fp)
Definition: strngs.cpp:163
const int kMinCapacity
Definition: strngs.cpp:52
int push_back(T object)
char * alloc_string(inT32 count)
Definition: memry.cpp:30
void split(const char c, GenericVector< STRING > *splited)
Definition: strngs.cpp:281
unsigned char BOOL8
Definition: host.h:113
inT32 length() const
Definition: strngs.cpp:188
#define ASSERT_HOST(x)
Definition: errcode.h:84
STRING & operator+=(const char *string)
Definition: strngs.cpp:459
void truncate_at(inT32 index)
Definition: strngs.cpp:264
bool Serialize(FILE *fp) const
Definition: strngs.cpp:148
void free_string(char *string)
Definition: memry.cpp:35
STRING operator+(const STRING &string) const
Definition: strngs.cpp:430
void add_str_int(const char *str, int number)
Definition: strngs.cpp:376
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:177
int FWrite(const void *buffer, int size, int count)
Definition: serialis.cpp:131
void assign(const char *cstr, int len)
Definition: strngs.cpp:417
BOOL8 operator==(const STRING &string) const
Definition: strngs.cpp:300
STRING()
Definition: strngs.cpp:105
Definition: strngs.h:44
void add_str_double(const char *str, double number)
Definition: strngs.cpp:386
#define NULL
Definition: host.h:144
STRING & operator=(const char *string)
Definition: strngs.cpp:396
BOOL8 operator!=(const STRING &string) const
Definition: strngs.cpp:312
const char * string() const
Definition: strngs.cpp:193
const int kMaxDoubleSize
Definition: strngs.cpp:36
char & operator[](inT32 index) const
Definition: strngs.cpp:273
const int kMaxIntSize
Definition: strngs.cpp:33
int FRead(void *buffer, int size, int count)
Definition: serialis.cpp:91
BOOL8 contains(const char c) const
Definition: strngs.cpp:184
int inT32
Definition: host.h:102
const char * c_str() const
Definition: strngs.cpp:204