tesseract  5.0.0-alpha-619-ge9db
strngs.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: strngs.h (Formerly strings.h)
3  * Description: STRING class definition.
4  * Author: Ray Smith
5  *
6  * (C) Copyright 1991, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #ifndef STRNGS_H
20 #define STRNGS_H
21 
22 #include <cassert> // for assert
23 #include <cstdint> // for uint32_t
24 #include <cstdio> // for FILE
25 #include <cstring> // for strncpy
26 
27 #include "platform.h" // for TESS_API
28 
29 namespace tesseract {
30 class TFile;
31 } // namespace tesseract.
32 
33 // STRING_IS_PROTECTED means that string[index] = X is invalid
34 // because you have to go through strings interface to modify it.
35 // This allows the string to ensure internal integrity and maintain
36 // its own string length. Unfortunately this is not possible because
37 // STRINGS are used as direct-manipulation data buffers for things
38 // like length arrays and many places cast away the const on c_str()
39 // to mutate the string. Turning this off means that internally we
40 // cannot assume we know the strlen.
41 #define STRING_IS_PROTECTED 0
42 
43 template <typename T>
44 class GenericVector;
45 
46 class TESS_API STRING {
47  public:
48  STRING();
49  STRING(const STRING& string);
50  STRING(const char* string);
51  STRING(const char* data, int length);
52  ~STRING();
53 
54  // Writes to the given file. Returns false in case of error.
55  bool Serialize(FILE* fp) const;
56  // Reads from the given file. Returns false in case of error.
57  // If swap is true, assumes a big/little-endian swap is needed.
58  bool DeSerialize(bool swap, FILE* fp);
59  // Writes to the given file. Returns false in case of error.
60  bool Serialize(tesseract::TFile* fp) const;
61  // Reads from the given file. Returns false in case of error.
62  // If swap is true, assumes a big/little-endian swap is needed.
63  bool DeSerialize(tesseract::TFile* fp);
64  // As DeSerialize, but only seeks past the data - hence a static method.
65  static bool SkipDeSerialize(tesseract::TFile* fp);
66 
67  bool contains(char c) const;
68  int32_t length() const;
69  int32_t size() const {
70  return length();
71  }
72  // Workaround to avoid g++ -Wsign-compare warnings.
73  uint32_t unsigned_size() const {
74  const int32_t len = length();
75  assert(0 <= len);
76  return static_cast<uint32_t>(len);
77  }
78  const char* c_str() const;
79 
80  inline char* strdup() const {
81  int32_t len = length() + 1;
82  return strncpy(new char[len], GetCStr(), len);
83  }
84 
85 #if STRING_IS_PROTECTED
86  const char& operator[](int32_t index) const;
87  // len is number of chars in s to insert starting at index in this string
88  void insert_range(int32_t index, const char* s, int len);
89  void erase_range(int32_t index, int len);
90 #else
91  char& operator[](int32_t index) const;
92 #endif
93  void split(char c, GenericVector<STRING>* splited);
94  void truncate_at(int32_t index);
95 
96  bool operator==(const STRING& string) const;
97  bool operator!=(const STRING& string) const;
98  bool operator!=(const char* string) const;
99 
100  STRING& operator=(const char* string);
101  STRING& operator=(const STRING& string);
102 
103  STRING operator+(const STRING& string) const;
104  STRING operator+(char ch) const;
105 
106  STRING& operator+=(const char* string);
107  STRING& operator+=(const STRING& string);
108  STRING& operator+=(char ch);
109 
110  // Assignment for strings which are not null-terminated.
111  void assign(const char* cstr, int len);
112 
113  // Appends the given string and int (as a %d) to this.
114  // += cannot be used for ints as there as a char += operator that would
115  // be ambiguous, and ints usually need a string before or between them
116  // anyway.
117  void add_str_int(const char* str, int number);
118  // Appends the given string and double (as a %.8g) to this.
119  void add_str_double(const char* str, double number);
120 
121  // ensure capacity but keep pointer encapsulated
122  inline void ensure(int32_t min_capacity) {
123  ensure_cstr(min_capacity);
124  }
125 
126  private:
127  typedef struct STRING_HEADER {
128  // How much space was allocated in the string buffer for char data.
129  int capacity_;
130 
131  // used_ is how much of the capacity is currently being used,
132  // including a '\0' terminator.
133  //
134  // If used_ is 0 then string is nullptr (not even the '\0')
135  // else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
136  // else strlen is >= 0 (not nullptr) but needs to be computed.
137  // this condition is set when encapsulation is violated because
138  // an API returned a mutable string.
139  //
140  // capacity_ - used_ = excess capacity that the string can grow
141  // without reallocating
142  mutable int used_;
143  } STRING_HEADER;
144 
145  // To preserve the behavior of the old serialization, we only have space
146  // for one pointer in this structure. So we are embedding a data structure
147  // at the start of the storage that will hold additional state variables,
148  // then storing the actual string contents immediately after.
149  STRING_HEADER* data_;
150 
151  // returns the header part of the storage
152  inline STRING_HEADER* GetHeader() {
153  return data_;
154  }
155  inline const STRING_HEADER* GetHeader() const {
156  return data_;
157  }
158 
159  // returns the string data part of storage
160  inline char* GetCStr() {
161  return (reinterpret_cast<char*>(data_)) + sizeof(STRING_HEADER);
162  }
163 
164  inline const char* GetCStr() const {
165  return (reinterpret_cast<const char*>(data_)) + sizeof(STRING_HEADER);
166  }
167  inline bool InvariantOk() const {
168 #if STRING_IS_PROTECTED
169  return (GetHeader()->used_ == 0)
170  ? (c_str() == nullptr)
171  : (GetHeader()->used_ == (strlen(c_str()) + 1));
172 #else
173  return true;
174 #endif
175  }
176 
177  // Ensure string has requested capacity as optimization
178  // to avoid unnecessary reallocations.
179  // The return value is a cstr buffer with at least requested capacity
180  char* ensure_cstr(int32_t min_capacity);
181 
182  void FixHeader() const; // make used_ non-negative, even if const
183 
184  char* AllocData(int used, int capacity);
185  void DiscardData();
186 };
187 
188 #endif
operator+=
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: points.h:376
platform.h
STRING
Definition: strngs.h:45
tesseract::TFile
Definition: serialis.h:75
tesseract
Definition: baseapi.h:65
GenericVector
Definition: baseapi.h:40
operator+
ICOORD operator+(const ICOORD &op1, const ICOORD &op2)
Definition: points.h:359
TESS_API
#define TESS_API
Definition: platform.h:54
operator==
bool operator==(const UnicodeText &lhs, const UnicodeText &rhs)
Definition: unicodetext.cc:375
operator!=
bool operator!=(const UnicodeText &lhs, const UnicodeText &rhs)
Definition: unicodetext.h:380
tesseract::DeSerialize
bool DeSerialize(FILE *fp, char *data, size_t n=1)
Definition: serialis.cpp:41
tesseract::Serialize
bool Serialize(FILE *fp, const char *data, size_t n=1)
Definition: serialis.cpp:73