Simulant  21.12-574
A portable game engine for Windows, OSX, Linux, Dreamcast, and PSP
unicode.h
1 /* * Copyright (c) 2011-2017 Luke Benstead https://simulant-engine.appspot.com
2  *
3  * This file is part of Simulant.
4  *
5  * Simulant is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Simulant is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Simulant. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #ifndef UNICODE_H
20 #define UNICODE_H
21 
22 #include <cstdint>
23 #include <string>
24 #include <vector>
25 #include <sstream>
26 #include <stdexcept>
27 
28 typedef std::u16string ustring;
29 
30 class InvalidEncodingError : public std::runtime_error {
31 public:
32  InvalidEncodingError(const std::string& what):
33  std::runtime_error(what) {}
34 };
35 
36 class unicode {
37 public:
38  static const size_t npos = -1;
39  typedef ustring::size_type size_type;
40  typedef ustring::value_type value_type;
41 
42  unicode():
43  string_(u"") {}
44 
45  unicode(const unicode& rhs):
46  string_(rhs.string_){
47 
48  }
49 
50  unicode& operator=(const unicode& rhs);
51  unicode(const char* encoded_string, const std::string& encoding="ascii");
52 
53  unicode(int32_t n, char16_t c);
54  unicode(int32_t n, char c);
55 
56  unicode(const std::string& utf8_string, const std::string &encoding="ascii");
57  unicode(const char16_t *utf16_string);
58 
59  template<class InputIterator>
60  unicode(InputIterator begin, InputIterator end):
61  string_(begin, end) {
62 
63  }
64 
65  std::wstring::size_type length() const {
66  return string_.size();
67  }
68 
69  std::string encode() const;
70 
71  unicode capitalize() const;
72  unicode title() const;
73 
74  unicode lower() const;
75  unicode upper() const;
76  unicode strip() const;
77  unicode lstrip() const;
78  unicode rstrip() const;
79 
80  unicode substr(std::size_t pos = 0, std::size_t len = npos) const;
81 
82  unicode strip(const unicode& things) const;
83  unicode lstrip(const unicode& things) const;
84  unicode rstrip(const unicode& things) const;
85  unicode swap_case() const;
86  unicode replace(const unicode& thing, const unicode& replacement) const;
87 
88  unicode lpad(int32_t indent);
89  unicode rpad(int32_t count);
90 
91  bool contains(const unicode& thing) const;
92  bool contains(const std::string& thing) const;
93  bool contains(const char* thing) const;
94  bool contains(const wchar_t ch) const;
95 
96  void push_back(const wchar_t c);
97  void pop_back();
98 
99  unicode slice(int32_t beg, int32_t end) const;
100  unicode slice(int32_t beg, void* null) const;
101  unicode slice(void* null, int32_t end) const;
102 
103  bool empty() const { return string_.empty(); }
104  bool starts_with(const unicode& thing) const;
105  bool ends_with(const unicode& thing) const;
106 
107  std::vector<unicode> split(const unicode& on, int32_t count=-1, bool keep_empty=true) const;
108 
109  unicode join(const std::vector<unicode>& parts) const;
110  unicode join(const std::vector<std::string>& parts) const;
111 
112  bool operator==(const unicode& rhs) const {
113  return string_ == rhs.string_;
114  }
115 
116  bool operator!=(const unicode& rhs) const {
117  return !(*this == rhs);
118  }
119 
120  unicode& operator=(const std::string& rhs) {
121  //Automatically convert UTF-8 strings to unicode
122  *this = unicode(rhs);
123  return *this;
124  }
125 
126  unicode& operator=(const char* rhs) {
127  //Automatically convert UTF-8 strings to unicode
128  *this = unicode(rhs);
129  return *this;
130  }
131 
132  std::size_t find(const char16_t c) const {
133  return string_.find(c);
134  }
135 
136  std::size_t find(const unicode& what) const {
137  return string_.find(what.string_);
138  }
139 
140  std::size_t rfind(const unicode& what) const {
141  return string_.rfind(what.string_);
142  }
143 
144  char16_t& operator[](ustring::size_type pos) {
145  return string_[pos];
146  }
147 
148  const char16_t& operator[](ustring::size_type pos) const {
149  return string_[pos];
150  }
151 
152  unicode& operator+=(const unicode& rhs) {
153  string_.append(rhs.string_);
154  return *this;
155  }
156 
157  unicode operator+(const unicode& rhs) const {
158  unicode result;
159  result += *this;
160  result += rhs;
161  return result;
162  }
163 
164  unicode operator*(const uint32_t rhs) const {
165  unicode result;
166  for(uint32_t i = 0; i < rhs; ++i) {
167  result += *this;
168  }
169  return result;
170  }
171 
172  bool operator<(const unicode& rhs) const {
173  //FIXME: need to do a proper lexigraphical compare - probably
174  return encode() < rhs.encode();
175  }
176 
177  ustring::iterator begin() { return string_.begin(); }
178  ustring::iterator end() { return string_.end(); }
179  ustring::const_iterator begin() const { return string_.begin(); }
180  ustring::const_iterator end() const { return string_.end(); }
181 
182  ustring::reverse_iterator rbegin() { return string_.rbegin(); }
183  ustring::reverse_iterator rend() { return string_.rend(); }
184  ustring::const_reverse_iterator rbegin() const { return string_.rbegin(); }
185  ustring::const_reverse_iterator rend() const { return string_.rend(); }
186 
187  uint32_t count(const unicode& str) const;
188 
189  //Conversion functions
190  int32_t to_int() const;
191  float to_float() const;
192  bool to_boolean() const;
193  ustring to_ustring() const { return string_; }
194 private:
195  ustring string_;
196 };
197 
198 std::ostream& operator<< (std::ostream& os, const unicode& str);
199 bool operator==(const char* c_str, const unicode& uni_str);
200 bool operator!=(const char* c_str, const unicode& uni_str);
201 unicode operator+(const char* c_str, const unicode& uni_str);
202 
203 namespace std {
204  template<>
205  struct hash<unicode> {
206  size_t operator()(const unicode& str) const {
207  hash<ustring> make_hash;
208  return make_hash(str.to_ustring());
209  }
210  };
211 }
212 
213 typedef unicode _u;
214 
215 unicode humanize(int i);
216 
217 #endif // UNICODE_H
InvalidEncodingError
Definition: unicode.h:30
unicode
Definition: unicode.h:36
std
Extensions to the C++ standard library.
Definition: unique_id.h:200