Simulant  21.12-194
A portable game engine for Windows, OSX, Linux, Dreamcast, and PSP
unicode.h
1 /* * Copyright (c) 2011-2017 Luke Benstead https://simulant-engine.appspot.com
2  *
3  * This file is part of Simulant.
4  *
5  * Simulant is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Simulant is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Simulant. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #ifndef UNICODE_H
20 #define UNICODE_H
21 
22 #include <string>
23 #include <vector>
24 #include <sstream>
25 #include <stdexcept>
26 
27 typedef std::u16string ustring;
28 
29 class InvalidEncodingError : public std::runtime_error {
30 public:
31  InvalidEncodingError(const std::string& what):
32  std::runtime_error(what) {}
33 };
34 
35 class unicode {
36 public:
37  static const size_t npos = -1;
38  typedef ustring::size_type size_type;
39  typedef ustring::value_type value_type;
40 
41  unicode():
42  string_(u"") {}
43 
44  unicode(const unicode& rhs):
45  string_(rhs.string_){
46 
47  }
48 
49  unicode& operator=(const unicode& rhs);
50  unicode(const char* encoded_string, const std::string& encoding="ascii");
51 
52  unicode(int32_t n, char16_t c);
53  unicode(int32_t n, char c);
54 
55  unicode(const std::string& utf8_string, const std::string &encoding="ascii");
56  unicode(const char16_t *utf16_string);
57 
58  template<class InputIterator>
59  unicode(InputIterator begin, InputIterator end):
60  string_(begin, end) {
61 
62  }
63 
64  std::wstring::size_type length() const {
65  return string_.size();
66  }
67 
68  std::string encode() const;
69 
70  unicode capitalize() const;
71  unicode title() const;
72 
73  unicode lower() const;
74  unicode upper() const;
75  unicode strip() const;
76  unicode lstrip() const;
77  unicode rstrip() const;
78 
79  unicode strip(const unicode& things) const;
80  unicode lstrip(const unicode& things) const;
81  unicode rstrip(const unicode& things) const;
82  unicode swap_case() const;
83  unicode replace(const unicode& thing, const unicode& replacement) const;
84 
85  unicode lpad(int32_t indent);
86  unicode rpad(int32_t count);
87 
88  bool contains(const unicode& thing) const;
89  bool contains(const std::string& thing) const;
90  bool contains(const char* thing) const;
91  bool contains(const wchar_t ch) const;
92 
93  void push_back(const wchar_t c);
94 
95  unicode slice(int32_t beg, int32_t end) const;
96  unicode slice(int32_t beg, void* null) const;
97  unicode slice(void* null, int32_t end) const;
98 
99  bool empty() const { return string_.empty(); }
100  bool starts_with(const unicode& thing) const;
101  bool ends_with(const unicode& thing) const;
102 
103  std::vector<unicode> split(const unicode& on, int32_t count=-1, bool keep_empty=true) const;
104 
105  unicode join(const std::vector<unicode>& parts) const;
106  unicode join(const std::vector<std::string>& parts) const;
107 
108  bool operator==(const unicode& rhs) const {
109  return string_ == rhs.string_;
110  }
111 
112  bool operator!=(const unicode& rhs) const {
113  return !(*this == rhs);
114  }
115 
116  unicode& operator=(const std::string& rhs) {
117  //Automatically convert UTF-8 strings to unicode
118  *this = unicode(rhs);
119  return *this;
120  }
121 
122  unicode& operator=(const char* rhs) {
123  //Automatically convert UTF-8 strings to unicode
124  *this = unicode(rhs);
125  return *this;
126  }
127 
128  std::size_t find(const char16_t c) const {
129  return string_.find(c);
130  }
131 
132  std::size_t find(const unicode& what) const {
133  return string_.find(what.string_);
134  }
135 
136  std::size_t rfind(const unicode& what) const {
137  return string_.rfind(what.string_);
138  }
139 
140  char16_t& operator[](ustring::size_type pos) {
141  return string_[pos];
142  }
143 
144  const char16_t& operator[](ustring::size_type pos) const {
145  return string_[pos];
146  }
147 
148  unicode& operator+=(const unicode& rhs) {
149  string_.append(rhs.string_);
150  return *this;
151  }
152 
153  unicode operator+(const unicode& rhs) const {
154  unicode result;
155  result += *this;
156  result += rhs;
157  return result;
158  }
159 
160  unicode operator*(const uint32_t rhs) const {
161  unicode result;
162  for(uint32_t i = 0; i < rhs; ++i) {
163  result += *this;
164  }
165  return result;
166  }
167 
168  bool operator<(const unicode& rhs) const {
169  //FIXME: need to do a proper lexigraphical compare - probably
170  return encode() < rhs.encode();
171  }
172 
173  ustring::iterator begin() { return string_.begin(); }
174  ustring::iterator end() { return string_.end(); }
175  ustring::const_iterator begin() const { return string_.begin(); }
176  ustring::const_iterator end() const { return string_.end(); }
177 
178  ustring::reverse_iterator rbegin() { return string_.rbegin(); }
179  ustring::reverse_iterator rend() { return string_.rend(); }
180  ustring::const_reverse_iterator rbegin() const { return string_.rbegin(); }
181  ustring::const_reverse_iterator rend() const { return string_.rend(); }
182 
183  uint32_t count(const unicode& str) const;
184 
185  //Conversion functions
186  int32_t to_int() const;
187  float to_float() const;
188  bool to_boolean() const;
189  ustring to_ustring() const { return string_; }
190 private:
191  ustring string_;
192 };
193 
194 std::ostream& operator<< (std::ostream& os, const unicode& str);
195 bool operator==(const char* c_str, const unicode& uni_str);
196 bool operator!=(const char* c_str, const unicode& uni_str);
197 unicode operator+(const char* c_str, const unicode& uni_str);
198 
199 namespace std {
200  template<>
201  struct hash<unicode> {
202  size_t operator()(const unicode& str) const {
203  hash<ustring> make_hash;
204  return make_hash(str.to_ustring());
205  }
206  };
207 }
208 
209 typedef unicode _u;
210 
211 unicode humanize(int i);
212 
213 #endif // UNICODE_H
InvalidEncodingError
Definition: unicode.h:29
unicode
Definition: unicode.h:35
std
Extensions to the C++ standard library.
Definition: unique_id.h:200