33 static void SkipUtf8Bom(FILE* fp);
41 if ((ch & 0x80) == 0x00) {
43 }
else if ((ch & 0xE0) == 0xC0) {
45 }
else if ((ch & 0xF0) == 0xE0) {
47 }
else if ((ch & 0xF8) == 0xF0) {
49 }
else if ((ch & 0xFC) == 0xF8) {
51 }
else if ((ch & 0xFE) == 0xFC) {
61 size_t length = NextCharLengthNoException(str);
72 for (
size_t i = 1; i <= 6; i++) {
74 size_t length = NextCharLengthNoException(str);
86 return str + NextCharLength(str);
93 return str - PrevCharLength(str);
103 while (!IsLineEndingOrFileEnding(*str) && *str != ch) {
113 return ch ==
'\0' || ch ==
'\n' || ch ==
'\r';
121 newStr.resize(length);
122 strncpy(const_cast<char*>(newStr.c_str()), str, length);
146 if (NotShorterThan(str, maxLength)) {
148 const char* pStr = str;
149 while (len < maxLength) {
150 size_t nextLen = NextCharLength(pStr);
154 wordTrunc = FromSubstr(str, len);
164 static void ReplaceAll(
string& str,
const char* from,
const char* to) {
165 string::size_type pos = 0;
166 string::size_type fromLen = strlen(from);
167 string::size_type toLen = strlen(to);
168 while ((pos = str.find(from, pos)) != string::npos) {
169 str.replace(pos, fromLen, to);
177 static string Join(
const vector<string>& strings,
const string& separator) {
178 std::ostringstream buffer;
180 for (
const auto& str : strings) {
193 static string Join(
const vector<string>& strings) {
194 std::ostringstream buffer;
195 for (
const auto& str : strings) {
static size_t PrevCharLength(const char *str)
Returns the length in byte for the previous UTF8 character.
Definition: UTF8Util.hpp:71
static string Join(const vector< string > &strings)
Joins a string vector in to a string.
Definition: UTF8Util.hpp:193
Definition: Exception.hpp:85
static string Join(const vector< string > &strings, const string &separator)
Joins a string vector in to a string with a separator.
Definition: UTF8Util.hpp:177
static bool NotShorterThan(const char *str, size_t length)
Returns true if the given string is longer or as long as the given length.
Definition: UTF8Util.hpp:129
static void ReplaceAll(string &str, const char *from, const char *to)
Replaces all patterns in a string in place.
Definition: UTF8Util.hpp:164
static string FromSubstr(const char *str, size_t length)
Copies a substring with given length to a new std::string.
Definition: UTF8Util.hpp:119
Definition: BinaryDict.hpp:24
static const char * PrevChar(const char *str)
Move the char* pointer before the previous UTF8 character.
Definition: UTF8Util.hpp:92
static string TruncateUTF8(const char *str, size_t maxLength)
Truncates a string with a maximal length.
Definition: UTF8Util.hpp:144
static const char * NextChar(const char *str)
Returns the char* pointer over the next UTF8 character.
Definition: UTF8Util.hpp:85
UTF8 string utilities.
Definition: UTF8Util.hpp:28
static const char * FindNextInline(const char *str, const char ch)
Finds a character in the same line.
Definition: UTF8Util.hpp:102
static size_t NextCharLength(const char *str)
Returns the length in byte for the next UTF8 character.
Definition: UTF8Util.hpp:60
static bool IsLineEndingOrFileEnding(const char ch)
Returns ture if the character is a line ending or end of file.
Definition: UTF8Util.hpp:112
static size_t NextCharLengthNoException(const char *str)
Returns the length in byte for the next UTF8 character.
Definition: UTF8Util.hpp:39