#include "unicode.h"
#include <codecvt>
#include <cstdint>
#include <cstring>
#include <iomanip>
#include <locale>
#include <sstream>
#include "invariant.h"
Go to the source code of this file.
|
std::string | narrow (const wchar_t *s) |
|
std::wstring | widen (const char *s) |
|
std::string | narrow (const std::wstring &s) |
|
std::wstring | widen (const std::string &s) |
|
static void | utf8_append_code (unsigned int c, std::string &result) |
| Appends a unicode character to a utf8-encoded string. More...
|
|
std::string | utf32_native_endian_to_utf8 (const std::basic_string< unsigned int > &s) |
|
std::vector< std::string > | narrow_argv (int argc, const wchar_t **argv_wide) |
|
static void | utf16_append_code (unsigned int code, std::wstring &result) |
|
std::wstring | utf8_to_utf16_native_endian (const std::string &in) |
| Convert UTF8-encoded string to UTF-16 with architecture-native endianness. More...
|
|
static void | utf16_native_endian_to_java_string (const wchar_t ch, std::ostringstream &result, const std::locale &loc) |
| Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes. More...
|
|
static void | utf16_native_endian_to_java (const wchar_t ch, std::ostringstream &result, const std::locale &loc) |
| Escapes non-printable characters, whitespace except for spaces, double- and single-quotes and backslashes. More...
|
|
std::string | utf16_native_endian_to_java (const char16_t ch) |
|
std::string | utf16_native_endian_to_java_string (const std::wstring &in) |
| Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes. More...
|
|
std::string | utf16_native_endian_to_utf8 (const char16_t utf16_char) |
|
std::string | utf16_native_endian_to_utf8 (const std::u16string &utf16_str) |
|
char16_t | codepoint_hex_to_utf16_native_endian (const std::string &hex) |
|
std::string | codepoint_hex_to_utf8 (const std::string &hex) |
|
◆ codepoint_hex_to_utf16_native_endian()
char16_t codepoint_hex_to_utf16_native_endian |
( |
const std::string & |
hex | ) |
|
- Parameters
-
hex | representation of a BMP codepoint as a four-digit string (e.g. "0041" for \u0041) |
- Returns
- encoding of the codepoint as a single UTF-16 character in architecture-native endianness encoding
Definition at line 374 of file unicode.cpp.
◆ codepoint_hex_to_utf8()
std::string codepoint_hex_to_utf8 |
( |
const std::string & |
hex | ) |
|
- Parameters
-
hex | representation of a BMP codepoint as a four-digit string (e.g. "0041" for \u0041) |
- Returns
- UTF-8 encoding of the codepoint
Definition at line 380 of file unicode.cpp.
◆ narrow() [1/2]
std::string narrow |
( |
const std::wstring & |
s | ) |
|
◆ narrow() [2/2]
std::string narrow |
( |
const wchar_t * |
s | ) |
|
◆ narrow_argv()
std::vector<std::string> narrow_argv |
( |
int |
argc, |
|
|
const wchar_t ** |
argv_wide |
|
) |
| |
◆ utf16_append_code()
static void utf16_append_code |
( |
unsigned int |
code, |
|
|
std::wstring & |
result |
|
) |
| |
|
static |
◆ utf16_native_endian_to_java() [1/2]
std::string utf16_native_endian_to_java |
( |
const char16_t |
ch | ) |
|
- Parameters
-
ch | UTF-16 character in architecture-native endianness encoding |
- Returns
- String in US-ASCII format, with \uxxxx escapes for other characters
Definition at line 331 of file unicode.cpp.
◆ utf16_native_endian_to_java() [2/2]
static void utf16_native_endian_to_java |
( |
const wchar_t |
ch, |
|
|
std::ostringstream & |
result, |
|
|
const std::locale & |
loc |
|
) |
| |
|
static |
Escapes non-printable characters, whitespace except for spaces, double- and single-quotes and backslashes.
This should yield a valid Java identifier.
- Parameters
-
ch | UTF-16 character in architecture-native endianness encoding |
result | stream to receive string in US-ASCII format, with \uxxxx escapes for other characters |
loc | locale to check for printable characters |
Definition at line 312 of file unicode.cpp.
◆ utf16_native_endian_to_java_string() [1/2]
std::string utf16_native_endian_to_java_string |
( |
const std::wstring & |
in | ) |
|
Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes.
This should yield a valid Java string literal. Note that this specifically does not escape single quotes, as these are not required to be escaped for Java string literals.
- Parameters
-
in | String in UTF-16 (native endianness) format |
- Returns
- Valid Java string literal in US-ASCII format, with \uxxxx escapes for other characters
Definition at line 346 of file unicode.cpp.
◆ utf16_native_endian_to_java_string() [2/2]
static void utf16_native_endian_to_java_string |
( |
const wchar_t |
ch, |
|
|
std::ostringstream & |
result, |
|
|
const std::locale & |
loc |
|
) |
| |
|
static |
Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes.
This should yield a valid Java string literal. Note that this specifically does not escape single quotes, as these are not required to be escaped for Java string literals.
- Parameters
-
ch | UTF-16 character in architecture-native endianness encoding |
result | stream to receive string in US-ASCII format, with \uxxxx escapes for other characters |
loc | locale to check for printable characters |
Definition at line 268 of file unicode.cpp.
◆ utf16_native_endian_to_utf8() [1/2]
std::string utf16_native_endian_to_utf8 |
( |
char16_t |
utf16_char | ) |
|
- Parameters
-
utf16_char | UTF-16 character in architecture-native endianness encoding |
- Returns
- UTF-8 encoding of the same codepoint
Definition at line 355 of file unicode.cpp.
◆ utf16_native_endian_to_utf8() [2/2]
std::string utf16_native_endian_to_utf8 |
( |
const std::u16string & |
utf16_str | ) |
|
- Parameters
-
utf16_str | UTF-16 string in architecture-native endianness encoding |
- Returns
- UTF-8 encoding of the string
Definition at line 360 of file unicode.cpp.
◆ utf32_native_endian_to_utf8()
std::string utf32_native_endian_to_utf8 |
( |
const std::basic_string< unsigned int > & |
s | ) |
|
- Parameters
-
s | UTF-32 encoded wide string |
- Returns
- utf8-encoded string with the same unicode characters as the input.
Definition at line 145 of file unicode.cpp.
◆ utf8_append_code()
static void utf8_append_code |
( |
unsigned int |
c, |
|
|
std::string & |
result |
|
) |
| |
|
static |
Appends a unicode character to a utf8-encoded string.
- parameters: character to append, string to append to
Definition at line 118 of file unicode.cpp.
◆ utf8_to_utf16_native_endian()
std::wstring utf8_to_utf16_native_endian |
( |
const std::string & |
in | ) |
|
Convert UTF8-encoded string to UTF-16 with architecture-native endianness.
- parameters: String in UTF-8 format
- Returns
- String in UTF-16 format. The encoding follows the endianness of the architecture iff swap_bytes is true.
Definition at line 201 of file unicode.cpp.
◆ widen() [1/2]
std::wstring widen |
( |
const char * |
s | ) |
|
◆ widen() [2/2]
std::wstring widen |
( |
const std::string & |
s | ) |
|