13 #ifdef LCF_SUPPORT_ICU
14 # include <unicode/ucsdet.h>
15 # include <unicode/ucnv.h>
16 # include <unicode/normalizer2.h>
17 # include <unicode/unistr.h>
20 # error MSVC builds require ICU
27 # ifndef LCF_SUPPORT_ICU
33 #if defined(__MORPHOS__) || defined(__amigaos4__)
34 #define ICONV_CONST const
54 if (codepage == 932) {
55 #ifdef LCF_SUPPORT_ICU
56 return "ibm-943_P15A-2003";
61 if (codepage == 949) {
62 #ifdef LCF_SUPPORT_ICU
63 return "windows-949-2000";
68 std::ostringstream out;
69 #ifdef LCF_SUPPORT_ICU
70 out <<
"windows-" << codepage;
72 out <<
"CP" << codepage;
76 std::string outs = out.str();
83 if (encodings.empty()) {
87 return encodings.front();
93 if (encodings.empty()) {
97 return encodings.front();
101 #ifdef LCF_SUPPORT_ICU
102 std::ostringstream text;
154 return std::vector<std::string>();
159 std::vector<std::string> encodings;
160 #ifdef LCF_SUPPORT_ICU
162 UErrorCode status = U_ZERO_ERROR;
163 UCharsetDetector* detector = ucsdet_open(&status);
165 std::string s =
data;
166 ucsdet_setText(detector, s.c_str(), s.length(), &status);
168 int32_t matches_count;
169 const UCharsetMatch** matches = ucsdet_detectAll(detector, &matches_count, &status);
171 if (matches != NULL) {
173 for (
int i = 0; i < matches_count; ++i) {
174 std::string encoding = ucsdet_getName(matches[i], &status);
177 if (encoding ==
"Shift_JIS") {
178 encodings.push_back(
"ibm-943_P15A-2003");
179 }
else if (encoding ==
"EUC-KR") {
180 encodings.push_back(
"windows-949-2000");
181 }
else if (encoding ==
"GB18030") {
182 encodings.push_back(
"windows-936-2000");
183 }
else if (encoding ==
"ISO-8859-1" || encoding ==
"windows-1252") {
184 encodings.push_back(
"ibm-5348_P100-1997");
185 }
else if (encoding ==
"ISO-8859-2" || encoding ==
"windows-1250") {
186 encodings.push_back(
"ibm-5346_P100-1998");
187 }
else if (encoding ==
"ISO-8859-5" || encoding ==
"windows-1251") {
188 encodings.push_back(
"ibm-5347_P100-1998");
189 }
else if (encoding ==
"ISO-8859-6" || encoding ==
"windows-1256") {
190 encodings.push_back(
"ibm-9448_X100-2005");
191 }
else if (encoding ==
"ISO-8859-7" || encoding ==
"windows-1253") {
192 encodings.push_back(
"ibm-5349_P100-1998");
193 }
else if (encoding ==
"ISO-8859-8" || encoding ==
"windows-1255") {
194 encodings.push_back(
"ibm-9447_P100-2002");
196 encodings.push_back(encoding);
200 ucsdet_close(detector);
210 std::string encoding = ini.
Get(
"EasyRPG",
"Encoding", std::string());
211 if (!encoding.empty()) {
215 return std::string();
221 std::string encoding = ini.
Get(
"EasyRPG",
"Encoding", std::string());
222 if (!encoding.empty()) {
226 return std::string();
231 int codepage = GetACP();
239 std::locale loc = std::locale(
"");
241 std::string loc_full = loc.name().substr(0, loc.name().find_first_of(
"@."));
243 std::string loc_lang = loc.name().substr(0, loc.name().find_first_of(
"_"));
245 if (loc_lang ==
"th") codepage = 874;
246 else if (loc_lang ==
"ja") codepage = 932;
247 else if (loc_full ==
"zh_CN" ||
248 loc_full ==
"zh_SG") codepage = 936;
249 else if (loc_lang ==
"ko") codepage = 949;
250 else if (loc_full ==
"zh_TW" ||
251 loc_full ==
"zh_HK") codepage = 950;
252 else if (loc_lang ==
"cs" ||
258 loc_lang ==
"sl") codepage = 1250;
259 else if (loc_lang ==
"ru") codepage = 1251;
260 else if (loc_lang ==
"ca" ||
272 loc_lang ==
"eu") codepage = 1252;
273 else if (loc_lang ==
"el") codepage = 1253;
274 else if (loc_lang ==
"tr") codepage = 1254;
275 else if (loc_lang ==
"he") codepage = 1255;
276 else if (loc_lang ==
"ar") codepage = 1256;
277 else if (loc_lang ==
"et" ||
279 loc_lang ==
"lv") codepage = 1257;
280 else if (loc_lang ==
"vi") codepage = 1258;
286 std::string
ReaderUtil::Recode(
const std::string& str_to_encode,
const std::string& source_encoding) {
291 const std::string& src_enc,
292 const std::string& dst_enc) {
294 if (src_enc.empty() || dst_enc.empty() || str_to_encode.empty()) {
295 return str_to_encode;
298 auto src_cp = atoi(src_enc.c_str());
299 const auto& src_enc_str = src_cp > 0
303 auto dst_cp = atoi(dst_enc.c_str());
304 const auto& dst_enc_str = dst_cp > 0
308 #ifdef LCF_SUPPORT_ICU
309 auto status = U_ZERO_ERROR;
310 auto conv_from = ucnv_open(src_enc_str.c_str(), &status);
312 if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) {
313 fprintf(stderr,
"liblcf: ucnv_open() error for source encoding \"%s\": %s\n", src_enc_str.c_str(), u_errorName(status));
314 return std::string();
316 status = U_ZERO_ERROR;
317 auto conv_from_sg =
makeScopeGuard([&]() { ucnv_close(conv_from); });
319 auto conv_to = ucnv_open(dst_enc_str.c_str(), &status);
321 if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) {
322 fprintf(stderr,
"liblcf: ucnv_open() error for dest encoding \"%s\": %s\n", dst_enc_str.c_str(), u_errorName(status));
323 return std::string();
326 status = U_ZERO_ERROR;
328 std::string result(str_to_encode.size() * 4,
'\0');
329 auto* src = &str_to_encode.front();
330 auto* dst = &result.front();
332 ucnv_convertEx(conv_to, conv_from,
333 &dst, dst + result.size(),
334 &src, src + str_to_encode.size(),
335 nullptr,
nullptr,
nullptr,
nullptr,
339 if (U_FAILURE(status)) {
340 fprintf(stderr,
"liblcf: ucnv_convertEx() error when encoding \"%s\": %s\n", str_to_encode.c_str(), u_errorName(status));
341 return std::string();
344 result.resize(dst - result.c_str());
345 result.shrink_to_fit();
349 iconv_t cd = iconv_open(dst_enc_str.c_str(), src_enc_str.c_str());
350 if (cd == (iconv_t)-1)
351 return str_to_encode;
352 char *src =
const_cast<char *
>(str_to_encode.c_str());
353 size_t src_left = str_to_encode.size();
354 size_t dst_size = str_to_encode.size() * 5 + 10;
355 char *dst =
new char[dst_size];
356 size_t dst_left = dst_size;
358 char ICONV_CONST *p = src;
363 size_t status = iconv(cd, &p, &src_left, &q, &dst_left);
365 if (status == (
size_t) -1 || src_left > 0) {
367 return std::string();
370 std::string result(dst);
377 #ifdef LCF_SUPPORT_ICU
378 icu::UnicodeString uni = icu::UnicodeString(str.c_str(),
"utf-8").toLower();
379 UErrorCode err = U_ZERO_ERROR;
381 const icu::Normalizer2* norm = icu::Normalizer2::getNFKCInstance(err);
382 if (U_FAILURE(err)) {
383 static bool err_reported =
false;
385 fprintf(stderr,
"Normalizer2::getNFKCInstance failed (%s). \"nrm\" is probably missing in the ICU data file. Unicode normalization will not work!\n", u_errorName(err));
388 uni.toUTF8String(res);
391 icu::UnicodeString f = norm->normalize(uni, err);
392 if (U_FAILURE(err)) {
393 uni.toUTF8String(res);
399 std::string result = str;
400 std::transform(result.begin(), result.end(), result.begin(), tolower);
std::string Get(const std::string §ion, const std::string &name, const std::string &default_value) const
std::string battletest_background
std::string gameover_name
std::string health_points
std::string spirit_points
std::string save_game_message
std::string exit_game_message
std::string normal_status
std::string load_game_message
bool Load(const std::string &filename, const std::string &encoding)
std::string DetectEncoding(std::istream &filestream)
std::string CodepageToEncoding(int codepage)
std::vector< std::string > DetectEncodings(std::istream &filestream)
std::string GetLocaleEncoding()
std::string Normalize(const std::string &str)
std::string GetEncoding(const std::string &ini_file)
std::string Recode(const std::string &str_to_encode, const std::string &source_encoding)
ScopeGuard< F > makeScopeGuard(F &&f)