liblcf
Loading...
Searching...
No Matches
encoder.cpp
Go to the documentation of this file.
1/*
2 * This file is part of liblcf. Copyright (c) liblcf authors.
3 * https://github.com/EasyRPG/liblcf - https://easyrpg.org
4 *
5 * liblcf is Free/Libre Open Source Software, released under the MIT License.
6 * For the full copyright and license information, please view the COPYING
7 * file that was distributed with this source code.
8 */
9
10#include "lcf/encoder.h"
11#include "lcf/reader_util.h"
12#include "lcf/scope_guard.h"
13#include "log.h"
14#include <cstdio>
15#include <cstdlib>
16
17#if LCF_SUPPORT_ICU == 1
18# include <unicode/ucsdet.h>
19# include <unicode/ucnv.h>
20#elif LCF_SUPPORT_ICU == 2
21# ifndef _WIN32
22# error "icu.h only supported on Windows"
23# endif
24# include <icu.h>
25#else
26# include <cstdint>
27#endif
28
29#ifdef _WIN32
30# include <windows.h>
31#else
32# include <locale>
33#endif
34
35namespace lcf {
36
37static std::string filterUtf8Compatible(std::string enc) {
38#if LCF_SUPPORT_ICU
39 if (ucnv_compareNames(enc.c_str(), "UTF-8") == 0) {
40 return "";
41 }
42#endif
43
44 if (enc == "utf-8" || enc == "UTF-8" || enc == "65001") {
45 return "";
46 }
47
48 return enc;
49}
50
51Encoder::Encoder(std::string encoding)
52 : _encoding(filterUtf8Compatible(std::move(encoding)))
53{
54 Init();
55}
56
57Encoder::~Encoder() {
58 Reset();
59}
60
61bool Encoder::IsOk() const {
62 return _encoding.empty() || (_conv_storage && _conv_runtime);
63}
64
65void Encoder::Encode(std::string& str) {
66 if (_encoding.empty() || str.empty()) {
67 return;
68 }
69 Convert(str, _conv_runtime, _conv_storage);
70}
71
72void Encoder::Decode(std::string& str) {
73 if (_encoding.empty() || str.empty()) {
74 return;
75 }
76 Convert(str, _conv_storage, _conv_runtime);
77}
78
79void Encoder::Init() {
80 if (_encoding.empty()) {
81 return;
82 }
83
84 auto code_page = atoi(_encoding.c_str());
85 const auto& storage_encoding = code_page > 0
86 ? ReaderUtil::CodepageToEncoding(code_page)
87 : _encoding;
88
89#if LCF_SUPPORT_ICU
90 auto status = U_ZERO_ERROR;
91 constexpr auto runtime_encoding = "UTF-8";
92 auto conv_runtime = ucnv_open(runtime_encoding, &status);
93
94 if (conv_runtime == nullptr) {
95 Log::Error("ucnv_open() error for encoding \"%s\": %s", runtime_encoding, u_errorName(status));
96 return;
97 }
98 status = U_ZERO_ERROR;
99 auto sg = makeScopeGuard([&]() { ucnv_close(conv_runtime); });
100
101 auto conv_storage = ucnv_open(storage_encoding.c_str(), &status);
102
103 if (conv_storage == nullptr) {
104 Log::Error("ucnv_open() error for dest encoding \"%s\": %s", storage_encoding.c_str(), u_errorName(status));
105 return;
106 }
107
108 sg.Dismiss();
109
110 _conv_runtime = conv_runtime;
111 _conv_storage = conv_storage;
112#else
113 if (storage_encoding != "windows-1252") {
114 return;
115 }
116
117 _conv_runtime = 65001;
118 _conv_storage = 1252;
119#endif
120}
121
122#if LCF_SUPPORT_ICU
123void Encoder::Reset() {
124 if (_conv_runtime) {
125 ucnv_close(_conv_runtime);
126 _conv_runtime = nullptr;
127 }
128
129 if (_conv_storage) {
130 ucnv_close(_conv_storage);
131 _conv_storage = nullptr;
132 }
133}
134
135void Encoder::Convert(std::string& str, UConverter* conv_dst, UConverter* conv_src) {
136 const auto& src = str;
137
138 auto status = U_ZERO_ERROR;
139 _buffer.resize(src.size() * 4);
140
141 const auto* src_p = src.c_str();
142 auto* dst_p = _buffer.data();
143
144 ucnv_convertEx(conv_dst, conv_src,
145 &dst_p, dst_p + _buffer.size(),
146 &src_p, src_p + src.size(),
147 nullptr, nullptr, nullptr, nullptr,
148 true, true,
149 &status);
150
151 if (U_FAILURE(status)) {
152 Log::Error("ucnv_convertEx() error when encoding \"%s\": %s", src.c_str(), u_errorName(status));
153 _buffer.clear();
154 }
155
156 str.assign(_buffer.data(), dst_p);
157}
158#else
159void Encoder::Convert(std::string& str, int conv_dst, int) {
160 if (str.empty()) {
161 return;
162 }
163
164 size_t buf_idx = 0;
165
166 if (conv_dst == 65001) {
167 // From 1252 to UTF-8
168 // Based on https://stackoverflow.com/q/4059775/
169 _buffer.resize(str.size() * 2 + 1);
170
171 for (unsigned char ch: str) {
172 if (ch < 0x80) {
173 _buffer[buf_idx] = static_cast<char>(ch);
174 } else {
175 _buffer[buf_idx] = static_cast<char>(0xC0 | (ch >> 6));
176 ++buf_idx;
177 _buffer[buf_idx] = static_cast<char>(0x80 | (ch & 0x3F));
178 }
179
180 ++buf_idx;
181 }
182 } else {
183 // From UTF-8 to 1252
184 // Based on https://stackoverflow.com/q/23689733/
185 _buffer.resize(str.size() + 1);
186 uint32_t codepoint;
187
188 for (size_t str_idx = 0; str_idx < str.size(); ++str_idx) {
189 unsigned char ch = str[str_idx];
190 if (ch <= 0x7F) {
191 codepoint = ch;
192 } else if (ch <= 0xBF) {
193 codepoint = (codepoint << 6) | (ch & 0x3F);
194 } else if (ch <= 0xDF) {
195 codepoint = ch & 0x1F;
196 } else if (ch <= 0xEF) {
197 codepoint = ch & 0x0F;
198 } else {
199 codepoint = ch & 0x07;
200 }
201 ++str_idx;
202 ch = str[str_idx];
203 if (((ch & 0xC0) != 0x80) && (codepoint <= 0x10ffff)) {
204 if (codepoint <= 255) {
205 _buffer[buf_idx] = static_cast<char>(codepoint);
206 } else {
207 _buffer[buf_idx] = '?';
208 }
209 }
210 ++buf_idx;
211 }
212 }
213
214 str.assign(_buffer.data(), buf_idx);
215}
216#endif
217
218} //namespace lcf
static std::string filterUtf8Compatible(std::string enc)
Definition encoder.cpp:37