From 20ee760990a826b04d0096468a1d66d45ce250be Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 27 Oct 2016 10:32:29 +0100 Subject: [PATCH 1/2] conversion from utf8 to utf16 --- src/util/unicode.cpp | 8 ++++++++ src/util/unicode.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/src/util/unicode.cpp b/src/util/unicode.cpp index 59aea736547..ee675cb8834 100644 --- a/src/util/unicode.cpp +++ b/src/util/unicode.cpp @@ -7,6 +7,8 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ #include +#include +#include #include "unicode.h" @@ -253,3 +255,9 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide) return argv_narrow; } + +std::wstring utf8_to_utf16(const std::string& in) +{ + std::wstring_convert > converter; + return converter.from_bytes(in); +} diff --git a/src/util/unicode.h b/src/util/unicode.h index 44038a26c04..05bc84a463d 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -22,6 +22,8 @@ std::wstring widen(const std::string &s); std::string utf32_to_utf8(const std::basic_string &s); std::string utf16_to_utf8(const std::basic_string &s); +std::wstring utf8_to_utf16(const std::string&); + const char **narrow_argv(int argc, const wchar_t **argv_wide); #endif From 2fe891d09878d53d662f763036fe46cc938eb20d Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Thu, 27 Oct 2016 14:04:41 +0100 Subject: [PATCH 2/2] Using new version of the functions to convert utf8 to utf16 that make explicit the use of little and big endian --- src/util/unicode.cpp | 27 ++++++++++++++++++++++++++- src/util/unicode.h | 4 +++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/util/unicode.cpp b/src/util/unicode.cpp index ee675cb8834..cb22f3a5d65 100644 --- a/src/util/unicode.cpp +++ b/src/util/unicode.cpp @@ -256,8 +256,33 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide) return argv_narrow; } -std::wstring utf8_to_utf16(const std::string& in) +std::wstring utf8_to_utf16be(const std::string& in) { std::wstring_convert > converter; return converter.from_bytes(in); } + +std::wstring utf8_to_utf16le(const std::string& in) +{ + std::wstring_convert > converter; + return converter.from_bytes(in); +} + +std::string utf16le_to_ascii(const std::wstring& in) +{ + std::string result; + std::locale loc; + for(const auto c : in) + { + if(c <= 255 && isprint(c,loc)) + result+=(unsigned char)c; + else + { + result+="\\u"; + char hex[5]; + sprintf(hex,"%04x",(wchar_t)c); + result+=hex; + } + } + return result; +} diff --git a/src/util/unicode.h b/src/util/unicode.h index 05bc84a463d..e22bb10574c 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -21,8 +21,10 @@ std::wstring widen(const std::string &s); std::string utf32_to_utf8(const std::basic_string &s); std::string utf16_to_utf8(const std::basic_string &s); +std::string utf16le_to_ascii(const std::wstring&); -std::wstring utf8_to_utf16(const std::string&); +std::wstring utf8_to_utf16be(const std::string&); +std::wstring utf8_to_utf16le(const std::string&); const char **narrow_argv(int argc, const wchar_t **argv_wide);