diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 968fa3236e9..07ed54fe927 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -375,7 +375,7 @@ exprt get_numeric_value_from_character( unsigned long radix=36ul); size_t max_printed_string_length(const typet &type, unsigned long radix); unsigned long to_integer_or_default(const exprt &expr, unsigned long def); -std::string utf16_constant_array_to_ascii( +std::string utf16_constant_array_to_java( const array_exprt &arr, unsigned length); #endif diff --git a/src/solvers/refinement/string_constraint_generator_format.cpp b/src/solvers/refinement/string_constraint_generator_format.cpp index 6320a23ad7a..7ad34d7d2ec 100644 --- a/src/solvers/refinement/string_constraint_generator_format.cpp +++ b/src/solvers/refinement/string_constraint_generator_format.cpp @@ -386,7 +386,7 @@ string_exprt string_constraint_generatort::add_axioms_for_format( /// \param length: an unsigned value representing the length of the array /// \return String of length `length` represented by the array assuming each /// field in `arr` represents a character. -std::string utf16_constant_array_to_ascii( +std::string utf16_constant_array_to_java( const array_exprt &arr, unsigned int length) { std::wstring out(length, '?'); @@ -399,7 +399,7 @@ std::string utf16_constant_array_to_ascii( INVARIANT(!conversion_failed, "constant should be convertible to unsigned"); out[i]=c; } - return utf16_little_endian_to_ascii(out); + return utf16_little_endian_to_java(out); } /// Add axioms to specify the Java String.format function. @@ -423,7 +423,7 @@ string_exprt string_constraint_generatort::add_axioms_for_format( s1.content().id()==ID_array && !to_unsigned_integer(to_constant_expr(s1.length()), length)) { - std::string s=utf16_constant_array_to_ascii( + std::string s=utf16_constant_array_to_java( to_array_expr(s1.content()), length); // List of arguments after s std::vector args( diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 218309826b6..95e84d422a4 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -799,7 +799,7 @@ std::string string_refinementt::string_of_array(const array_exprt &arr) exprt size_expr=to_array_type(arr.type()).size(); PRECONDITION(size_expr.id()==ID_constant); to_unsigned_integer(to_constant_expr(size_expr), n); - return utf16_constant_array_to_ascii(arr, n); + return utf16_constant_array_to_java(arr, n); } /// Display part of the current model by mapping the variables created by the diff --git a/src/util/unicode.cpp b/src/util/unicode.cpp index df38d52f8e7..6ea0e3cc364 100644 --- a/src/util/unicode.cpp +++ b/src/util/unicode.cpp @@ -272,7 +272,7 @@ std::wstring utf8_to_utf16(const std::string& in, bool swap_bytes) /// \par parameters: String in UTF-8 format /// \return String in UTF-16BE format -std::wstring utf8_to_utf16_big_endian(const std::string& in) +std::wstring utf8_to_utf16_big_endian(const std::string &in) { bool swap_bytes=is_little_endian_arch(); return utf8_to_utf16(in, swap_bytes); @@ -280,7 +280,7 @@ std::wstring utf8_to_utf16_big_endian(const std::string& in) /// \par parameters: String in UTF-8 format /// \return String in UTF-16LE format -std::wstring utf8_to_utf16_little_endian(const std::string& in) +std::wstring utf8_to_utf16_little_endian(const std::string &in) { bool swap_bytes=!is_little_endian_arch(); return utf8_to_utf16(in, swap_bytes); @@ -288,21 +288,30 @@ std::wstring utf8_to_utf16_little_endian(const std::string& in) /// \par parameters: String in UTF-16LE format /// \return String in US-ASCII format, with \uxxxx escapes for other characters -std::string utf16_little_endian_to_ascii(const std::wstring& in) +std::string utf16_little_endian_to_java(const std::wstring &in) { std::ostringstream result; - std::locale loc; - for(const auto c : in) + const std::locale loc; + for(const auto ch : in) { - if(c<=255 && isprint(c, loc)) - result << (unsigned char)c; + if(ch=='\n') + result << "\\n"; + else if(ch=='\r') + result << "\\r"; + else if(ch<=255 && isprint(ch, loc)) + { + const auto uch=static_cast(ch); + if(uch=='"') + result << '\\'; + result << uch; + } else { result << "\\u" << std::hex << std::setw(4) << std::setfill('0') - << (unsigned int)c; + << static_cast(ch); } } return result.str(); diff --git a/src/util/unicode.h b/src/util/unicode.h index 93e83c5b64f..22b7fadb464 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -24,7 +24,7 @@ std::string utf32_to_utf8(const std::basic_string &s); std::wstring utf8_to_utf16_big_endian(const std::string &); std::wstring utf8_to_utf16_little_endian(const std::string &); -std::string utf16_little_endian_to_ascii(const std::wstring &in); +std::string utf16_little_endian_to_java(const std::wstring &in); const char **narrow_argv(int argc, const wchar_t **argv_wide);