From 06e666b802535bbff421baf31c2686c4dafa954b Mon Sep 17 00:00:00 2001 From: Antonia Lechner Date: Mon, 29 Apr 2019 18:49:53 +0100 Subject: [PATCH] Implement parsing of Unicode characters from JSON Previously, if a JSON file contained a string in hexadecimal Unicode representation, e.g. "\u0001", the JSON parser would discard the "\u" part and store the string as "0001". This commit fixes this so the resulting string is equal to "\u0001". --- src/json/parser.y | 9 ++++++++- unit/json/json_parser.cpp | 27 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/json/parser.y b/src/json/parser.y index 9d17ef51242..e0998fa5448 100644 --- a/src/json/parser.y +++ b/src/json/parser.y @@ -46,8 +46,15 @@ static std::string convert_TOK_STRING() case 'r': result+='\r'; break; case 't': result+='\t'; break; case 'u': + { + // Character in hexadecimal Unicode representation, in the format + // \uABCD, i.e. the following four digits are part of this character. + assert(p + 4 < yyjsontext + len - 1); + std::string hex(++p, 4); + result += std::stoi(hex, nullptr, 16); + p += 3; break; - + } default:; /* an error */ } } diff --git a/unit/json/json_parser.cpp b/unit/json/json_parser.cpp index d91828a3dfe..1da52d01b69 100644 --- a/unit/json/json_parser.cpp +++ b/unit/json/json_parser.cpp @@ -84,4 +84,31 @@ SCENARIO("Loading JSON files") } } } + GIVEN("A JSON file containing a hexadecimal Unicode character") + { + temporary_filet unicode_json_file("cbmc_unit_json_parser_unicode", ".json"); + const std::string unicode_json_path = unicode_json_file(); + { + std::ofstream unicode_json_out(unicode_json_path); + unicode_json_out << "{\n" + << " \"special character\": \"\\u0001\"\n" + << "}\n"; + } + WHEN("Loading the JSON file with the special character") + { + jsont unicode_json; + const auto unicode_parse_error = + parse_json(unicode_json_path, null_message_handler, unicode_json); + THEN("The JSON file should be parsed correctly") + { + REQUIRE_FALSE(unicode_parse_error); + REQUIRE(unicode_json.is_object()); + + const json_objectt &json_object = to_json_object(unicode_json); + REQUIRE(json_object.find("special character") != json_object.end()); + REQUIRE(json_object["special character"].value.size() == 1); + REQUIRE(json_object["special character"].value == "\u0001"); + } + } + } }