diff --git a/regression/strings-smoke-tests/java_format/test.class b/regression/strings-smoke-tests/java_format/test.class new file mode 100644 index 00000000000..a889c41e687 Binary files /dev/null and b/regression/strings-smoke-tests/java_format/test.class differ diff --git a/regression/strings-smoke-tests/java_format/test.desc b/regression/strings-smoke-tests/java_format/test.desc new file mode 100644 index 00000000000..2a200d7918c --- /dev/null +++ b/regression/strings-smoke-tests/java_format/test.desc @@ -0,0 +1,8 @@ +CORE +test.class +--refine-strings --string-max-length 20 +^EXIT=10$ +^SIGNAL=0$ +^\[.*assertion.1\].* line 6.* SUCCESS$ +^\[.*assertion.2\].* line 7.* FAILURE$ +-- diff --git a/regression/strings-smoke-tests/java_format/test.java b/regression/strings-smoke-tests/java_format/test.java new file mode 100644 index 00000000000..9726153fbb3 --- /dev/null +++ b/regression/strings-smoke-tests/java_format/test.java @@ -0,0 +1,10 @@ +public class test +{ + public static String main() + { + String s = String.format("foo %s", "bar"); + assert(s.equals("foo bar")); + assert(!s.equals("foo bar")); + return s; + } +} diff --git a/regression/strings-smoke-tests/java_format2/test.class b/regression/strings-smoke-tests/java_format2/test.class new file mode 100644 index 00000000000..e78ed5bb9df Binary files /dev/null and b/regression/strings-smoke-tests/java_format2/test.class differ diff --git a/regression/strings-smoke-tests/java_format2/test.desc b/regression/strings-smoke-tests/java_format2/test.desc new file mode 100644 index 00000000000..2a200d7918c --- /dev/null +++ b/regression/strings-smoke-tests/java_format2/test.desc @@ -0,0 +1,8 @@ +CORE +test.class +--refine-strings --string-max-length 20 +^EXIT=10$ +^SIGNAL=0$ +^\[.*assertion.1\].* line 6.* SUCCESS$ +^\[.*assertion.2\].* line 7.* FAILURE$ +-- diff --git a/regression/strings-smoke-tests/java_format2/test.java b/regression/strings-smoke-tests/java_format2/test.java new file mode 100644 index 00000000000..c197d5c6793 --- /dev/null +++ b/regression/strings-smoke-tests/java_format2/test.java @@ -0,0 +1,10 @@ +public class test +{ + public static String main(String str) + { + String s = String.format("foo %s", str); + assert(s.equals("foo ".concat(str))); + assert(!s.equals("foo ".concat(str))); + return s; + } +} diff --git a/regression/strings-smoke-tests/java_format3/test.class b/regression/strings-smoke-tests/java_format3/test.class new file mode 100644 index 00000000000..6d235dcccab Binary files /dev/null and b/regression/strings-smoke-tests/java_format3/test.class differ diff --git a/regression/strings-smoke-tests/java_format3/test.desc b/regression/strings-smoke-tests/java_format3/test.desc new file mode 100644 index 00000000000..d9461eb09d3 --- /dev/null +++ b/regression/strings-smoke-tests/java_format3/test.desc @@ -0,0 +1,8 @@ +CORE +test.class +--refine-strings --string-max-length 20 +^EXIT=10$ +^SIGNAL=0$ +^\[.*assertion.1\].* line 7.* SUCCESS$ +^\[.*assertion.2\].* line 9.* FAILURE$ +-- diff --git a/regression/strings-smoke-tests/java_format3/test.java b/regression/strings-smoke-tests/java_format3/test.java new file mode 100644 index 00000000000..fd88ef5c3b5 --- /dev/null +++ b/regression/strings-smoke-tests/java_format3/test.java @@ -0,0 +1,12 @@ +public class test +{ + public static String main(int i) + { + String s = String.format("%3$sar", "a", "r", "b"); + if(i==0) + assert(s.equals("bar")); + else + assert(!s.equals("bar")); + return s; + } +} diff --git a/regression/strings-smoke-tests/java_format4/test.class b/regression/strings-smoke-tests/java_format4/test.class new file mode 100644 index 00000000000..8adb0b41ad8 Binary files /dev/null and b/regression/strings-smoke-tests/java_format4/test.class differ diff --git a/regression/strings-smoke-tests/java_format4/test.desc b/regression/strings-smoke-tests/java_format4/test.desc new file mode 100644 index 00000000000..120bf0cf11f --- /dev/null +++ b/regression/strings-smoke-tests/java_format4/test.desc @@ -0,0 +1,8 @@ +KNOWNBUG +test.class +--refine-strings --string-max-length 20 +^EXIT=10$ +^SIGNAL=0$ +^\[.*assertion.1\].* line 8.* SUCCESS$ +^\[.*assertion.2\].* line 13.* FAILURE$ +-- diff --git a/regression/strings-smoke-tests/java_format4/test.java b/regression/strings-smoke-tests/java_format4/test.java new file mode 100644 index 00000000000..bc76a9ee416 --- /dev/null +++ b/regression/strings-smoke-tests/java_format4/test.java @@ -0,0 +1,17 @@ +public class test +{ + public static String main(String str) + { + try + { + String s = String.format("%s %s", "a"); + assert(false); + return s; + } + catch(java.util.IllegalFormatException e) + { + assert(false); + return str; + } + } +} diff --git a/regression/strings-smoke-tests/java_format5/test.class b/regression/strings-smoke-tests/java_format5/test.class new file mode 100644 index 00000000000..66f386a8c3e Binary files /dev/null and b/regression/strings-smoke-tests/java_format5/test.class differ diff --git a/regression/strings-smoke-tests/java_format5/test.desc b/regression/strings-smoke-tests/java_format5/test.desc new file mode 100644 index 00000000000..1a77e4be0a5 --- /dev/null +++ b/regression/strings-smoke-tests/java_format5/test.desc @@ -0,0 +1,8 @@ +THOROUGH +test.class +--refine-strings --string-max-length 20 +^EXIT=10$ +^SIGNAL=0$ +^\[.*assertion.1\].* line 7.* SUCCESS$ +^\[.*assertion.2\].* line 9.* FAILURE$ +-- diff --git a/regression/strings-smoke-tests/java_format5/test.java b/regression/strings-smoke-tests/java_format5/test.java new file mode 100644 index 00000000000..5078791baa2 --- /dev/null +++ b/regression/strings-smoke-tests/java_format5/test.java @@ -0,0 +1,12 @@ +public class test +{ + public static String main(boolean b) + { + String s = String.format("%s%s%s%s%s%s%s%s", "a", "b", "c", "d", "e", "f", "g", "h"); + if(b) + assert(s.equals("abcdefgh")); + else + assert(!s.equals("abcdefgh")); + return s; + } +} diff --git a/src/java_bytecode/java_string_library_preprocess.cpp b/src/java_bytecode/java_string_library_preprocess.cpp index 427f76b8753..b4f72d0112d 100644 --- a/src/java_bytecode/java_string_library_preprocess.cpp +++ b/src/java_bytecode/java_string_library_preprocess.cpp @@ -815,6 +815,11 @@ codet java_string_library_preprocesst::code_assign_java_string_to_string_expr( // Assignments code_blockt code; code.add(code_assignt(lhs.length(), rhs_length)); + + // We always assume data of a String is not null + not_exprt data_not_null(equal_exprt( + member_data, null_pointer_exprt(to_pointer_type(member_data.type())))); + code.add(code_assumet(data_not_null)); code.add(code_assignt(lhs.content(), rhs_data)); return code; } @@ -1169,6 +1174,308 @@ codet java_string_library_preprocesst::make_string_to_char_array_code( return code; } +/// Adds to the code an assignment of the form +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// type_name tmp_type_name +/// tmp_type_name = ((Classname*)arg_i)->value +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// and returns `tmp_typename`. +/// In case the class corresponding to `type_name` is not available in +/// `symbol_table`, the variable is declared but not assigned. +/// Used to access the values of the arguments of `String.format`. +/// \param object: an expression representing a reference to an object +/// \param type_name: name of the corresponding primitive type, this can be +/// one of the following: ID_boolean, ID_char, ID_byte, ID_short, ID_int, +/// ID_long, ID_float, ID_double, ID_void +/// \param loc: a location in the source +/// \param symbol_table: the symbol table +/// \param code: code block to which we are adding some assignments +/// \return An expression contaning a symbol `tmp_type_name` where `type_name` +/// is the given argument (ie. boolean, char etc.). Which represents the +/// primitive value contained in the given object. +exprt java_string_library_preprocesst::get_primitive_value_of_object( + const exprt &object, + irep_idt type_name, + const source_locationt &loc, + symbol_tablet &symbol_table, + code_blockt &code) +{ + symbol_typet object_type; + typet value_type; + if(type_name==ID_boolean) + { + value_type=java_boolean_type(); + object_type=symbol_typet("java::java.lang.Boolean"); + } + else if(type_name==ID_char) + { + value_type=java_char_type(); + object_type=symbol_typet("java::java.lang.Character"); + } + else if(type_name==ID_byte) + { + value_type=java_byte_type(); + object_type=symbol_typet("java::java.lang.Byte"); + } + else if(type_name==ID_short) + { + value_type=java_short_type(); + object_type=symbol_typet("java::java.lang.Short"); + } + else if(type_name==ID_int) + { + value_type=java_int_type(); + object_type=symbol_typet("java::java.lang.Integer"); + } + else if(type_name==ID_long) + { + value_type=java_long_type(); + object_type=symbol_typet("java::java.lang.Long"); + } + else if(type_name==ID_float) + { + value_type=java_float_type(); + object_type=symbol_typet("java::java.lang.Float"); + } + else if(type_name==ID_double) + { + value_type=java_double_type(); + object_type=symbol_typet("java::java.lang.Double"); + } + else if(type_name==ID_void) + return nil_exprt(); + else + UNREACHABLE; + + // declare tmp_type_name to hold the value + std::string aux_name="tmp_"+id2string(type_name); + symbolt symbol=get_fresh_aux_symbol( + value_type, aux_name, aux_name, loc, ID_java, symbol_table); + exprt value=symbol.symbol_expr(); + + // Check that the type of the object is in the symbol table, + // otherwise there is no safe way of finding its value. + if(symbol_table.has_symbol(object_type.get_identifier())) + { + struct_typet struct_type=to_struct_type( + symbol_table.lookup(object_type.get_identifier()).type); + // Check that the type has a value field + const struct_union_typet::componentt value_comp= + struct_type.get_component("value"); + if(!value_comp.is_nil()) + { + pointer_typet pointer_type(struct_type); + dereference_exprt deref( + typecast_exprt(object, pointer_type), pointer_type.subtype()); + member_exprt deref_value(deref, value_comp.get_name(), value_comp.type()); + code.add(code_assignt(value, deref_value)); + return value; + } + } + + warning() << object_type.get_identifier() + << " not available to format function" << eom; + code.add(code_declt(value)); + return value; +} + +/// Helper for format function. Returns the expression: +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// *((void**)(argv->data)+index ) +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// which corresponds to the object at position `index` in `argv`. +/// \param argv: reference to an array of references +/// \param index: index of the desired object +/// \return An expression representing the object at position `index` of `argv`. +exprt java_string_library_preprocesst::get_object_at_index( + const exprt &argv, + int index) +{ + dereference_exprt deref_objs(argv, argv.type().subtype()); + pointer_typet empty_pointer((empty_typet())); + pointer_typet pointer_of_pointer; + pointer_of_pointer.copy_to_subtypes(empty_pointer); + member_exprt data_member(deref_objs, "data", pointer_of_pointer); + plus_exprt data_pointer_plus_index( + data_member, from_integer(index, java_int_type()), data_member.type()); + dereference_exprt data_at_index( + data_pointer_plus_index, data_pointer_plus_index.type().subtype()); + return data_at_index; +} + +/// Helper for format function. Adds code: +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// string_expr arg_i_string_expr; +/// int tmp_int; +/// float tmp_float; +/// char tmp_char; +/// boolean tmp_boolean; +/// Object* arg_i=get_object_at_index(argv, index) +/// if(arg_i!=NULL) +/// { +/// if(arg_i.@class_identifier=="java::java.lang.String") +/// { +/// arg_i_string_expr = (string_expr)((String*)arg_i_as_string) +/// } +/// tmp_int=((Integer)arg_i)->value +/// tmp_float=((Float)arg_i)->value +/// tmp_char=((Char)arg_i)->value +/// tmp_boolean=((Boolean)arg_i)->value +/// } +/// arg_i_struct = { string_expr=arg_i_string_expr; +/// integer=tmp_int; float=tmp_float; char=tmp_char; boolean=tmp_boolean} +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// and returns `arg_i_struct`. +/// +/// TODO: date_time and hash code are not implemented +/// \param argv: reference to an array of references +/// \param index: index of the desired argument +/// \param structured_type: type for arguments of the internal format function +/// \param loc: a location in the source +/// \param symbol_table: the symbol table +/// \param code: code block to which we are adding some assignments +/// \return An expression of type `structured_type` representing the possible +/// values of the argument at position `index` of `argv`. +exprt java_string_library_preprocesst::make_argument_for_format( + const exprt &argv, + int index, + const struct_typet &structured_type, + const source_locationt &loc, + symbol_tablet &symbol_table, + code_blockt &code) +{ + // Declarations of the fields of arg_i_struct + // arg_i_struct is { arg_i_string_expr, tmp_int, tmp_char, ... } + struct_exprt arg_i_struct(structured_type); + std::list field_exprs; + for(const auto & comp : structured_type.components()) + { + const irep_idt &name=comp.get_name(); + const typet &type=comp.type(); + exprt field_expr; + if(name!="string_expr") + { + std::string tmp_name="tmp_"+id2string(name); + symbolt field_symbol=get_fresh_aux_symbol( + type, tmp_name, tmp_name, loc, ID_java, symbol_table); + field_expr=field_symbol.symbol_expr(); + code.add(code_declt(field_expr)); + } + else + field_expr=fresh_string_expr(loc, symbol_table, code); + + field_exprs.push_back(field_expr); + arg_i_struct.copy_to_operands(field_expr); + } + + // arg_i = argv[index] + exprt obj=get_object_at_index(argv, index); + symbolt object_symbol=get_fresh_aux_symbol( + obj.type(), "tmp_object", "tmp_object", loc, ID_java, symbol_table); + symbol_exprt arg_i=object_symbol.symbol_expr(); + (void) allocate_dynamic_object_with_decl( + arg_i, obj.type(), symbol_table, loc, code, false); + code.add(code_assignt(arg_i, obj)); + code.add(code_assumet(not_exprt(equal_exprt( + arg_i, null_pointer_exprt(to_pointer_type(arg_i.type())))))); + + // if arg_i != null then [code_not_null] + code_ifthenelset code_avoid_null_arg; + code_avoid_null_arg.cond()=not_exprt(equal_exprt( + arg_i, null_pointer_exprt(to_pointer_type(arg_i.type())))); + code_blockt code_not_null; + + // Assigning all the fields of arg_i_struct + for(const auto &comp : structured_type.components()) + { + const irep_idt &name=comp.get_name(); + exprt field_expr=field_exprs.front(); + field_exprs.pop_front(); + + if(name=="string_expr") + { + pointer_typet string_pointer(symbol_typet("java::java.lang.String")); + typecast_exprt arg_i_as_string(arg_i, string_pointer); + code_not_null.add(code_assign_java_string_to_string_expr( + to_string_expr(field_expr), arg_i_as_string, symbol_table)); + exprt arg_i_string_expr_sym=fresh_string_expr_symbol( + loc, symbol_table, code_not_null); + code_not_null.add(code_assignt( + arg_i_string_expr_sym, to_string_expr(field_expr))); + } + else if(name==ID_int || name==ID_float || name==ID_char || name==ID_boolean) + { + exprt value=get_primitive_value_of_object( + arg_i, name, loc, symbol_table, code_not_null); + code_not_null.add(code_assignt(field_expr, value)); + } + else + { + // TODO: date_time and hash_code not implemented + } + } + + code.add(code_not_null); + return arg_i_struct; +} + +/// Used to provide code for the Java String.format function. +/// +/// TODO: date_time and hash code are not implemented, and we set a limit of +/// 10 arguments +/// \param type: type of the function call +/// \param loc: location in the program_invocation_name +/// \param symbol_table: symbol table +/// \return Code implementing the Java String.format function. +/// Since the exact class of the arguments is not known, we give as +/// argument to the internal format function a structure containing +/// the different possible types. +codet java_string_library_preprocesst::make_string_format_code( + const code_typet &type, + const source_locationt &loc, + symbol_tablet &symbol_table) +{ + PRECONDITION(type.parameters().size()==2); + code_blockt code; + exprt::operandst args=process_parameters( + type.parameters(), loc, symbol_table, code); + INVARIANT(args.size()==2, "String.format should have two arguments"); + + // The argument can be: + // a string, an integer, a floating point, a character, a boolean, + // an object of which we take the hash code, or a date/time + struct_typet structured_type; + structured_type.components().emplace_back("string_expr", refined_string_type); + structured_type.components().emplace_back(ID_int, java_int_type()); + structured_type.components().emplace_back(ID_float, java_float_type()); + structured_type.components().emplace_back(ID_char, java_char_type()); + structured_type.components().emplace_back(ID_boolean, java_boolean_type()); + // TODO: hash_code not implemented for now + structured_type.components().emplace_back("hashcode", java_int_type()); + // TODO: date_time type not implemented for now + structured_type.components().emplace_back("date_time", java_int_type()); + + // We will process arguments so that each is converted to a `struct_exprt` + // containing each possible type used in format specifiers. + std::vector processed_args; + processed_args.push_back(args[0]); + for(std::size_t i=0; i #include -class string_constraint_generatort +class string_constraint_generatort: messaget { public: // This module keeps a list of axioms. It has methods which generate @@ -95,6 +95,9 @@ class string_constraint_generatort static constant_exprt constant_char(int i, const typet &char_type); + // Used by format function + class format_specifiert; + private: // The integer with the longest string is Integer.MIN_VALUE which is -2^31, // that is -2147483648 so takes 11 characters to write. @@ -170,6 +173,21 @@ class string_constraint_generatort string_exprt add_axioms_for_delete(const function_application_exprt &expr); string_exprt add_axioms_for_delete_char_at( const function_application_exprt &expr); + string_exprt add_axioms_for_format(const function_application_exprt &f); + string_exprt add_axioms_for_format( + const std::string &s, + const exprt::operandst &args, + const refined_string_typet &ref_type); + exprt add_axioms_for_format_specifier_is_correct( + const function_application_exprt &expr); + bool add_axioms_for_format_specifier_is_correct( + const std::string &s); + + string_exprt add_axioms_for_format_specifier( + const format_specifiert &fs, + const struct_exprt &arg, + const refined_string_typet &ref_type); + string_exprt add_axioms_for_insert( const string_exprt &s1, const string_exprt &s2, const exprt &offset); string_exprt add_axioms_for_insert(const function_application_exprt &f); @@ -267,6 +285,8 @@ class string_constraint_generatort const function_application_exprt &expr); string_exprt add_axioms_for_to_upper_case( const function_application_exprt &expr); + string_exprt add_axioms_for_to_upper_case( + const string_exprt &expr); string_exprt add_axioms_for_trim(const function_application_exprt &expr); // Add axioms corresponding to the String.valueOf([CII) function @@ -335,10 +355,13 @@ class string_constraint_generatort exprt character_equals_ignore_case( exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z); bool is_constant_string(const string_exprt &expr) const; + string_exprt empty_string(const refined_string_typet &ref_type); }; exprt is_digit_with_radix(exprt chr, exprt radix); exprt get_numeric_value_from_character( const exprt &chr, const typet &char_type, const typet &type); +std::string utf16_constant_array_to_ascii( + const array_exprt &arr, unsigned length); #endif diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index 1351a8839d4..6a68c82acfe 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -57,6 +57,15 @@ string_exprt string_constraint_generatort::add_axioms_for_empty_string( PRECONDITION(f.arguments().empty()); PRECONDITION(refined_string_typet::is_refined_string_type(f.type())); const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return empty_string(ref_type); +} + +/// Generate a string expression representing the empty string +/// \param ref_type: a refined string type +/// \return a string expression +string_exprt string_constraint_generatort::empty_string( + const refined_string_typet &ref_type) +{ exprt size=from_integer(0, ref_type.get_index_type()); const array_typet &content_type=ref_type.get_content_type(); array_of_exprt empty_array( diff --git a/src/solvers/refinement/string_constraint_generator_format.cpp b/src/solvers/refinement/string_constraint_generator_format.cpp new file mode 100644 index 00000000000..a30cae81a7c --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator_format.cpp @@ -0,0 +1,440 @@ +/*******************************************************************\ + +Module: Generates string constraints for the Java format function + +Author: Romain Brenguier + +Date: May 2017 + +\*******************************************************************/ + +/// \file +/// Generates string constraints for the Java format function + +#include +#include +#include +#include + +#include +#include + +#include "string_constraint_generator.h" + +// Format specifier describes how a value should be printed. +class string_constraint_generatort::format_specifiert +{ +public: + // Constants describing the meaning of characters in format specifiers. + static const char DECIMAL_INTEGER ='d'; + static const char OCTAL_INTEGER ='o'; + static const char HEXADECIMAL_INTEGER ='x'; + static const char HEXADECIMAL_INTEGER_UPPER='X'; + static const char SCIENTIFIC ='e'; + static const char SCIENTIFIC_UPPER ='E'; + static const char GENERAL ='g'; + static const char GENERAL_UPPER ='G'; + static const char DECIMAL_FLOAT ='f'; + static const char HEXADECIMAL_FLOAT ='a'; + static const char HEXADECIMAL_FLOAT_UPPER ='A'; + static const char CHARACTER ='c'; + static const char CHARACTER_UPPER ='C'; + static const char DATE_TIME ='t'; + static const char DATE_TIME_UPPER ='T'; + static const char BOOLEAN ='b'; + static const char BOOLEAN_UPPER ='B'; + static const char STRING ='s'; + static const char STRING_UPPER ='S'; + static const char HASHCODE ='h'; + static const char HASHCODE_UPPER ='H'; + static const char LINE_SEPARATOR ='n'; + static const char PERCENT_SIGN ='%'; + + int index=-1; + std::string flag; + int width; + int precision; + bool dt=false; + char conversion; + + format_specifiert( + int _index, + std::string _flag, + int _width, + int _precision, + bool _dt, + char c): + index(_index), + flag(_flag), + width(_width), + precision(_precision), + dt(_dt), + conversion(c) + { } +}; + +// Format text represent a constant part of a format string. +class format_textt +{ +public: + explicit format_textt(std::string _content): content(_content) { } + + format_textt(const format_textt &fs): content(fs.content) { } + + std::string get_content() const + { + return content; + } + +private: + std::string content; +}; + +// A format element is either a specifier or text. +class format_elementt +{ +public: + typedef enum {SPECIFIER, TEXT} format_typet; + + explicit format_elementt(format_typet _type): type(_type), fstring("") + { + } + + explicit format_elementt(std::string s): type(TEXT), fstring(s) + { + } + + explicit format_elementt(string_constraint_generatort::format_specifiert fs): + type(SPECIFIER), + fstring("") + { + fspec.push_back(fs); + } + + bool is_format_specifier() const + { + return type==SPECIFIER; + } + + bool is_format_text() const + { + return type==TEXT; + } + + string_constraint_generatort::format_specifiert get_format_specifier() const + { + PRECONDITION(is_format_specifier()); + return fspec.back(); + } + + format_textt &get_format_text() + { + PRECONDITION(is_format_text()); + return fstring; + } + + const format_textt &get_format_text() const + { + PRECONDITION(is_format_text()); + return fstring; + } + +private: + format_typet type; + format_textt fstring; + std::vector fspec; +}; + +#if 0 +// This code is deactivated as it is not used for now, but ultimalety this +// should be used to throw an exception when the format string is not correct +/// Used to check first argument of `String.format` is correct. +/// \param s: a string +/// \return True if the argument is a correct format string. Any '%' found +/// between format specifier means the string is invalid. +static bool check_format_string(std::string s) +{ + std::string format_specifier= + "%(\\d+\\$)?([-#+ 0,(\\<]*)?(\\d+)?(\\.\\d+)?([tT])?([a-zA-Z%])"; + std::regex regex(format_specifier); + std::smatch match; + + while(std::regex_search(s, match, regex)) + { + if(match.position()!= 0) + for(const auto &c : match.str()) + if(c=='%') + return false; + s=match.suffix(); + } + + for(const auto &c : s) + if(c=='%') + return false; + + return true; +} +#endif + +/// Helper function for parsing format strings. +/// This follows the implementation in openJDK of the java.util.Formatter class: +/// http://hg.openjdk.java.net/jdk7/jdk7/jdk/file/9b8c96f96a0f/src/share/classes/java/util/Formatter.java#l2660 +/// \param m: a match in a regular expression +/// \return Format specifier represented by the matched string. The groups in +/// the match should represent: index, flag, width, precision, date and +/// conversion type. +static string_constraint_generatort::format_specifiert + format_specifier_of_match(std::smatch &m) +{ + int index=m[1].str().empty()?-1:std::stoi(m[1].str()); + std::string flag=m[2].str().empty()?"":m[2].str(); + int width=m[3].str().empty()?-1:std::stoi(m[3].str()); + int precision=m[4].str().empty()?-1:std::stoi(m[4].str()); + std::string tT=m[5].str(); + + bool dt=(tT!=""); + if(tT=="T") + flag.push_back( + string_constraint_generatort::format_specifiert::DATE_TIME_UPPER); + + INVARIANT( + m[6].str().length()==1, "format conversion should be one character"); + char conversion=m[6].str()[0]; + + return string_constraint_generatort::format_specifiert( + index, flag, width, precision, dt, conversion); +} + +/// Parse the given string into format specifiers and text. +/// This follows the implementation in openJDK of the java.util.Formatter class: +/// http://hg.openjdk.java.net/jdk7/jdk7/jdk/file/9b8c96f96a0f/src/share/classes/java/util/Formatter.java#l2513 +/// \param s: a string +/// \return A vector of format_elementt. +static std::vector parse_format_string(std::string s) +{ + std::string format_specifier= + "%(\\d+\\$)?([-#+ 0,(\\<]*)?(\\d+)?(\\.\\d+)?([tT])?([a-zA-Z%])"; + std::regex regex(format_specifier); + std::vector al; + std::smatch match; + + while(std::regex_search(s, match, regex)) + { + if(match.position()!=0) + { + std::string pre_match=s.substr(0, match.position()); + al.emplace_back(pre_match); + } + + al.emplace_back(format_specifier_of_match(match)); + s=match.suffix(); + } + + al.emplace_back(s); + return al; +} + +/// Helper for add_axioms_for_format_specifier +/// \param expr: a structured expression +/// \param component_name: name of the desired component +/// \return Expression in the component of `expr` named `component_name`. +static exprt get_component_in_struct( + const struct_exprt &expr, irep_idt component_name) +{ + const struct_typet &type=to_struct_type(expr.type()); + std::size_t number=type.component_number(component_name); + return expr.operands()[number]; +} + +/// Parse `s` and add axioms ensuring the output corresponds to the output of +/// String.format. Assumes the argument is a structured expression which +/// contains the fields: string expr, int, float, char, boolean, hashcode, +/// date_time. The correct component will be fetched depending on the format +/// specifier. +/// \param fs: a format specifier +/// \param arg: a struct containing the possible value of the argument to format +/// \param ref_type: a type for refined string type +/// \return String expression representing the output of String.format. +string_exprt string_constraint_generatort::add_axioms_for_format_specifier( + const format_specifiert &fs, + const struct_exprt &arg, + const refined_string_typet &ref_type) +{ + switch(fs.conversion) + { + case format_specifiert::DECIMAL_INTEGER: + return add_axioms_from_int( + get_component_in_struct(arg, ID_int), MAX_INTEGER_LENGTH, ref_type); + case format_specifiert::HEXADECIMAL_INTEGER: + return add_axioms_from_int_hex( + get_component_in_struct(arg, ID_int), ref_type); + case format_specifiert::SCIENTIFIC: + return add_axioms_from_float_scientific_notation( + get_component_in_struct(arg, ID_float), ref_type); + case format_specifiert::DECIMAL_FLOAT: + return add_axioms_for_string_of_float( + get_component_in_struct(arg, ID_float), ref_type); + case format_specifiert::CHARACTER: + return add_axioms_from_char( + get_component_in_struct(arg, ID_char), ref_type); + case format_specifiert::BOOLEAN: + return add_axioms_from_bool( + get_component_in_struct(arg, ID_boolean), ref_type); + case format_specifiert::STRING: + return get_string_expr(get_component_in_struct(arg, "string_expr")); + case format_specifiert::HASHCODE: + return add_axioms_from_int( + get_component_in_struct(arg, "hashcode"), MAX_INTEGER_LENGTH, ref_type); + case format_specifiert::LINE_SEPARATOR: + // TODO: the constant should depend on the system: System.lineSeparator() + return add_axioms_for_constant("\n", ref_type); + case format_specifiert::PERCENT_SIGN: + return add_axioms_for_constant("%", ref_type); + case format_specifiert::SCIENTIFIC_UPPER: + case format_specifiert::GENERAL_UPPER: + case format_specifiert::HEXADECIMAL_FLOAT_UPPER: + case format_specifiert::CHARACTER_UPPER: + case format_specifiert::DATE_TIME_UPPER: + case format_specifiert::BOOLEAN_UPPER: + case format_specifiert::STRING_UPPER: + case format_specifiert::HASHCODE_UPPER: + { + string_constraint_generatort::format_specifiert fs_lower=fs; + fs_lower.conversion=tolower(fs.conversion); + string_exprt lower_case=add_axioms_for_format_specifier( + fs_lower, arg, ref_type); + return add_axioms_for_to_upper_case(lower_case); + } + case format_specifiert::OCTAL_INTEGER: + // TODO: conversion of octal not implemented + case format_specifiert::GENERAL: + // TODO: general not implemented + case format_specifiert::HEXADECIMAL_FLOAT: + // TODO: hexadecimal float not implemented + case format_specifiert::DATE_TIME: + // TODO: DateTime not implemented + // For all these unimplemented cases we return a non-deterministic string + warning() << "unimplemented format specifier: " << fs.conversion << eom; + return fresh_string(ref_type); + default: + error() << "invalid format specifier: " << fs.conversion << eom; + INVARIANT( + false, "format specifier must belong to [bBhHsScCdoxXeEfgGaAtT%n]"); + throw 0; + } +} + +/// Parse `s` and add axioms ensuring the output corresponds to the output of +/// String.format. +/// \param s: a format string +/// \param args: a vector of arguments +/// \param ref_type: a type for refined string type +/// \return String expression representing the output of String.format. +string_exprt string_constraint_generatort::add_axioms_for_format( + const std::string &s, + const exprt::operandst &args, + const refined_string_typet &ref_type) +{ + const std::vector format_strings=parse_format_string(s); + std::vector intermediary_strings; + std::size_t arg_count=0; + + for(const format_elementt &fe : format_strings) + if(fe.is_format_specifier()) + { + const format_specifiert &fs=fe.get_format_specifier(); + struct_exprt arg; + if(fs.conversion!=format_specifiert::PERCENT_SIGN && + fs.conversion!=format_specifiert::LINE_SEPARATOR) + { + if(fs.index==-1) + { + INVARIANT( + arg_count=0, "index in format should be positive"); + INVARIANT( + static_cast(fs.index)<=args.size(), + "number of format must match specifiers"); + + // first argument `args[0]` corresponds to index 1 + arg=to_struct_expr(args[fs.index-1]); + } + } + intermediary_strings.push_back( + add_axioms_for_format_specifier(fs, arg, ref_type)); + } + else + intermediary_strings.push_back( + add_axioms_for_constant( + fe.get_format_text().get_content(), ref_type)); + + if(intermediary_strings.empty()) + return empty_string(ref_type); + + auto it=intermediary_strings.begin(); + string_exprt str=*(it++); + for(; it!=intermediary_strings.end(); ++it) + str=add_axioms_for_concat(str, *it); + return str; +} + +/// Construct a string from a constant array. +/// \param arr: an array expression containing only constants +/// \param length: an unsigned value representing the length of the array +/// \return String of length `length` represented by the array assuming each +/// field in `arr` represents a character. +std::string utf16_constant_array_to_ascii( + const array_exprt &arr, unsigned int length) +{ + std::wstring out(length, '?'); + unsigned int c; + for(std::size_t i=0; i args( + std::next(f.arguments().begin()), f.arguments().end()); + return add_axioms_for_format(s, args, ref_type); + } + else + { + warning() << "ignoring format function with non constant first argument" + << eom; + return fresh_string(ref_type); + } +} diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index a3add2c74c8..0babb05cd2e 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -499,6 +499,8 @@ exprt string_constraint_generatort::add_axioms_for_function_application( res=add_axioms_for_intern(expr); else if(id==ID_cprover_string_array_of_char_pointer_func) res=add_axioms_for_char_pointer(expr); + else if(id==ID_cprover_string_format_func) + res=add_axioms_for_format(expr); else { std::string msg( diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index abbd6d34794..094cf5a0ac8 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -277,9 +277,8 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( /// \par parameters: function application with one string argument /// \return a new string expression string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( - const function_application_exprt &expr) + const string_exprt &str) { - string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); @@ -323,6 +322,15 @@ string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( return res; } +/// add axioms corresponding to the String.toUpperCase java function +/// \param expr: function application with one string argument +/// \return a new string expression +string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( + const function_application_exprt &expr) +{ + string_exprt str=get_string_expr(args(expr, 1)[0]); + return add_axioms_for_to_upper_case(str); +} /// add axioms corresponding stating that the result is similar to that of the /// StringBuilder.setCharAt java function Warning: this may be underspecified in diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index a8c66d63404..1c876567a2c 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -807,28 +807,7 @@ std::string string_refinementt::string_of_array(const array_exprt &arr) exprt size_expr=to_array_type(arr.type()).size(); PRECONDITION(size_expr.id()==ID_constant); to_unsigned_integer(to_constant_expr(size_expr), n); - std::string str(n, '?'); - - std::ostringstream result; - std::locale loc; - - for(size_t i=0; i=32) - result << (unsigned char) c; - else - { - result << "\\u" << std::hex << std::setw(4) << std::setfill('0') - << (unsigned int) c; - } - } - - return result.str(); + return utf16_constant_array_to_ascii(arr, n); } /// Display part of the current model by mapping the variables created by the