diff --git a/regression/strings-smoke-tests/java_string_printable/Test.class b/regression/strings-smoke-tests/java_string_printable/Test.class new file mode 100644 index 00000000000..67670375b05 Binary files /dev/null and b/regression/strings-smoke-tests/java_string_printable/Test.class differ diff --git a/regression/strings-smoke-tests/java_string_printable/Test.java b/regression/strings-smoke-tests/java_string_printable/Test.java new file mode 100644 index 00000000000..8b16a250ae6 --- /dev/null +++ b/regression/strings-smoke-tests/java_string_printable/Test.java @@ -0,0 +1,21 @@ +public class Test { + + public static String check(String t) { + String s = t.concat("$£¥\u0000\u0008\u0010\u001F"); + // This should be disallowed because "\u0000" is not printable + assert(!s.equals("\u0000$£¥\u0000\u0008\u0010\u001F")); + // This can fail when t="a" which is printable + assert(!s.equals("a$£¥\u0000\u0008\u0010\u001F")); + return s; + } + + public static boolean check_char(String s, char c) { + if(s == null) + return false; + // This should be -1 for all non-printable character + int i = s.indexOf(c); + boolean b = (i == -1 || (c >= ' ' && c <= '~')); + assert(b); + return b; + } +} diff --git a/regression/strings-smoke-tests/java_string_printable/test.desc b/regression/strings-smoke-tests/java_string_printable/test.desc new file mode 100644 index 00000000000..9a91eb68f6e --- /dev/null +++ b/regression/strings-smoke-tests/java_string_printable/test.desc @@ -0,0 +1,8 @@ +CORE +Test.class +--refine-strings --function Test.check --string-max-length 100 --string-printable --java-assume-inputs-non-null +^EXIT=10$ +^SIGNAL=0$ +assertion.* file Test.java line 6 .* SUCCESS +assertion.* file Test.java line 8 .* FAILURE +-- diff --git a/regression/strings-smoke-tests/java_string_printable/test_char.desc b/regression/strings-smoke-tests/java_string_printable/test_char.desc new file mode 100644 index 00000000000..af03ad6fbbe --- /dev/null +++ b/regression/strings-smoke-tests/java_string_printable/test_char.desc @@ -0,0 +1,7 @@ +CORE +Test.class +--refine-strings --function Test.check_char --string-max-length 100 --string-printable +^EXIT=0$ +^SIGNAL=0$ +assertion.* file Test.java line 18 .* SUCCESS +-- diff --git a/src/cbmc/cbmc_solvers.cpp b/src/cbmc/cbmc_solvers.cpp index d98b8b3dcca..d9c245b5b7a 100644 --- a/src/cbmc/cbmc_solvers.cpp +++ b/src/cbmc/cbmc_solvers.cpp @@ -180,7 +180,6 @@ std::unique_ptr cbmc_solverst::get_string_refinement() info.string_max_length=options.get_signed_int_option("string-max-length"); info.string_non_empty=options.get_bool_option("string-non-empty"); info.trace=options.get_bool_option("trace"); - info.string_printable=options.get_bool_option("string-printable"); if(options.get_bool_option("max-node-refinement")) info.max_node_refinement= options.get_unsigned_int_option("max-node-refinement"); diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index a4b15d8b963..e99f9fe0b73 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -51,6 +51,7 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd) if(cmd.isset("string-max-input-length")) object_factory_parameters.max_nondet_string_length= std::stoi(cmd.get_value("string-max-input-length")); + object_factory_parameters.string_printable = cmd.isset("string-printable"); if(cmd.isset("java-max-vla-length")) max_user_array_length=std::stoi(cmd.get_value("java-max-vla-length")); if(cmd.isset("lazy-methods-context-sensitive")) diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index eac76e3aa06..1cd59afeee1 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -75,6 +75,9 @@ struct object_factory_parameterst final /// dereference a pointer using a 'depth counter'. We set a pointer to null if /// such depth becomes >= than this maximum value. size_t max_nondet_tree_depth=MAX_NONDET_TREE_DEPTH; + + /// Force string content to be ASCII printable characters when set to true. + bool string_printable = false; }; typedef std::pair< diff --git a/src/java_bytecode/java_object_factory.cpp b/src/java_bytecode/java_object_factory.cpp index 330c5d19a3c..60460ffcb21 100644 --- a/src/java_bytecode/java_object_factory.cpp +++ b/src/java_bytecode/java_object_factory.cpp @@ -542,7 +542,8 @@ codet initialize_nondet_string_struct( const exprt &obj, const std::size_t &max_nondet_string_length, const source_locationt &loc, - symbol_tablet &symbol_table) + symbol_tablet &symbol_table, + bool printable) { PRECONDITION( java_string_library_preprocesst::implements_java_char_sequence(obj.type())); @@ -605,6 +606,13 @@ codet initialize_nondet_string_struct( add_array_to_length_association( data_expr, length_expr, symbol_table, loc, code); + + // Printable ASCII characters are between ' ' and '~'. + if(printable) + { + add_character_set_constraint( + array_pointer, length_expr, " -~", symbol_table, loc, code); + } } // tmp_object = struct_expr; @@ -616,6 +624,7 @@ codet initialize_nondet_string_struct( /// content and association of its address to the pointer `expr`. /// \param expr: pointer to be affected /// \param max_nondet_string_length: maximum length of strings to initialize +/// \param printable: Boolean, true to force content to be printable /// \param symbol_table: the symbol table /// \param loc: location in the source /// \param [out] code: code block in which initialization code is added @@ -625,6 +634,7 @@ codet initialize_nondet_string_struct( static bool add_nondet_string_pointer_initialization( const exprt &expr, const std::size_t &max_nondet_string_length, + bool printable, symbol_tablet &symbol_table, const source_locationt &loc, code_blockt &code) @@ -644,7 +654,8 @@ static bool add_nondet_string_pointer_initialization( dereference_exprt(expr, struct_type), max_nondet_string_length, loc, - symbol_table)); + symbol_table, + printable)); return false; } @@ -784,6 +795,7 @@ void java_object_factoryt::gen_nondet_pointer_init( add_nondet_string_pointer_initialization( expr, object_factory_parameters.max_nondet_string_length, + object_factory_parameters.string_printable, symbol_table, loc, assignments); diff --git a/src/java_bytecode/java_object_factory.h b/src/java_bytecode/java_object_factory.h index e731c70998b..c6aeac7d3d3 100644 --- a/src/java_bytecode/java_object_factory.h +++ b/src/java_bytecode/java_object_factory.h @@ -156,6 +156,7 @@ codet initialize_nondet_string_struct( const exprt &obj, const std::size_t &max_nondet_string_length, const source_locationt &loc, - symbol_tablet &symbol_table); + symbol_tablet &symbol_table, + bool printable); #endif // CPROVER_JAVA_BYTECODE_JAVA_OBJECT_FACTORY_H diff --git a/src/java_bytecode/java_string_library_preprocess.cpp b/src/java_bytecode/java_string_library_preprocess.cpp index eaac8aaf6fc..9135480da20 100644 --- a/src/java_bytecode/java_string_library_preprocess.cpp +++ b/src/java_bytecode/java_string_library_preprocess.cpp @@ -704,6 +704,38 @@ void add_array_to_length_association( symbol_table)); } +/// Add a call to a primitive of the string solver which ensures all characters +/// belong to the character set. +/// \param pointer: a character pointer expression +/// \param length: length of the character sequence pointed by `pointer` +/// \param char_set: character set given by a range expression consisting of +/// two characters separated by an hyphen. +/// For instance "a-z" denotes all lower case ascii letters. +/// \param symbol_table: the symbol table +/// \param loc: source location +/// \param code [out] : code block to which declaration and calls get added +void add_character_set_constraint( + const exprt &pointer, + const exprt &length, + const irep_idt &char_set, + symbol_tablet &symbol_table, + const source_locationt &loc, + code_blockt &code) +{ + PRECONDITION(pointer.type().id() == ID_pointer); + symbolt &return_sym = get_fresh_aux_symbol( + java_int_type(), "cnstr_added", "cnstr_added", loc, ID_java, symbol_table); + const exprt return_expr = return_sym.symbol_expr(); + code.add(code_declt(return_expr)); + const constant_exprt char_set_expr(char_set, string_typet()); + code.add( + code_assign_function_application( + return_expr, + ID_cprover_string_constrain_characters_func, + {length, pointer, char_set_expr}, + symbol_table)); +} + /// Create a refined_string_exprt `str` whose content and length are fresh /// symbols, calls the string primitive with name `function_name`. /// In the arguments of the primitive `str` takes the place of the result and diff --git a/src/java_bytecode/java_string_library_preprocess.h b/src/java_bytecode/java_string_library_preprocess.h index b02af6bfbcd..d9915655fd4 100644 --- a/src/java_bytecode/java_string_library_preprocess.h +++ b/src/java_bytecode/java_string_library_preprocess.h @@ -349,4 +349,12 @@ void add_array_to_length_association( const source_locationt &loc, code_blockt &code); +void add_character_set_constraint( + const exprt &pointer, + const exprt &length, + const irep_idt &char_set, + symbol_tablet &symbol_table, + const source_locationt &loc, + code_blockt &code); + #endif // CPROVER_JAVA_BYTECODE_JAVA_STRING_LIBRARY_PREPROCESS_H diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 8d1c9c75546..41e97131662 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -42,8 +42,6 @@ class string_constraint_generatort final { /// Max length of non-deterministic strings size_t string_max_length=std::numeric_limits::max(); - /// Prefer printable characters in non-deterministic strings - bool string_printable=false; }; string_constraint_generatort(const infot& info, const namespacet& ns); @@ -110,6 +108,14 @@ class string_constraint_generatort final void add_default_axioms(const array_string_exprt &s); exprt axiom_for_is_positive_index(const exprt &x); + void add_constraint_on_characters( + const array_string_exprt &s, + const exprt &start, + const exprt &end, + const std::string &char_set); + exprt + add_axioms_for_constrain_characters(const function_application_exprt &f); + // The following functions add axioms for the returned value // to be equal to the result of the function given as argument. // They are not accessed directly from other classes: they call @@ -339,7 +345,6 @@ class string_constraint_generatort final std::set created_strings; unsigned symbol_count=0; const messaget message; - const bool force_printable_characters; std::vector axioms; std::vector boolean_symbols; diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index d7ff6d98c86..730e0877a2b 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -31,7 +31,6 @@ string_constraint_generatort::string_constraint_generatort( const string_constraint_generatort::infot &info, const namespacet &ns) : max_string_length(info.string_max_length), - force_printable_characters(info.string_printable), ns(ns) { } @@ -319,21 +318,71 @@ void string_constraint_generatort::add_default_axioms( if(!created_strings.insert(s).second) return; - axioms.push_back( - s.axiom_for_length_ge(from_integer(0, s.length().type()))); + const exprt index_zero = from_integer(0, s.length().type()); + axioms.push_back(s.axiom_for_length_ge(index_zero)); + if(max_string_length!=std::numeric_limits::max()) axioms.push_back(s.axiom_for_length_le(max_string_length)); +} - if(force_printable_characters) - { - symbol_exprt qvar=fresh_univ_index("printable", s.length().type()); - exprt chr=s[qvar]; - and_exprt printable( - binary_relation_exprt(chr, ID_ge, from_integer(' ', chr.type())), - binary_relation_exprt(chr, ID_le, from_integer('~', chr.type()))); - string_constraintt sc(qvar, s.length(), printable); - axioms.push_back(sc); - } +/// Add constraint on characters of a string. +/// +/// This constraint is +/// \f$ \forall i \in [start, end), low\_char \le s[i] \le high\_char \f$ +/// \param s: a string expression +/// \param start: index of the first character to constrain +/// \param end: index at which we stop adding constraints +/// \param char_set: a string of the form "-" where +/// `` and `` are two characters, which represents +/// the set of characters that are between `low_char` and `high_char`. +/// \return a string expression that is linked to the argument through axioms +/// that are added to the list +void string_constraint_generatort::add_constraint_on_characters( + const array_string_exprt &s, + const exprt &start, + const exprt &end, + const std::string &char_set) +{ + // Parse char_set + PRECONDITION(char_set.length() == 3); + PRECONDITION(char_set[1] == '-'); + const char &low_char = char_set[0]; + const char &high_char = char_set[2]; + + // Add constraint + const symbol_exprt qvar = fresh_univ_index("char_constr", s.length().type()); + const exprt chr = s[qvar]; + const and_exprt char_in_set( + binary_relation_exprt(chr, ID_ge, from_integer(low_char, chr.type())), + binary_relation_exprt(chr, ID_le, from_integer(high_char, chr.type()))); + const string_constraintt sc(qvar, start, end, true_exprt(), char_in_set); + axioms.push_back(sc); +} + +/// Add axioms to ensure all characters of a string belong to a given set. +/// +/// The axiom is: \f$\forall i \in [start, end).\ s[i] \in char_set \f$, where +/// `char_set` is given by the string `char_set_string` composed of three +/// characters `low_char`, `-` and `high_char`. Character `c` belongs to +/// `char_set` if \f$low_char \le c \le high_char\f$. +/// \param f: a function application with arguments: integer `|s|`, character +/// pointer `&s[0]`, string `char_set_string`, +/// optional integers `start` and `end` +/// \return integer expression whose value is zero +exprt string_constraint_generatort::add_axioms_for_constrain_characters( + const function_application_exprt &f) +{ + const auto &args = f.arguments(); + PRECONDITION(3 <= args.size() && args.size() <= 5); + PRECONDITION(args[2].type().id() == ID_string); + PRECONDITION(args[2].id() == ID_constant); + const array_string_exprt s = char_array_of_pointer(args[1], args[0]); + const irep_idt &char_set_string = to_constant_expr(args[2]).get_value(); + const exprt &start = + args.size() >= 4 ? args[3] : from_integer(0, s.length().type()); + const exprt &end = args.size() >= 5 ? args[4] : s.length(); + add_constraint_on_characters(s, start, end, char_set_string.c_str()); + return from_integer(0, get_return_code_type()); } /// Adds creates a new array if it does not already exists @@ -472,6 +521,8 @@ exprt string_constraint_generatort::add_axioms_for_function_application( res = associate_array_to_pointer(expr); else if(id == ID_cprover_associate_length_to_array_func) res = associate_length_to_array(expr); + else if(id == ID_cprover_string_constrain_characters_func) + res = add_axioms_for_constrain_characters(expr); else { std::string msg( diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index ac1f75b7f60..4d4ea1838be 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -772,6 +772,7 @@ IREP_ID_ONE(cprover_string_concat_bool_func) IREP_ID_ONE(cprover_string_concat_double_func) IREP_ID_ONE(cprover_string_concat_float_func) IREP_ID_ONE(cprover_string_concat_code_point_func) +IREP_ID_ONE(cprover_string_constrain_characters_func) IREP_ID_ONE(cprover_string_contains_func) IREP_ID_ONE(cprover_string_copy_func) IREP_ID_ONE(cprover_string_delete_func) diff --git a/unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp b/unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp index 1225263b824..3d2dc659b3c 100644 --- a/unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp +++ b/unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp @@ -49,7 +49,7 @@ SCENARIO( WHEN("Initialisation code for a string is generated") { const codet code = - initialize_nondet_string_struct(expr, 20, loc, symbol_table); + initialize_nondet_string_struct(expr, 20, loc, symbol_table, false); THEN("Code is produced") {