diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index 1f49653c644..5af90c00b79 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -320,6 +320,11 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) if(cmdline.isset("refine-strings")) { options.set_option("refine-strings", true); + options.set_option("string-non-empty", cmdline.isset("string-non-empty")); + options.set_option("string-printable", cmdline.isset("string-printable")); + if(cmdline.isset("string-max-length")) + options.set_option( + "string-max-length", cmdline.get_value("string-max-length")); } if(cmdline.isset("max-node-refinement")) @@ -1207,6 +1212,9 @@ void cbmc_parse_optionst::help() " --z3 use Z3\n" " --refine use refinement procedure (experimental)\n" " --refine-strings use string refinement (experimental)\n" + " --string-non-empty add constraint that strings are non empty (experimental)\n" // NOLINT(*) + " --string-printable add constraint that strings are printable (experimental)\n" // NOLINT(*) + " --string-max-length add constraint on the length of strings (experimental)\n" // NOLINT(*) " --outfile filename output formula to given file\n" " --arrays-uf-never never turn arrays into uninterpreted functions\n" // NOLINT(*) " --arrays-uf-always always turn arrays into uninterpreted functions\n" // NOLINT(*) diff --git a/src/cbmc/cbmc_parse_options.h b/src/cbmc/cbmc_parse_options.h index b3225951b3b..2c54dfc7353 100644 --- a/src/cbmc/cbmc_parse_options.h +++ b/src/cbmc/cbmc_parse_options.h @@ -39,6 +39,9 @@ class optionst; "(no-pretty-names)(beautify)" \ "(dimacs)(refine)(max-node-refinement):(refine-arrays)(refine-arithmetic)"\ "(refine-strings)" \ + "(string-non-empty)" \ + "(string-printable)" \ + "(string-max-length):" \ "(aig)(16)(32)(64)(LP64)(ILP64)(LLP64)(ILP32)(LP32)" \ "(little-endian)(big-endian)" \ "(show-goto-functions)(show-loops)" \ diff --git a/src/cbmc/cbmc_solvers.cpp b/src/cbmc/cbmc_solvers.cpp index a72546a8d0c..dace046cc17 100644 --- a/src/cbmc/cbmc_solvers.cpp +++ b/src/cbmc/cbmc_solvers.cpp @@ -232,6 +232,16 @@ cbmc_solverst::solvert* cbmc_solverst::get_string_refinement() string_refinementt *string_refinement=new string_refinementt( ns, *prop, MAX_NB_REFINEMENT); string_refinement->set_ui(ui); + + string_refinement->do_concretizing=options.get_bool_option("trace"); + if(options.get_bool_option("string-max-length")) + string_refinement->set_max_string_length( + options.get_signed_int_option("string-max-length")); + if(options.get_bool_option("string-non-empty")) + string_refinement->enforce_non_empty_string(); + if(options.get_bool_option("string-printable")) + string_refinement->enforce_printable_characters(); + return new solvert(string_refinement, prop); } diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 12447e0cf04..2c5216ddd6c 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -13,6 +13,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #ifndef CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H +#include #include #include #include @@ -22,21 +23,19 @@ class string_constraint_generatort { public: // This module keeps a list of axioms. It has methods which generate - // string constraints for different string funcitons and add them + // string constraints for different string functions and add them // to the axiom list. string_constraint_generatort(): - mode(ID_unknown) + max_string_length(std::numeric_limits::max()), + force_printable_characters(false) { } - void set_mode(irep_idt _mode) - { - // only C and java modes supported - assert((_mode==ID_java) || (_mode==ID_C)); - mode=_mode; - } + // Constraints on the maximal length of strings + unsigned long max_string_length; - irep_idt &get_mode() { return mode; } + // Should we add constraints on the characters + bool force_printable_characters; // Axioms are of three kinds: universally quantified string constraint, // not contains string constraints and simple formulas. @@ -74,6 +73,8 @@ class string_constraint_generatort // Maps unresolved symbols to the string_exprt that was created for them std::map unresolved_symbols; + // Set of strings that have been created by the generator + std::set created_strings; string_exprt find_or_add_string_of_symbol( const symbol_exprt &sym, @@ -100,6 +101,7 @@ class string_constraint_generatort static irep_idt extract_java_string(const symbol_exprt &s); + void add_default_axioms(const string_exprt &s); exprt axiom_for_is_positive_index(const exprt &x); // The following functions add axioms for the returned value diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index fb8e24d5320..e6f360585bb 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -35,7 +35,9 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( // a4 : forall i<|s1|. res[i]=s1[i] // a5 : forall i<|s2|. res[i+|s1|]=s2[i] - res.length()=plus_exprt_with_overflow_check(s1.length(), s2.length()); + equal_exprt a1( + res.length(), plus_exprt_with_overflow_check(s1.length(), s2.length())); + axioms.push_back(a1); axioms.push_back(s1.axiom_for_is_shorter_than(res)); axioms.push_back(s2.axiom_for_is_shorter_than(res)); diff --git a/src/solvers/refinement/string_constraint_generator_insert.cpp b/src/solvers/refinement/string_constraint_generator_insert.cpp index cd5f4376a93..7a63279699e 100644 --- a/src/solvers/refinement/string_constraint_generator_insert.cpp +++ b/src/solvers/refinement/string_constraint_generator_insert.cpp @@ -40,17 +40,32 @@ Function: string_constraint_generatort::add_axioms_for_insert Outputs: a new string expression - Purpose: add axioms corresponding to the StringBuilder.insert(String) java - function + Purpose: add axioms corresponding to the + StringBuilder.insert(int, CharSequence) + and StringBuilder.insert(int, CharSequence, int, int) + java functions \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_for_insert( const function_application_exprt &f) { - string_exprt s1=get_string_expr(args(f, 3)[0]); - string_exprt s2=get_string_expr(args(f, 3)[2]); - return add_axioms_for_insert(s1, s2, args(f, 3)[1]); + assert(f.arguments().size()>=3); + string_exprt s1=get_string_expr(f.arguments()[0]); + string_exprt s2=get_string_expr(f.arguments()[2]); + const exprt &offset=f.arguments()[1]; + if(f.arguments().size()==5) + { + const exprt &start=f.arguments()[3]; + const exprt &end=f.arguments()[4]; + string_exprt substring=add_axioms_for_substring(s2, start, end); + return add_axioms_for_insert(s1, substring, offset); + } + else + { + assert(f.arguments().size()==3); + return add_axioms_for_insert(s1, s2, offset); + } } /*******************************************************************\ diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index e4292da542b..9e09f5ef7f3 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -173,10 +173,11 @@ Function: string_constraint_generatort::fresh_string string_exprt string_constraint_generatort::fresh_string( const refined_string_typet &type) { - symbol_exprt length= - fresh_symbol("string_length", type.get_index_type()); + symbol_exprt length=fresh_symbol("string_length", type.get_index_type()); symbol_exprt content=fresh_symbol("string_content", type.get_content_type()); - return string_exprt(length, content, type); + string_exprt str(length, content, type); + created_strings.insert(str); + return str; } /*******************************************************************\ @@ -223,7 +224,6 @@ Function: string_constraint_generatort::convert_java_string_to_string_exprt string_exprt string_constraint_generatort::convert_java_string_to_string_exprt( const exprt &jls) { - assert(get_mode()==ID_java); assert(jls.id()==ID_struct); exprt length(to_struct_expr(jls).op1()); @@ -246,6 +246,45 @@ string_exprt string_constraint_generatort::convert_java_string_to_string_exprt( /*******************************************************************\ +Function: string_constraint_generatort::add_default_constraints + + Inputs: + s - a string expression + + Outputs: a string expression that is linked to the argument through + axioms that are added to the list + + Purpose: adds standard axioms about the length of the string and + its content: + * its length should be positive + * it should not exceed max_string_length + * if force_printable_characters is true then all characters + should belong to the range of ASCII characters between ' ' and '~' + + +\*******************************************************************/ + +void string_constraint_generatort::add_default_axioms( + const string_exprt &s) +{ + s.axiom_for_is_longer_than(from_integer(0, s.length().type())); + if(max_string_length!=std::numeric_limits::max()) + axioms.push_back(s.axiom_for_is_shorter_than(max_string_length)); + + if(force_printable_characters) + { + symbol_exprt qvar=fresh_univ_index("printable", s.length().type()); + exprt chr=s[qvar]; + and_exprt printable( + binary_relation_exprt(chr, ID_ge, from_integer(' ', chr.type())), + binary_relation_exprt(chr, ID_le, from_integer('~', chr.type()))); + string_constraintt sc(qvar, s.length(), printable); + axioms.push_back(sc); + } +} + +/*******************************************************************\ + Function: string_constraint_generatort::add_axioms_for_refined_string Inputs: an expression of refined string type @@ -258,7 +297,6 @@ Function: string_constraint_generatort::add_axioms_for_refined_string \*******************************************************************/ - string_exprt string_constraint_generatort::add_axioms_for_refined_string( const exprt &string) { @@ -272,15 +310,13 @@ string_exprt string_constraint_generatort::add_axioms_for_refined_string( { const symbol_exprt &sym=to_symbol_expr(string); string_exprt s=find_or_add_string_of_symbol(sym, type); - axioms.push_back( - s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + add_default_axioms(s); return s; } else if(string.id()==ID_nondet_symbol) { string_exprt s=fresh_string(type); - axioms.push_back( - s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + add_default_axioms(s); return s; } else if(string.id()==ID_if) @@ -290,8 +326,7 @@ string_exprt string_constraint_generatort::add_axioms_for_refined_string( else if(string.id()==ID_struct) { const string_exprt &s=to_string_expr(string); - axioms.push_back( - s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + add_default_axioms(s); return s; } else @@ -397,7 +432,6 @@ exprt string_constraint_generatort::add_axioms_for_function_application( // TODO: This part needs some improvement. // Stripping the symbol name is not a very robust process. new_expr.function() = symbol_exprt(str_id.substr(0, pos+4)); - assert(get_mode()==ID_java); new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); auto res_it=function_application_cache.insert(std::make_pair(new_expr, diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 1391ffd2315..e792537cb8d 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -43,24 +43,56 @@ string_refinementt::string_refinementt( supert(_ns, _prop), use_counter_example(false), do_concretizing(false), - initial_loop_bound(refinement_bound) + initial_loop_bound(refinement_bound), + non_empty_string(false) { } /*******************************************************************\ -Function: string_refinementt::set_mode +Function: string_refinementt::set_max_string_length - Purpose: determine which language should be used + Inputs: + i - maximum length which is allowed for strings. + by default the strings length has no other limit + than the maximal integer according to the type of their + length, for instance 2^31-1 for Java. + + Purpose: Add constraints on the size of strings used in the + program. + +\*******************************************************************/ + +void string_refinementt::set_max_string_length(unsigned int i) +{ + generator.max_string_length=i; +} + +/*******************************************************************\ + +Function: string_refinementt::set_max_string_length + + Purpose: Add constraints on the size of nondet character arrays + to ensure they have length at least 1 + +\*******************************************************************/ + +void string_refinementt::enforce_non_empty_string() +{ + non_empty_string=true; +} + +/*******************************************************************\ + +Function: string_refinementt::enforce_printable_characters + + Purpose: Add constraints on characters used in the program + to ensure they are printable \*******************************************************************/ -void string_refinementt::set_mode() +void string_refinementt::enforce_printable_characters() { - debug() << "initializing mode" << eom; - symbolt init=ns.lookup(irep_idt(CPROVER_PREFIX"initialize")); - irep_idt mode=init.mode; - debug() << "mode detected as " << mode << eom; - generator.set_mode(mode); + generator.force_printable_characters=true; } /*******************************************************************\ @@ -283,52 +315,71 @@ bool string_refinementt::add_axioms_for_string_assigns(const exprt &lhs, /*******************************************************************\ -Function: string_refinementt::concretize_results +Function: string_refinementt::concretize_string - Purpose: For each string whose length has been solved, add constants + Input: + expr - an expression + + Purpose: If the expression is of type string, then adds constants to the index set to force the solver to pick concrete values for each character, and fill the map `found_length` \*******************************************************************/ -void string_refinementt::concretize_results() +void string_refinementt::concretize_string(const exprt &expr) { - for(const auto& it : symbol_resolve) + if(refined_string_typet::is_refined_string_type(expr.type())) { - if(refined_string_typet::is_refined_string_type(it.second.type())) + string_exprt str=to_string_expr(expr); + exprt length=get(str.length()); + add_lemma(equal_exprt(str.length(), length)); + exprt content=str.content(); + replace_expr(symbol_resolve, content); + found_length[content]=length; + mp_integer found_length; + if(!to_integer(length, found_length)) { - string_exprt str=to_string_expr(it.second); - exprt length=current_model[str.length()]; - exprt content=str.content(); - replace_expr(symbol_resolve, content); - found_length[content]=length; - mp_integer found_length; - if(!to_integer(length, found_length)) + assert(found_length.is_long()); + if(found_length < 0) { - assert(found_length.is_long()); - if(found_length < 0) - { - debug() << "concretize_results: WARNING found length is negative" - << eom; - } - else + debug() << "concretize_results: WARNING found length is negative" + << eom; + } + else + { + unsigned long concretize_limit=found_length.to_long(); + concretize_limit=concretize_limit>generator.max_string_length? + generator.max_string_length:concretize_limit; + exprt content_expr=str.content(); + for(unsigned long i=0; iMAX_CONCRETE_STRING_SIZE? - MAX_CONCRETE_STRING_SIZE:concretize_limit; - exprt content_expr=str.content(); - replace_expr(current_model, content_expr); - for(size_t i=0; iMAX_CONCRETE_STRING_SIZE) + if(n>generator.max_string_length) { #if 0 debug() << "(sr::get_array) long string (size=" << n << ")" << eom; @@ -869,6 +925,125 @@ void string_refinementt::fill_model() } } + +/*******************************************************************\ + +Function: string_refinementt::substitute_array_with_expr() + + Inputs: + expr - A (possibly nested) 'with' expression on an `array_of` + expression + index - An index with which to build the equality condition + + Outputs: An expression containing no 'with' expression + + Purpose: Create a new expression where 'with' expressions on arrays + are replaced by 'if' expressions. + e.g. for an array access arr[x], where: + `arr := array_of(12) with {0:=24} with {2:=42}` + the constructed expression will be: + `index==0 ? 24 : index==2 ? 42 : 12` + +\*******************************************************************/ + +exprt string_refinementt::substitute_array_with_expr( + const exprt &expr, const exprt &index) const +{ + if(expr.id()==ID_with) + { + const with_exprt &with_expr=to_with_expr(expr); + const exprt &then_expr=with_expr.new_value(); + exprt else_expr=substitute_array_with_expr(with_expr.old(), index); + const typet &type=then_expr.type(); + assert(else_expr.type()==type); + return if_exprt( + equal_exprt(index, with_expr.where()), then_expr, else_expr, type); + } + else + { + // Only handle 'with' expressions on 'array_of' expressions. + assert(expr.id()==ID_array_of); + return to_array_of_expr(expr).what(); + } +} + +/*******************************************************************\ + +Function: string_refinementt::substitute_array_access() + + Inputs: + expr - an expression containing array accesses + + Outputs: an expression containing no array access + + Purpose: create an equivalent expression where array accesses and + 'with' expressions are replaced by 'if' expressions. + e.g. for an array access arr[x], where: + `arr := {12, 24, 48}` + the constructed expression will be: + `index==0 ? 12 : index==1 ? 24 : 48` + +\*******************************************************************/ + +void string_refinementt::substitute_array_access(exprt &expr) const +{ + for(auto &op : expr.operands()) + substitute_array_access(op); + + if(expr.id()==ID_index) + { + index_exprt &index_expr=to_index_expr(expr); + + if(index_expr.array().id()==ID_symbol) + { + expr=index_expr; + return; + } + + if(index_expr.array().id()==ID_with) + { + expr=substitute_array_with_expr( + index_expr.array(), index_expr.index()); + return; + } + + if(index_expr.array().id()==ID_array_of) + { + expr=to_array_of_expr(index_expr.array()).op(); + return; + } + + assert(index_expr.array().id()==ID_array); + array_exprt &array_expr=to_array_expr(index_expr.array()); + + size_t last_index=array_expr.operands().size()-1; + assert(last_index>=0); + + const typet &char_type=index_expr.array().type().subtype(); + exprt ite=array_expr.operands()[last_index]; + + if(ite.type()!=char_type) + { + // We have to manualy set the type for unknown values + assert(ite.id()==ID_unknown); + ite.type()=char_type; + } + + for(long i=last_index-1; i>=0; --i) + { + exprt op=array_expr.operands()[i]; + if(op.type()!=char_type) + { + assert(op.id()==ID_unknown); + op.type()=char_type; + } + equal_exprt equals(index_expr.index(), from_integer(i, java_int_type())); + ite=if_exprt(equals, op, ite, char_type); + } + expr=ite; + } +} + /*******************************************************************\ Function: string_refinementt::add_negation_of_constraint_to_solver @@ -913,6 +1088,7 @@ void string_refinementt::add_negation_of_constraint_to_solver( and_exprt negaxiom(premise, not_exprt(axiom.body())); debug() << "(sr::check_axioms) negated axiom: " << from_expr(negaxiom) << eom; + substitute_array_access(negaxiom); solver << negaxiom; } @@ -953,6 +1129,7 @@ bool string_refinementt::check_axioms() satcheck_no_simplifiert sat_check; supert solver(ns, sat_check); + solver.set_ui(ui); add_negation_of_constraint_to_solver(axiom_in_model, solver); switch(solver()) @@ -1007,6 +1184,7 @@ bool string_refinementt::check_axioms() exprt premise(axiom.premise()); exprt body(axiom.body()); implies_exprt instance(premise, body); + replace_expr(symbol_resolve, instance); replace_expr(axiom.univ_var(), val, instance); debug() << "adding counter example " << from_expr(instance) << eom; add_lemma(instance); diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index 0a62a6abe7d..cc3f530039e 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -2,7 +2,7 @@ Module: String support via creating string constraints and progressively instantiating the universal constraints as needed. - The procedure is described in the PASS paper at HVC'13: + The procedure is described in the PASS paper at HVC'13: "PASS: String Solving with Parameterized Array and Interval Automaton" by Guodong Li and Indradeep Ghosh @@ -18,11 +18,6 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include -// Defines a limit on the string witnesses we will output. -// Longer strings are still concidered possible by the solver but -// it will not output them. -#define MAX_CONCRETE_STRING_SIZE 500 - #define MAX_NB_REFINEMENT 100 class string_refinementt: public bv_refinementt @@ -38,8 +33,13 @@ class string_refinementt: public bv_refinementt // Should we use counter examples at each iteration? bool use_counter_example; + // Should we concretize strings when the solver finished bool do_concretizing; + void set_max_string_length(unsigned int i); + void enforce_non_empty_string(); + void enforce_printable_characters(); + virtual std::string decision_procedure_text() const override { return "string refinement loop with "+prop.solver_text(); @@ -65,6 +65,9 @@ class string_refinementt: public bv_refinementt string_constraint_generatort generator; + bool non_empty_string; + expr_sett nondet_arrays; + // Simple constraints that have been given to the solver expr_sett seen_instances; @@ -98,6 +101,8 @@ class string_refinementt: public bv_refinementt exprt substitute_function_applications(exprt expr); typet substitute_java_string_types(typet type); exprt substitute_java_strings(exprt expr); + exprt substitute_array_with_expr(const exprt &expr, const exprt &index) const; + void substitute_array_access(exprt &expr) const; void add_symbol_to_symbol_map(const exprt &lhs, const exprt &rhs); bool is_char_array(const typet &type) const; bool add_axioms_for_string_assigns(const exprt &lhs, const exprt &rhs); @@ -134,8 +139,10 @@ class string_refinementt: public bv_refinementt exprt simplify_sum(const exprt &f) const; + void concretize_string(const exprt &expr); void concretize_results(); void concretize_lengths(); + // Length of char arrays found during concretization std::map found_length; diff --git a/src/util/string_expr.h b/src/util/string_expr.h index 317e3c1ee6e..ff19a8c8259 100644 --- a/src/util/string_expr.h +++ b/src/util/string_expr.h @@ -74,7 +74,7 @@ class string_exprt: public struct_exprt return binary_relation_exprt(rhs.length(), ID_lt, length()); } - binary_relation_exprt axiom_for_is_strictly_longer_than(int i) const + binary_relation_exprt axiom_for_is_strictly_longer_than(mp_integer i) const { return axiom_for_is_strictly_longer_than(from_integer(i, length().type())); } @@ -91,7 +91,7 @@ class string_exprt: public struct_exprt return binary_relation_exprt(length(), ID_le, rhs); } - binary_relation_exprt axiom_for_is_shorter_than(int i) const + binary_relation_exprt axiom_for_is_shorter_than(mp_integer i) const { return axiom_for_is_shorter_than(from_integer(i, length().type())); } @@ -119,7 +119,7 @@ class string_exprt: public struct_exprt return equal_exprt(length(), rhs); } - equal_exprt axiom_for_has_length(int i) const + equal_exprt axiom_for_has_length(mp_integer i) const { return axiom_for_has_length(from_integer(i, length().type())); }