From bc24e2cb9dfc58dbd44527031ba449cd9ef133fc Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Sat, 31 Dec 2016 15:27:04 +0300 Subject: [PATCH 01/17] Preprocessing of goto programs for string refinement --- .../string_refine_preprocess.cpp | 1271 +++++++++++++++++ src/goto-programs/string_refine_preprocess.h | 212 +++ src/util/std_expr.h | 12 + 3 files changed, 1495 insertions(+) create mode 100644 src/goto-programs/string_refine_preprocess.cpp create mode 100644 src/goto-programs/string_refine_preprocess.h diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp new file mode 100644 index 00000000000..ac7446de7d5 --- /dev/null +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -0,0 +1,1271 @@ +/*******************************************************************\ + +Module: Preprocess a goto-programs so that calls to the java String + library are recognized by the string solver + +Author: Romain Brenguier + +Date: September 2016 + +\*******************************************************************/ + +#include +#include +#include +#include +#include +#include +// TODO: refined_string_type should be moved to util +#include +#include + +#include "string_refine_preprocess.h" + +/*******************************************************************\ + +Function: string_refine_preprocesst::new_tmp_symbol + + Inputs: a name and a type + + Outputs: a new symbol + + Purpose: add a temporary symbol with the given name and type to the + symbol table and returns it + +\*******************************************************************/ + +symbol_exprt string_refine_preprocesst::new_tmp_symbol( + const std::string &prefix, const typet &type) +{ + // TODO : this method should be replaced by a call to get_fresh_aux_symbol + // once fresh symbol generation has been factored out + auxiliary_symbolt tmp_symbol; + std::ostringstream buf; + buf << "string_preprocess_tmp" << (next_symbol_id++) << "$" << prefix; + std::string name=buf.str(); + tmp_symbol.base_name=name; + tmp_symbol.is_static_lifetime=false; + tmp_symbol.mode=ID_java; + tmp_symbol.name=name; + tmp_symbol.type=type; + symbol_table.add(tmp_symbol); + return tmp_symbol.symbol_expr(); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::new_symbol + + Inputs: a name and a type + + Outputs: a new symbol + + Purpose: add a symbol with the given name and type to the symbol table + and returns it + +\*******************************************************************/ + +symbol_exprt string_refine_preprocesst::new_symbol( + const std::string &prefix, const typet &type, bool static_lifetime) +{ + symbolt tmp_symbol; + std::ostringstream buf; + buf << "string_preprocess" << (next_symbol_id++) << "$" << prefix; + std::string name=buf.str(); + tmp_symbol.base_name=name; + tmp_symbol.is_static_lifetime=static_lifetime; + tmp_symbol.is_state_var=static_lifetime; + tmp_symbol.is_type=false; + tmp_symbol.is_lvalue=true; + tmp_symbol.mode=ID_java; + tmp_symbol.name=name; + tmp_symbol.type=type; + symbol_table.add(tmp_symbol); + return tmp_symbol.symbol_expr(); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::declare_function + + Inputs: a name and a type + + Purpose: declare a function with the given name and type + +\*******************************************************************/ + +void string_refine_preprocesst::declare_function( + irep_idt function_name, const typet &type) +{ + auxiliary_symbolt func_symbol; + func_symbol.base_name=function_name; + func_symbol.is_static_lifetime=false; + func_symbol.mode=ID_java; + func_symbol.name=function_name; + func_symbol.type=type; + symbol_table.add(func_symbol); + goto_functions.function_map[function_name]; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::get_data_and_length_type_of_string + + Inputs: an expression, a reference to a data type and a reference to a + length type + + Purpose: assuming the expression is a java string, figure out what + the types for length and data are and put them into the references + given as argument + +\*******************************************************************/ + +void string_refine_preprocesst::get_data_and_length_type_of_string( + const exprt &expr, typet &data_type, typet &length_type) +{ + assert(refined_string_typet::is_java_string_type(expr.type()) || + refined_string_typet::is_java_string_builder_type(expr.type())); + typet object_type=ns.follow(expr.type()); + assert(object_type.id()==ID_struct); + const struct_typet &struct_type=to_struct_type(object_type); + for(auto component : struct_type.components()) + if(component.get_name()=="length") + length_type=component.type(); + else if(component.get_name()=="data") + data_type=component.type(); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_cprover_string_assign + + Inputs: a goto_program, a position in this program, an expression and a + location + + Outputs: an expression + + Purpose: Introduce a temporary variable for cprover strings; + returns the cprover_string corresponding to rhs if it is a string + pointer and the original rhs otherwise. + +\*******************************************************************/ + +exprt string_refine_preprocesst::make_cprover_string_assign( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &rhs, + const source_locationt &location) +{ + if(refined_string_typet::is_java_string_pointer_type(rhs.type())) + { + auto pair=java_to_cprover_strings.insert( + std::pair(rhs, nil_exprt())); + + if(pair.second) + { + // We do the following assignments: + // cprover_string_array = *(rhs->data) + // cprover_string = { rhs->length; cprover_string_array } + + dereference_exprt deref(rhs, rhs.type().subtype()); + + typet data_type, length_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + member_exprt length(deref, "length", length_type); + symbol_exprt array_lhs=new_symbol( + "cprover_string_array", data_type.subtype()); + + // string expression for the rhs of the second assignment + string_exprt new_rhs( + length, array_lhs, refined_string_typet(length_type, data_type)); + + member_exprt data(deref, "data", data_type); + dereference_exprt deref_data(data, data_type.subtype()); + + symbol_exprt lhs=new_symbol("cprover_string", new_rhs.type()); + + std::list assignments; + assignments.emplace_back(array_lhs, deref_data); + assignments.emplace_back(lhs, new_rhs); + insert_assignments(goto_program, i_it, assignments); + i_it=goto_program.insert_after(i_it); + pair.first->second=lhs; + } + return pair.first->second; + } + else if(rhs.id()==ID_typecast && + refined_string_typet::is_java_string_pointer_type(rhs.op0().type())) + { + exprt new_rhs=make_cprover_string_assign( + goto_program, i_it, rhs.op0(), location); + return typecast_exprt(new_rhs, rhs.type()); + } + else + return rhs; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_normal_assign + + Inputs: a goto_program, a position in this program, an expression lhs, + a function type, a function name, a vector of arguments, a location + and a signature + + Purpose: replace the current instruction by: + > lhs=function_name(arguments) : return_type @ location + If given, signature can force String conversion of given arguments. + The convention for signature is one character by argument + and 'S' denotes string. + +\*******************************************************************/ + +void string_refine_preprocesst::make_normal_assign( + goto_programt &goto_program, + goto_programt::targett i_it, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + if(function_name==ID_cprover_string_copy_func) + make_string_copy(goto_program, i_it, lhs, arguments[0], location); + else + { + function_application_exprt rhs( + symbol_exprt(function_name), function_type.return_type()); + rhs.add_source_location()=location; + declare_function(function_name, function_type); + + exprt::operandst processed_arguments=process_arguments( + goto_program, i_it, arguments, location, signature); + rhs.arguments()=processed_arguments; + + code_assignt assignment(lhs, rhs); + assignment.add_source_location()=location; + i_it->make_assignment(); + i_it->code=assignment; + } +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::insert_assignments + + Inputs: a goto_program, a position in this program, a list of assignments + + Purpose: add the assignments to the program in the order they are given + +\*******************************************************************/ + +void string_refine_preprocesst::insert_assignments( + goto_programt &goto_program, + goto_programt::targett &i_it, + const std::list &va) +{ + auto i=va.begin(); + i_it->make_assignment(); + i_it->code=*i; + for(i++; i!=va.end(); i++) + { + i_it=goto_program.insert_after(i_it); + i_it->make_assignment(); + i_it->code=*i; + } +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_string_assign + + Inputs: a goto_program, a position in this program, an expression lhs, + a function type, a function name, a vector of arguments, a location + and a signature + + Purpose: replace the current instruction by: + > lhs=malloc(String *) + > lhs->length=function_name_length(arguments) + > tmp_data=function_name_data(arguments) + > lhs->data=&tmp_data + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_assign( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + assert(refined_string_typet::is_java_string_pointer_type( + function_type.return_type())); + dereference_exprt deref(lhs, lhs.type().subtype()); + typet object_type=ns.follow(deref.type()); + exprt object_size=size_of_expr(object_type, ns); + typet length_type, data_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + + std::string fnamel=id2string(function_name)+"_length"; + std::string fnamed=id2string(function_name)+"_data"; + declare_function(fnamel, length_type); + declare_function(fnamed, data_type); + function_application_exprt rhs_length(symbol_exprt(fnamel), length_type); + function_application_exprt rhs_data( + symbol_exprt(fnamed), data_type.subtype()); + + exprt::operandst processed_arguments=process_arguments( + goto_program, i_it, arguments, location, signature); + rhs_length.arguments()=processed_arguments; + rhs_data.arguments()=processed_arguments; + + symbol_exprt tmp_length=new_tmp_symbol("tmp_length", length_type); + symbol_exprt tmp_array=new_tmp_symbol("tmp_array", data_type.subtype()); + member_exprt lhs_length(deref, "length", length_type); + member_exprt lhs_data(deref, "data", tmp_array.type()); + + // lhs=malloc(String *) + + if(object_size.is_nil()) + debug() << "string_refine_preprocesst::make_string_assign " + << "got nil object_size" << eom; + + side_effect_exprt malloc_expr(ID_malloc); + malloc_expr.copy_to_operands(object_size); + malloc_expr.type()=pointer_typet(object_type); + malloc_expr.add_source_location()=location; + + std::list assigns; + assigns.emplace_back(lhs, malloc_expr); + assigns.emplace_back(tmp_length, rhs_length); + assigns.emplace_back(lhs_length, tmp_length); + assigns.emplace_back(tmp_array, rhs_data); + assigns.emplace_back(lhs_data, address_of_exprt(tmp_array)); + insert_assignments(goto_program, i_it, assigns); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_assign + + Inputs: a goto_program, a position in this program, an expression lhs, + a function type, a function name, a vector of arguments, a location + and a signature + + Purpose: assign the result of the function application to lhs, + in case the function type is string, it does a special assignment + using `make_string_assign` + +\*******************************************************************/ + +void string_refine_preprocesst::make_assign( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arg, + const source_locationt &loc, + const std::string &sig) +{ + if(refined_string_typet::is_java_string_pointer_type( + function_type.return_type())) + make_string_assign( + goto_program, i_it, lhs, function_type, function_name, arg, loc, sig); + else + make_normal_assign( + goto_program, i_it, lhs, function_type, function_name, arg, loc, sig); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_string_copy + + Inputs: a goto_program, a position in this program, a lhs expression, + an argument expression and a location + + Outputs: an expression + + Purpose: replace the current instruction by: + > lhs->length=argument->length + > tmp_data=*(argument->data) + > lhs->data=&tmp_data + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_copy( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &lhs, + const exprt &argument, + const source_locationt &location) +{ + // TODO : treat CharSequence and StringBuffer + assert(refined_string_typet::is_java_string_pointer_type(lhs.type()) || + refined_string_typet::is_java_string_builder_pointer_type(lhs.type())); + exprt deref=dereference_exprt(lhs, lhs.type().subtype()); + + typet length_type, data_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + + dereference_exprt deref_arg(argument, argument.type().subtype()); + std::list assignments; + + exprt lhs_length=get_length(deref, length_type); + exprt rhs_length=get_length(deref_arg, length_type); + assignments.emplace_back(lhs_length, rhs_length); + + symbol_exprt tmp_data=new_tmp_symbol("tmp_data", data_type.subtype()); + exprt rhs_data=get_data(deref_arg, data_type); + exprt lhs_data=get_data(deref, data_type); + assignments.emplace_back( + tmp_data, dereference_exprt(rhs_data, data_type.subtype())); + assignments.emplace_back(lhs_data, address_of_exprt(tmp_data)); + + insert_assignments(goto_program, i_it, assignments); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_string_function + + Inputs: a position in a goto program, a function name, an expression lhs, + a function type, name, arguments, a location and a signature string + + Purpose: at the current position replace `lhs=s.some_function(x,...)` + by `lhs=function_name(s,x,...)`; + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_function( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + if(refined_string_typet::is_java_string_pointer_type( + function_type.return_type())) + make_string_assign( + goto_program, + i_it, + lhs, + function_type, + function_name, + arguments, + location, + signature); + else + make_normal_assign( + goto_program, + i_it, + lhs, + function_type, + function_name, + arguments, + location, + signature); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_string_function + + Inputs: a position in a goto program, a function name and two Boolean options + + Purpose: at the current position replace `lhs=s.some_function(x,...)` + by `lhs=function_name(s,x,...)`; + option `assign_first_arg` uses `s` instead of `lhs` in the resulting + expression; + option `skip_first_arg`, removes `s` from the arguments, ie `x` is + the first one + +\*******************************************************************/ + +void string_refine_preprocesst::make_string_function( + goto_programt &goto_program, + goto_programt::targett &i_it, + const irep_idt &function_name, + const std::string &signature, + bool assign_first_arg, + bool skip_first_arg) +{ + code_function_callt &function_call=to_code_function_call(i_it->code); + code_typet function_type=to_code_type(function_call.function().type()); + code_typet new_type; + const source_locationt &loc=function_call.source_location(); + declare_function(function_name, function_type); + function_application_exprt rhs; + std::vector args; + if(assign_first_arg) + rhs.type()=function_call.arguments()[0].type(); + else + rhs.type()=function_type.return_type(); + rhs.add_source_location()=function_call.source_location(); + rhs.function()=symbol_exprt(function_name); + + std::size_t start_index=skip_first_arg?1:0; + for(std::size_t i=start_index; icode); + string_builders[function_call.lhs()]=function_call.arguments()[0]; + make_string_function( + goto_program, i_it, function_name, signature, true, false); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::build_function_application + + Inputs: a function name, a type, a location and a vector of arguments + + Outputs: a function application expression + + Purpose: declare a function and construct an function application expression + with the given function name, type, location and arguments + +\*******************************************************************/ + +function_application_exprt + string_refine_preprocesst::build_function_application( + const irep_idt &function_name, + const typet &type, + const source_locationt &location, + const exprt::operandst &arguments) +{ + declare_function(function_name, type); + function_application_exprt function_app(symbol_exprt(function_name), type); + function_app.add_source_location()=location; + for(const auto &arg : arguments) + function_app.arguments().push_back(arg); + + return function_app; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_to_char_array_function + + Inputs: a goto program and a position in that goto program + + Purpose: at the given position replace `return_tmp0=s.toCharArray()` with: + > return_tmp0->data=&((s->data)[0]) + > return_tmp0->length=s->length + +\*******************************************************************/ + +void string_refine_preprocesst::make_to_char_array_function( + goto_programt &goto_program, goto_programt::targett &i_it) +{ + const code_function_callt &function_call=to_code_function_call(i_it->code); + + assert(function_call.arguments().size()>=1); + const exprt &string_argument=function_call.arguments()[0]; + assert(refined_string_typet::is_java_string_pointer_type( + string_argument.type())); + + dereference_exprt deref(string_argument, string_argument.type().subtype()); + typet length_type, data_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + std::list assignments; + + // &((s->data)[0]) + exprt rhs_data=get_data(deref, data_type); + dereference_exprt rhs_array(rhs_data, data_type.subtype()); + exprt first_index=from_integer(0, java_int_type()); + index_exprt first_element(rhs_array, first_index, java_char_type()); + address_of_exprt rhs_pointer(first_element); + + // return_tmp0->data=&((s->data)[0]) + typet deref_type=function_call.lhs().type().subtype(); + dereference_exprt deref_lhs(function_call.lhs(), deref_type); + exprt lhs_data=get_data(deref_lhs, data_type); + assignments.emplace_back(lhs_data, rhs_pointer); + + // return_tmp0->length=s->length + exprt rhs_length=get_length(deref, length_type); + exprt lhs_length=get_length(deref_lhs, length_type); + assignments.emplace_back(lhs_length, rhs_length); + insert_assignments(goto_program, i_it, assignments); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_cprover_char_array_assign + + Inputs: a goto_program, a position in this program, an expression and a + location + + Outputs: a char array expression (not a pointer) + + Purpose: Introduce a temporary variable for cprover strings; + returns the cprover_string corresponding to rhs + +\*******************************************************************/ + +exprt string_refine_preprocesst::make_cprover_char_array_assign( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &rhs, + const source_locationt &location) +{ + // TODO : add an assertion on the type of rhs + + // We do the following assignments: + // cprover_string_array = rhs.data + // cprover_string = { rhs.length; cprover_string_array } + + // string expression for the rhs of the second assignment + string_exprt new_rhs(java_char_type()); + + typet data_type=new_rhs.content().type(); + typet length_type=java_int_type(); + + symbol_exprt array_lhs=new_symbol("cprover_string_array", data_type); + exprt array_rhs=get_data(rhs, new_rhs.content().type()); + symbol_exprt lhs=new_symbol("cprover_string", new_rhs.type()); + new_rhs.length()=get_length(rhs, length_type); + new_rhs.content()=array_lhs; + + std::list assignments; + assignments.emplace_back(array_lhs, array_rhs); + assignments.emplace_back(lhs, new_rhs); + insert_assignments(goto_program, i_it, assignments); + i_it=goto_program.insert_after(i_it); + return lhs; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::make_char_array_function + + Inputs: a position in a goto program, a function name, two Boolean options, + and the index of the char array argument in the function + + Purpose: at the given position replace + `lhs=s.some_function(...,char_array,...)` by + > cprover_string = { char_array->length, *char_array } + > tmp_string=function_name(s, cprover_string, ...) + option `assign_first_arg` uses `s` instead of `lhs` in the second + assignment; + option `skip_first_arg`, removes `s` from the arguments, ie `x` is + the first one; + argument index gives the index of the argument containing char_array + +\*******************************************************************/ + +void string_refine_preprocesst::make_char_array_function( + goto_programt &goto_program, + goto_programt::targett &i_it, + const irep_idt &function_name, + const std::string &signature, + size_t index, + bool assign_first_arg, + bool skip_first_arg) +{ + code_function_callt &function_call=to_code_function_call(i_it->code); + code_typet function_type=to_code_type(function_call.function().type()); + code_typet new_function_type; + const source_locationt &location=function_call.source_location(); + assert(function_call.arguments().size()>index); + const std::vector &args=function_call.arguments(); + std::vector new_args; + + exprt lhs; + if(assign_first_arg) + lhs=function_call.arguments()[0]; + else + lhs=function_call.lhs(); + + if(lhs.id()==ID_typecast) + lhs=to_typecast_expr(lhs).op(); + + exprt char_array=dereference_exprt( + function_call.arguments()[index], + function_call.arguments()[index].type().subtype()); + exprt string=make_cprover_char_array_assign( + goto_program, i_it, char_array, location); + + std::size_t start_index=skip_first_arg?1:0; + for(std::size_t i=start_index; icode); + string_builders[function_call.lhs()]=function_call.arguments()[0]; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::process_arguments + + Inputs: a goto program, a position, a list of expressions, a location and a + signature + + Outputs: a list of expressions + + Purpose: for each expression that is a string or that is at a position with + an 'S' character in the signature, we declare a new `cprover_string` + whose contents is deduced from the expression and replace the + expression by this cprover_string in the output list; + in the other case the expression is kept as is for the output list. + +\*******************************************************************/ + +exprt::operandst string_refine_preprocesst::process_arguments( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature) +{ + exprt::operandst new_arguments; + + for(std::size_t i=0; isecond); + else + { + if(isecond; + else return ""; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::replace_string_calls + + Inputs: a function in a goto_program + + Purpose: goes through the instructions, replace function calls to string + function by equivalent instructions using functions defined + for the string solver, replace string literals by string + expressions of the type used by the string solver + +\*******************************************************************/ + +void string_refine_preprocesst::replace_string_calls( + goto_functionst::function_mapt::iterator f_it) +{ + goto_programt &goto_program=f_it->second.body; + + Forall_goto_program_instructions(i_it, goto_program) + { + if(i_it->is_function_call()) + { + code_function_callt &function_call=to_code_function_call(i_it->code); + for(auto arg : function_call.arguments()) + { + auto sb_it=string_builders.find(arg); + if(sb_it!=string_builders.end()) + arg=sb_it->second; + } + + if(function_call.function().id()==ID_symbol) + { + const irep_idt &function_id= + to_symbol_expr(function_call.function()).get_identifier(); + std::string signature=function_signature(function_id); + + auto it=string_functions.find(function_id); + if(it!=string_functions.end()) + make_string_function( + goto_program, i_it, it->second, signature, false, false); + + it=side_effect_functions.find(function_id); + if(it!=side_effect_functions.end()) + make_string_function_side_effect( + goto_program, i_it, it->second, signature); + + it=string_function_calls.find(function_id); + if(it!=string_function_calls.end()) + make_string_function_call(goto_program, i_it, it->second, signature); + + it=string_of_char_array_functions.find(function_id); + if(it!=string_of_char_array_functions.end()) + make_char_array_function( + goto_program, i_it, it->second, signature, 0); + + it=string_of_char_array_function_calls.find(function_id); + if(it!=string_of_char_array_function_calls.end()) + make_char_array_function_call( + goto_program, i_it, it->second, signature); + + it=side_effect_char_array_functions.find(function_id); + if(it!=side_effect_char_array_functions.end()) + make_char_array_side_effect( + goto_program, i_it, it->second, signature); + + if(function_id==irep_idt("java::java.lang.String.toCharArray:()[C")) + make_to_char_array_function(goto_program, i_it); + } + } + else + { + if(i_it->is_assign()) + { + // In assignments we replace string literals and C string functions + code_assignt assignment=to_code_assign(i_it->code); + + exprt new_rhs=assignment.rhs(); + code_assignt new_assignment(assignment.lhs(), new_rhs); + + if(new_rhs.id()==ID_function_application) + { + function_application_exprt f=to_function_application_expr(new_rhs); + const exprt &name=f.function(); + assert(name.id()==ID_symbol); + const irep_idt &id=to_symbol_expr(name).get_identifier(); + auto it=c_string_functions.find(id); + if(it!=c_string_functions.end()) + { + declare_function(it->second, f.type()); + f.function()=symbol_exprt(it->second); + new_assignment=code_assignt(assignment.lhs(), f); + } + } + + new_assignment.add_source_location()=assignment.source_location(); + i_it->make_assignment(); + i_it->code=new_assignment; + } + } + } + return; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::initialize_string_function_table + + Purpose: fill maps with correspondance from java method names to cprover + functions + +\*******************************************************************/ + +void string_refine_preprocesst::initialize_string_function_table() +{ + string_functions["java::java.lang.String.codePointAt:(I)I"]= + ID_cprover_string_code_point_at_func; + string_functions["java::java.lang.String.codePointBefore:(I)I"]= + ID_cprover_string_code_point_before_func; + string_functions["java::java.lang.String.codePointCount:(II)I"]= + ID_cprover_string_code_point_count_func; + string_functions["java::java.lang.String.offsetByCodePoints:(II)I"]= + ID_cprover_string_offset_by_code_point_func; + string_functions["java::java.lang.String.hashCode:()I"]= + ID_cprover_string_hash_code_func; + string_functions["java::java.lang.String.indexOf:(I)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.indexOf:(II)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.indexOf:(Ljava/lang/String;)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.indexOf:(Ljava/lang/String;I)I"]= + ID_cprover_string_index_of_func; + string_functions["java::java.lang.String.lastIndexOf:(I)I"]= + ID_cprover_string_last_index_of_func; + string_functions["java::java.lang.String.lastIndexOf:(II)I"]= + ID_cprover_string_last_index_of_func; + string_functions + ["java::java.lang.String.lastIndexOf:(Ljava/lang/String;)I"]= + ID_cprover_string_last_index_of_func; + string_functions + ["java::java.lang.String.lastIndexOf:(Ljava/lang/String;I)I"]= + ID_cprover_string_last_index_of_func; + string_functions + ["java::java.lang.String.concat:(Ljava/lang/String;)Ljava/lang/String;"]= + ID_cprover_string_concat_func; + string_functions["java::java.lang.String.length:()I"]= + ID_cprover_string_length_func; + string_functions["java::java.lang.StringBuilder.length:()I"]= + ID_cprover_string_length_func; + string_functions["java::java.lang.String.equals:(Ljava/lang/Object;)Z"]= + ID_cprover_string_equal_func; + string_functions + ["java::java.lang.String.equalsIgnoreCase:(Ljava/lang/String;)Z"]= + ID_cprover_string_equals_ignore_case_func; + string_functions["java::java.lang.String.startsWith:(Ljava/lang/String;)Z"]= + ID_cprover_string_startswith_func; + string_functions + ["java::java.lang.String.startsWith:(Ljava/lang/String;I)Z"]= + ID_cprover_string_startswith_func; + string_functions["java::java.lang.String.endsWith:(Ljava/lang/String;)Z"]= + ID_cprover_string_endswith_func; + string_functions["java::java.lang.String.substring:(II)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions["java::java.lang.String.substring:(II)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions["java::java.lang.String.substring:(I)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions + ["java::java.lang.StringBuilder.substring:(II)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions + ["java::java.lang.StringBuilder.substring:(I)Ljava/lang/String;"]= + ID_cprover_string_substring_func; + string_functions + ["java::java.lang.String.subSequence:(II)Ljava/lang/CharSequence;"]= + ID_cprover_string_substring_func; + string_functions["java::java.lang.String.trim:()Ljava/lang/String;"]= + ID_cprover_string_trim_func; + string_functions["java::java.lang.String.toLowerCase:()Ljava/lang/String;"]= + ID_cprover_string_to_lower_case_func; + string_functions["java::java.lang.String.toUpperCase:()Ljava/lang/String;"]= + ID_cprover_string_to_upper_case_func; + string_functions["java::java.lang.String.replace:(CC)Ljava/lang/String;"]= + ID_cprover_string_replace_func; + string_functions + ["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= + ID_cprover_string_contains_func; + string_functions["java::java.lang.String.compareTo:(Ljava/lang/String;)I"]= + ID_cprover_string_compare_to_func; + string_functions["java::java.lang.String.intern:()Ljava/lang/String;"]= + ID_cprover_string_intern_func; + string_functions["java::java.lang.String.isEmpty:()Z"]= + ID_cprover_string_is_empty_func; + string_functions["java::java.lang.String.charAt:(I)C"]= + ID_cprover_string_char_at_func; + string_functions["java::java.lang.StringBuilder.charAt:(I)C"]= + ID_cprover_string_char_at_func; + string_functions["java::java.lang.CharSequence.charAt:(I)C"]= + ID_cprover_string_char_at_func; + string_functions + ["java::java.lang.StringBuilder.toString:()Ljava/lang/String;"]= + ID_cprover_string_copy_func; + + string_functions["java::java.lang.String.valueOf:(F)Ljava/lang/String;"]= + ID_cprover_string_of_float_func; + string_functions["java::java.lang.Float.toString:(F)Ljava/lang/String;"]= + ID_cprover_string_of_float_func; + string_functions["java::java.lang.Integer.toString:(I)Ljava/lang/String;"]= + ID_cprover_string_of_int_func; + string_functions["java::java.lang.String.valueOf:(I)Ljava/lang/String;"]= + ID_cprover_string_of_int_func; + string_functions["java::java.lang.Integer.toHexString:(I)Ljava/lang/String;"]= + ID_cprover_string_of_int_hex_func; + string_functions["java::java.lang.String.valueOf:(L)Ljava/lang/String;"]= + ID_cprover_string_of_long_func; + string_functions["java::java.lang.String.valueOf:(D)Ljava/lang/String;"]= + ID_cprover_string_of_double_func; + string_functions["java::java.lang.String.valueOf:(Z)Ljava/lang/String;"]= + ID_cprover_string_of_bool_func; + string_functions["java::java.lang.String.valueOf:(C)Ljava/lang/String;"]= + ID_cprover_string_of_char_func; + string_functions["java::java.lang.Integer.parseInt:(Ljava/lang/String;)I"]= + ID_cprover_string_parse_int_func; + + side_effect_functions + ["java::java.lang.StringBuilder.append:(Ljava/lang/String;)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_func; + side_effect_functions["java::java.lang.StringBuilder.setCharAt:(IC)V"]= + ID_cprover_string_char_set_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(I)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_int_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(J)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_long_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(Z)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_bool_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(C)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_char_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(D)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_double_func; + side_effect_functions + ["java::java.lang.StringBuilder.append:(F)Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_float_func; + side_effect_functions + ["java::java.lang.StringBuilder.appendCodePoint:(I)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_concat_code_point_func; + side_effect_functions + ["java::java.lang.StringBuilder.delete:(II)Ljava/lang/StringBuilder;"]= + ID_cprover_string_delete_func; + side_effect_functions + ["java::java.lang.StringBuilder.deleteCharAt:(I)Ljava/lang/StringBuilder;"]= + ID_cprover_string_delete_char_at_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(II)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_int_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(IJ)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_long_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(IC)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_char_func; + side_effect_functions + ["java::java.lang.StringBuilder.insert:(IZ)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_bool_func; + side_effect_functions + ["java::java.lang.StringBuilder.setLength:(I)V"]= + ID_cprover_string_set_length_func; + + + side_effect_char_array_functions + ["java::java.lang.StringBuilder.insert:(I[CII)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_char_array_func; + side_effect_char_array_functions + ["java::java.lang.StringBuilder.insert:(I[C)Ljava/lang/StringBuilder;"]= + ID_cprover_string_insert_char_array_func; + + string_function_calls + ["java::java.lang.String.:(Ljava/lang/String;)V"]= + ID_cprover_string_copy_func; + string_function_calls + ["java::java.lang.String.:(Ljava/lang/StringBuilder;)V"]= + ID_cprover_string_copy_func; + string_function_calls + ["java::java.lang.StringBuilder.:(Ljava/lang/String;)V"]= + ID_cprover_string_copy_func; + string_function_calls["java::java.lang.String.:()V"]= + ID_cprover_string_empty_string_func; + string_function_calls["java::java.lang.StringBuilder.:()V"]= + ID_cprover_string_empty_string_func; + + string_of_char_array_function_calls["java::java.lang.String.:([C)V"]= + ID_cprover_string_of_char_array_func; + string_of_char_array_function_calls["java::java.lang.String.:([CII)V"]= + ID_cprover_string_of_char_array_func; + + string_of_char_array_functions + ["java::java.lang.String.valueOf:([CII)Ljava/lang/String;"]= + ID_cprover_string_of_char_array_func; + string_of_char_array_functions + ["java::java.lang.String.valueOf:([C)Ljava/lang/String;"]= + ID_cprover_string_of_char_array_func; + string_of_char_array_functions + ["java::java.lang.String.copyValueOf:([CII)Ljava/lang/String;"]= + ID_cprover_string_of_char_array_func; + string_of_char_array_functions + ["java::java.lang.String.copyValueOf:([C)Ljava/lang/String;"]= + ID_cprover_string_of_char_array_func; + + c_string_functions["__CPROVER_uninterpreted_string_literal_func"]= + ID_cprover_string_literal_func; + c_string_functions["__CPROVER_uninterpreted_string_char_at_func"]= + ID_cprover_string_char_at_func; + c_string_functions["__CPROVER_uninterpreted_string_equal_func"]= + ID_cprover_string_equal_func; + c_string_functions["__CPROVER_uninterpreted_string_concat_func"]= + ID_cprover_string_concat_func; + c_string_functions["__CPROVER_uninterpreted_string_length_func"]= + ID_cprover_string_length_func; + c_string_functions["__CPROVER_uninterpreted_string_substring_func"]= + ID_cprover_string_substring_func; + c_string_functions["__CPROVER_uninterpreted_string_is_prefix_func"]= + ID_cprover_string_is_prefix_func; + c_string_functions["__CPROVER_uninterpreted_string_is_suffix_func"]= + ID_cprover_string_is_suffix_func; + c_string_functions["__CPROVER_uninterpreted_string_contains_func"]= + ID_cprover_string_contains_func; + c_string_functions["__CPROVER_uninterpreted_string_index_of_func"]= + ID_cprover_string_index_of_func; + c_string_functions["__CPROVER_uninterpreted_string_last_index_of_func"]= + ID_cprover_string_last_index_of_func; + c_string_functions["__CPROVER_uninterpreted_string_char_set_func"]= + ID_cprover_string_char_set_func; + c_string_functions["__CPROVER_uninterpreted_string_copy_func"]= + ID_cprover_string_copy_func; + c_string_functions["__CPROVER_uninterpreted_string_parse_int_func"]= + ID_cprover_string_parse_int_func; + c_string_functions["__CPROVER_uninterpreted_string_of_int_func"]= + ID_cprover_string_of_int_func; + + signatures["java::java.lang.String.equals:(Ljava/lang/Object;)Z"]="SSZ"; + signatures + ["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= + "SSZ"; +} + +/*******************************************************************\ + +Constructor: string_refine_preprocesst::string_refine_preprocesst + + Inputs: a symbol table, goto functions, a message handler + + Purpose: process the goto function by replacing calls to string functions + +\*******************************************************************/ + +string_refine_preprocesst::string_refine_preprocesst( + symbol_tablet &_symbol_table, + goto_functionst &_goto_functions, + message_handlert &_message_handler) + :messaget(_message_handler), + symbol_table(_symbol_table), + ns(_symbol_table), + goto_functions(_goto_functions), + jls_ptr(symbol_typet("java::java.lang.String")), + next_symbol_id(0) +{ + initialize_string_function_table(); + Forall_goto_functions(it, goto_functions) + replace_string_calls(it); +} diff --git a/src/goto-programs/string_refine_preprocess.h b/src/goto-programs/string_refine_preprocess.h new file mode 100644 index 00000000000..ae282f7aed6 --- /dev/null +++ b/src/goto-programs/string_refine_preprocess.h @@ -0,0 +1,212 @@ +/*******************************************************************\ + +Module: Preprocess a goto-programs so that calls to the java String + library are recognized by the PASS algorithm + +Author: Romain Brenguier + +Date: September 2016 + +\*******************************************************************/ + +#ifndef CPROVER_GOTO_PROGRAMS_STRING_REFINE_PREPROCESS_H +#define CPROVER_GOTO_PROGRAMS_STRING_REFINE_PREPROCESS_H + +#include +#include + +class string_refine_preprocesst:public messaget +{ + private: + namespacet ns; + symbol_tablet & symbol_table; + goto_functionst & goto_functions; + + // String builders maps the different names of a same StringBuilder object + // to a unique expression. + std::map string_builders; + + // Map name of Java string functions to there equivalent in the solver + std::map side_effect_functions; + std::map string_functions; + std::map c_string_functions; + std::map string_function_calls; + std::map string_of_char_array_functions; + std::map string_of_char_array_function_calls; + std::map side_effect_char_array_functions; + + std::map signatures; + std::map hidden_strings; + std::map java_to_cprover_strings; + + // unique id for each newly created symbols + int next_symbol_id; + + public: + string_refine_preprocesst( + symbol_tablet &, goto_functionst &, message_handlert &); + + private: + void initialize_string_function_table(); + + symbol_exprt new_tmp_symbol(const std::string &name, const typet &type); + + symbol_exprt new_symbol( + const std::string &prefix, const typet &type, bool static_lifetime=true); + + // get the data member of a java string + static exprt get_data(const exprt &string, const typet &data_type) + { + return member_exprt(string, "data", data_type); + } + + // get the length member of a java string + exprt get_length(const exprt &string, const typet &length_type) + { + return member_exprt(string, "length", length_type); + } + + + // type of pointers to string + pointer_typet jls_ptr; + exprt replace_string(const exprt &in); + exprt replace_string_in_assign(const exprt &in); + + void insert_assignments( + goto_programt &goto_program, + goto_programt::targett &i_it, + const std::list &va); + + exprt replace_string_pointer(const exprt &in); + + // Replace string builders by expression of the mapping and make + // assignments for strings as string_exprt + exprt::operandst process_arguments( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature=""); + + // Signature of the named function if it is defined in the signature map, + // empty string otherwise + std::string function_signature(const irep_idt &function_id); + + void declare_function(irep_idt function_name, const typet &type); + + void get_data_and_length_type_of_string( + const exprt &expr, typet &data_type, typet &length_type); + + function_application_exprt build_function_application( + const irep_idt &function_name, + const typet &type, + const source_locationt &location, + const exprt::operandst &arguments); + + void make_normal_assign( + goto_programt &goto_program, + goto_programt::targett i_it, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature=""); + + void make_string_assign( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature); + + void make_assign( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arg, + const source_locationt &loc, + const std::string &sig); + + exprt make_cprover_string_assign( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &rhs, + const source_locationt &location); + + void make_string_copy( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &lhs, + const exprt &argument, + const source_locationt &location); + + void make_string_function( + goto_programt &goto_program, + goto_programt::targett &i_it, + const irep_idt &function_name, + const std::string &signature, + bool assign_first_arg=false, + bool skip_first_arg=false); + + void make_string_function( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &lhs, + const code_typet &function_type, + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &location, + const std::string &signature); + + void make_string_function_call( + goto_programt &goto_program, + goto_programt::targett &i_it, + const irep_idt &function_name, + const std::string &signature); + + void make_string_function_side_effect( + goto_programt &goto_program, + goto_programt::targett &i_it, + const irep_idt &function_name, + const std::string &signature); + + void make_to_char_array_function( + goto_programt &goto_program, goto_programt::targett &); + + exprt make_cprover_char_array_assign( + goto_programt &goto_program, + goto_programt::targett &i_it, + const exprt &rhs, + const source_locationt &location); + + void make_char_array_function( + goto_programt &goto_program, + goto_programt::targett &i_it, + const irep_idt &function_name, + const std::string &signature, + size_t index, + bool assign_first_arg=false, + bool skip_first_arg=false); + + void make_char_array_function_call( + goto_programt &goto_program, + goto_programt::targett &i_it, + const irep_idt &function_name, + const std::string &signature); + + void make_char_array_side_effect( + goto_programt &goto_program, + goto_programt::targett &i_it, + const irep_idt &function_name, + const std::string &signature); + + void replace_string_calls(goto_functionst::function_mapt::iterator f_it); +}; + +#endif diff --git a/src/util/std_expr.h b/src/util/std_expr.h index a2f78ab25e8..b50413e8598 100644 --- a/src/util/std_expr.h +++ b/src/util/std_expr.h @@ -3450,6 +3450,18 @@ class function_application_exprt:public exprt operands().resize(2); } + function_application_exprt(const typet &_type): + exprt(ID_function_application, _type) + { + operands().resize(2); + } + + function_application_exprt(const symbol_exprt &_function, const typet &_type): + function_application_exprt(_type) + { + function()=_function; + } + exprt &function() { return op0(); From c1c29c381a0043c862d8ad884e2e7fda1c27c18d Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 17 Feb 2017 13:10:40 +0000 Subject: [PATCH 02/17] Moving is_java_string_type functions to string_preprocess This avoid dependencies between goto-programs and solver. We also removed function is_unrefined_string_type which should not be needed in the string solver. Removed also mention of java string types and unrefined types in the solver. These mentions should not be necessary, since we are not supposed to do anything java specific. The solver should only have to deal with refined string types and possibly char arrays. --- .../string_refine_preprocess.cpp | 144 ++++++++++++++++-- src/goto-programs/string_refine_preprocess.h | 10 ++ .../refinement/refined_string_type.cpp | 95 ------------ src/solvers/refinement/refined_string_type.h | 36 +---- .../string_constraint_generator_constants.cpp | 2 +- .../string_constraint_generator_indexof.cpp | 20 ++- .../string_constraint_generator_main.cpp | 4 +- src/solvers/refinement/string_refinement.cpp | 6 +- 8 files changed, 165 insertions(+), 152 deletions(-) diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp index ac7446de7d5..8724202e9a6 100644 --- a/src/goto-programs/string_refine_preprocess.cpp +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -23,6 +23,124 @@ Date: September 2016 /*******************************************************************\ +Function: string_refine_preprocesst::is_java_string_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string pointers + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_pointer_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_string_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_string_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_type(const typet &type) +{ + if(type.id()==ID_symbol) + { + irep_idt tag=to_symbol_type(type).get_identifier(); + return tag=="java::java.lang.String"; + } + else if(type.id()==ID_struct) + { + irep_idt tag=to_struct_type(type).get_tag(); + return tag=="java.lang.String"; + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_string_builder_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string builder + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_builder_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + if(subtype.id()==ID_struct) + { + irep_idt tag=to_struct_type(subtype).get_tag(); + return tag=="java.lang.StringBuilder"; + } + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_string_builder_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java StringBuilder + pointers + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_string_builder_pointer_type( + const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_string_builder_type(subtype); + } + return false; +} +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_sequence_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java char sequence + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_sequence_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + if(subtype.id()==ID_struct) + { + const irep_idt &tag=to_struct_type(subtype).get_tag(); + return tag=="java.lang.CharSequence"; + } + } + return false; +} + +/******************************************************************* \ + Function: string_refine_preprocesst::new_tmp_symbol Inputs: a name and a type @@ -123,8 +241,8 @@ Function: string_refine_preprocesst::get_data_and_length_type_of_string void string_refine_preprocesst::get_data_and_length_type_of_string( const exprt &expr, typet &data_type, typet &length_type) { - assert(refined_string_typet::is_java_string_type(expr.type()) || - refined_string_typet::is_java_string_builder_type(expr.type())); + assert(is_java_string_type(expr.type()) || + is_java_string_builder_type(expr.type())); typet object_type=ns.follow(expr.type()); assert(object_type.id()==ID_struct); const struct_typet &struct_type=to_struct_type(object_type); @@ -156,7 +274,7 @@ exprt string_refine_preprocesst::make_cprover_string_assign( const exprt &rhs, const source_locationt &location) { - if(refined_string_typet::is_java_string_pointer_type(rhs.type())) + if(is_java_string_pointer_type(rhs.type())) { auto pair=java_to_cprover_strings.insert( std::pair(rhs, nil_exprt())); @@ -194,7 +312,7 @@ exprt string_refine_preprocesst::make_cprover_string_assign( return pair.first->second; } else if(rhs.id()==ID_typecast && - refined_string_typet::is_java_string_pointer_type(rhs.op0().type())) + is_java_string_pointer_type(rhs.op0().type())) { exprt new_rhs=make_cprover_string_assign( goto_program, i_it, rhs.op0(), location); @@ -302,8 +420,7 @@ void string_refine_preprocesst::make_string_assign( const source_locationt &location, const std::string &signature) { - assert(refined_string_typet::is_java_string_pointer_type( - function_type.return_type())); + assert(is_java_string_pointer_type(function_type.return_type())); dereference_exprt deref(lhs, lhs.type().subtype()); typet object_type=ns.follow(deref.type()); exprt object_size=size_of_expr(object_type, ns); @@ -372,8 +489,7 @@ void string_refine_preprocesst::make_assign( const source_locationt &loc, const std::string &sig) { - if(refined_string_typet::is_java_string_pointer_type( - function_type.return_type())) + if(is_java_string_pointer_type(function_type.return_type())) make_string_assign( goto_program, i_it, lhs, function_type, function_name, arg, loc, sig); else @@ -405,8 +521,8 @@ void string_refine_preprocesst::make_string_copy( const source_locationt &location) { // TODO : treat CharSequence and StringBuffer - assert(refined_string_typet::is_java_string_pointer_type(lhs.type()) || - refined_string_typet::is_java_string_builder_pointer_type(lhs.type())); + assert(is_java_string_pointer_type(lhs.type()) || + is_java_string_builder_pointer_type(lhs.type())); exprt deref=dereference_exprt(lhs, lhs.type().subtype()); typet length_type, data_type; @@ -451,8 +567,7 @@ void string_refine_preprocesst::make_string_function( const source_locationt &location, const std::string &signature) { - if(refined_string_typet::is_java_string_pointer_type( - function_type.return_type())) + if(is_java_string_pointer_type(function_type.return_type())) make_string_assign( goto_program, i_it, @@ -626,8 +741,7 @@ void string_refine_preprocesst::make_to_char_array_function( assert(function_call.arguments().size()>=1); const exprt &string_argument=function_call.arguments()[0]; - assert(refined_string_typet::is_java_string_pointer_type( - string_argument.type())); + assert(is_java_string_pointer_type(string_argument.type())); dereference_exprt deref(string_argument, string_argument.type().subtype()); typet length_type, data_type; @@ -857,7 +971,7 @@ exprt::operandst string_refine_preprocesst::process_arguments( { while(arg.id()==ID_typecast) arg=arg.op0(); - if(!refined_string_typet::is_java_string_type(arg.type())) + if(!is_java_string_type(arg.type())) arg=typecast_exprt(arg, jls_ptr); } exprt arg2=make_cprover_string_assign(goto_program, i_it, arg, location); diff --git a/src/goto-programs/string_refine_preprocess.h b/src/goto-programs/string_refine_preprocess.h index ae282f7aed6..d235f4a4b40 100644 --- a/src/goto-programs/string_refine_preprocess.h +++ b/src/goto-programs/string_refine_preprocess.h @@ -49,6 +49,16 @@ class string_refine_preprocesst:public messaget private: void initialize_string_function_table(); + static bool is_java_string_pointer_type(const typet &type); + + static bool is_java_string_type(const typet &type); + + static bool is_java_string_builder_type(const typet &type); + + static bool is_java_string_builder_pointer_type(const typet &type); + + static bool is_java_char_sequence_type(const typet &type); + symbol_exprt new_tmp_symbol(const std::string &name, const typet &type); symbol_exprt new_symbol( diff --git a/src/solvers/refinement/refined_string_type.cpp b/src/solvers/refinement/refined_string_type.cpp index 012d30eb8de..3362c71e0db 100644 --- a/src/solvers/refinement/refined_string_type.cpp +++ b/src/solvers/refinement/refined_string_type.cpp @@ -48,98 +48,3 @@ bool refined_string_typet::is_c_string_type(const typet &type) to_struct_type(type).get_tag()==CPROVER_PREFIX"string"; } -/*******************************************************************\ - -Function: refined_string_typet::is_java_string_pointer_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java string pointers - -\*******************************************************************/ - -bool refined_string_typet::is_java_string_pointer_type(const typet &type) -{ - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - return is_java_string_type(subtype); - } - return false; -} - -/*******************************************************************\ - -Function: refined_string_typet::is_java_string_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java string - -\*******************************************************************/ - -bool refined_string_typet::is_java_string_type(const typet &type) -{ - if(type.id()==ID_symbol) - { - irep_idt tag=to_symbol_type(type).get_identifier(); - return tag=="java::java.lang.String"; - } - else if(type.id()==ID_struct) - { - irep_idt tag=to_struct_type(type).get_tag(); - return tag=="java.lang.String"; - } - return false; -} - -/*******************************************************************\ - -Function: refined_string_typet::is_java_string_builder_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java string builder - -\*******************************************************************/ - -bool refined_string_typet::is_java_string_builder_type(const typet &type) -{ - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - irep_idt tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.StringBuilder"; - } - } - return false; -} - -/*******************************************************************\ - -Function: refined_string_typet::is_java_char_sequence_type - - Inputs: a type - - Outputs: Boolean telling whether the type is that of java char sequence - -\*******************************************************************/ - -bool refined_string_typet::is_java_char_sequence_type(const typet &type) -{ - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.CharSequence"; - } - } - return false; -} diff --git a/src/solvers/refinement/refined_string_type.h b/src/solvers/refinement/refined_string_type.h index 3ecc0ac3a9f..3b36b19e826 100644 --- a/src/solvers/refinement/refined_string_type.h +++ b/src/solvers/refinement/refined_string_type.h @@ -33,41 +33,20 @@ class refined_string_typet: public struct_typet const typet &get_char_type() const { - assert(components().size()==2); - return components()[0].type(); + return get_content_type().subtype(); } const typet &get_index_type() const { - return get_content_type().size().type(); + assert(components().size()==2); + return components()[0].type(); } - // For C the unrefined string type is __CPROVER_string, for java it is a - // pointer to a struct with tag java.lang.String + // For C the unrefined string type is __CPROVER_string static bool is_c_string_type(const typet &type); - static bool is_java_string_pointer_type(const typet &type); - - static bool is_java_string_type(const typet &type); - - static bool is_java_string_builder_type(const typet &type); - - static bool is_java_char_sequence_type(const typet &type); - - static bool is_unrefined_string_type(const typet &type) - { - return ( - is_c_string_type(type) || - is_java_string_pointer_type(type) || - is_java_string_builder_type(type) || - is_java_char_sequence_type(type)); - } - - static bool is_unrefined_string(const exprt &expr) - { - return (is_unrefined_string_type(expr.type())); - } + static bool is_refined_string_type(const typet &type); constant_exprt index_of_int(int i) const { @@ -75,9 +54,10 @@ class refined_string_typet: public struct_typet } }; -const refined_string_typet &to_refined_string_type(const typet &type) +extern inline const refined_string_typet &to_refined_string_type( + const typet &type) { - assert(type.id()==ID_struct); + assert(refined_string_typet::is_refined_string_type(type)); return static_cast(type); } diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index 90769747949..64ffabfd266 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -132,7 +132,7 @@ string_exprt string_constraint_generatort::add_axioms_from_literal( else { // Java string constant - assert(refined_string_typet::is_unrefined_string_type(arg.type())); + assert(arg.id()==ID_symbol); const exprt &s=arg.op0(); // It seems the value of the string is lost, diff --git a/src/solvers/refinement/string_constraint_generator_indexof.cpp b/src/solvers/refinement/string_constraint_generator_indexof.cpp index 93db048458b..283de683bd6 100644 --- a/src/solvers/refinement/string_constraint_generator_indexof.cpp +++ b/src/solvers/refinement/string_constraint_generator_indexof.cpp @@ -186,14 +186,16 @@ exprt string_constraint_generatort::add_axioms_for_index_of( else assert(false); - if(refined_string_typet::is_java_string_pointer_type(c.type())) + if(c.type().id()==ID_unsignedbv) + { + return add_axioms_for_index_of( + str, typecast_exprt(c, ref_type.get_char_type()), from_index); + } + else { string_exprt sub=add_axioms_for_string_expr(c); return add_axioms_for_index_of_string(str, sub, from_index); } - else - return add_axioms_for_index_of( - str, typecast_exprt(c, ref_type.get_char_type()), from_index); } exprt string_constraint_generatort::add_axioms_for_last_index_of( @@ -280,12 +282,14 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( else assert(false); - if(refined_string_typet::is_java_string_pointer_type(c.type())) + if(c.type().id()==ID_unsignedbv) + { + return add_axioms_for_last_index_of( + str, typecast_exprt(c, ref_type.get_char_type()), from_index); + } + else { string_exprt sub=add_axioms_for_string_expr(c); return add_axioms_for_last_index_of_string(str, sub, from_index); } - else - return add_axioms_for_last_index_of( - str, typecast_exprt(c, ref_type.get_char_type()), from_index); } diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 5a269e5ad3b..0179a2fcb61 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -215,10 +215,10 @@ string_exprt string_constraint_generatort::add_axioms_for_if( const if_exprt &expr) { assert( - refined_string_typet::is_unrefined_string_type(expr.true_case().type())); + refined_string_typet::is_c_string_type(expr.true_case().type())); string_exprt t=add_axioms_for_string_expr(expr.true_case()); assert( - refined_string_typet::is_unrefined_string_type(expr.false_case().type())); + refined_string_typet::is_c_string_type(expr.false_case().type())); string_exprt f=add_axioms_for_string_expr(expr.false_case()); const refined_string_typet &ref_type=to_refined_string_type(t.type()); const typet &index_type=ref_type.get_index_type(); diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 16b89bc43c8..db9a2c460cc 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -146,7 +146,7 @@ bvt string_refinementt::convert_symbol(const exprt &expr) const irep_idt &identifier=expr.get(ID_identifier); assert(!identifier.empty()); - if(refined_string_typet::is_unrefined_string_type(type)) + if(refined_string_typet::is_refined_string_type(type)) { string_exprt str= generator.find_or_add_string_of_symbol(to_symbol_expr(expr)); @@ -216,7 +216,7 @@ bool string_refinementt::boolbv_set_equality_to_true(const equal_exprt &expr) if(expr.rhs().id()==ID_typecast) { exprt uncast=to_typecast_expr(expr.rhs()).op(); - if(refined_string_typet::is_unrefined_string_type(uncast.type())) + if(refined_string_typet::is_refined_string_type(uncast.type())) { debug() << "(sr) detected casted string" << eom; symbol_exprt sym=to_symbol_expr(expr.lhs()); @@ -225,7 +225,7 @@ bool string_refinementt::boolbv_set_equality_to_true(const equal_exprt &expr) } } - if(refined_string_typet::is_unrefined_string_type(type)) + if(refined_string_typet::is_refined_string_type(type)) { symbol_exprt sym=to_symbol_expr(expr.lhs()); generator.set_string_symbol_equal_to_expr(sym, expr.rhs()); From 30edf8f8e642041d6bca96f83a2cb2aae278c52c Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 24 Feb 2017 11:02:55 +0000 Subject: [PATCH 03/17] Moving refined_string_type to util This is a more appropriate location for this module since it's used both in the preprocessing of goto-programs and the string solver. --- src/goto-programs/string_refine_preprocess.cpp | 3 +-- src/solvers/refinement/string_constraint.h | 2 +- src/solvers/refinement/string_constraint_generator.h | 2 +- src/{solvers/refinement => util}/refined_string_type.cpp | 0 src/{solvers/refinement => util}/refined_string_type.h | 4 ++-- 5 files changed, 5 insertions(+), 6 deletions(-) rename src/{solvers/refinement => util}/refined_string_type.cpp (100%) rename src/{solvers/refinement => util}/refined_string_type.h (93%) diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp index 8724202e9a6..01b8f95988b 100644 --- a/src/goto-programs/string_refine_preprocess.cpp +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -14,9 +14,8 @@ Date: September 2016 #include #include #include +#include #include -// TODO: refined_string_type should be moved to util -#include #include #include "string_refine_preprocess.h" diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h index 61a6f3ebdd6..4b8d1229e09 100644 --- a/src/solvers/refinement/string_constraint.h +++ b/src/solvers/refinement/string_constraint.h @@ -15,7 +15,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include -#include +#include class string_constraintt: public exprt { diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 2a56f09fddd..ed898cb9a18 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -14,7 +14,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H #include -#include +#include #include class string_constraint_generatort diff --git a/src/solvers/refinement/refined_string_type.cpp b/src/util/refined_string_type.cpp similarity index 100% rename from src/solvers/refinement/refined_string_type.cpp rename to src/util/refined_string_type.cpp diff --git a/src/solvers/refinement/refined_string_type.h b/src/util/refined_string_type.h similarity index 93% rename from src/solvers/refinement/refined_string_type.h rename to src/util/refined_string_type.h index 3b36b19e826..477b00139ed 100644 --- a/src/solvers/refinement/refined_string_type.h +++ b/src/util/refined_string_type.h @@ -10,8 +10,8 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com \*******************************************************************/ -#ifndef CPROVER_SOLVERS_REFINEMENT_REFINED_STRING_TYPE_H -#define CPROVER_SOLVERS_REFINEMENT_REFINED_STRING_TYPE_H +#ifndef CPROVER_UTIL_REFINED_STRING_TYPE_H +#define CPROVER_UTIL_REFINED_STRING_TYPE_H #include #include From 94069c0e550e770717284716e025d79fefd0f342 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 27 Feb 2017 11:14:11 +0000 Subject: [PATCH 04/17] Removed distinction between c string type and refined string type --- src/util/refined_string_type.cpp | 9 +++++---- src/util/refined_string_type.h | 4 ---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/util/refined_string_type.cpp b/src/util/refined_string_type.cpp index 3362c71e0db..0625149cf83 100644 --- a/src/util/refined_string_type.cpp +++ b/src/util/refined_string_type.cpp @@ -29,22 +29,23 @@ refined_string_typet::refined_string_typet( array_typet char_array(char_type, infinite_index); components().emplace_back("length", index_type); components().emplace_back("content", char_array); + set_tag(CPROVER_PREFIX"refined_string_type"); } /*******************************************************************\ -Function: refined_string_typet::is_c_string_type +Function: refined_string_typet::is_refined_string_type Inputs: a type - Outputs: Boolean telling whether the type is that of C strings + Outputs: Boolean telling whether the input is a refined string type \*******************************************************************/ -bool refined_string_typet::is_c_string_type(const typet &type) +bool refined_string_typet::is_refined_string_type(const typet &type) { return type.id()==ID_struct && - to_struct_type(type).get_tag()==CPROVER_PREFIX"string"; + to_struct_type(type).get_tag()==CPROVER_PREFIX"refined_string_type"; } diff --git a/src/util/refined_string_type.h b/src/util/refined_string_type.h index 477b00139ed..a0cb7500e7f 100644 --- a/src/util/refined_string_type.h +++ b/src/util/refined_string_type.h @@ -42,10 +42,6 @@ class refined_string_typet: public struct_typet return components()[0].type(); } - // For C the unrefined string type is __CPROVER_string - - static bool is_c_string_type(const typet &type); - static bool is_refined_string_type(const typet &type); constant_exprt index_of_int(int i) const From d87a2f545db7959313680975f9dfc53af22d59ac Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Mon, 27 Feb 2017 11:14:21 +0000 Subject: [PATCH 05/17] Removed mention of c string type Refined string type should be used instead as we are trying to be language independent. --- src/solvers/refinement/string_constraint_generator_main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 0179a2fcb61..7fe510c2e35 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -215,10 +215,10 @@ string_exprt string_constraint_generatort::add_axioms_for_if( const if_exprt &expr) { assert( - refined_string_typet::is_c_string_type(expr.true_case().type())); + refined_string_typet::is_refined_string_type(expr.true_case().type())); string_exprt t=add_axioms_for_string_expr(expr.true_case()); assert( - refined_string_typet::is_c_string_type(expr.false_case().type())); + refined_string_typet::is_refined_string_type(expr.false_case().type())); string_exprt f=add_axioms_for_string_expr(expr.false_case()); const refined_string_typet &ref_type=to_refined_string_type(t.type()); const typet &index_type=ref_type.get_index_type(); From 03a5fff199f7ab1a7c003081ce0d87c4475ecb45 Mon Sep 17 00:00:00 2001 From: Joel Allred Date: Tue, 28 Feb 2017 17:33:36 +0000 Subject: [PATCH 06/17] Linting corrections on /util/ --- src/util/std_expr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/util/std_expr.h b/src/util/std_expr.h index b50413e8598..da11c4345c7 100644 --- a/src/util/std_expr.h +++ b/src/util/std_expr.h @@ -3450,14 +3450,14 @@ class function_application_exprt:public exprt operands().resize(2); } - function_application_exprt(const typet &_type): + explicit function_application_exprt(const typet &_type): exprt(ID_function_application, _type) { operands().resize(2); } - function_application_exprt(const symbol_exprt &_function, const typet &_type): - function_application_exprt(_type) + function_application_exprt(const symbol_exprt &_function, const typet &_type) + :function_application_exprt(_type) { function()=_function; } From f139dd2baa05cf191e455768a0079260b01a5544 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 7 Feb 2017 11:11:47 +0000 Subject: [PATCH 07/17] Making add_string_type more generic The class_name is now passed as argument so that we can reuse this function for StringBuilder and other "String-like" classes. --- .../java_bytecode_convert_class.cpp | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index f21b0bb1ce2..7f52c6ef77b 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -50,7 +50,16 @@ class java_bytecode_convert_classt:public messaget convert(parse_tree.parsed_class); else if(string_refinement_enabled && parse_tree.parsed_class.name=="java.lang.String") - add_string_type(); + add_string_type("java.lang.String"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.StringBuilder") + add_string_type("java.lang.StringBuilder"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.CharSequence") + add_string_type("java.lang.CharSequence"); + else if(string_refinement_enabled && + parse_tree.parsed_class.name=="java.lang.StringBuffer") + add_string_type("java.lang.StringBuffer"); else generate_class_stub(parse_tree.parsed_class.name); } @@ -72,7 +81,7 @@ class java_bytecode_convert_classt:public messaget void generate_class_stub(const irep_idt &class_name); void add_array_types(); - void add_string_type(); + void add_string_type(const irep_idt &class_name); }; /*******************************************************************\ @@ -406,15 +415,17 @@ bool java_bytecode_convert_class( Function: java_bytecode_convert_classt::add_string_type + Inputs: a name for the class such as "java.lang.String" + Purpose: Implements the java.lang.String type in the case that we provide an internal implementation. \*******************************************************************/ -void java_bytecode_convert_classt::add_string_type() +void java_bytecode_convert_classt::add_string_type(const irep_idt &class_name) { class_typet string_type; - string_type.set_tag("java.lang.String"); + string_type.set_tag(class_name); string_type.components().resize(3); string_type.components()[0].set_name("@java.lang.Object"); string_type.components()[0].set_pretty_name("@java.lang.Object"); @@ -432,8 +443,8 @@ void java_bytecode_convert_classt::add_string_type() string_type.add_base(symbol_typet("java::java.lang.Object")); symbolt string_symbol; - string_symbol.name="java::java.lang.String"; - string_symbol.base_name="java.lang.String"; + string_symbol.name="java::"+id2string(class_name); + string_symbol.base_name=id2string(class_name); string_symbol.type=string_type; string_symbol.is_type=true; @@ -445,8 +456,8 @@ void java_bytecode_convert_classt::add_string_type() symbolt string_equals_symbol; string_equals_symbol.name= "java::java.lang.String.equals:(Ljava/lang/Object;)Z"; - string_equals_symbol.base_name="java.lang.String.equals"; - string_equals_symbol.pretty_name="java.lang.String.equals"; + string_equals_symbol.base_name=id2string(class_name)+".equals"; + string_equals_symbol.pretty_name=id2string(class_name)+".equals"; string_equals_symbol.mode=ID_java; code_typet string_equals_type; From 5bedcd67b657367981a81790dd93e62d028a1130 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 17 Feb 2017 13:18:54 +0000 Subject: [PATCH 08/17] Corrections in preprocessing, better signatures and string detection Better detection of string-like types and handling of char arrays Changing the way we deal with string initialisation from array Factorized part of type recognition and added StringBuilder and CharSequence to the list of java classes that should be considered as string. Changed the way we deal with StringBuilders: instead of having a map we add an assignment to the instructions. We also detect char arrays and handle them better. We now use substring and copy functions for initialisation from char array since the argument are always transformed into refined strings. For each string returned by a string function we also add into the java_string_to_cprover_string map a string_exprt. Corrected detection of typecast in make_cprover_string_assign Ensuring refined_string arguments of function applications are string_exprt Correct string_refine_preprocesst constructor. Order of initialisation is now the same as the order of declaration. This was picked up by g++ and clang. Added signatures for some StringBuilder functions. Removed map java_to_cprover_string and adapt signature for side effects. The usage of a map is not correct since strings can be modified by side effects. Signature is necessary for StringBuilders to be assigned in the right way with methods with side effects. Assign all string_exprt to refined string symbols in preprocessing. This makes it then easier to debug and to find witnesses for counter examples in the solver. Make signatures take priority over actual type and add signature for intern. --- .../string_refine_preprocess.cpp | 434 +++++++++++++----- src/goto-programs/string_refine_preprocess.h | 26 +- 2 files changed, 338 insertions(+), 122 deletions(-) diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp index 01b8f95988b..e8449c9e55a 100644 --- a/src/goto-programs/string_refine_preprocess.cpp +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -22,6 +22,32 @@ Date: September 2016 /*******************************************************************\ +Function: string_refine_preprocesst::check_java_type + + Inputs: a type and a string + + Outputs: Boolean telling whether the type is a struct with the given + tag or a symbolic type with the tag prefixed by "java::" + +\*******************************************************************/ + +bool string_refine_preprocesst::check_java_type( + const typet &type, const std::string &tag) +{ + if(type.id()==ID_symbol) + { + irep_idt tag_id=to_symbol_type(type).get_identifier(); + return tag_id=="java::"+tag; + } + else if(type.id()==ID_struct) + { + irep_idt tag_id=to_struct_type(type).get_tag(); + return tag_id==tag; + } + return false; +} +/*******************************************************************\ + Function: string_refine_preprocesst::is_java_string_pointer_type Inputs: a type @@ -53,17 +79,7 @@ Function: string_refine_preprocesst::is_java_string_type bool string_refine_preprocesst::is_java_string_type(const typet &type) { - if(type.id()==ID_symbol) - { - irep_idt tag=to_symbol_type(type).get_identifier(); - return tag=="java::java.lang.String"; - } - else if(type.id()==ID_struct) - { - irep_idt tag=to_struct_type(type).get_tag(); - return tag=="java.lang.String"; - } - return false; + return check_java_type(type, "java.lang.String"); } /*******************************************************************\ @@ -78,17 +94,7 @@ Function: string_refine_preprocesst::is_java_string_builder_type bool string_refine_preprocesst::is_java_string_builder_type(const typet &type) { - if(type.id()==ID_pointer) - { - const pointer_typet &pt=to_pointer_type(type); - const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - irep_idt tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.StringBuilder"; - } - } - return false; + return check_java_type(type, "java.lang.StringBuilder"); } /*******************************************************************\ @@ -113,6 +119,7 @@ bool string_refine_preprocesst::is_java_string_builder_pointer_type( } return false; } + /*******************************************************************\ Function: string_refine_preprocesst::is_java_char_sequence_type @@ -124,21 +131,72 @@ Function: string_refine_preprocesst::is_java_char_sequence_type \*******************************************************************/ bool string_refine_preprocesst::is_java_char_sequence_type(const typet &type) +{ + return check_java_type(type, "java.lang.CharSequence"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_sequence_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of a pointer + to a java char sequence + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_sequence_pointer_type( + const typet &type) { if(type.id()==ID_pointer) { const pointer_typet &pt=to_pointer_type(type); const typet &subtype=pt.subtype(); - if(subtype.id()==ID_struct) - { - const irep_idt &tag=to_struct_type(subtype).get_tag(); - return tag=="java.lang.CharSequence"; - } + return is_java_char_sequence_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_array_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java char array + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_array_type(const typet &type) +{ + return check_java_type(type, "array[char]"); +} + +/*******************************************************************\ + +Function: string_refine_preprocesst::is_java_char_array_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of a pointer + to a java char array + +\*******************************************************************/ + +bool string_refine_preprocesst::is_java_char_array_pointer_type( + const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_char_array_type(subtype); } return false; } -/******************************************************************* \ +/*******************************************************************\ Function: string_refine_preprocesst::new_tmp_symbol @@ -226,6 +284,32 @@ void string_refine_preprocesst::declare_function( /*******************************************************************\ +Function: string_refine_preprocesst::get_data_and_length_type_of_char_array + + Inputs: an expression, a reference to a data type and a reference to a + length type + + Purpose: assuming the expression is a char array, figure out what + the types for length and data are and put them into the references + given as argument + +\*******************************************************************/ + +void string_refine_preprocesst::get_data_and_length_type_of_char_array( + const exprt &expr, typet &data_type, typet &length_type) +{ + typet object_type=ns.follow(expr.type()); + assert(object_type.id()==ID_struct); + const struct_typet &struct_type=to_struct_type(object_type); + for(auto component : struct_type.components()) + if(component.get_name()=="length") + length_type=component.type(); + else if(component.get_name()=="data") + data_type=component.type(); +} + +/*******************************************************************\ + Function: string_refine_preprocesst::get_data_and_length_type_of_string Inputs: an expression, a reference to a data type and a reference to a @@ -241,7 +325,9 @@ void string_refine_preprocesst::get_data_and_length_type_of_string( const exprt &expr, typet &data_type, typet &length_type) { assert(is_java_string_type(expr.type()) || - is_java_string_builder_type(expr.type())); + is_java_string_builder_type(expr.type()) || + is_java_char_sequence_type(expr.type()) + ); typet object_type=ns.follow(expr.type()); assert(object_type.id()==ID_struct); const struct_typet &struct_type=to_struct_type(object_type); @@ -273,45 +359,61 @@ exprt string_refine_preprocesst::make_cprover_string_assign( const exprt &rhs, const source_locationt &location) { - if(is_java_string_pointer_type(rhs.type())) + if(implements_java_char_sequence(rhs.type())) { + #if 0 auto pair=java_to_cprover_strings.insert( - std::pair(rhs, nil_exprt())); + std::make_pair(rhs, nil_exprt())); if(pair.second) { +#endif // We do the following assignments: - // cprover_string_array = *(rhs->data) - // cprover_string = { rhs->length; cprover_string_array } + // 1 cprover_string_length= *(rhs->length) + // 2 cprover_string_array = *(rhs->data) + // 3 cprover_string = { cprover_string_length; cprover_string_array } dereference_exprt deref(rhs, rhs.type().subtype()); - typet data_type, length_type; get_data_and_length_type_of_string(deref, data_type, length_type); - member_exprt length(deref, "length", length_type); - symbol_exprt array_lhs=new_symbol( - "cprover_string_array", data_type.subtype()); + std::list assignments; + + // 1) cprover_string_length= *(rhs->length) + symbol_exprt length_lhs=new_symbol( + "cprover_string_length", length_type); - // string expression for the rhs of the second assignment - string_exprt new_rhs( - length, array_lhs, refined_string_typet(length_type, data_type)); + member_exprt deref_length(deref, "length", length_type); + assignments.emplace_back(length_lhs, deref_length); + // 2) cprover_string_array = *(rhs->data) + symbol_exprt array_lhs=new_symbol( + "cprover_string_array", data_type.subtype()); member_exprt data(deref, "data", data_type); dereference_exprt deref_data(data, data_type.subtype()); + assignments.emplace_back(array_lhs, deref_data); - symbol_exprt lhs=new_symbol("cprover_string", new_rhs.type()); + // 3) cprover_string = { cprover_string_length; cprover_string_array } + // This assignment is useful for finding witnessing strings for counter + // examples + refined_string_typet ref_type(length_type, java_char_type()); + string_exprt new_rhs(length_lhs, array_lhs, ref_type); - std::list assignments; - assignments.emplace_back(array_lhs, deref_data); + symbol_exprt lhs=new_symbol("cprover_string", new_rhs.type()); assignments.emplace_back(lhs, new_rhs); + insert_assignments(goto_program, i_it, assignments); i_it=goto_program.insert_after(i_it); - pair.first->second=lhs; + return new_rhs; + + #if 0 + //pair.first->second=lhs; + pair.first->second=new_rhs; } return pair.first->second; +#endif } else if(rhs.id()==ID_typecast && - is_java_string_pointer_type(rhs.op0().type())) + implements_java_char_sequence(rhs.op0().type())) { exprt new_rhs=make_cprover_string_assign( goto_program, i_it, rhs.op0(), location); @@ -419,7 +521,7 @@ void string_refine_preprocesst::make_string_assign( const source_locationt &location, const std::string &signature) { - assert(is_java_string_pointer_type(function_type.return_type())); + assert(implements_java_char_sequence(function_type.return_type())); dereference_exprt deref(lhs, lhs.type().subtype()); typet object_type=ns.follow(deref.type()); exprt object_size=size_of_expr(object_type, ns); @@ -455,11 +557,20 @@ void string_refine_preprocesst::make_string_assign( malloc_expr.type()=pointer_typet(object_type); malloc_expr.add_source_location()=location; + // Adding a string expr in the map + refined_string_typet ref_type(length_type, data_type.subtype().subtype()); + string_exprt str(tmp_length, tmp_array, ref_type); + symbol_exprt cprover_string_sym=new_tmp_symbol("tmp_cprover_string", ref_type); +#if 0 + java_to_cprover_strings[lhs]=cprover_string_sym; +#endif + std::list assigns; assigns.emplace_back(lhs, malloc_expr); assigns.emplace_back(tmp_length, rhs_length); assigns.emplace_back(lhs_length, tmp_length); assigns.emplace_back(tmp_array, rhs_data); + assigns.emplace_back(cprover_string_sym, str); assigns.emplace_back(lhs_data, address_of_exprt(tmp_array)); insert_assignments(goto_program, i_it, assigns); } @@ -488,7 +599,7 @@ void string_refine_preprocesst::make_assign( const source_locationt &loc, const std::string &sig) { - if(is_java_string_pointer_type(function_type.return_type())) + if(implements_java_char_sequence(function_type.return_type())) make_string_assign( goto_program, i_it, lhs, function_type, function_name, arg, loc, sig); else @@ -519,9 +630,7 @@ void string_refine_preprocesst::make_string_copy( const exprt &argument, const source_locationt &location) { - // TODO : treat CharSequence and StringBuffer - assert(is_java_string_pointer_type(lhs.type()) || - is_java_string_builder_pointer_type(lhs.type())); + assert(implements_java_char_sequence(lhs.type())); exprt deref=dereference_exprt(lhs, lhs.type().subtype()); typet length_type, data_type; @@ -566,7 +675,36 @@ void string_refine_preprocesst::make_string_function( const source_locationt &location, const std::string &signature) { - if(is_java_string_pointer_type(function_type.return_type())) + if(signature.length()>0) + { + if(signature.back()=='S') + { + code_typet ft=function_type; + ft.return_type()=jls_ptr; + typecast_exprt lhs2(lhs, jls_ptr); + + make_string_assign( + goto_program, + i_it, + lhs2, + ft, + function_name, + arguments, + location, + signature); + } + else + make_normal_assign( + goto_program, + i_it, + lhs, + function_type, + function_name, + arguments, + location, + signature); + } + else if(implements_java_char_sequence(function_type.return_type())) make_string_assign( goto_program, i_it, @@ -597,7 +735,7 @@ Function: string_refine_preprocesst::make_string_function Purpose: at the current position replace `lhs=s.some_function(x,...)` by `lhs=function_name(s,x,...)`; option `assign_first_arg` uses `s` instead of `lhs` in the resulting - expression; + expression, Warning : it assumes that `s` is string-like option `skip_first_arg`, removes `s` from the arguments, ie `x` is the first one @@ -632,19 +770,28 @@ void string_refine_preprocesst::make_string_function( new_type.parameters().push_back(function_type.parameters()[i]); } + std::string new_sig=signature; exprt lhs; if(assign_first_arg) + { lhs=function_call.arguments()[0]; + if(signature.length()>0) + new_sig.replace(signature.length()-1, 1, "S"); + else + new_sig="S"; + } else lhs=function_call.lhs(); + // TODO: not sure we have to do that if(lhs.id()==ID_typecast) lhs=to_typecast_expr(lhs).op(); + // ---------------- new_type.return_type()=lhs.type(); make_string_function( - goto_program, i_it, lhs, new_type, function_name, args, loc, signature); + goto_program, i_it, lhs, new_type, function_name, args, loc, new_sig); } /*******************************************************************\ @@ -674,9 +821,13 @@ Function: string_refine_preprocesst::make_string_function_side_effect Inputs: a position in a goto program and a function name - Purpose: at the current position, replace `r=s.some_function(x,...)` - by `s=function_name(s,x,...)` and add a correspondance from r - to s in the `string_builders` map + Purpose: at the current position, replace `r=s.some_function(x,...)` by + > s=function_name(s,x,...) + and if `r` is not a nil expression: + > r=s + + // TODO : the second assignment should be first and should be: + > r=function_name(s,x,...) \*******************************************************************/ @@ -687,9 +838,14 @@ void string_refine_preprocesst::make_string_function_side_effect( const std::string &signature) { const code_function_callt &function_call=to_code_function_call(i_it->code); - string_builders[function_call.lhs()]=function_call.arguments()[0]; + code_assignt assign(function_call.lhs(),function_call.arguments()[0]); make_string_function( goto_program, i_it, function_name, signature, true, false); + if(assign.lhs().is_not_nil()) + { + i_it=goto_program.insert_after(i_it); + insert_assignments(goto_program, i_it, {assign}); + } } /*******************************************************************\ @@ -771,8 +927,8 @@ void string_refine_preprocesst::make_to_char_array_function( Function: string_refine_preprocesst::make_cprover_char_array_assign - Inputs: a goto_program, a position in this program, an expression and a - location + Inputs: a goto_program, a position in this program, an expression of + type char array and a location Outputs: a char array expression (not a pointer) @@ -787,21 +943,29 @@ exprt string_refine_preprocesst::make_cprover_char_array_assign( const exprt &rhs, const source_locationt &location) { - // TODO : add an assertion on the type of rhs + typet type=ns.follow(rhs.type()); + assert(type.id()==ID_struct && + to_struct_type(type).get_tag()=="java::array[char]"); // We do the following assignments: // cprover_string_array = rhs.data // cprover_string = { rhs.length; cprover_string_array } + typet length_type, data_type; + get_data_and_length_type_of_char_array(rhs, data_type, length_type); + assert(data_type.id()==ID_pointer); + typet char_type=to_pointer_type(data_type).subtype(); + + refined_string_typet ref_type(length_type, char_type); + typet content_type=ref_type.get_content_type(); + exprt array_rhs=typecast_exprt(rhs, content_type); + // string expression for the rhs of the second assignment - string_exprt new_rhs(java_char_type()); + string_exprt new_rhs(ref_type); - typet data_type=new_rhs.content().type(); - typet length_type=java_int_type(); + symbol_exprt array_lhs=new_symbol("cprover_string_array", content_type); - symbol_exprt array_lhs=new_symbol("cprover_string_array", data_type); - exprt array_rhs=get_data(rhs, new_rhs.content().type()); - symbol_exprt lhs=new_symbol("cprover_string", new_rhs.type()); + symbol_exprt lhs=new_symbol("cprover_string", ref_type); new_rhs.length()=get_length(rhs, length_type); new_rhs.content()=array_lhs; @@ -817,18 +981,20 @@ exprt string_refine_preprocesst::make_cprover_char_array_assign( Function: string_refine_preprocesst::make_char_array_function - Inputs: a position in a goto program, a function name, two Boolean options, - and the index of the char array argument in the function + Inputs: a position in a goto program, a function name, two Boolean options Purpose: at the given position replace `lhs=s.some_function(...,char_array,...)` by > cprover_string = { char_array->length, *char_array } - > tmp_string=function_name(s, cprover_string, ...) + > lhs=function_name(s, cprover_string, ...) option `assign_first_arg` uses `s` instead of `lhs` in the second assignment; option `skip_first_arg`, removes `s` from the arguments, ie `x` is the first one; - argument index gives the index of the argument containing char_array + If the function name is ID_cprover_copy_func we simply do + > cprover_string = { char_array->length, *char_array } + > lhs=cprover_string + \*******************************************************************/ @@ -837,7 +1003,6 @@ void string_refine_preprocesst::make_char_array_function( goto_programt::targett &i_it, const irep_idt &function_name, const std::string &signature, - size_t index, bool assign_first_arg, bool skip_first_arg) { @@ -845,8 +1010,9 @@ void string_refine_preprocesst::make_char_array_function( code_typet function_type=to_code_type(function_call.function().type()); code_typet new_function_type; const source_locationt &location=function_call.source_location(); - assert(function_call.arguments().size()>index); - const std::vector &args=function_call.arguments(); + + // We need a copy here because this function call may be overwritten + const std::vector args=function_call.arguments(); std::vector new_args; exprt lhs; @@ -858,20 +1024,42 @@ void string_refine_preprocesst::make_char_array_function( if(lhs.id()==ID_typecast) lhs=to_typecast_expr(lhs).op(); - exprt char_array=dereference_exprt( - function_call.arguments()[index], - function_call.arguments()[index].type().subtype()); - exprt string=make_cprover_char_array_assign( - goto_program, i_it, char_array, location); - std::size_t start_index=skip_first_arg?1:0; + + if(function_name==ID_cprover_string_copy_func) + { + assert(is_java_char_array_pointer_type(args[start_index].type())); + dereference_exprt char_array(args[start_index], args[start_index].type().subtype()); + exprt string=make_cprover_char_array_assign( + goto_program, i_it, char_array, location); + + std::list assignments; + assignments.emplace_back(lhs, string); + insert_assignments(goto_program, i_it, assignments); + return; + } + for(std::size_t i=start_index; icode); - string_builders[function_call.lhs()]=function_call.arguments()[0]; + goto_program, i_it, function_name, signature, true, false); } /*******************************************************************\ @@ -961,21 +1146,15 @@ exprt::operandst string_refine_preprocesst::process_arguments( for(std::size_t i=0; isecond); - else + if(iis_function_call()) { code_function_callt &function_call=to_code_function_call(i_it->code); - for(auto arg : function_call.arguments()) - { - auto sb_it=string_builders.find(arg); - if(sb_it!=string_builders.end()) - arg=sb_it->second; - } if(function_call.function().id()==ID_symbol) { @@ -1081,6 +1254,20 @@ void string_refine_preprocesst::replace_string_calls( exprt new_rhs=assignment.rhs(); code_assignt new_assignment(assignment.lhs(), new_rhs); + exprt uncasted=new_rhs; + + if(uncasted.id()==ID_typecast) + uncasted=to_typecast_expr(uncasted).op0(); + + #if 0 + if(implements_java_char_sequence(uncasted.type())) + { + auto it=java_to_cprover_strings.find(uncasted); + if(it!=java_to_cprover_strings.end()) + java_to_cprover_strings.insert(std::make_pair(assignment.lhs(), it->second)); + } +#endif + if(new_rhs.id()==ID_function_application) { function_application_exprt f=to_function_application_expr(new_rhs); @@ -1303,22 +1490,22 @@ void string_refine_preprocesst::initialize_string_function_table() ID_cprover_string_empty_string_func; string_of_char_array_function_calls["java::java.lang.String.:([C)V"]= - ID_cprover_string_of_char_array_func; + ID_cprover_string_copy_func; string_of_char_array_function_calls["java::java.lang.String.:([CII)V"]= - ID_cprover_string_of_char_array_func; + ID_cprover_string_substring_func; string_of_char_array_functions ["java::java.lang.String.valueOf:([CII)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; + ID_cprover_string_substring_func; string_of_char_array_functions ["java::java.lang.String.valueOf:([C)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; + ID_cprover_string_copy_func; string_of_char_array_functions ["java::java.lang.String.copyValueOf:([CII)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; + ID_cprover_string_substring_func; string_of_char_array_functions ["java::java.lang.String.copyValueOf:([C)Ljava/lang/String;"]= - ID_cprover_string_of_char_array_func; + ID_cprover_string_copy_func; c_string_functions["__CPROVER_uninterpreted_string_literal_func"]= ID_cprover_string_literal_func; @@ -1352,9 +1539,22 @@ void string_refine_preprocesst::initialize_string_function_table() ID_cprover_string_of_int_func; signatures["java::java.lang.String.equals:(Ljava/lang/Object;)Z"]="SSZ"; - signatures - ["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= - "SSZ"; + signatures["java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z"]= + "SSZ"; + signatures["java::java.lang.StringBuilder.insert:(IZ)" + "Ljava/lang/StringBuilder;"]="SIZS"; + signatures["java::java.lang.StringBuilder.insert:(IJ)" + "Ljava/lang/StringBuilder;"]="SIJS"; + signatures["java::java.lang.StringBuilder.insert:(II)" + "Ljava/lang/StringBuilder;"]="SIIS"; + signatures["java::java.lang.StringBuilder.insert:(IC)" + "Ljava/lang/StringBuilder;"]="SICS"; + signatures["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]="SISS"; + signatures["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;"]="SISS"; + signatures["java::java.lang.String.intern:()Ljava/lang/String;"]="SV"; + } /*******************************************************************\ @@ -1372,11 +1572,11 @@ string_refine_preprocesst::string_refine_preprocesst( goto_functionst &_goto_functions, message_handlert &_message_handler) :messaget(_message_handler), - symbol_table(_symbol_table), ns(_symbol_table), + symbol_table(_symbol_table), goto_functions(_goto_functions), - jls_ptr(symbol_typet("java::java.lang.String")), - next_symbol_id(0) + next_symbol_id(0), + jls_ptr(symbol_typet("java::java.lang.String")) { initialize_string_function_table(); Forall_goto_functions(it, goto_functions) diff --git a/src/goto-programs/string_refine_preprocess.h b/src/goto-programs/string_refine_preprocess.h index d235f4a4b40..e0c0c163508 100644 --- a/src/goto-programs/string_refine_preprocess.h +++ b/src/goto-programs/string_refine_preprocess.h @@ -22,10 +22,6 @@ class string_refine_preprocesst:public messaget symbol_tablet & symbol_table; goto_functionst & goto_functions; - // String builders maps the different names of a same StringBuilder object - // to a unique expression. - std::map string_builders; - // Map name of Java string functions to there equivalent in the solver std::map side_effect_functions; std::map string_functions; @@ -37,7 +33,9 @@ class string_refine_preprocesst:public messaget std::map signatures; std::map hidden_strings; +#if 0 // We cannot use this map since side effects may modify strings std::map java_to_cprover_strings; +#endif // unique id for each newly created symbols int next_symbol_id; @@ -49,6 +47,8 @@ class string_refine_preprocesst:public messaget private: void initialize_string_function_table(); + static bool check_java_type(const typet &type, const std::string &tag); + static bool is_java_string_pointer_type(const typet &type); static bool is_java_string_type(const typet &type); @@ -59,6 +59,20 @@ class string_refine_preprocesst:public messaget static bool is_java_char_sequence_type(const typet &type); + static bool is_java_char_sequence_pointer_type(const typet &type); + + static bool is_java_char_array_type(const typet &type); + + static bool is_java_char_array_pointer_type(const typet &type); + + static bool implements_java_char_sequence(const typet &type) + { + return + is_java_char_sequence_pointer_type(type) || + is_java_string_builder_pointer_type(type) || + is_java_string_pointer_type(type); + } + symbol_exprt new_tmp_symbol(const std::string &name, const typet &type); symbol_exprt new_symbol( @@ -107,6 +121,9 @@ class string_refine_preprocesst:public messaget void get_data_and_length_type_of_string( const exprt &expr, typet &data_type, typet &length_type); + void get_data_and_length_type_of_char_array( + const exprt &expr, typet &data_type, typet &length_type); + function_application_exprt build_function_application( const irep_idt &function_name, const typet &type, @@ -200,7 +217,6 @@ class string_refine_preprocesst:public messaget goto_programt::targett &i_it, const irep_idt &function_name, const std::string &signature, - size_t index, bool assign_first_arg=false, bool skip_first_arg=false); From 4576899529d523f25435d606854fb01629bcef9b Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 28 Feb 2017 13:50:25 +0000 Subject: [PATCH 09/17] Overhaul of string solver contributions The code adding lemmas to the solver for each equality has been completely remastered. It has been simplified and should now cover all foreseable cases. Add overflow constraints on sums for string solver When creating sum expressions during string refinement, e.g. when adding string lengths, we shall add axioms to prevent the solver from finding values that actually come from an integer overflow. Therefore, we introduce a new method plus_exprt_with_overflow_check() that wraps around plus_exprt and generates the relevant axioms. Cache function applications Cache converted function applications to avoid re-generating the axioms if it has already been done for a given function application. This seems to happen for function applications that are in an assertion, and thus are treated separately by convert_bitvector. Remove string_refinement::convert_symbol: the behaviour of this function was to convert java strings to strings of symbols. In hindsight, this was not a good idea, as we still refer to the actual java string fields in some cases, e.g. to read its length. Replace add_axioms_for_string_expr In all add_axioms_... methods, we replace all calls to add_axioms_for_string_expr by the newly created function get_string_expr, which simply returns a string_exprt when we give it an refined expr (such as a string_exprt or a symbol). Update doc in add_axioms_for_index_string Simplify constraint for is_prefix: apply distribution law and update (and refactor) documentation. Remove set_string_symbol_equal_to_expr() Removed mentions of java string type in the string solver Removing mentions of is_c_string_type To use the string solver in C, we should use a struct type with tag __CPROVER_refined_string_type Keep non-string axioms in a list Instead of giving axioms to super::boolbv_set_equality_to_true, which may contain un-substituted symbols, we keep them in a list and substitute them in dec_solve() before giving them to the function. Cleaning the add_axioms_for_string_expr method and renamed it to add_axioms_for_refined_string to signify that it should only be called on expression of type refined string. An assertion was added at the beginning to ensure that. Better get_array and string_of_array functions. Cleaned the implementation of get_array and factorized part of it. Made string_of_array directly take a array_exprt, which can be obtained from get_array. Improved string_of_array for non printable characters Resolve symbol to char arrays in non-string axioms. We introduce a mapping that expresses the maximal traversal of symbols of char array type. We use that map to, either obtain a char array from a symbol, either get a unique symbol the aliases equivalence class. This map is used to replace all symbols of char array type in the axioms that are added using the parent method: supert::boolbv_set_equality_to_true. Defining several intermediary functions for check_axioms and cleaning. Check axioms is quite complicated so we splitted it in several parts. Avoid using same universal variable name in string comparison function. Corrected test in index_of as characters could be signed Corrected return type retrieval in from_float functions. The return type should be given as argument of the helper function add_axioms_from_float. Fixed type of contains in constraint generation Introduce string_refinementt::set_to. We now override the set_to() method instead of set_equality_to_true(). This solves a problem where many lemmas were being dropped. Handle array_of in set_char_array_equality. Java assignments such as char[] str=new char[10] involves assigning the char array to 0, which is done using the array_of operator. This operator is now handled when found on the rhs of a char array assignment. Optimized string hash code and intern functions to only look at seen strings. We only compare the argument to the strings on which hash_code (resp. intern) was already called. Add missing override keyword in declarations Integrate strings into symbol resolution. We now use for strings the mechanism that was made for resolving symbols to char arrays. As a result, the symbol_to_string map has been removed. We introduce an unresolved_symbol map to associate symbols to string expressions that were introduced during constraint generation. Preventing return of nil exprt in sum_over_map Enforce -1 for witnesses when strings are unequal. This makes it easier to see when models for string have different length. --- src/solvers/refinement/string_constraint.h | 1 - .../refinement/string_constraint_generator.h | 41 +- ...tring_constraint_generator_code_points.cpp | 18 +- ...string_constraint_generator_comparison.cpp | 101 +-- .../string_constraint_generator_concat.cpp | 32 +- .../string_constraint_generator_constants.cpp | 2 + .../string_constraint_generator_indexof.cpp | 27 +- .../string_constraint_generator_insert.cpp | 24 +- .../string_constraint_generator_main.cpp | 368 +++++++---- .../string_constraint_generator_testing.cpp | 41 +- ...ng_constraint_generator_transformation.cpp | 37 +- .../string_constraint_generator_valueof.cpp | 15 +- src/solvers/refinement/string_refinement.cpp | 590 ++++++++++++------ src/solvers/refinement/string_refinement.h | 35 +- 14 files changed, 862 insertions(+), 470 deletions(-) diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h index 4b8d1229e09..1bb460349b7 100644 --- a/src/solvers/refinement/string_constraint.h +++ b/src/solvers/refinement/string_constraint.h @@ -48,7 +48,6 @@ class string_constraintt: public exprt return operands()[4]; } - private: string_constraintt(); diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index ed898cb9a18..3f0db68738f 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -14,6 +14,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H #include +#include #include #include @@ -65,21 +66,20 @@ class string_constraint_generatort symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type); symbol_exprt fresh_boolean(const irep_idt &prefix); string_exprt fresh_string(const refined_string_typet &type); + string_exprt get_string_expr(const exprt &expr); + string_exprt convert_java_string_to_string_exprt( + const exprt &underlying); + plus_exprt plus_exprt_with_overflow_check(const exprt &op1, const exprt &op2); - // We maintain a map from symbols to strings. - std::map symbol_to_string; + // Maps unresolved symbols to the string_exprt that was created for them + std::map unresolved_symbols; - string_exprt find_or_add_string_of_symbol(const symbol_exprt &sym); - void assign_to_symbol( - const symbol_exprt &sym, const string_exprt &expr) - { - symbol_to_string[sym.get_identifier()]=expr; - } + string_exprt find_or_add_string_of_symbol( + const symbol_exprt &sym, + const refined_string_typet &ref_type); - string_exprt add_axioms_for_string_expr(const exprt &expr); - void set_string_symbol_equal_to_expr( - const symbol_exprt &sym, const exprt &str); + string_exprt add_axioms_for_refined_string(const exprt &expr); exprt add_axioms_for_function_application( const function_application_exprt &expr); @@ -96,6 +96,8 @@ class string_constraint_generatort const std::size_t MAX_FLOAT_LENGTH=15; const std::size_t MAX_DOUBLE_LENGTH=30; + std::map function_application_cache; + static irep_idt extract_java_string(const symbol_exprt &s); exprt axiom_for_is_positive_index(const exprt &x); @@ -117,6 +119,9 @@ class string_constraint_generatort // The specification is partial: the actual value is not actually computed // but we ensure that hash codes of equal strings are equal. exprt add_axioms_for_hash_code(const function_application_exprt &f); + // To each string on which hash_code was called we associate a symbol + // representing the return value of the hash_code function. + std::map hash_code_of_string; exprt add_axioms_for_is_empty(const function_application_exprt &f); exprt add_axioms_for_is_prefix( @@ -224,7 +229,9 @@ class string_constraint_generatort // the start for negative number string_exprt add_axioms_from_float(const function_application_exprt &f); string_exprt add_axioms_from_float( - const exprt &f, bool double_precision=false); + const exprt &f, + const refined_string_typet &ref_type, + bool double_precision); // Add axioms corresponding to the String.valueOf(D) java function // TODO: the specifications is only partial @@ -278,6 +285,9 @@ class string_constraint_generatort // string pointers symbol_exprt add_axioms_for_intern(const function_application_exprt &f); + // Pool used for the intern method + std::map intern_of_string; + // Tells which language is used. C and Java are supported irep_idt mode; @@ -293,14 +303,9 @@ class string_constraint_generatort exprt int_of_hex_char(const exprt &chr) const; exprt is_high_surrogate(const exprt &chr) const; exprt is_low_surrogate(const exprt &chr) const; - static exprt character_equals_ignore_case( + exprt character_equals_ignore_case( exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z); - // Pool used for the intern method - std::map pool; - - // Used to determine whether hashcode should be equal - std::map hash; }; #endif diff --git a/src/solvers/refinement/string_constraint_generator_code_points.cpp b/src/solvers/refinement/string_constraint_generator_code_points.cpp index 7c035a0d3e4..16763dfc97c 100644 --- a/src/solvers/refinement/string_constraint_generator_code_points.cpp +++ b/src/solvers/refinement/string_constraint_generator_code_points.cpp @@ -160,17 +160,18 @@ exprt string_constraint_generatort::add_axioms_for_code_point_at( { typet return_type=f.type(); assert(return_type.id()==ID_signedbv); - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); const exprt &pos=args(f, 2)[1]; symbol_exprt result=fresh_symbol("char", return_type); exprt index1=from_integer(1, str.length().type()); const exprt &char1=str[pos]; - const exprt &char2=str[plus_exprt(pos, index1)]; + const exprt &char2=str[plus_exprt_with_overflow_check(pos, index1)]; exprt char1_as_int=typecast_exprt(char1, return_type); exprt char2_as_int=typecast_exprt(char2, return_type); exprt pair=pair_value(char1_as_int, char2_as_int, return_type); - exprt is_low=is_low_surrogate(str[plus_exprt(pos, index1)]); + exprt is_low=is_low_surrogate( + str[plus_exprt_with_overflow_check(pos, index1)]); exprt return_pair=and_exprt(is_high_surrogate(str[pos]), is_low); axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); @@ -199,7 +200,7 @@ exprt string_constraint_generatort::add_axioms_for_code_point_before( typet return_type=f.type(); assert(return_type.id()==ID_signedbv); symbol_exprt result=fresh_symbol("char", return_type); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); const exprt &char1= str[minus_exprt(args[1], from_integer(2, str.length().type()))]; @@ -234,7 +235,7 @@ Function: string_constraint_generatort::add_axioms_for_code_point_count exprt string_constraint_generatort::add_axioms_for_code_point_count( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const exprt &begin=args(f, 3)[1]; const exprt &end=args(f, 3)[2]; const typet &return_type=f.type(); @@ -265,14 +266,15 @@ Function: string_constraint_generatort::add_axioms_for_offset_by_code_point exprt string_constraint_generatort::add_axioms_for_offset_by_code_point( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const exprt &index=args(f, 3)[1]; const exprt &offset=args(f, 3)[2]; const typet &return_type=f.type(); symbol_exprt result=fresh_symbol("offset_by_code_point", return_type); - exprt minimum=plus_exprt(index, offset); - exprt maximum=plus_exprt(index, plus_exprt(offset, offset)); + exprt minimum=plus_exprt_with_overflow_check(index, offset); + exprt maximum=plus_exprt_with_overflow_check( + index, plus_exprt_with_overflow_check(offset, offset)); axioms.push_back(binary_relation_exprt(result, ID_le, maximum)); axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); diff --git a/src/solvers/refinement/string_constraint_generator_comparison.cpp b/src/solvers/refinement/string_constraint_generator_comparison.cpp index 321282ee9c1..7dab98567da 100644 --- a/src/solvers/refinement/string_constraint_generator_comparison.cpp +++ b/src/solvers/refinement/string_constraint_generator_comparison.cpp @@ -18,7 +18,9 @@ Function: string_constraint_generatort::add_axioms_for_equals Outputs: a expression of Boolean type Purpose: add axioms stating that the result is true exactly when the strings - represented by the arguments are equal + represented by the arguments are equal. + the variable ending in `witness_unequal` is -1 if the length differs + or an index at which the strings are different \*******************************************************************/ @@ -29,8 +31,8 @@ exprt string_constraint_generatort::add_axioms_for_equals( symbol_exprt eq=fresh_boolean("equal"); typecast_exprt tc_eq(eq, f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); typet index_type=s1.length().type(); // We want to write: @@ -54,9 +56,10 @@ exprt string_constraint_generatort::add_axioms_for_equals( binary_relation_exprt(witness, ID_lt, s1.length()), binary_relation_exprt(witness, ID_ge, zero)); and_exprt witnessing(bound_witness, notequal_exprt(s1[witness], s2[witness])); - implies_exprt a3( - not_exprt(eq), - or_exprt(notequal_exprt(s1.length(), s2.length()), witnessing)); + and_exprt diff_length( + notequal_exprt(s1.length(), s2.length()), + equal_exprt(witness, from_integer(-1, index_type))); + implies_exprt a3(not_exprt(eq), or_exprt(diff_length, witnessing)); axioms.push_back(a3); return tc_eq; @@ -92,8 +95,13 @@ exprt string_constraint_generatort::character_equals_ignore_case( // p3 : (is_up2&&'a'-'A'+char2=char1) equal_exprt p1(char1, char2); minus_exprt diff=minus_exprt(char_a, char_A); - and_exprt p2(is_upper_case_1, equal_exprt(plus_exprt(diff, char1), char2)); - and_exprt p3(is_upper_case_2, equal_exprt(plus_exprt(diff, char2), char1)); + + // Overflow is not a problem here because is_upper_case conditions + // ensure that we are within a safe range. + and_exprt p2(is_upper_case_1, + equal_exprt(plus_exprt(diff, char1), char2)); + and_exprt p3(is_upper_case_2, + equal_exprt(plus_exprt(diff, char2), char1)); return or_exprt(or_exprt(p1, p2), p3); } @@ -116,8 +124,8 @@ exprt string_constraint_generatort::add_axioms_for_equals_ignore_case( symbol_exprt eq=fresh_boolean("equal_ignore_case"); typecast_exprt tc_eq(eq, f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); typet char_type=to_refined_string_type(s1.type()).get_char_type(); exprt char_a=constant_char('a', char_type); exprt char_A=constant_char('A', char_type); @@ -174,37 +182,35 @@ Function: string_constraint_generatort::add_axioms_for_hash_code exprt string_constraint_generatort::add_axioms_for_hash_code( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); typet return_type=f.type(); typet index_type=str.length().type(); - // initialisation of the missing pool variable - std::map::iterator it; - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(hash.find(it->second)==hash.end()) - hash[it->second]=fresh_symbol("hash", return_type); + auto pair=hash_code_of_string.insert( + std::make_pair(str, fresh_symbol("hash", return_type))); + exprt hash=pair.first->second; // for each string s. either: // c1: hash(str)=hash(s) // c2: |str|!=|s| - // c3: (|str|==|s| &&exists i<|s|. s[i]!=str[i]) + // c3: (|str|==|s| && exists i<|s|. s[i]!=str[i]) // WARNING: the specification may be incomplete - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + for(auto it : hash_code_of_string) { symbol_exprt i=fresh_exist_index("index_hash", index_type); - equal_exprt c1(hash[it->second], hash[str]); - not_exprt c2(equal_exprt(it->second.length(), str.length())); + equal_exprt c1(it.second, hash); + not_exprt c2(equal_exprt(it.first.length(), str.length())); and_exprt c3( - equal_exprt(it->second.length(), str.length()), + equal_exprt(it.first.length(), str.length()), and_exprt( - not_exprt(equal_exprt(str[i], it->second[i])), + not_exprt(equal_exprt(str[i], it.first[i])), and_exprt( str.axiom_for_is_strictly_longer_than(i), axiom_for_is_positive_index(i)))); axioms.push_back(or_exprt(c1, or_exprt(c2, c3))); } - return hash[str]; + return hash; } /*******************************************************************\ @@ -222,8 +228,8 @@ Function: string_constraint_generatort::add_axioms_for_compare_to exprt string_constraint_generatort::add_axioms_for_compare_to( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + string_exprt s1=get_string_expr(args(f, 2)[0]); + string_exprt s2=get_string_expr(args(f, 2)[1]); const typet &return_type=f.type(); symbol_exprt res=fresh_symbol("compare_to", return_type); typet index_type=s1.length().type(); @@ -234,12 +240,12 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( // a1 : res==0 => |s1|=|s2| // a2 : forall i<|s1|. s1[i]==s2[i] // a3 : exists x. - // res!=0 ==> x> 0 && - // ((|s1| <= |s2| &&x<|s1|) || (|s1| >= |s2| &&x<|s2|) - // &&res=s1[x]-s2[x] ) - // || cond2: - // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) - // a4 : forall i s1[i]=s2[i] + // res!=0 ==> x > 0 + // && ((|s1| <= |s2| && x<|s1|) || (|s1| >= |s2| &&x<|s2|) + // && res=s1[x]-s2[x] ) + // || cond2: + // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) + // a4 : forall i' s1[i]=s2[i] assert(return_type.id()==ID_signedbv); @@ -282,7 +288,8 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( or_exprt(cond1, cond2))); axioms.push_back(a3); - string_constraintt a4(i, x, not_exprt(res_null), equal_exprt(s1[i], s2[i])); + symbol_exprt i2=fresh_univ_index("QA_compare_to", index_type); + string_constraintt a4(i2, x, not_exprt(res_null), equal_exprt(s1[i2], s2[i2])); axioms.push_back(a4); return res; @@ -304,15 +311,15 @@ Function: string_constraint_generatort::add_axioms_for_intern symbol_exprt string_constraint_generatort::add_axioms_for_intern( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); + // For now we only enforce content equality and not pointer equality const typet &return_type=f.type(); + typet index_type=str.length().type(); - // initialisation of the missing pool variable - std::map::iterator it; - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(pool.find(it->second)==pool.end()) - pool[it->second]=fresh_symbol("pool", return_type); + auto pair=intern_of_string.insert( + std::make_pair(str, fresh_symbol("pool", return_type))); + symbol_exprt intern=pair.first->second; // intern(str)=s_0 || s_1 || ... // for each string s. @@ -320,30 +327,30 @@ symbol_exprt string_constraint_generatort::add_axioms_for_intern( // || (|str|==|s| &&exists i<|s|. s[i]!=str[i]) exprt disj=false_exprt(); - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + for(auto it : intern_of_string) disj=or_exprt( - disj, equal_exprt(pool[str], symbol_exprt(it->first, return_type))); + disj, equal_exprt(intern, it.second)); axioms.push_back(disj); // WARNING: the specification may be incomplete or incorrect - for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) - if(it->second!=str) + for(auto it : intern_of_string) + if(it.second!=str) { symbol_exprt i=fresh_exist_index("index_intern", index_type); axioms.push_back( or_exprt( - equal_exprt(pool[it->second], pool[str]), + equal_exprt(it.second, intern), or_exprt( - not_exprt(str.axiom_for_has_same_length_as(it->second)), + not_exprt(str.axiom_for_has_same_length_as(it.first)), and_exprt( - str.axiom_for_has_same_length_as(it->second), + str.axiom_for_has_same_length_as(it.first), and_exprt( - not_exprt(equal_exprt(str[i], it->second[i])), + not_exprt(equal_exprt(str[i], it.first[i])), and_exprt(str.axiom_for_is_strictly_longer_than(i), axiom_for_is_positive_index(i))))))); } - return pool[str]; + return intern; } diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index a7d9c4921c4..247bb19bc84 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -36,7 +36,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( // a5 : forall i<|s2|. res[i+|s1|]=s2[i] exprt a1=res.axiom_for_has_length( - plus_exprt(s1.length(), s2.length())); + plus_exprt_with_overflow_check(s1.length(), s2.length())); axioms.push_back(a1); axioms.push_back(s1.axiom_for_is_shorter_than(res)); axioms.push_back(s2.axiom_for_is_shorter_than(res)); @@ -46,7 +46,8 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( axioms.push_back(a4); symbol_exprt idx2=fresh_univ_index("QA_index_concat2", res.length().type()); - equal_exprt res_eq(s2[idx2], res[plus_exprt(idx2, s1.length())]); + equal_exprt res_eq(s2[idx2], + res[plus_exprt_with_overflow_check(idx2, s1.length())]); string_constraintt a5(idx2, s2.length(), res_eq); axioms.push_back(a5); @@ -73,8 +74,8 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( const function_application_exprt::argumentst &args=f.arguments(); assert(args.size()==2); - string_exprt s1=add_axioms_for_string_expr(args[0]); - string_exprt s2=add_axioms_for_string_expr(args[1]); + string_exprt s1=get_string_expr(args[0]); + string_exprt s2=get_string_expr(args[1]); return add_axioms_for_concat(s1, s2); } @@ -95,7 +96,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_int( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_from_int( args(f, 2)[1], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); @@ -118,7 +119,7 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_long( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_LONG_LENGTH, ref_type); return add_axioms_for_concat(s1, s2); } @@ -138,7 +139,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_bool string_exprt string_constraint_generatort::add_axioms_for_concat_bool( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_bool(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); @@ -159,7 +160,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_char string_exprt string_constraint_generatort::add_axioms_for_concat_char( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_char(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); @@ -180,9 +181,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_double string_exprt string_constraint_generatort::add_axioms_for_concat_double( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_float( - args(f, 2)[1], MAX_DOUBLE_LENGTH); + string_exprt s1=get_string_expr(args(f, 2)[0]); + assert(refined_string_typet::is_refined_string_type(f.type())); + refined_string_typet ref_type=to_refined_string_type(f.type()); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], ref_type, true); return add_axioms_for_concat(s1, s2); } @@ -201,8 +203,10 @@ Function: string_constraint_generatort::add_axioms_for_concat_float string_exprt string_constraint_generatort::add_axioms_for_concat_float( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_float(args(f, 2)[1], MAX_FLOAT_LENGTH); + string_exprt s1=get_string_expr(args(f, 2)[0]); + assert(refined_string_typet::is_refined_string_type(f.type())); + refined_string_typet ref_type=to_refined_string_type(f.type()); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], ref_type, false); return add_axioms_for_concat(s1, s2); } @@ -222,7 +226,7 @@ Function: string_constraint_generatort::add_axioms_for_concat_code_point string_exprt string_constraint_generatort::add_axioms_for_concat_code_point( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_for_code_point(args(f, 2)[1], ref_type); return add_axioms_for_concat(s1, s2); diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index 64ffabfd266..ef766dd9b8d 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -93,6 +93,7 @@ string_exprt string_constraint_generatort::add_axioms_for_empty_string( const function_application_exprt &f) { assert(f.arguments().empty()); + assert(refined_string_typet::is_refined_string_type(f.type())); const refined_string_typet &ref_type=to_refined_string_type(f.type()); string_exprt res=fresh_string(ref_type); axioms.push_back(res.axiom_for_has_length(0)); @@ -132,6 +133,7 @@ string_exprt string_constraint_generatort::add_axioms_from_literal( else { // Java string constant + assert(false); // TODO: Check if used. On the contrary, discard else. assert(arg.id()==ID_symbol); const exprt &s=arg.op0(); diff --git a/src/solvers/refinement/string_constraint_generator_indexof.cpp b/src/solvers/refinement/string_constraint_generator_indexof.cpp index 283de683bd6..d799091a573 100644 --- a/src/solvers/refinement/string_constraint_generator_indexof.cpp +++ b/src/solvers/refinement/string_constraint_generator_indexof.cpp @@ -92,15 +92,16 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( symbol_exprt contains=fresh_boolean("contains_substring"); // We add axioms: - // a1 : contains => |substring|>=offset>=from_index + // a1 : contains => |str|-|substring|>=offset>=from_index // a2 : !contains => offset=-1 - // a3 : forall 0 <= witness str[witness+offset]=substring[witness] + // a3 : forall 0<=witness<|substring|. + // contains => str[witness+offset]=substring[witness] implies_exprt a1( contains, and_exprt( - str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + str.axiom_for_is_longer_than(plus_exprt_with_overflow_check( + substring.length(), offset)), binary_relation_exprt(offset, ID_ge, from_index))); axioms.push_back(a1); @@ -138,7 +139,8 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( implies_exprt a1( contains, and_exprt( - str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + str.axiom_for_is_longer_than( + plus_exprt_with_overflow_check(substring.length(), offset)), binary_relation_exprt(offset, ID_le, from_index))); axioms.push_back(a1); @@ -173,7 +175,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); const exprt &c=args[1]; const refined_string_typet &ref_type=to_refined_string_type(str.type()); assert(f.type()==ref_type.get_index_type()); @@ -186,14 +188,15 @@ exprt string_constraint_generatort::add_axioms_for_index_of( else assert(false); - if(c.type().id()==ID_unsignedbv) + if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { return add_axioms_for_index_of( str, typecast_exprt(c, ref_type.get_char_type()), from_index); } else { - string_exprt sub=add_axioms_for_string_expr(c); + assert(refined_string_typet::is_refined_string_type(c.type())); + string_exprt sub=get_string_expr(c); return add_axioms_for_index_of_string(str, sub, from_index); } } @@ -215,7 +218,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( exprt index1=from_integer(1, index_type); exprt minus1=from_integer(-1, index_type); - exprt from_index_plus_one=plus_exprt(from_index, index1); + exprt from_index_plus_one=plus_exprt_with_overflow_check(from_index, index1); and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), binary_relation_exprt(index, ID_lt, from_index_plus_one)); @@ -269,7 +272,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); exprt c=args[1]; const refined_string_typet &ref_type=to_refined_string_type(str.type()); exprt from_index; @@ -282,14 +285,14 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( else assert(false); - if(c.type().id()==ID_unsignedbv) + if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { return add_axioms_for_last_index_of( str, typecast_exprt(c, ref_type.get_char_type()), from_index); } else { - string_exprt sub=add_axioms_for_string_expr(c); + string_exprt sub=get_string_expr(c); return add_axioms_for_last_index_of_string(str, sub, from_index); } } diff --git a/src/solvers/refinement/string_constraint_generator_insert.cpp b/src/solvers/refinement/string_constraint_generator_insert.cpp index 6a080a5dc6c..cd5f4376a93 100644 --- a/src/solvers/refinement/string_constraint_generator_insert.cpp +++ b/src/solvers/refinement/string_constraint_generator_insert.cpp @@ -48,8 +48,8 @@ Function: string_constraint_generatort::add_axioms_for_insert string_exprt string_constraint_generatort::add_axioms_for_insert( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_for_string_expr(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + string_exprt s2=get_string_expr(args(f, 3)[2]); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -70,7 +70,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_int( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_int( args(f, 3)[2], MAX_INTEGER_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); @@ -93,7 +93,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_long( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_LONG_LENGTH, ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -115,7 +115,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_bool( const function_application_exprt &f) { const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); string_exprt s2=add_axioms_from_bool(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -136,7 +136,7 @@ Function: string_constraint_generatort::add_axioms_for_insert_char string_exprt string_constraint_generatort::add_axioms_for_insert_char( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s1=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt s2=add_axioms_from_char(args(f, 3)[2], ref_type); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); @@ -158,8 +158,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_double string_exprt string_constraint_generatort::add_axioms_for_insert_double( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_float(args(f, 3)[2], ref_type, true); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -179,8 +180,9 @@ Function: string_constraint_generatort::add_axioms_for_insert_float string_exprt string_constraint_generatort::add_axioms_for_insert_float( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + string_exprt s1=get_string_expr(args(f, 3)[0]); + const refined_string_typet &ref_type=to_refined_string_type(s1.type()); + string_exprt s2=add_axioms_from_float(args(f, 3)[2], ref_type, false); return add_axioms_for_insert(s1, s2, args(f, 3)[1]); } @@ -216,7 +218,7 @@ string_exprt string_constraint_generatort::add_axioms_for_insert_char_array( offset=from_integer(0, count.type()); } - string_exprt str=add_axioms_for_string_expr(f.arguments()[0]); + string_exprt str=get_string_expr(f.arguments()[0]); const exprt &length=f.arguments()[2]; const exprt &data=f.arguments()[3]; string_exprt arr=add_axioms_from_char_array( diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 7fe510c2e35..aaca632ebc5 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -16,6 +16,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include #include +#include unsigned string_constraint_generatort::next_symbol_id=1; @@ -122,6 +123,42 @@ symbol_exprt string_constraint_generatort::fresh_boolean( /*******************************************************************\ +Function: string_constraint_generatort::plus_exprt_with_overflow_check + + Inputs: + op1 - First term of the sum + op2 - Second term of the sum + + Outputs: A plus expression representing the sum of the arguments + + Purpose: Create a plus expression while adding extra constraints to + axioms in order to prevent overflows. + +\*******************************************************************/ +plus_exprt string_constraint_generatort::plus_exprt_with_overflow_check( + const exprt &op1, const exprt &op2) +{ + plus_exprt sum(plus_exprt(op1, op2)); + + exprt zero=from_integer(0, op1.type()); + + binary_relation_exprt neg1(op1, ID_lt, zero); + binary_relation_exprt neg2(op2, ID_lt, zero); + binary_relation_exprt neg_sum(sum, ID_lt, zero); + + // We prevent overflows by adding the following constraint: + // If the signs of the two operands are the same, then the sign of the sum + // should also be the same. + implies_exprt no_overflow(equal_exprt(neg1, neg2), + equal_exprt(neg1, neg_sum)); + + axioms.push_back(no_overflow); + + return sum; +} + +/*******************************************************************\ + Function: string_constraint_generatort::fresh_string Inputs: a type for string @@ -144,59 +181,110 @@ string_exprt string_constraint_generatort::fresh_string( /*******************************************************************\ -Function: string_constraint_generatort::add_axioms_for_string_expr +Function: string_constraint_generatort::get_string_expr + + Inputs: an expression of refined string type + + Outputs: a string expression - Inputs: an expression of type string + Purpose: casts an expression to a string expression, or fetches the + actual string_exprt in the case of a symbol. + +\*******************************************************************/ + +string_exprt string_constraint_generatort::get_string_expr(const exprt &expr) +{ + assert(refined_string_typet::is_refined_string_type(expr.type())); + + if(expr.id()==ID_symbol) + { + return find_or_add_string_of_symbol( + to_symbol_expr(expr), + to_refined_string_type(expr.type())); + } + else + { + return to_string_expr(expr); + } +} + +string_exprt string_constraint_generatort::convert_java_string_to_string_exprt( + const exprt &jls) +{ + assert(get_mode()==ID_java); + assert(jls.id()==ID_struct); + + exprt length(to_struct_expr(jls).op1()); + // TODO: Add assertion on the type. + // assert(length.type()==refined_string_typet::index_type()); + exprt java_content(to_struct_expr(jls).op2()); + if(java_content.id()==ID_address_of) + { + java_content=to_address_of_expr(java_content).object(); + } + else + { + java_content=dereference_exprt(java_content, java_content.type()); + } + + refined_string_typet type(java_int_type(), java_char_type()); + + return string_exprt(length, java_content, type); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_refined_string + + Inputs: an expression of refined string type Outputs: a string expression that is linked to the argument through axioms that are added to the list - Purpose: obtain a refined string expression corresponding to string - variable of string function call + Purpose: obtain a refined string expression corresponding to a expression + of type string \*******************************************************************/ -string_exprt string_constraint_generatort::add_axioms_for_string_expr( - const exprt &unrefined_string) + +string_exprt string_constraint_generatort::add_axioms_for_refined_string( + const exprt &string) { - string_exprt s; + assert(refined_string_typet::is_refined_string_type(string.type())); + refined_string_typet type=to_refined_string_type(string.type()); - if(unrefined_string.id()==ID_function_application) + // Function applications should have been removed before + assert(string.id()!=ID_function_application); + + if(string.id()==ID_symbol) { - exprt res=add_axioms_for_function_application( - to_function_application_expr(unrefined_string)); - s=to_string_expr(res); + const symbol_exprt &sym=to_symbol_expr(string); + string_exprt s=find_or_add_string_of_symbol(sym, type); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } - else if(unrefined_string.id()==ID_symbol) - s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string)); - else if(unrefined_string.id()==ID_address_of) + else if(string.id()==ID_nondet_symbol) { - assert(unrefined_string.op0().id()==ID_symbol); - s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string.op0())); + string_exprt s=fresh_string(type); + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); + return s; } - else if(unrefined_string.id()==ID_if) - s=add_axioms_for_if(to_if_expr(unrefined_string)); - else if(unrefined_string.id()==ID_nondet_symbol || - unrefined_string.id()==ID_struct) + else if(string.id()==ID_if) { - // TODO: for now we ignore non deterministic symbols and struct + return add_axioms_for_if(to_if_expr(string)); } - else if(unrefined_string.id()==ID_typecast) + else if(string.id()==ID_struct) { - exprt arg=to_typecast_expr(unrefined_string).op(); - exprt res=add_axioms_for_string_expr(arg); - s=to_string_expr(res); + return to_string_expr(string); } else { - throw "add_axioms_for_string_expr:\n"+unrefined_string.pretty()+ + throw "add_axioms_for_refined_string:\n"+string.pretty()+ "\nwhich is not a function application, "+ - "a symbol or an if expression"; + "a symbol, a struct or an if expression"; } - - axioms.push_back( - s.axiom_for_is_longer_than(from_integer(0, s.length().type()))); - return s; } /*******************************************************************\ @@ -216,10 +304,10 @@ string_exprt string_constraint_generatort::add_axioms_for_if( { assert( refined_string_typet::is_refined_string_type(expr.true_case().type())); - string_exprt t=add_axioms_for_string_expr(expr.true_case()); + string_exprt t=get_string_expr(expr.true_case()); assert( refined_string_typet::is_refined_string_type(expr.false_case().type())); - string_exprt f=add_axioms_for_string_expr(expr.false_case()); + string_exprt f=get_string_expr(expr.false_case()); const refined_string_typet &ref_type=to_refined_string_type(t.type()); const typet &index_type=ref_type.get_index_type(); string_exprt res=fresh_string(ref_type); @@ -246,7 +334,7 @@ Function: string_constraint_generatort::find_or_add_string_of_symbol Outputs: a string expression - Purpose: if a symbol represent a string is present in the symbol_to_string + Purpose: if a symbol representing a string is present in the symbol_to_string table, returns the corresponding string, if the symbol is not yet present, creates a new string with the correct type depending on whether the mode is java or c, adds it to the table and returns it. @@ -254,12 +342,11 @@ Function: string_constraint_generatort::find_or_add_string_of_symbol \*******************************************************************/ string_exprt string_constraint_generatort::find_or_add_string_of_symbol( - const symbol_exprt &sym) + const symbol_exprt &sym, const refined_string_typet &ref_type) { irep_idt id=sym.get_identifier(); - const refined_string_typet &ref_type=to_refined_string_type(sym.type()); string_exprt str=fresh_string(ref_type); - auto entry=symbol_to_string.insert(std::make_pair(id, str)); + auto entry=unresolved_symbols.insert(std::make_pair(id, str)); return entry.first->second; } @@ -286,125 +373,182 @@ exprt string_constraint_generatort::add_axioms_for_function_application( const irep_idt &id=is_ssa_expr(name)?to_ssa_expr(name).get_object_name(): to_symbol_expr(name).get_identifier(); + std::string str_id(id.c_str()); + + size_t pos=str_id.find("func_length"); + if(pos!=std::string::npos) + { + function_application_exprt new_expr(expr); + // TODO: This part needs some improvement. + // Stripping the symbol name is not a very robust process. + new_expr.function() = symbol_exprt(str_id.substr(0,pos+4)); + assert(get_mode()==ID_java); + new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); + + auto res_it=function_application_cache.insert(std::make_pair(new_expr, + nil_exprt())); + if(res_it.second) + { + string_exprt res=to_string_expr( + add_axioms_for_function_application(new_expr)); + res_it.first->second=res; + return res.length(); + } + else + return to_string_expr(res_it.first->second).length(); + } + + pos = str_id.find("func_data"); + if(pos!=std::string::npos) + { + function_application_exprt new_expr(expr); + new_expr.function() = symbol_exprt(str_id.substr(0,pos+4)); + new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); + + auto res_it=function_application_cache.insert(std::make_pair(new_expr, + nil_exprt())); + if(res_it.second) + { + string_exprt res=to_string_expr( + add_axioms_for_function_application(new_expr)); + res_it.first->second=res; + return res.content(); + } + else + return to_string_expr(res_it.first->second).content(); + } + // TODO: improve efficiency of this test by either ordering test by frequency // or using a map + auto res_it=function_application_cache.find(expr); + if(res_it!=function_application_cache.end() && res_it->second!=nil_exprt()) + return res_it->second; + + exprt res; + if(id==ID_cprover_char_literal_func) - return add_axioms_for_char_literal(expr); + res=add_axioms_for_char_literal(expr); else if(id==ID_cprover_string_length_func) - return add_axioms_for_length(expr); + res=add_axioms_for_length(expr); else if(id==ID_cprover_string_equal_func) - return add_axioms_for_equals(expr); + res=add_axioms_for_equals(expr); else if(id==ID_cprover_string_equals_ignore_case_func) - return add_axioms_for_equals_ignore_case(expr); + res=add_axioms_for_equals_ignore_case(expr); else if(id==ID_cprover_string_is_empty_func) - return add_axioms_for_is_empty(expr); + res=add_axioms_for_is_empty(expr); else if(id==ID_cprover_string_char_at_func) - return add_axioms_for_char_at(expr); + res=add_axioms_for_char_at(expr); else if(id==ID_cprover_string_is_prefix_func) - return add_axioms_for_is_prefix(expr); + res=add_axioms_for_is_prefix(expr); else if(id==ID_cprover_string_is_suffix_func) - return add_axioms_for_is_suffix(expr); + res=add_axioms_for_is_suffix(expr); else if(id==ID_cprover_string_startswith_func) - return add_axioms_for_is_prefix(expr, true); + res=add_axioms_for_is_prefix(expr, true); else if(id==ID_cprover_string_endswith_func) - return add_axioms_for_is_suffix(expr, true); + res=add_axioms_for_is_suffix(expr, true); else if(id==ID_cprover_string_contains_func) - return add_axioms_for_contains(expr); + res=add_axioms_for_contains(expr); else if(id==ID_cprover_string_hash_code_func) - return add_axioms_for_hash_code(expr); + res=add_axioms_for_hash_code(expr); else if(id==ID_cprover_string_index_of_func) - return add_axioms_for_index_of(expr); + res=add_axioms_for_index_of(expr); else if(id==ID_cprover_string_last_index_of_func) - return add_axioms_for_last_index_of(expr); + res=add_axioms_for_last_index_of(expr); else if(id==ID_cprover_string_parse_int_func) - return add_axioms_for_parse_int(expr); + res=add_axioms_for_parse_int(expr); else if(id==ID_cprover_string_to_char_array_func) - return add_axioms_for_to_char_array(expr); + res=add_axioms_for_to_char_array(expr); else if(id==ID_cprover_string_code_point_at_func) - return add_axioms_for_code_point_at(expr); + res=add_axioms_for_code_point_at(expr); else if(id==ID_cprover_string_code_point_before_func) - return add_axioms_for_code_point_before(expr); + res=add_axioms_for_code_point_before(expr); else if(id==ID_cprover_string_code_point_count_func) - return add_axioms_for_code_point_count(expr); + res=add_axioms_for_code_point_count(expr); else if(id==ID_cprover_string_offset_by_code_point_func) - return add_axioms_for_offset_by_code_point(expr); + res=add_axioms_for_offset_by_code_point(expr); else if(id==ID_cprover_string_compare_to_func) - return add_axioms_for_compare_to(expr); + res=add_axioms_for_compare_to(expr); else if(id==ID_cprover_string_literal_func) - return add_axioms_from_literal(expr); + res=add_axioms_from_literal(expr); else if(id==ID_cprover_string_concat_func) - return add_axioms_for_concat(expr); + res=add_axioms_for_concat(expr); else if(id==ID_cprover_string_concat_int_func) - return add_axioms_for_concat_int(expr); + res=add_axioms_for_concat_int(expr); else if(id==ID_cprover_string_concat_long_func) - return add_axioms_for_concat_long(expr); + res=add_axioms_for_concat_long(expr); else if(id==ID_cprover_string_concat_bool_func) - return add_axioms_for_concat_bool(expr); + res=add_axioms_for_concat_bool(expr); else if(id==ID_cprover_string_concat_char_func) - return add_axioms_for_concat_char(expr); + res=add_axioms_for_concat_char(expr); else if(id==ID_cprover_string_concat_double_func) - return add_axioms_for_concat_double(expr); + res=add_axioms_for_concat_double(expr); else if(id==ID_cprover_string_concat_float_func) - return add_axioms_for_concat_float(expr); + res=add_axioms_for_concat_float(expr); else if(id==ID_cprover_string_concat_code_point_func) - return add_axioms_for_concat_code_point(expr); + res=add_axioms_for_concat_code_point(expr); else if(id==ID_cprover_string_insert_func) - return add_axioms_for_insert(expr); + res=add_axioms_for_insert(expr); else if(id==ID_cprover_string_insert_int_func) - return add_axioms_for_insert_int(expr); + res=add_axioms_for_insert_int(expr); else if(id==ID_cprover_string_insert_long_func) - return add_axioms_for_insert_long(expr); + res=add_axioms_for_insert_long(expr); else if(id==ID_cprover_string_insert_bool_func) - return add_axioms_for_insert_bool(expr); + res=add_axioms_for_insert_bool(expr); else if(id==ID_cprover_string_insert_char_func) - return add_axioms_for_insert_char(expr); + res=add_axioms_for_insert_char(expr); else if(id==ID_cprover_string_insert_double_func) - return add_axioms_for_insert_double(expr); + res=add_axioms_for_insert_double(expr); else if(id==ID_cprover_string_insert_float_func) - return add_axioms_for_insert_float(expr); + res=add_axioms_for_insert_float(expr); +#if 0 else if(id==ID_cprover_string_insert_char_array_func) - return add_axioms_for_insert_char_array(expr); + res=add_axioms_for_insert_char_array(expr); +#endif else if(id==ID_cprover_string_substring_func) - return add_axioms_for_substring(expr); + res=add_axioms_for_substring(expr); else if(id==ID_cprover_string_trim_func) - return add_axioms_for_trim(expr); + res=add_axioms_for_trim(expr); else if(id==ID_cprover_string_to_lower_case_func) - return add_axioms_for_to_lower_case(expr); + res=add_axioms_for_to_lower_case(expr); else if(id==ID_cprover_string_to_upper_case_func) - return add_axioms_for_to_upper_case(expr); + res=add_axioms_for_to_upper_case(expr); else if(id==ID_cprover_string_char_set_func) - return add_axioms_for_char_set(expr); + res=add_axioms_for_char_set(expr); else if(id==ID_cprover_string_value_of_func) - return add_axioms_for_value_of(expr); + res=add_axioms_for_value_of(expr); else if(id==ID_cprover_string_empty_string_func) - return add_axioms_for_empty_string(expr); + res=add_axioms_for_empty_string(expr); else if(id==ID_cprover_string_copy_func) - return add_axioms_for_copy(expr); + res=add_axioms_for_copy(expr); else if(id==ID_cprover_string_of_int_func) - return add_axioms_from_int(expr); + res=add_axioms_from_int(expr); else if(id==ID_cprover_string_of_int_hex_func) - return add_axioms_from_int_hex(expr); + res=add_axioms_from_int_hex(expr); else if(id==ID_cprover_string_of_float_func) - return add_axioms_from_float(expr); + res=add_axioms_from_float(expr); else if(id==ID_cprover_string_of_double_func) - return add_axioms_from_double(expr); + res=add_axioms_from_double(expr); else if(id==ID_cprover_string_of_long_func) - return add_axioms_from_long(expr); + res=add_axioms_from_long(expr); else if(id==ID_cprover_string_of_bool_func) - return add_axioms_from_bool(expr); + res=add_axioms_from_bool(expr); else if(id==ID_cprover_string_of_char_func) - return add_axioms_from_char(expr); + res=add_axioms_from_char(expr); +#if 0 else if(id==ID_cprover_string_of_char_array_func) - return add_axioms_from_char_array(expr); + res=add_axioms_from_char_array(expr); +#endif else if(id==ID_cprover_string_set_length_func) - return add_axioms_for_set_length(expr); + res=add_axioms_for_set_length(expr); else if(id==ID_cprover_string_delete_func) - return add_axioms_for_delete(expr); + res=add_axioms_for_delete(expr); else if(id==ID_cprover_string_delete_char_at_func) - return add_axioms_for_delete_char_at(expr); + res=add_axioms_for_delete_char_at(expr); else if(id==ID_cprover_string_replace_func) - return add_axioms_for_replace(expr); + res=add_axioms_for_replace(expr); + else if(id==ID_cprover_string_intern_func) + res=add_axioms_for_intern(expr); else { std::string msg( @@ -412,6 +556,8 @@ exprt string_constraint_generatort::add_axioms_for_function_application( msg+=id2string(id); throw msg; } + function_application_cache[expr]=res; + return res; } /*******************************************************************\ @@ -430,7 +576,7 @@ Function: string_constraint_generatort::add_axioms_for_copy string_exprt string_constraint_generatort::add_axioms_for_copy( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt s1=get_string_expr(args(f, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt res=fresh_string(ref_type); @@ -486,7 +632,7 @@ Function: string_constraint_generatort::add_axioms_for_length exprt string_constraint_generatort::add_axioms_for_length( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); return str.length(); } @@ -511,6 +657,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( const exprt &offset, const exprt &count) { + assert(false); // deprecated, we should use add_axioms_for_substring instead const typet &char_type=to_array_type(data.type()).subtype(); const typet &index_type=length.type(); refined_string_typet ref_type(index_type, char_type); @@ -523,7 +670,7 @@ string_exprt string_constraint_generatort::add_axioms_from_char_array( symbol_exprt qvar=fresh_univ_index("QA_string_of_char_array", index_type); exprt char_in_tab=data; assert(char_in_tab.id()==ID_index); - char_in_tab.op1()=plus_exprt(qvar, offset); + char_in_tab.op1()=plus_exprt_with_overflow_check(qvar, offset); string_constraintt a1(qvar, count, equal_exprt(str[qvar], char_in_tab)); axioms.push_back(a1); @@ -549,6 +696,7 @@ Function: string_constraint_generatort::add_axioms_from_char_array string_exprt string_constraint_generatort::add_axioms_from_char_array( const function_application_exprt &f) { + assert(false); // deprecated, we should use add_axioms_for_substring instead exprt offset; exprt count; if(f.arguments().size()==4) @@ -638,7 +786,7 @@ Function: string_constraint_generatort::add_axioms_for_char_at exprt string_constraint_generatort::add_axioms_for_char_at( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); symbol_exprt char_sym=fresh_symbol("char", ref_type.get_char_type()); axioms.push_back(equal_exprt(char_sym, str[args(f, 2)[1]])); @@ -660,26 +808,6 @@ Function: string_constraint_generatort::add_axioms_for_to_char_array exprt string_constraint_generatort::add_axioms_for_to_char_array( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); return str.content(); } - -/*******************************************************************\ - -Function: string_constraint_generatort::set_string_symbol_equal_to_expr - - Inputs: a symbol and a string - - Purpose: add a correspondence to make sure the symbol points to the - same string as the second argument - -\*******************************************************************/ - -void string_constraint_generatort::set_string_symbol_equal_to_expr( - const symbol_exprt &sym, const exprt &str) -{ - if(str.id()==ID_symbol) - assign_to_symbol(sym, find_or_add_string_of_symbol(to_symbol_expr(str))); - else - assign_to_symbol(sym, add_axioms_for_string_expr(str)); -} diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index 8b1c7bca9d4..6d0d8a0eb27 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -31,15 +31,15 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( // We add axioms: // a1 : isprefix => |str| >= |prefix|+offset - // a2 : forall 0<=qvar - // s0[witness+offset]=s2[witness] - // a3 : !isprefix => |str| < |prefix|+offset - // || (|str| >= |prefix|+offset &&0<=witness<|prefix| - // &&str[witness+ofsset]!=prefix[witness]) + // a2 : forall 0<=qvar<|prefix|. isprefix => s0[witness+offset]=s2[witness] + // a3 : !isprefix => + // |str|<|prefix|+offset || + // (0<=witness<|prefix| && str[witness+offset]!=prefix[witness]) implies_exprt a1( isprefix, - str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))); + str.axiom_for_is_longer_than(plus_exprt_with_overflow_check( + prefix.length(), offset))); axioms.push_back(a1); symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type); @@ -47,7 +47,8 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( qvar, prefix.length(), isprefix, - equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])); + equal_exprt(str[plus_exprt_with_overflow_check(qvar, offset)], + prefix[qvar])); axioms.push_back(a2); symbol_exprt witness=fresh_exist_index("witness_not_isprefix", index_type); @@ -55,14 +56,13 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( axiom_for_is_positive_index(witness), and_exprt( prefix.axiom_for_is_strictly_longer_than(witness), - notequal_exprt(str[plus_exprt(witness, offset)], prefix[witness]))); + notequal_exprt(str[plus_exprt_with_overflow_check(witness, offset)], + prefix[witness]))); or_exprt s0_notpref_s1( not_exprt( - str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))), - and_exprt( - witness_diff, str.axiom_for_is_longer_than( - plus_exprt(prefix.length(), offset)))); + plus_exprt_with_overflow_check(prefix.length(), offset))), + witness_diff); implies_exprt a3(not_exprt(isprefix), s0_notpref_s1); axioms.push_back(a3); @@ -88,8 +88,8 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( { const function_application_exprt::argumentst &args=f.arguments(); assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); - string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); - string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + string_exprt s0=get_string_expr(args[swap_arguments?1:0]); + string_exprt s1=get_string_expr(args[swap_arguments?0:1]); exprt offset; if(args.size()==2) offset=from_integer(0, s0.length().type()); @@ -121,7 +121,7 @@ exprt string_constraint_generatort::add_axioms_for_is_empty( // a2 : s0 => is_empty symbol_exprt is_empty=fresh_boolean("is_empty"); - string_exprt s0=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt s0=get_string_expr(args(f, 1)[0]); axioms.push_back(implies_exprt(is_empty, s0.axiom_for_has_length(0))); axioms.push_back(implies_exprt(s0.axiom_for_has_length(0), is_empty)); return typecast_exprt(is_empty, f.type()); @@ -150,8 +150,8 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( symbol_exprt issuffix=fresh_boolean("issuffix"); typecast_exprt tc_issuffix(issuffix, f.type()); - string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); - string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + string_exprt s0=get_string_expr(args[swap_arguments?1:0]); + string_exprt s1=get_string_expr(args[swap_arguments?0:1]); const typet &index_type=s0.length().type(); // We add axioms: @@ -207,9 +207,10 @@ exprt string_constraint_generatort::add_axioms_for_contains( assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); symbol_exprt contains=fresh_boolean("contains"); typecast_exprt tc_contains(contains, f.type()); - string_exprt s0=add_axioms_for_string_expr(args(f, 2)[0]); - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[1]); - const typet &index_type=s0.type(); + string_exprt s0=get_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[1]); + const refined_string_typet ref_type=to_refined_string_type(s0.type()); + const typet &index_type=ref_type.get_index_type(); // We add axioms: // a1 : contains => s0.length >= s1.length diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index 83735e0d5b5..8f0623356ef 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -29,7 +29,7 @@ Function: string_constraint_generatort::add_axioms_for_set_length string_exprt string_constraint_generatort::add_axioms_for_set_length( const function_application_exprt &f) { - string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=get_string_expr(args(f, 2)[0]); exprt k=args(f, 2)[1]; const refined_string_typet &ref_type=to_refined_string_type(s1.type()); string_exprt res=fresh_string(ref_type); @@ -76,7 +76,7 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( { const function_application_exprt::argumentst &args=f.arguments(); assert(args.size()>=2); - string_exprt str=add_axioms_for_string_expr(args[0]); + string_exprt str=get_string_expr(args[0]); exprt i(args[1]); exprt j; if(args.size()==3) @@ -133,8 +133,10 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( // Warning: check what to do if the string is not long enough axioms.push_back(str.axiom_for_is_longer_than(end)); - string_constraintt a4( - idx, res.length(), equal_exprt(res[idx], str[plus_exprt(start, idx)])); + string_constraintt a4(idx, + res.length(), + equal_exprt(res[idx], + str[plus_exprt_with_overflow_check(start, idx)])); axioms.push_back(a4); return res; } @@ -154,7 +156,7 @@ Function: string_constraint_generatort::add_axioms_for_trim string_exprt string_constraint_generatort::add_axioms_for_trim( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &index_type=ref_type.get_index_type(); string_exprt res=fresh_string(ref_type); @@ -173,7 +175,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( // a8 : forall n<|s1|, s[idx+n]=s1[n] // a9 : (s[m]>' ' &&s[m+|s1|-1]>' ') || m=|s| - exprt a1=str.axiom_for_is_longer_than(plus_exprt(idx, res.length())); + exprt a1=str.axiom_for_is_longer_than( + plus_exprt_with_overflow_check(idx, res.length())); axioms.push_back(a1); binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type)); @@ -195,7 +198,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( axioms.push_back(a6); symbol_exprt n2=fresh_univ_index("QA_index_trim2", index_type); - minus_exprt bound(str.length(), plus_exprt(idx, res.length())); + minus_exprt bound(str.length(), plus_exprt_with_overflow_check(idx, + res.length())); binary_relation_exprt eqn2( str[plus_exprt(idx, plus_exprt(res.length(), n2))], ID_le, @@ -210,7 +214,8 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( axioms.push_back(a8); minus_exprt index_before( - plus_exprt(idx, res.length()), from_integer(1, index_type)); + plus_exprt_with_overflow_check(idx, res.length()), + from_integer(1, index_type)); binary_relation_exprt no_space_before(str[index_before], ID_gt, space_char); or_exprt a9( equal_exprt(idx, str.length()), @@ -236,7 +241,7 @@ Function: string_constraint_generatort::add_axioms_for_to_lower_case string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); @@ -289,7 +294,7 @@ Function: string_constraint_generatort::add_axioms_for_to_upper_case string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( const function_application_exprt &expr) { - string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt str=get_string_expr(args(expr, 1)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); @@ -345,7 +350,7 @@ Function: string_constraint_generatort::add_axioms_for_char_set string_exprt string_constraint_generatort::add_axioms_for_char_set( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); string_exprt res=fresh_string(ref_type); with_exprt sarrnew(str.content(), args(f, 3)[1], args(f, 3)[2]); @@ -378,7 +383,7 @@ Function: string_constraint_generatort::add_axioms_for_replace string_exprt string_constraint_generatort::add_axioms_for_replace( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); const refined_string_typet &ref_type=to_refined_string_type(str.type()); const exprt &old_char=args(f, 3)[1]; const exprt &new_char=args(f, 3)[2]; @@ -421,10 +426,12 @@ Function: string_constraint_generatort::add_axioms_for_delete_char_at string_exprt string_constraint_generatort::add_axioms_for_delete_char_at( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt str=get_string_expr(args(f, 2)[0]); exprt index_one=from_integer(1, str.length().type()); return add_axioms_for_delete( - str, args(f, 2)[1], plus_exprt(args(f, 2)[1], index_one)); + str, + args(f, 2)[1], + plus_exprt_with_overflow_check(args(f, 2)[1], index_one)); } /*******************************************************************\ @@ -468,6 +475,6 @@ Function: string_constraint_generatort::add_axioms_for_delete string_exprt string_constraint_generatort::add_axioms_for_delete( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt str=get_string_expr(args(f, 3)[0]); return add_axioms_for_delete(str, args(f, 3)[1], args(f, 3)[2]); } diff --git a/src/solvers/refinement/string_constraint_generator_valueof.cpp b/src/solvers/refinement/string_constraint_generator_valueof.cpp index d1030e5e085..dbc86b0b05f 100644 --- a/src/solvers/refinement/string_constraint_generator_valueof.cpp +++ b/src/solvers/refinement/string_constraint_generator_valueof.cpp @@ -63,7 +63,8 @@ Function: string_constraint_generatort::add_axioms_from_float string_exprt string_constraint_generatort::add_axioms_from_float( const function_application_exprt &f) { - return add_axioms_from_float(args(f, 1)[0], false); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_float(args(f, 1)[0], ref_type, false); } /*******************************************************************\ @@ -81,7 +82,8 @@ Function: string_constraint_generatort::add_axioms_from_double string_exprt string_constraint_generatort::add_axioms_from_double( const function_application_exprt &f) { - return add_axioms_from_float(args(f, 1)[0], true); + const refined_string_typet &ref_type=to_refined_string_type(f.type()); + return add_axioms_from_float(args(f, 1)[0], ref_type, true); } /*******************************************************************\ @@ -99,13 +101,12 @@ Function: string_constraint_generatort::add_axioms_from_float \*******************************************************************/ string_exprt string_constraint_generatort::add_axioms_from_float( - const exprt &f, bool double_precision) + const exprt &f, const refined_string_typet &ref_type, bool double_precision) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); + string_exprt res=fresh_string(ref_type); const typet &index_type=ref_type.get_index_type(); const typet &char_type=ref_type.get_char_type(); - string_exprt res=fresh_string(ref_type); - const exprt &index24=from_integer(24, ref_type.get_index_type()); + const exprt &index24=from_integer(24, index_type); axioms.push_back(res.axiom_for_is_shorter_than(index24)); string_exprt magnitude=fresh_string(ref_type); @@ -610,7 +611,7 @@ Function: string_constraint_generatort::add_axioms_for_parse_int exprt string_constraint_generatort::add_axioms_for_parse_int( const function_application_exprt &f) { - string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt str=get_string_expr(args(f, 1)[0]); const typet &type=f.type(); symbol_exprt i=fresh_symbol("parsed_int", type); const refined_string_typet &ref_type=to_refined_string_type(str.type()); diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index db9a2c460cc..93b6e6f221c 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -11,12 +11,16 @@ Author: Alberto Griggio, alberto.griggio@gmail.com \*******************************************************************/ #include +#include #include #include #include +#include +#include #include #include #include +#include string_refinementt::string_refinementt( const namespacet &_ns, propt &_prop, unsigned refinement_bound): @@ -100,152 +104,217 @@ void string_refinementt::add_instantiations() /*******************************************************************\ -Function: string_refinementt::convert_rest - - Inputs: an expression +Function: string_refinementt::add_symbol_to_symbol_map() - Outputs: a literal + Inputs: a symbol and the expression to map it to - Purpose: if the expression is a function application, we convert it - using our own convert_function_application method + Purpose: keeps a map of symbols to expressions, such as none of the + mapped values exist as a key \*******************************************************************/ -literalt string_refinementt::convert_rest(const exprt &expr) +void string_refinementt::add_symbol_to_symbol_map +(const exprt &lhs, const exprt &rhs) { - if(expr.id()==ID_function_application) - { - // can occur in __CPROVER_assume - bvt bv=convert_function_application(to_function_application_expr(expr)); - assert(bv.size()==1); - return bv[0]; - } - else + assert(lhs.id()==ID_symbol); + + // We insert the mapped value of the rhs, if it exists. + auto it=symbol_resolve.find(rhs); + const exprt &new_rhs=it!=symbol_resolve.end()?it->second:rhs; + + symbol_resolve[lhs]=new_rhs; + reverse_symbol_resolve[new_rhs].push_back(lhs); + + std::list symbols_to_update_with_new_rhs(reverse_symbol_resolve[rhs]); + for(exprt item:symbols_to_update_with_new_rhs) { - return supert::convert_rest(expr); + symbol_resolve[item]=new_rhs; + reverse_symbol_resolve[new_rhs].push_back(item); } } /*******************************************************************\ -Function: string_refinementt::convert_symbol +Function: string_refinementt::set_char_array_equality() - Inputs: an expression + Inputs: the rhs and lhs of an equality over character arrays - Outputs: a bitvector - - Purpose: if the expression as string type, look up for the string in the - list of string symbols that we maintain, and convert it; - otherwise use the method of the parent class + Purpose: add axioms if the rhs is a character array \*******************************************************************/ -bvt string_refinementt::convert_symbol(const exprt &expr) +void string_refinementt::set_char_array_equality( + const exprt &lhs, const exprt &rhs) { - const typet &type=expr.type(); - const irep_idt &identifier=expr.get(ID_identifier); - assert(!identifier.empty()); + assert(lhs.id()==ID_symbol); - if(refined_string_typet::is_refined_string_type(type)) + if(rhs.id()==ID_array && rhs.type().id()==ID_array) { - string_exprt str= - generator.find_or_add_string_of_symbol(to_symbol_expr(expr)); - bvt bv=convert_bv(str); - return bv; + const typet &index_type=to_array_type(rhs.type()).size().type(); + for(size_t i=0, ilim=rhs.operands().size(); i!=ilim; ++i) + { + // Introduce axioms to map symbolic rhs to its char array. + index_exprt arraycell(rhs, from_integer(i,index_type)); + equal_exprt arrayeq(arraycell,rhs.operands()[i]); + generator.axioms.push_back(arrayeq); + } } - else - return supert::convert_symbol(expr); + // At least for Java (as it is currently pre-processed), we need not consider + // other cases, because all character arrays find themselves on the rhs of an + // equality. Note that this might not be the case for other languages. } /*******************************************************************\ -Function: string_refinementt::convert_function_application +Function: string_refinementt::substitute_function_applications() - Inputs: a function_application + Inputs: an expression containing function applications - Outputs: a bitvector + Outputs: an epression containing no function application - Purpose: generate an expression, add lemmas stating that this expression - corresponds to the result of the given function call, and convert - this expression + Purpose: remove functions applications and create the necessary + axioms \*******************************************************************/ -bvt string_refinementt::convert_function_application -(const function_application_exprt &expr) +exprt string_refinementt::substitute_function_applications(exprt expr) { - debug() << "string_refinementt::convert_function_application " - << from_expr(expr) << eom; - exprt f=generator.add_axioms_for_function_application(expr); - return convert_bv(f); + for(size_t i=0; i &pair : non_string_axioms) + { + replace_expr(symbol_resolve, pair.first); + supert::set_to(pair.first, pair.second); + } + + for(exprt &axiom : generator.axioms) + { + replace_expr(symbol_resolve, axiom); if(axiom.id()==ID_string_constraint) { string_constraintt c=to_string_constraint(axiom); @@ -282,6 +361,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() { add_lemma(axiom); } + } initial_index_set(universal_axioms); update_index_set(cur); @@ -367,7 +447,8 @@ bvt string_refinementt::convert_bool_bv(const exprt &boole, const exprt &orig) Function: string_refinementt::add_lemma - Inputs: a lemma + Inputs: a lemma and Boolean value stating whether the lemma should + be added to the index set. Purpose: add the given lemma to the solver @@ -393,151 +474,265 @@ void string_refinementt::add_lemma(const exprt &lemma, bool add_to_index_set) /*******************************************************************\ -Function: string_refinementt::string_of_array +Function: string_refinementt::get_array - Inputs: a constant array expression and a integer expression + Inputs: an expression representing an array and an expression + representing an integer - Outputs: a string + Outputs: an array expression or an array_of_exprt - Purpose: convert the content of a string to a more readable representation. - This should only be used for debbuging. + Purpose: get a model of an array and put it in a certain form. + If the size cannot be obtained or if it is too big, return an + empty array. \*******************************************************************/ -std::string string_refinementt::string_of_array( - const exprt &arr, const exprt &size) const +exprt string_refinementt::get_array(const exprt &arr, const exprt &size) { + exprt arr_val=get(arr); + exprt size_val=get(size); + typet char_type=arr.type().subtype(); + typet index_type=size.type(); + array_typet empty_ret_type(char_type, from_integer(0, index_type)); + array_of_exprt empty_ret(from_integer(0, char_type), empty_ret_type); + if(size.id()!=ID_constant) - return "string of unknown size"; + { + debug() << "(sr::get_array) string of unknown size" << eom; + return empty_ret; + } + unsigned n; if(to_unsigned_integer(to_constant_expr(size), n)) - n=0; + { + debug() << "(sr::get_array) size is not a valid"; + return empty_ret; + } + + array_typet ret_type(char_type, from_integer(n, index_type)); + array_exprt ret(ret_type); if(n>MAX_CONCRETE_STRING_SIZE) - return "very long string"; + { + debug() << "(sr::get_array) long string (size=" << n << ")"; + return empty_ret; + } + if(n==0) - return "\"\""; + { + debug() << "(sr::get_array) empty string" << eom; + return empty_ret; + } - std::ostringstream buf; - buf << "\""; - exprt val=get(arr); + unsigned concrete_array[n]; - if(val.id()=="array-list") + if(arr_val.id()=="array-list") { - for(size_t i=0; i(c); + exprt value=arr_val.operands()[i*2+1]; + to_unsigned_integer(to_constant_expr(value), concrete_array[i]); } } } } + else if(arr_val.id()==ID_array) + { + for(size_t i=0; i=32) + result << (unsigned char) c; + else { - exprt index=val.operands()[i*2]; - assert(index.type()==index_type); - exprt value=val.operands()[i*2+1]; - assert(value.type()==char_type); - ret=with_exprt(ret, index, value); + result << "\\u" << std::hex << std::setw(4) << std::setfill('0') + << (unsigned int) c; } - return ret; - } - else - { - debug() << "unable to get array-list value of " - << from_expr(val) << eom; - return arr; } + + return result.str(); } /*******************************************************************\ -Function: string_refinementt::check_axioms +Function: string_refinementt::fill_model - Outputs: a Boolean + Outputs: a replace map - Purpose: return true if the current model satisfies all the axioms + Purpose: maps the variable created by the solver to constant expressions + given by the current model \*******************************************************************/ -bool string_refinementt::check_axioms() +replace_mapt string_refinementt::fill_model() { - debug() << "string_refinementt::check_axioms: ===============" - << "===========================================" << eom; - debug() << "string_refinementt::check_axioms: build the" - << " interpretation from the model of the prop_solver" << eom; replace_mapt fmodel; - for(auto it : generator.symbol_to_string) + for(auto it:symbol_resolve) { - string_exprt refined=it.second; - const exprt &econtent=refined.content(); - const exprt &elength=refined.length(); - - exprt len=get(elength); - exprt arr=get_array(econtent, len); - - fmodel[elength]=len; - fmodel[econtent]=arr; - debug() << it.first << "=" << from_expr(it.second) - << " of length " << from_expr(len) <<" := " << eom - << from_expr(get(econtent)) << eom - << string_of_array(econtent, len) << eom; + if(refined_string_typet::is_refined_string_type(it.second.type())) + { + string_exprt refined=to_string_expr(it.second); + replace_expr(symbol_resolve, refined); + const exprt &econtent=refined.content(); + const exprt &elength=refined.length(); + + exprt len=get(elength); + exprt arr=get_array(econtent, len); + + fmodel[elength]=len; + fmodel[econtent]=arr; + + debug() << from_expr(to_symbol_expr(it.first)) << "=" + << from_expr(refined); + if(arr.id()==ID_array) + debug() << " = \"" << string_of_array(to_array_expr(arr)) + << "\" (size:" << from_expr(len) << ")"<< eom; + else + debug() << " = " << from_expr(arr) << "" << eom; + } } for(auto it : generator.boolean_symbols) { - debug() << "" << it.get_identifier() << " := " - << from_expr(get(it)) << eom; - fmodel[it]=get(it); + debug() << "" << it.get_identifier() << " := " + << from_expr(get(it)) << eom; + fmodel[it]=get(it); } for(auto it : generator.index_symbols) { - debug() << "" << it.get_identifier() << " := " - << from_expr(get(it)) << eom; - fmodel[it]=get(it); + debug() << "" << it.get_identifier() << " := " + << from_expr(get(it)) << eom; + fmodel[it]=get(it); } + return fmodel; +} + +/*******************************************************************\ + +Function: string_refinementt::add_negation_of_constraint_to_solver + + Inputs: a string constraint and a solver for non string expressions + + Purpose: negates the constraint and add it to the solver. + the intended usage is to find an assignement of the universal + variable that would violate the axiom, + to avoid false positives the symbols other than the universal + variable should have been replaced by there valuation in the + current model + +\*******************************************************************/ + +void string_refinementt::add_negation_of_constraint_to_solver( + const string_constraintt &axiom, supert &solver) +{ + exprt lb=axiom.lower_bound(); + exprt ub=axiom.upper_bound(); + if(lb.id()==ID_constant && ub.id()==ID_constant) + { + mp_integer lb_int, ub_int; + to_integer(to_constant_expr(lb), lb_int); + to_integer(to_constant_expr(ub), ub_int); + if(ub_int<=lb_int) + { + debug() << "empty constraint with current model" << eom; + return; + } + } + + if(axiom.premise()==false_exprt()) + { + debug() << "(string_refinement::check_axioms) adding false" << eom; + solver << false_exprt(); + return; + } + + and_exprt premise(axiom.premise(), axiom.univ_within_bounds()); + and_exprt negaxiom(premise, not_exprt(axiom.body())); + + debug() << "(sr::check_axioms) negated axiom: " << from_expr(negaxiom) << eom; + solver << negaxiom; +} + +/*******************************************************************\ + +Function: string_refinementt::check_axioms + + Outputs: a Boolean + + Purpose: return true if the current model satisfies all the axioms + +\*******************************************************************/ + +bool string_refinementt::check_axioms() +{ + debug() << "string_refinementt::check_axioms: ===============" + << "===========================================" << eom; + debug() << "string_refinementt::check_axioms: build the" + << " interpretation from the model of the prop_solver" << eom; + replace_mapt fmodel=fill_model(); // Maps from indexes of violated universal axiom to a witness of violation std::map violated; @@ -547,21 +742,28 @@ bool string_refinementt::check_axioms() for(size_t i=0; i &m, bool negated) const + std::map &m, const typet &type, bool negated) const { exprt sum=nil_exprt(); mp_integer constants=0; typet index_type; if(m.empty()) - return nil_exprt(); + return from_integer(0, type); else index_type=m.begin()->first.type(); @@ -723,12 +925,20 @@ exprt string_refinementt::sum_over_map( default: if(second>1) { - for(int i=0; isecond; i--) + if(sum.is_nil()) + sum=unary_minus_exprt(t); + else + sum=minus_exprt(sum, t); + for(int i=-1; i>second; i--) sum=minus_exprt(sum, t); } } @@ -755,7 +965,7 @@ Function: string_refinementt::simplify_sum exprt string_refinementt::simplify_sum(const exprt &f) const { std::map map=map_representation_of_sum(f); - return sum_over_map(map); + return sum_over_map(map, f.type()); } /*******************************************************************\ @@ -800,7 +1010,7 @@ exprt string_refinementt::compute_inverse_function( } elems.erase(it); - return sum_over_map(elems, neg); + return sum_over_map(elems, f.type(), neg); } @@ -830,7 +1040,7 @@ Function: find_qvar Outputs: a Boolean - Purpose: looks for the symbol and return true if it is found + Purpose: look for the symbol and return true if it is found \*******************************************************************/ @@ -1014,19 +1224,18 @@ exprt find_index(const exprt &expr, const exprt &str) catch (exprt i) { return i; } } - /*******************************************************************\ Function: string_refinementt::instantiate - Inputs: an universaly quantified formula `axiom`, an array of char + Inputs: a universally quantified formula `axiom`, an array of char variable `str`, and an index expression `val`. - Outputs: substitute `qvar` the universaly quantified variable of `axiom`, by + Outputs: substitute `qvar` the universally quantified variable of `axiom`, by an index `val`, in `axiom`, so that the index used for `str` equals `val`. For instance, if `axiom` corresponds to - $\forall q. s[q+x]='a' && t[q]='b'$, `instantiate(axom,s,v)` would return - an expression for $s[v]='a' && t[v-x]='b'$. + $\forall q. s[q+x]='a' && t[q]='b'$, `instantiate(axom,s,v)` + would return an expression for $s[v]='a' && t[v-x]='b'$. \*******************************************************************/ @@ -1051,6 +1260,17 @@ exprt string_refinementt::instantiate( return implies_exprt(bounds, instance); } +/*******************************************************************\ + +Function: string_refinementt::instantiate_not_contains + + Inputs: a quantified formula representing `not_contains`, and a + list to which to add the created lemmas to + + Purpose: instantiate a quantified formula representing `not_contains` + by substituting the quantifiers and generating axioms + +\*******************************************************************/ void string_refinementt::instantiate_not_contains( const string_not_contains_constraintt &axiom, std::list &new_lemmas) diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index be0cd332bc3..c8ee98f1385 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -14,6 +14,7 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #define CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H #include +#include #include #include @@ -36,7 +37,7 @@ class string_refinementt: public bv_refinementt // Should we use counter examples at each iteration? bool use_counter_example; - virtual std::string decision_procedure_text() const + virtual std::string decision_procedure_text() const override { return "string refinement loop with "+prop.solver_text(); } @@ -45,12 +46,9 @@ class string_refinementt: public bv_refinementt protected: typedef std::set expr_sett; + typedef std::list exprt_listt; - virtual bvt convert_symbol(const exprt &expr); - virtual bvt convert_function_application( - const function_application_exprt &expr); - - decision_proceduret::resultt dec_solve(); + decision_proceduret::resultt dec_solve() override; bvt convert_bool_bv(const exprt &boole, const exprt &orig); @@ -76,19 +74,31 @@ class string_refinementt: public bv_refinementt // Warning: this is indexed by array_expressions and not string expressions std::map current_index_set; std::map index_set; + replace_mapt symbol_resolve; + std::map reverse_symbol_resolve; + std::list> non_string_axioms; + + void add_equivalence(const irep_idt & lhs, const exprt & rhs); void display_index_set(); void add_lemma(const exprt &lemma, bool add_to_index_set=true); - bool boolbv_set_equality_to_true(const equal_exprt &expr); - - literalt convert_rest(const exprt &expr); + exprt substitute_function_applications(exprt expr); + typet substitute_java_string_types(typet type); + exprt substitute_java_strings(exprt expr); + void add_symbol_to_symbol_map(const exprt &lhs, const exprt &rhs); + bool is_char_array(const typet &type); + bool add_axioms_for_string_assigns(const exprt &lhs, const exprt &rhs); + void set_to(const exprt &expr, bool value) override; void add_instantiations(); - + void add_negation_of_constraint_to_solver( + const string_constraintt &axiom, supert &solver); + replace_mapt fill_model(); bool check_axioms(); + void set_char_array_equality(const exprt &lhs, const exprt &rhs); void update_index_set(const exprt &formula); void update_index_set(const std::vector &cur); void initial_index_set(const string_constraintt &axiom); @@ -105,13 +115,14 @@ class string_refinementt: public bv_refinementt const exprt &qvar, const exprt &val, const exprt &f); std::map map_representation_of_sum(const exprt &f) const; - exprt sum_over_map(std::map &m, bool negated=false) const; + exprt sum_over_map( + std::map &m, const typet &type, bool negated=false) const; exprt simplify_sum(const exprt &f) const; exprt get_array(const exprt &arr, const exprt &size); - std::string string_of_array(const exprt &arr, const exprt &size) const; + std::string string_of_array(const array_exprt &arr); }; #endif From 86da7620569d649b60c3050f60fe1fffe8260b57 Mon Sep 17 00:00:00 2001 From: Joel Allred Date: Fri, 24 Feb 2017 17:36:40 +0000 Subject: [PATCH 10/17] Add test java_intern --- regression/strings/java_intern/test.desc | 7 +++++++ regression/strings/java_intern/test_intern.class | Bin 0 -> 634 bytes regression/strings/java_intern/test_intern.java | 10 ++++++++++ 3 files changed, 17 insertions(+) create mode 100644 regression/strings/java_intern/test.desc create mode 100644 regression/strings/java_intern/test_intern.class create mode 100644 regression/strings/java_intern/test_intern.java diff --git a/regression/strings/java_intern/test.desc b/regression/strings/java_intern/test.desc new file mode 100644 index 00000000000..fedc6e813ea --- /dev/null +++ b/regression/strings/java_intern/test.desc @@ -0,0 +1,7 @@ +FUTURE +test_intern.class +--string-refine +^EXIT=0$ +^SIGNAL=0$ +^\[.*assertion.1\].* line 8.* SUCCESS$ +-- diff --git a/regression/strings/java_intern/test_intern.class b/regression/strings/java_intern/test_intern.class new file mode 100644 index 0000000000000000000000000000000000000000..e8dd8e9d57bbb48f87035da435601826f3ff33f9 GIT binary patch literal 634 zcmY*WK~EDw6#m}swzF&(*%qh;0Tqe?RJd@H7zygd0*4Y4xtQs8LZ+0hnVtG$xEsz$ z0tqB~_eZI3inN&B%BS4Krg9m=$;wp@6EuT!eWn z2s|cC)@7b6V|6df-|Aepx~ff}Lqc&&XWG6d_$#X)3BgXUtq2nhovFS4NlTdnSIoeP z)EQyCX=Qr6E58iV0#69xR@(ii5jA^#ldAXHF%qkC`&nmJnQXlyzseXf#<`JfrA&Y#$O6`BCrZqY{;@NJS6Q)*H z8zaUo{EDX><#xKfAVRgRa&1(5=z_+3VoA@rQ zU4hPBAtv}N4wA&wWLSC(4*_HKiua-7HP$ZS?>^^a?>mB1BrjfmL+J*EPgf}ZbV}Yp zX_mFXJfDAXk)IM9N)Td+wabi5gZ^+32vmkIr$bB*8*dKG0Og9fM0g4joFVWp5RAIq S`E`l~c<~!I)f5LFrvCzCr*zK% literal 0 HcmV?d00001 diff --git a/regression/strings/java_intern/test_intern.java b/regression/strings/java_intern/test_intern.java new file mode 100644 index 00000000000..5d25ddcaa1c --- /dev/null +++ b/regression/strings/java_intern/test_intern.java @@ -0,0 +1,10 @@ +public class test_intern { + + public static void main(/*String[] argv*/) { + String s1 = "abc"; + String s3 = "abc"; + String x = s1.intern(); + String y = s3.intern(); + assert(x == y); + } +} From 9bea9611aab3e88eaad740d63dce8e3df74cbcf9 Mon Sep 17 00:00:00 2001 From: Joel Allred Date: Fri, 24 Feb 2017 18:16:06 +0000 Subject: [PATCH 11/17] Adapt test.desc to new output format Make more robust. Changes only applied to java tests. --- regression/strings/java_case/test.desc | 8 ++++---- regression/strings/java_char_array/test.desc | 6 +++--- .../strings/java_char_array_init/test.desc | 10 +++++----- regression/strings/java_char_at/test.desc | 6 +++--- regression/strings/java_code_point/test.desc | 10 +++++----- regression/strings/java_compare/test.desc | 8 ++++---- regression/strings/java_concat/test.desc | 4 ++-- regression/strings/java_contains/test.desc | 4 ++-- regression/strings/java_delete/test.desc | 4 ++-- regression/strings/java_easychair/test.desc | 2 +- regression/strings/java_empty/test.desc | 4 ++-- regression/strings/java_equal/test.desc | 4 ++-- regression/strings/java_float/test.desc | 8 ++++---- regression/strings/java_index_of/test.desc | 20 +++++++++---------- regression/strings/java_int/test.desc | 14 ++++++------- regression/strings/java_prefix/test.desc | 8 ++++---- regression/strings/java_replace/test.desc | 4 ++-- regression/strings/java_set_length/test.desc | 6 +++--- .../strings/java_string_builder/test.desc | 6 +++--- .../java_string_builder_insert/test.desc | 4 ++-- .../java_string_builder_length/test.desc | 4 ++-- regression/strings/java_strlen/test.desc | 4 ++-- regression/strings/java_substring/test.desc | 8 ++++---- regression/strings/java_suffix/test.desc | 4 ++-- regression/strings/java_trim/test.desc | 4 ++-- 25 files changed, 82 insertions(+), 82 deletions(-) diff --git a/regression/strings/java_case/test.desc b/regression/strings/java_case/test.desc index 9f48288c694..860440cce58 100644 --- a/regression/strings/java_case/test.desc +++ b/regression/strings/java_case/test.desc @@ -3,8 +3,8 @@ test_case.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_case.java line 11: SUCCESS$ -^\[assertion.2\] assertion at file test_case.java line 12: SUCCESS$ -^\[assertion.3\] assertion at file test_case.java line 13: SUCCESS$ -^\[assertion.4\] assertion at file test_case.java line 14: FAILURE$ +^\[.*assertion.1\].* line 11.* SUCCESS$ +^\[.*assertion.2\].* line 12.* SUCCESS$ +^\[.*assertion.3\].* line 13.* SUCCESS$ +^\[.*assertion.4\].* line 14.* FAILURE$ -- diff --git a/regression/strings/java_char_array/test.desc b/regression/strings/java_char_array/test.desc index 8282b808b84..8ca3024a669 100644 --- a/regression/strings/java_char_array/test.desc +++ b/regression/strings/java_char_array/test.desc @@ -3,7 +3,7 @@ test_char_array.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_char_array.java line 11: SUCCESS$ -^\[assertion.2\] assertion at file test_char_array.java line 12: SUCCESS$ -^\[assertion.3\] assertion at file test_char_array.java line 13: FAILURE$ +^\[.*assertion.1\].* line 11.* SUCCESS$ +^\[.*assertion.2\].* line 12.* SUCCESS$ +^\[.*assertion.3\].* line 13.* FAILURE$ -- diff --git a/regression/strings/java_char_array_init/test.desc b/regression/strings/java_char_array_init/test.desc index fe5ffae7238..5e4cc85da91 100644 --- a/regression/strings/java_char_array_init/test.desc +++ b/regression/strings/java_char_array_init/test.desc @@ -3,9 +3,9 @@ test_init.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_init.java line 16: SUCCESS$ -^\[assertion.2\] assertion at file test_init.java line 17: SUCCESS$ -^\[assertion.3\] assertion at file test_init.java line 18: SUCCESS$ -^\[assertion.4\] assertion at file test_init.java line 20: SUCCESS$ -^\[assertion.5\] assertion at file test_init.java line 21: FAILURE$ +^\[.*assertion.1\].* line 16.* SUCCESS$ +^\[.*assertion.2\].* line 17.* SUCCESS$ +^\[.*assertion.3\].* line 18.* SUCCESS$ +^\[.*assertion.4\].* line 20.* SUCCESS$ +^\[.*assertion.5\].* line 21.* FAILURE$ -- diff --git a/regression/strings/java_char_at/test.desc b/regression/strings/java_char_at/test.desc index 6f206a0f22f..8434dac11ef 100644 --- a/regression/strings/java_char_at/test.desc +++ b/regression/strings/java_char_at/test.desc @@ -3,7 +3,7 @@ test_char_at.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_char_at.java line 11: SUCCESS$ -^\[assertion.2\] assertion at file test_char_at.java line 13: FAILURE$ -^\[assertion.3\] assertion at file test_char_at.java line 15: SUCCESS$ +^\[.*assertion.1\].* line 11.* SUCCESS$ +^\[.*assertion.2\].* line 13.* FAILURE$ +^\[.*assertion.3\].* line 15.* SUCCESS$ -- diff --git a/regression/strings/java_code_point/test.desc b/regression/strings/java_code_point/test.desc index 35ca0cd6f4b..21bdf458e7a 100644 --- a/regression/strings/java_code_point/test.desc +++ b/regression/strings/java_code_point/test.desc @@ -3,9 +3,9 @@ test_code_point.class --string-refine ^EXIT=0$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_code_point.java line 5: SUCCESS$ -^\[assertion.2\] assertion at file test_code_point.java line 6: SUCCESS$ -^\[assertion.3\] assertion at file test_code_point.java line 7: SUCCESS$ -^\[assertion.4\] assertion at file test_code_point.java line 8: SUCCESS$ -^\[assertion.5\] assertion at file test_code_point.java line 11: SUCCESS$ +^\[.*assertion.1\].* line 5.* SUCCESS$ +^\[.*assertion.2\].* line 6.* SUCCESS$ +^\[.*assertion.3\].* line 7.* SUCCESS$ +^\[.*assertion.4\].* line 8.* SUCCESS$ +^\[.*assertion.5\].* line 11.* SUCCESS$ -- diff --git a/regression/strings/java_compare/test.desc b/regression/strings/java_compare/test.desc index 517b208c3e4..026ab9d8aba 100644 --- a/regression/strings/java_compare/test.desc +++ b/regression/strings/java_compare/test.desc @@ -3,8 +3,8 @@ test_compare.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_compare.java line 6: SUCCESS$ -^\[assertion.2\] assertion at file test_compare.java line 8: FAILURE$ -^\[assertion.3\] assertion at file test_compare.java line 11: SUCCESS$ -^\[assertion.4\] assertion at file test_compare.java line 12: FAILURE$ +^\[.*assertion.1\].* line 6.* SUCCESS$ +^\[.*assertion.2\].* line 8.* FAILURE$ +^\[.*assertion.3\].* line 11.* SUCCESS$ +^\[.*assertion.4\].* line 12.* FAILURE$ -- diff --git a/regression/strings/java_concat/test.desc b/regression/strings/java_concat/test.desc index 8ef2898e0d7..722476ae67e 100644 --- a/regression/strings/java_concat/test.desc +++ b/regression/strings/java_concat/test.desc @@ -3,6 +3,6 @@ test_concat.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_concat.java line 9: SUCCESS$ -^\[assertion.2\] assertion at file test_concat.java line 10: FAILURE$ +^\[.*assertion.1\].* line 9.* SUCCESS$ +^\[.*assertion.2\].* line 10.* FAILURE$ -- diff --git a/regression/strings/java_contains/test.desc b/regression/strings/java_contains/test.desc index 4e7a3bd4f7d..1d891ccbf1f 100644 --- a/regression/strings/java_contains/test.desc +++ b/regression/strings/java_contains/test.desc @@ -3,6 +3,6 @@ test_contains.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_contains.java line 7: SUCCESS$ -^\[assertion.2\] assertion at file test_contains.java line 8: FAILURE$ +^\[.*assertion.1\].* line 7.* SUCCESS$ +^\[.*assertion.2\].* line 8.* FAILURE$ -- diff --git a/regression/strings/java_delete/test.desc b/regression/strings/java_delete/test.desc index c6c608c0955..02c2e16acc5 100644 --- a/regression/strings/java_delete/test.desc +++ b/regression/strings/java_delete/test.desc @@ -3,6 +3,6 @@ test_delete.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_delete.java line 11: SUCCESS$ -^\[assertion.2\] assertion at file test_delete.java line 12: FAILURE$ +^\[.*assertion.1\].* line 11.* SUCCESS$ +^\[.*assertion.2\].* line 12.* FAILURE$ -- diff --git a/regression/strings/java_easychair/test.desc b/regression/strings/java_easychair/test.desc index 8680af72c5a..eee11d4022d 100644 --- a/regression/strings/java_easychair/test.desc +++ b/regression/strings/java_easychair/test.desc @@ -3,5 +3,5 @@ easychair.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file easychair.java line 29: FAILURE$ +^\[.*assertion.1\].* line 29.* FAILURE$ -- diff --git a/regression/strings/java_empty/test.desc b/regression/strings/java_empty/test.desc index cab514b80b5..d0c255ae5ed 100644 --- a/regression/strings/java_empty/test.desc +++ b/regression/strings/java_empty/test.desc @@ -3,6 +3,6 @@ test_empty.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_empty.java line 4: SUCCESS$ -^\[assertion.2\] assertion at file test_empty.java line 5: FAILURE$ +^\[.*assertion.1\].* line 4.* SUCCESS$ +^\[.*assertion.2\].* line 5.* FAILURE$ -- diff --git a/regression/strings/java_equal/test.desc b/regression/strings/java_equal/test.desc index d66c30b26fe..a7c55ce83a2 100644 --- a/regression/strings/java_equal/test.desc +++ b/regression/strings/java_equal/test.desc @@ -3,6 +3,6 @@ test_equal.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_equal.java line 7: FAILURE$ -^\[assertion.2\] assertion at file test_equal.java line 8: SUCCESS$ +^\[.*assertion.1\].* line 7.* FAILURE$ +^\[.*assertion.2\].* line 8.* SUCCESS$ -- diff --git a/regression/strings/java_float/test.desc b/regression/strings/java_float/test.desc index 955f0358eab..8dd935d8ddf 100644 --- a/regression/strings/java_float/test.desc +++ b/regression/strings/java_float/test.desc @@ -3,8 +3,8 @@ test_float.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_float.java line 14: SUCCESS$ -^\[assertion.2\] assertion at file test_float.java line 15: SUCCESS$ -^\[assertion.3\] assertion at file test_float.java line 16: SUCCESS$ -^\[assertion.4\] assertion at file test_float.java line 17: FAILURE$ +^\[.*assertion.1\].* line 14.* SUCCESS$ +^\[.*assertion.2\].* line 15.* SUCCESS$ +^\[.*assertion.3\].* line 16.* SUCCESS$ +^\[.*assertion.4\].* line 17.* FAILURE$ -- diff --git a/regression/strings/java_index_of/test.desc b/regression/strings/java_index_of/test.desc index daa6c32493b..e4e2bbc1116 100644 --- a/regression/strings/java_index_of/test.desc +++ b/regression/strings/java_index_of/test.desc @@ -3,14 +3,14 @@ test_index_of.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_index_of.java line 13: SUCCESS$ -^\[assertion.2\] assertion at file test_index_of.java line 14: FAILURE$ -^\[assertion.3\] assertion at file test_index_of.java line 17: SUCCESS$ -^\[assertion.4\] assertion at file test_index_of.java line 18: FAILURE$ -^\[assertion.5\] assertion at file test_index_of.java line 21: SUCCESS$ -^\[assertion.6\] assertion at file test_index_of.java line 22: FAILURE$ -^\[assertion.7\] assertion at file test_index_of.java line 25: SUCCESS$ -^\[assertion.8\] assertion at file test_index_of.java line 26: FAILURE$ -^\[assertion.9\] assertion at file test_index_of.java line 28: SUCCESS$ -^\[assertion.10\] assertion at file test_index_of.java line 29: SUCCESS$ +^\[.*assertion.1\].* line 13.* SUCCESS$ +^\[.*assertion.2\].* line 14.* FAILURE$ +^\[.*assertion.3\].* line 17.* SUCCESS$ +^\[.*assertion.4\].* line 18.* FAILURE$ +^\[.*assertion.5\].* line 21.* SUCCESS$ +^\[.*assertion.6\].* line 22.* FAILURE$ +^\[.*assertion.7\].* line 25.* SUCCESS$ +^\[.*assertion.8\].* line 26.* FAILURE$ +^\[.*assertion.9\].* line 28.* SUCCESS$ +^\[.*assertion.10\].* line 29.* SUCCESS$ -- diff --git a/regression/strings/java_int/test.desc b/regression/strings/java_int/test.desc index ae60dd78af0..7b537828446 100644 --- a/regression/strings/java_int/test.desc +++ b/regression/strings/java_int/test.desc @@ -3,11 +3,11 @@ test_int.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_int.java line 8: SUCCESS$ -^\[assertion.2\] assertion at file test_int.java line 9: SUCCESS$ -^\[assertion.3\] assertion at file test_int.java line 12: SUCCESS$ -^\[assertion.4\] assertion at file test_int.java line 15: SUCCESS$ -^\[assertion.5\] assertion at file test_int.java line 18: SUCCESS$ -^\[assertion.6\] assertion at file test_int.java line 21: SUCCESS$ -^\[assertion.7\] assertion at file test_int.java line 23: FAILURE$ +^\[.*assertion.1\].* line 8.* SUCCESS$ +^\[.*assertion.2\].* line 9.* SUCCESS$ +^\[.*assertion.3\].* line 12.* SUCCESS$ +^\[.*assertion.4\].* line 15.* SUCCESS$ +^\[.*assertion.5\].* line 18.* SUCCESS$ +^\[.*assertion.6\].* line 21.* SUCCESS$ +^\[.*assertion.7\].* line 23.* FAILURE$ -- diff --git a/regression/strings/java_prefix/test.desc b/regression/strings/java_prefix/test.desc index 175f934ca1d..f326ef8a4aa 100644 --- a/regression/strings/java_prefix/test.desc +++ b/regression/strings/java_prefix/test.desc @@ -3,8 +3,8 @@ test_prefix.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_prefix.java line 14: SUCCESS$ -^\[assertion.2\] assertion at file test_prefix.java line 16: FAILURE$ -^\[assertion.3\] assertion at file test_prefix.java line 18: SUCCESS$ -^\[assertion.4\] assertion at file test_prefix.java line 20: FAILURE$ +^\[.*assertion.1\].* line 14.* SUCCESS$ +^\[.*assertion.2\].* line 16.* FAILURE$ +^\[.*assertion.3\].* line 18.* SUCCESS$ +^\[.*assertion.4\].* line 20.* FAILURE$ -- diff --git a/regression/strings/java_replace/test.desc b/regression/strings/java_replace/test.desc index 1e89ebe37b4..07eb4f9173f 100644 --- a/regression/strings/java_replace/test.desc +++ b/regression/strings/java_replace/test.desc @@ -3,6 +3,6 @@ test_replace.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_replace.java line 6: SUCCESS$ -^\[assertion.2\] assertion at file test_replace.java line 8: FAILURE$ +^\[.*assertion.1\].* line 6.* SUCCESS$ +^\[.*assertion.2\].* line 8.* FAILURE$ -- diff --git a/regression/strings/java_set_length/test.desc b/regression/strings/java_set_length/test.desc index 43f82a648fd..70dd8c9c073 100644 --- a/regression/strings/java_set_length/test.desc +++ b/regression/strings/java_set_length/test.desc @@ -3,7 +3,7 @@ test_set_length.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_set_length.java line 8: SUCCESS$ -^\[assertion.2\] assertion at file test_set_length.java line 9: SUCCESS$ -^\[assertion.3\] assertion at file test_set_length.java line 10: FAILURE$ +^\[.*assertion.1\].* line 8.* SUCCESS$ +^\[.*assertion.2\].* line 9.* SUCCESS$ +^\[.*assertion.3\].* line 10.* FAILURE$ -- diff --git a/regression/strings/java_string_builder/test.desc b/regression/strings/java_string_builder/test.desc index 9712205b104..92986366ed5 100644 --- a/regression/strings/java_string_builder/test.desc +++ b/regression/strings/java_string_builder/test.desc @@ -3,7 +3,7 @@ test_string_builder.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_string_builder.java line 11: SUCCESS$ -^\[assertion.2\] assertion at file test_string_builder.java line 12: SUCCESS$ -^\[assertion.3\] assertion at file test_string_builder.java line 13: FAILURE$ +^\[.*assertion.1\].* line 11.* SUCCESS$ +^\[.*assertion.2\].* line 12.* SUCCESS$ +^\[.*assertion.3\].* line 13.* FAILURE$ -- diff --git a/regression/strings/java_string_builder_insert/test.desc b/regression/strings/java_string_builder_insert/test.desc index 2655f846da1..397cb42a473 100644 --- a/regression/strings/java_string_builder_insert/test.desc +++ b/regression/strings/java_string_builder_insert/test.desc @@ -3,6 +3,6 @@ test_insert.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_insert.java line 17: SUCCESS$ -^\[assertion.2\] assertion at file test_insert.java line 18: FAILURE$ +^\[.*assertion.1\].* line 17.* SUCCESS$ +^\[.*assertion.2\].* line 18.* FAILURE$ -- diff --git a/regression/strings/java_string_builder_length/test.desc b/regression/strings/java_string_builder_length/test.desc index c4720992571..0c69ba1672a 100644 --- a/regression/strings/java_string_builder_length/test.desc +++ b/regression/strings/java_string_builder_length/test.desc @@ -3,6 +3,6 @@ test_sb_length.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -\[assertion.1\] assertion at file test_sb_length.java line 6: SUCCESS$ -\[assertion.2\] assertion at file test_sb_length.java line 8: FAILURE$ +\[.*assertion.1\].* line 6.* SUCCESS$ +\[.*assertion.2\].* line 8.* FAILURE$ -- diff --git a/regression/strings/java_strlen/test.desc b/regression/strings/java_strlen/test.desc index b98e6f76f0a..6552949fb1e 100644 --- a/regression/strings/java_strlen/test.desc +++ b/regression/strings/java_strlen/test.desc @@ -3,6 +3,6 @@ test_length.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_length.java line 10: SUCCESS$ -^\[assertion.2\] assertion at file test_length.java line 11: FAILURE$ +^\[.*assertion.1\].* line 10.* SUCCESS$ +^\[.*assertion.2\].* line 11.* FAILURE$ -- diff --git a/regression/strings/java_substring/test.desc b/regression/strings/java_substring/test.desc index bd54a8204fe..6439ab24b24 100644 --- a/regression/strings/java_substring/test.desc +++ b/regression/strings/java_substring/test.desc @@ -3,8 +3,8 @@ test_substring.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_substring.java line 12: SUCCESS$ -^\[assertion.2\] assertion at file test_substring.java line 13: FAILURE$ -^\[assertion.3\] assertion at file test_substring.java line 20: SUCCESS$ -^\[assertion.4\] assertion at file test_substring.java line 21: FAILURE$ +^\[.*assertion.1\].* line 12.* SUCCESS$ +^\[.*assertion.2\].* line 13.* FAILURE$ +^\[.*assertion.3\].* line 20.* SUCCESS$ +^\[.*assertion.4\].* line 21.* FAILURE$ -- diff --git a/regression/strings/java_suffix/test.desc b/regression/strings/java_suffix/test.desc index 2740e87d6e4..275eae1b98a 100644 --- a/regression/strings/java_suffix/test.desc +++ b/regression/strings/java_suffix/test.desc @@ -3,6 +3,6 @@ test_suffix.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_suffix.java line 12: SUCCESS$ -^\[assertion.2\] assertion at file test_suffix.java line 13: FAILURE$ +^\[.*assertion.1\].* line 12.* SUCCESS$ +^\[.*assertion.2\].* line 13.* FAILURE$ -- diff --git a/regression/strings/java_trim/test.desc b/regression/strings/java_trim/test.desc index 7c0f1a87978..fa7806d3db9 100644 --- a/regression/strings/java_trim/test.desc +++ b/regression/strings/java_trim/test.desc @@ -3,6 +3,6 @@ test_trim.class --string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[assertion.1\] assertion at file test_trim.java line 5: SUCCESS$ -^\[assertion.2\] assertion at file test_trim.java line 6: FAILURE$ +^\[.*assertion.1\].* line 5.* SUCCESS$ +^\[.*assertion.2\].* line 6.* FAILURE$ -- From 83c5c17e3aa49e63489ddbcb3327ce2012014b0b Mon Sep 17 00:00:00 2001 From: Joel Allred Date: Tue, 28 Feb 2017 09:14:04 +0000 Subject: [PATCH 12/17] Add test for main class with no argument. In certain cases, omitting the argument of the main class makes cbmc crash. This test checks that this does not happen. --- regression/strings/java_noarg/test.desc | 9 +++++++++ regression/strings/java_noarg/test_noarg.class | Bin 0 -> 901 bytes regression/strings/java_noarg/test_noarg.java | 13 +++++++++++++ 3 files changed, 22 insertions(+) create mode 100644 regression/strings/java_noarg/test.desc create mode 100644 regression/strings/java_noarg/test_noarg.class create mode 100644 regression/strings/java_noarg/test_noarg.java diff --git a/regression/strings/java_noarg/test.desc b/regression/strings/java_noarg/test.desc new file mode 100644 index 00000000000..b5181fef08a --- /dev/null +++ b/regression/strings/java_noarg/test.desc @@ -0,0 +1,9 @@ +FUTURE +test_noarg.class +--string-refine +^EXIT=0$ +^SIGNAL=0$ +^\[.*assertion.1\].* line 9.* SUCCESS$ +^\[.*assertion.2\].* line 10.* FAILURE$ +^\[.*assertion.3\].* line 11.* SUCCESS$ +-- diff --git a/regression/strings/java_noarg/test_noarg.class b/regression/strings/java_noarg/test_noarg.class new file mode 100644 index 0000000000000000000000000000000000000000..4c1838321107a8c9bb3410cef8db087889262a70 GIT binary patch literal 901 zcmZWnTW=CU6#j`Sd*|WBdfrN8^YX_u%RF%9wBTB zvlT`Zw|*zl)<0N zzh?;4ZJjej4-AXH=^eMZ^G=ZI<5)8+hJ~i5bv`$=lQB%vNDYHr>zEf|l&0NtI{cL( zoT46gy^ofyIo<6e?W?9Dg(ZfVxR*`M>Xw_HV_01V^))w5d;Np$n0i*w$|~;QF2nLX z_j%7Sb?&IRhx;lu>4Ay_k_?#{ad%SdOUJRDN$}Dn_(MEmP-fC3JXTS`lS{(hwvTwn zV_3=^%mSaQt%^p4I-fLWmk5E}UcSx{cDYx-fP%Tby5Pv3J*h91%l{vTqVO+0&2;C| zoae6QX*|(c^14YgWJv4WH5{%_i=>HoJ^G*l9wJMt5U1r~v5aU7-TYvp>txgPTBVcG z87vIJP6ZLmWJ6<+sz7E+JtbE^{qYfet%QGwz~DyS5NQpe{+VDW^H{2oSQucjKcfmMzOj@o5&;CDAcz Date: Tue, 28 Feb 2017 09:24:34 +0000 Subject: [PATCH 13/17] Add test for parseint Was taken out of another test. --- regression/strings/java_parseint/test.desc | 8 ++++++++ .../strings/java_parseint/test_parseint.class | Bin 0 -> 727 bytes .../strings/java_parseint/test_parseint.java | 9 +++++++++ 3 files changed, 17 insertions(+) create mode 100644 regression/strings/java_parseint/test.desc create mode 100644 regression/strings/java_parseint/test_parseint.class create mode 100644 regression/strings/java_parseint/test_parseint.java diff --git a/regression/strings/java_parseint/test.desc b/regression/strings/java_parseint/test.desc new file mode 100644 index 00000000000..85553ab1b70 --- /dev/null +++ b/regression/strings/java_parseint/test.desc @@ -0,0 +1,8 @@ +FUTURE +test_parseint.class +--string-refine +^EXIT=10$ +^SIGNAL=0$ +^\[.*assertion.1\].* line 6.* SUCCESS$ +^\[.*assertion.2\].* line 7.* FAILURE$ +-- diff --git a/regression/strings/java_parseint/test_parseint.class b/regression/strings/java_parseint/test_parseint.class new file mode 100644 index 0000000000000000000000000000000000000000..54ad515b8d8be4f74eaf5b8125122631f3ec0fd2 GIT binary patch literal 727 zcmZ`%U279T6g_u$lih5#=F7%dTeY@o+KNRfC|HY96$+_7M185W>NHhy`3Mpnyw0 zE{E`KaxugeEcsX_)Rsk(NNv`6kYrDFMX zeAtotxh>|zp-?fQ-hA0Q5O2i#K*as^w$Uo?-?NP~Z6ms`cf}~JXJ&+8vpe_$9=3;L z-IY(2g{m5vm{%jA6RBcz)51kKgA$?qH#EVyb=xMd`nVS1I+_t8%n%w`<-?Q79&0_+ z%;XfjkCh0kSUUy&taBi{hA`Xw_b`klRu!X4bnNNen%~jOcx1}R) zb=lrMhO<|7C-7QpUy!^15&0uj=F8Dr6p!KWO%Qya#@=Nyi|(Z8b>!Ih5V9@tTT8r5 zcM#w#M|stL+Krl}8b|6=q8lkApi(V-MtB6@dk@e3gnR~jLfXRQ;6xAXXy?*b8@~Yz CMUr3u literal 0 HcmV?d00001 diff --git a/regression/strings/java_parseint/test_parseint.java b/regression/strings/java_parseint/test_parseint.java new file mode 100644 index 00000000000..7c4a248329c --- /dev/null +++ b/regression/strings/java_parseint/test_parseint.java @@ -0,0 +1,9 @@ +public class test_parseint { + + public static void main(String[] argv) { + String twelve = new String("12"); + int parsed = Integer.parseInt(twelve); + assert(parsed == 12); + assert(parsed != 12); + } +} From 4e3f0dc279620308e972c973185535c7a9a32ce2 Mon Sep 17 00:00:00 2001 From: Joel Allred Date: Tue, 28 Feb 2017 16:55:02 +0000 Subject: [PATCH 14/17] Correct array index in get_array Correct a bad array access that caused a segmentation fault on the java_delete test. --- src/solvers/refinement/string_refinement.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 93b6e6f221c..cc0bbecf31d 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -538,6 +538,7 @@ exprt string_refinementt::get_array(const exprt &arr, const exprt &size) { exprt value=arr_val.operands()[i*2+1]; to_unsigned_integer(to_constant_expr(value), concrete_array[i]); + to_unsigned_integer(to_constant_expr(value), concrete_array[idx]); } } } From 28de886fa68f246eff5ccd6c27848e4afcf3adcf Mon Sep 17 00:00:00 2001 From: Joel Allred Date: Tue, 28 Feb 2017 17:01:47 +0000 Subject: [PATCH 15/17] Linting corrections on /goto-programs/ --- .../string_refine_preprocess.cpp | 110 +++++++----------- 1 file changed, 42 insertions(+), 68 deletions(-) diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp index e8449c9e55a..64eed71a724 100644 --- a/src/goto-programs/string_refine_preprocess.cpp +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -326,8 +326,7 @@ void string_refine_preprocesst::get_data_and_length_type_of_string( { assert(is_java_string_type(expr.type()) || is_java_string_builder_type(expr.type()) || - is_java_char_sequence_type(expr.type()) - ); + is_java_char_sequence_type(expr.type())); typet object_type=ns.follow(expr.type()); assert(object_type.id()==ID_struct); const struct_typet &struct_type=to_struct_type(object_type); @@ -361,56 +360,42 @@ exprt string_refine_preprocesst::make_cprover_string_assign( { if(implements_java_char_sequence(rhs.type())) { - #if 0 - auto pair=java_to_cprover_strings.insert( - std::make_pair(rhs, nil_exprt())); + // We do the following assignments: + // 1 cprover_string_length= *(rhs->length) + // 2 cprover_string_array = *(rhs->data) + // 3 cprover_string = { cprover_string_length; cprover_string_array } + + dereference_exprt deref(rhs, rhs.type().subtype()); + typet data_type, length_type; + get_data_and_length_type_of_string(deref, data_type, length_type); + std::list assignments; - if(pair.second) - { -#endif - // We do the following assignments: - // 1 cprover_string_length= *(rhs->length) - // 2 cprover_string_array = *(rhs->data) - // 3 cprover_string = { cprover_string_length; cprover_string_array } - - dereference_exprt deref(rhs, rhs.type().subtype()); - typet data_type, length_type; - get_data_and_length_type_of_string(deref, data_type, length_type); - std::list assignments; - - // 1) cprover_string_length= *(rhs->length) - symbol_exprt length_lhs=new_symbol( - "cprover_string_length", length_type); - - member_exprt deref_length(deref, "length", length_type); - assignments.emplace_back(length_lhs, deref_length); - - // 2) cprover_string_array = *(rhs->data) - symbol_exprt array_lhs=new_symbol( - "cprover_string_array", data_type.subtype()); - member_exprt data(deref, "data", data_type); - dereference_exprt deref_data(data, data_type.subtype()); - assignments.emplace_back(array_lhs, deref_data); - - // 3) cprover_string = { cprover_string_length; cprover_string_array } - // This assignment is useful for finding witnessing strings for counter - // examples - refined_string_typet ref_type(length_type, java_char_type()); - string_exprt new_rhs(length_lhs, array_lhs, ref_type); - - symbol_exprt lhs=new_symbol("cprover_string", new_rhs.type()); - assignments.emplace_back(lhs, new_rhs); - - insert_assignments(goto_program, i_it, assignments); - i_it=goto_program.insert_after(i_it); - return new_rhs; - - #if 0 - //pair.first->second=lhs; - pair.first->second=new_rhs; - } - return pair.first->second; -#endif + // 1) cprover_string_length= *(rhs->length) + symbol_exprt length_lhs=new_symbol( + "cprover_string_length", length_type); + + member_exprt deref_length(deref, "length", length_type); + assignments.emplace_back(length_lhs, deref_length); + + // 2) cprover_string_array = *(rhs->data) + symbol_exprt array_lhs=new_symbol( + "cprover_string_array", data_type.subtype()); + member_exprt data(deref, "data", data_type); + dereference_exprt deref_data(data, data_type.subtype()); + assignments.emplace_back(array_lhs, deref_data); + + // 3) cprover_string = { cprover_string_length; cprover_string_array } + // This assignment is useful for finding witnessing strings for counter + // examples + refined_string_typet ref_type(length_type, java_char_type()); + string_exprt new_rhs(length_lhs, array_lhs, ref_type); + + symbol_exprt lhs=new_symbol("cprover_string", new_rhs.type()); + assignments.emplace_back(lhs, new_rhs); + + insert_assignments(goto_program, i_it, assignments); + i_it=goto_program.insert_after(i_it); + return new_rhs; } else if(rhs.id()==ID_typecast && implements_java_char_sequence(rhs.op0().type())) @@ -560,10 +545,8 @@ void string_refine_preprocesst::make_string_assign( // Adding a string expr in the map refined_string_typet ref_type(length_type, data_type.subtype().subtype()); string_exprt str(tmp_length, tmp_array, ref_type); - symbol_exprt cprover_string_sym=new_tmp_symbol("tmp_cprover_string", ref_type); -#if 0 - java_to_cprover_strings[lhs]=cprover_string_sym; -#endif + symbol_exprt cprover_string_sym=new_tmp_symbol("tmp_cprover_string", + ref_type); std::list assigns; assigns.emplace_back(lhs, malloc_expr); @@ -838,7 +821,7 @@ void string_refine_preprocesst::make_string_function_side_effect( const std::string &signature) { const code_function_callt &function_call=to_code_function_call(i_it->code); - code_assignt assign(function_call.lhs(),function_call.arguments()[0]); + code_assignt assign(function_call.lhs(), function_call.arguments()[0]); make_string_function( goto_program, i_it, function_name, signature, true, false); if(assign.lhs().is_not_nil()) @@ -944,7 +927,7 @@ exprt string_refine_preprocesst::make_cprover_char_array_assign( const source_locationt &location) { typet type=ns.follow(rhs.type()); - assert(type.id()==ID_struct && + assert(type.id()==ID_struct && to_struct_type(type).get_tag()=="java::array[char]"); // We do the following assignments: @@ -1029,7 +1012,8 @@ void string_refine_preprocesst::make_char_array_function( if(function_name==ID_cprover_string_copy_func) { assert(is_java_char_array_pointer_type(args[start_index].type())); - dereference_exprt char_array(args[start_index], args[start_index].type().subtype()); + dereference_exprt char_array(args[start_index], + args[start_index].type().subtype()); exprt string=make_cprover_char_array_assign( goto_program, i_it, char_array, location); @@ -1259,15 +1243,6 @@ void string_refine_preprocesst::replace_string_calls( if(uncasted.id()==ID_typecast) uncasted=to_typecast_expr(uncasted).op0(); - #if 0 - if(implements_java_char_sequence(uncasted.type())) - { - auto it=java_to_cprover_strings.find(uncasted); - if(it!=java_to_cprover_strings.end()) - java_to_cprover_strings.insert(std::make_pair(assignment.lhs(), it->second)); - } -#endif - if(new_rhs.id()==ID_function_application) { function_application_exprt f=to_function_application_expr(new_rhs); @@ -1554,7 +1529,6 @@ void string_refine_preprocesst::initialize_string_function_table() signatures["java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" "Ljava/lang/StringBuilder;"]="SISS"; signatures["java::java.lang.String.intern:()Ljava/lang/String;"]="SV"; - } /*******************************************************************\ From 6ecf65fc530c632a3bfdea671a334da7706fabbb Mon Sep 17 00:00:00 2001 From: Joel Allred Date: Tue, 28 Feb 2017 17:03:16 +0000 Subject: [PATCH 16/17] Linting corrections to /solvers/refinement/ --- .../refinement/string_constraint_generator.h | 3 +-- .../string_constraint_generator_comparison.cpp | 3 ++- .../string_constraint_generator_main.cpp | 4 ++-- src/solvers/refinement/string_refinement.cpp | 17 +++++++---------- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 3f0db68738f..503b0705d46 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -96,7 +96,7 @@ class string_constraint_generatort const std::size_t MAX_FLOAT_LENGTH=15; const std::size_t MAX_DOUBLE_LENGTH=30; - std::map function_application_cache; + std::map function_application_cache; static irep_idt extract_java_string(const symbol_exprt &s); @@ -305,7 +305,6 @@ class string_constraint_generatort exprt is_low_surrogate(const exprt &chr) const; exprt character_equals_ignore_case( exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z); - }; #endif diff --git a/src/solvers/refinement/string_constraint_generator_comparison.cpp b/src/solvers/refinement/string_constraint_generator_comparison.cpp index 7dab98567da..a51e2abe3cd 100644 --- a/src/solvers/refinement/string_constraint_generator_comparison.cpp +++ b/src/solvers/refinement/string_constraint_generator_comparison.cpp @@ -289,7 +289,8 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( axioms.push_back(a3); symbol_exprt i2=fresh_univ_index("QA_compare_to", index_type); - string_constraintt a4(i2, x, not_exprt(res_null), equal_exprt(s1[i2], s2[i2])); + string_constraintt a4( + i2, x, not_exprt(res_null), equal_exprt(s1[i2], s2[i2])); axioms.push_back(a4); return res; diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index aaca632ebc5..8ba7f11eb1b 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -381,7 +381,7 @@ exprt string_constraint_generatort::add_axioms_for_function_application( function_application_exprt new_expr(expr); // TODO: This part needs some improvement. // Stripping the symbol name is not a very robust process. - new_expr.function() = symbol_exprt(str_id.substr(0,pos+4)); + new_expr.function() = symbol_exprt(str_id.substr(0, pos+4)); assert(get_mode()==ID_java); new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); @@ -402,7 +402,7 @@ exprt string_constraint_generatort::add_axioms_for_function_application( if(pos!=std::string::npos) { function_application_exprt new_expr(expr); - new_expr.function() = symbol_exprt(str_id.substr(0,pos+4)); + new_expr.function() = symbol_exprt(str_id.substr(0, pos+4)); new_expr.type() = refined_string_typet(java_int_type(), java_char_type()); auto res_it=function_application_cache.insert(std::make_pair(new_expr, diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index cc0bbecf31d..00ce0acf85b 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -16,7 +16,6 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include #include -#include #include #include #include @@ -126,7 +125,7 @@ void string_refinementt::add_symbol_to_symbol_map reverse_symbol_resolve[new_rhs].push_back(lhs); std::list symbols_to_update_with_new_rhs(reverse_symbol_resolve[rhs]); - for(exprt item:symbols_to_update_with_new_rhs) + for(exprt item : symbols_to_update_with_new_rhs) { symbol_resolve[item]=new_rhs; reverse_symbol_resolve[new_rhs].push_back(item); @@ -154,8 +153,8 @@ void string_refinementt::set_char_array_equality( for(size_t i=0, ilim=rhs.operands().size(); i!=ilim; ++i) { // Introduce axioms to map symbolic rhs to its char array. - index_exprt arraycell(rhs, from_integer(i,index_type)); - equal_exprt arrayeq(arraycell,rhs.operands()[i]); + index_exprt arraycell(rhs, from_integer(i, index_type)); + equal_exprt arrayeq(arraycell, rhs.operands()[i]); generator.axioms.push_back(arrayeq); } } @@ -503,7 +502,7 @@ exprt string_refinementt::get_array(const exprt &arr, const exprt &size) } unsigned n; - if(to_unsigned_integer(to_constant_expr(size), n)) + if(to_unsigned_integer(to_constant_expr(size_val), n)) { debug() << "(sr::get_array) size is not a valid"; return empty_ret; @@ -524,7 +523,7 @@ exprt string_refinementt::get_array(const exprt &arr, const exprt &size) return empty_ret; } - unsigned concrete_array[n]; + std::vector concrete_array(n); if(arr_val.id()=="array-list") { @@ -537,7 +536,6 @@ exprt string_refinementt::get_array(const exprt &arr, const exprt &size) if(idx Date: Wed, 1 Mar 2017 12:03:27 +0000 Subject: [PATCH 17/17] Adding option to concretize result and get method in string solver The option to concretize makes sure that the model that we get in the end is correct. Overiding the get method is necessary to get the actual valuation of string gotten by the solver. This two additions makes the trace generation for program with strings more likely to be actual traces of the program. --- src/solvers/refinement/string_refinement.cpp | 176 ++++++++++++++++--- src/solvers/refinement/string_refinement.h | 20 ++- 2 files changed, 165 insertions(+), 31 deletions(-) diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 00ce0acf85b..2188ad3b833 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -21,10 +21,27 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include +/*******************************************************************\ + +Constructor: string_refinementt + + Inputs: a namespace, a decision procedure, a bound on the number + of refinements and a boolean flag `concretize_result` + + Purpose: refinement_bound is a bound on the number of refinement allowed. + if `concretize_result` is set to true, at the end of the decision + procedure, the solver try to find a concrete value for each + character + +\*******************************************************************/ + string_refinementt::string_refinementt( - const namespacet &_ns, propt &_prop, unsigned refinement_bound): + const namespacet &_ns, + propt &_prop, + unsigned refinement_bound): supert(_ns, _prop), use_counter_example(false), + do_concretizing(false), initial_loop_bound(refinement_bound) { } @@ -194,7 +211,7 @@ exprt string_refinementt::substitute_function_applications(exprt expr) return expr; } -bool string_refinementt::is_char_array(const typet &type) +bool string_refinementt::is_char_array(const typet &type) const { if(type.id()==ID_symbol) return is_char_array(ns.follow(type)); @@ -220,7 +237,12 @@ bool string_refinementt::add_axioms_for_string_assigns(const exprt &lhs, if(is_char_array(rhs.type())) { set_char_array_equality(lhs, rhs); - add_symbol_to_symbol_map(lhs, rhs); + if(rhs.id() != ID_nondet_symbol) + add_symbol_to_symbol_map(lhs, rhs); + else + add_symbol_to_symbol_map( + lhs, generator.fresh_symbol("nondet_array", lhs.type())); + return false; } if(refined_string_typet::is_refined_string_type(rhs.type())) @@ -235,6 +257,46 @@ bool string_refinementt::add_axioms_for_string_assigns(const exprt &lhs, /*******************************************************************\ +Function: string_refinementt::concretize_results + + Purpose: For each string whose length has been solved, add constants + to the index set to force the solver to pick concrete values + for each character + +\*******************************************************************/ + +void string_refinementt::concretize_results() +{ + for(const auto& it : symbol_resolve) + { + if(refined_string_typet::is_refined_string_type(it.second.type())) + { + string_exprt str=to_string_expr(it.second); + exprt length=current_model[str.length()]; + mp_integer found_length; + if(!to_integer(length, found_length)) + { + assert(found_length.is_long() && found_length >= 0); + size_t concretize_limit=found_length.to_long(); + concretize_limit=concretize_limit>MAX_CONCRETE_STRING_SIZE? + MAX_CONCRETE_STRING_SIZE:concretize_limit; + exprt content_expr=str.content(); + replace_expr(current_model, content_expr); + for(size_t i=0; i violated; @@ -746,10 +848,10 @@ bool string_refinementt::check_axioms() exprt prem=axiom.premise(); exprt body=axiom.body(); - replace_expr(fmodel, bound_inf); - replace_expr(fmodel, bound_sup); - replace_expr(fmodel, prem); - replace_expr(fmodel, body); + replace_expr(current_model, bound_inf); + replace_expr(current_model, bound_sup); + replace_expr(current_model, prem); + replace_expr(current_model, body); string_constraintt axiom_in_model( univ_var, bound_inf, bound_sup, prem, body); @@ -1320,3 +1422,23 @@ void string_refinementt::instantiate_not_contains( new_lemmas.push_back(witness_bounds); } } + +/*******************************************************************\ + +Function: string_refinementt::get + + Inputs: an expression + + Outputs: an expression + + Purpose: evaluation of the expression in the current model + +\*******************************************************************/ + +exprt string_refinementt::get(const exprt &expr) const +{ + exprt ecopy(expr); + replace_expr(symbol_resolve, ecopy); + replace_expr(current_model, ecopy); + return supert::get(ecopy); +} diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index c8ee98f1385..eee5f37b1de 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -28,15 +28,18 @@ Author: Alberto Griggio, alberto.griggio@gmail.com class string_refinementt: public bv_refinementt { public: - // refinement_bound is a bound on the number of refinement allowed string_refinementt( - const namespacet &_ns, propt &_prop, unsigned refinement_bound); + const namespacet &_ns, + propt &_prop, + unsigned refinement_bound); void set_mode(); // Should we use counter examples at each iteration? bool use_counter_example; + bool do_concretizing; + virtual std::string decision_procedure_text() const override { return "string refinement loop with "+prop.solver_text(); @@ -44,6 +47,8 @@ class string_refinementt: public bv_refinementt static exprt is_positive(const exprt &x); + exprt get(const exprt &expr) const override; + protected: typedef std::set expr_sett; typedef std::list exprt_listt; @@ -78,6 +83,10 @@ class string_refinementt: public bv_refinementt std::map reverse_symbol_resolve; std::list> non_string_axioms; + // Valuation in the current model of the symbols that have been created + // by the solver + replace_mapt current_model; + void add_equivalence(const irep_idt & lhs, const exprt & rhs); void display_index_set(); @@ -88,14 +97,14 @@ class string_refinementt: public bv_refinementt typet substitute_java_string_types(typet type); exprt substitute_java_strings(exprt expr); void add_symbol_to_symbol_map(const exprt &lhs, const exprt &rhs); - bool is_char_array(const typet &type); + bool is_char_array(const typet &type) const; bool add_axioms_for_string_assigns(const exprt &lhs, const exprt &rhs); void set_to(const exprt &expr, bool value) override; void add_instantiations(); void add_negation_of_constraint_to_solver( const string_constraintt &axiom, supert &solver); - replace_mapt fill_model(); + void fill_model(); bool check_axioms(); void set_char_array_equality(const exprt &lhs, const exprt &rhs); @@ -120,7 +129,10 @@ class string_refinementt: public bv_refinementt exprt simplify_sum(const exprt &f) const; + void concretize_results(); + exprt get_array(const exprt &arr, const exprt &size); + exprt get_array(const exprt &arr); std::string string_of_array(const array_exprt &arr); };