diff --git a/jbmc/src/janalyzer/janalyzer_parse_options.cpp b/jbmc/src/janalyzer/janalyzer_parse_options.cpp index 75f535cc141..de2c69a031d 100644 --- a/jbmc/src/janalyzer/janalyzer_parse_options.cpp +++ b/jbmc/src/janalyzer/janalyzer_parse_options.cpp @@ -634,7 +634,7 @@ bool janalyzer_parse_optionst::process_goto_program(const optionst &options) goto_partial_inline(goto_model, ui_message_handler); // remove returns, gcc vectors, complex - remove_returns(goto_model); + remove_returns(goto_model, log.get_message_handler()); remove_vector(goto_model); remove_complex(goto_model); diff --git a/jbmc/src/jbmc/jbmc_parse_options.cpp b/jbmc/src/jbmc/jbmc_parse_options.cpp index 1cd907e239a..e4732ed306e 100644 --- a/jbmc/src/jbmc/jbmc_parse_options.cpp +++ b/jbmc/src/jbmc/jbmc_parse_options.cpp @@ -764,7 +764,7 @@ void jbmc_parse_optionst::process_goto_function( !model.can_produce_function(id); }; - remove_returns(function, function_is_stub); + remove_returns(function, log.get_message_handler(), function_is_stub); replace_java_nondet(function); diff --git a/jbmc/src/jdiff/jdiff_parse_options.cpp b/jbmc/src/jdiff/jdiff_parse_options.cpp index 627d324152d..e716ad7fa68 100644 --- a/jbmc/src/jdiff/jdiff_parse_options.cpp +++ b/jbmc/src/jdiff/jdiff_parse_options.cpp @@ -293,7 +293,7 @@ bool jdiff_parse_optionst::process_goto_program( instrument_preconditions(goto_model); // remove returns, gcc vectors, complex - remove_returns(goto_model); + remove_returns(goto_model, log.get_message_handler()); remove_vector(goto_model); remove_complex(goto_model); rewrite_union(goto_model); diff --git a/jbmc/unit/java_bytecode/java_replace_nondet/replace_nondet.cpp b/jbmc/unit/java_bytecode/java_replace_nondet/replace_nondet.cpp index 872066ded25..920b915afb8 100644 --- a/jbmc/unit/java_bytecode/java_replace_nondet/replace_nondet.cpp +++ b/jbmc/unit/java_bytecode/java_replace_nondet/replace_nondet.cpp @@ -154,7 +154,9 @@ void load_and_test_method( // Then test both situations. THEN("Replace nondet should work after remove returns has been called.") { - remove_returns(model_function, [](const irep_idt &) { return false; }); + remove_returns(model_function, null_message_handler, [](const irep_idt &) { + return false; + }); replace_java_nondet(model_function); @@ -165,7 +167,9 @@ void load_and_test_method( { replace_java_nondet(model_function); - remove_returns(model_function, [](const irep_idt &) { return false; }); + remove_returns(model_function, null_message_handler, [](const irep_idt &) { + return false; + }); validate_nondet_method_removed(goto_function.body.instructions); } @@ -176,7 +180,9 @@ void load_and_test_method( "Replace and convert nondet should work after remove returns has been " "called.") { - remove_returns(model_function, [](const irep_idt &) { return false; }); + remove_returns(model_function, null_message_handler, [](const irep_idt &) { + return false; + }); replace_java_nondet(model_function); @@ -193,7 +199,9 @@ void load_and_test_method( convert_nondet(model_function, null_message_handler, params, ID_java); - remove_returns(model_function, [](const irep_idt &) { return false; }); + remove_returns(model_function, null_message_handler, [](const irep_idt &) { + return false; + }); validate_nondets_converted(goto_function.body.instructions); } diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index c3ba124b029..730e985f433 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -810,7 +810,7 @@ bool cbmc_parse_optionst::process_goto_program( instrument_preconditions(goto_model); // remove returns, gcc vectors, complex - remove_returns(goto_model); + remove_returns(goto_model, log.get_message_handler()); remove_vector(goto_model); remove_complex(goto_model); rewrite_union(goto_model); diff --git a/src/goto-analyzer/goto_analyzer_parse_options.cpp b/src/goto-analyzer/goto_analyzer_parse_options.cpp index 962cbd39dbb..7ce4e3832c0 100644 --- a/src/goto-analyzer/goto_analyzer_parse_options.cpp +++ b/src/goto-analyzer/goto_analyzer_parse_options.cpp @@ -686,7 +686,7 @@ bool goto_analyzer_parse_optionst::process_goto_program( goto_partial_inline(goto_model, ui_message_handler); // remove returns, gcc vectors, complex - remove_returns(goto_model); + remove_returns(goto_model, log.get_message_handler()); remove_vector(goto_model); remove_complex(goto_model); diff --git a/src/goto-analyzer/static_simplifier.cpp b/src/goto-analyzer/static_simplifier.cpp index 2f6a86658f7..04b3cd2fa0a 100644 --- a/src/goto-analyzer/static_simplifier.cpp +++ b/src/goto-analyzer/static_simplifier.cpp @@ -186,7 +186,7 @@ bool static_simplifier( } // restore return types before writing the binary - restore_returns(goto_model); + restore_returns(goto_model, m.get_message_handler()); goto_model.goto_functions.update(); m.status() << "Writing goto binary" << messaget::eom; diff --git a/src/goto-diff/goto_diff_parse_options.cpp b/src/goto-diff/goto_diff_parse_options.cpp index 2753ec1d1ba..aa383a4e96c 100644 --- a/src/goto-diff/goto_diff_parse_options.cpp +++ b/src/goto-diff/goto_diff_parse_options.cpp @@ -335,7 +335,7 @@ bool goto_diff_parse_optionst::process_goto_program( instrument_preconditions(goto_model); // remove returns, gcc vectors, complex - remove_returns(goto_model); + remove_returns(goto_model, log.get_message_handler()); remove_vector(goto_model); remove_complex(goto_model); rewrite_union(goto_model); diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index c21c8935dc6..ca692cc650f 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -649,7 +649,7 @@ int goto_instrument_parse_optionst::doit() // restore RETURN instructions in case remove_returns had been // applied - restore_returns(goto_model); + restore_returns(goto_model, log.get_message_handler()); if(cmdline.args.size()==2) { @@ -899,7 +899,7 @@ void goto_instrument_parse_optionst::do_remove_returns() remove_returns_done=true; log.status() << "Removing returns" << messaget::eom; - remove_returns(goto_model); + remove_returns(goto_model, log.get_message_handler()); } void goto_instrument_parse_optionst::get_goto_program() diff --git a/src/goto-programs/Makefile b/src/goto-programs/Makefile index 870590d8ffc..afccbd6fc8d 100644 --- a/src/goto-programs/Makefile +++ b/src/goto-programs/Makefile @@ -3,6 +3,7 @@ SRC = adjust_float_expressions.cpp \ class_hierarchy.cpp \ class_identifier.cpp \ compute_called_functions.cpp \ + collect_function_pointer_targets.cpp \ destructor.cpp \ destructor_tree.cpp \ elf_reader.cpp \ diff --git a/src/goto-programs/collect_function_pointer_targets.cpp b/src/goto-programs/collect_function_pointer_targets.cpp new file mode 100644 index 00000000000..b595661d6b4 --- /dev/null +++ b/src/goto-programs/collect_function_pointer_targets.cpp @@ -0,0 +1,301 @@ +/*******************************************************************\ + +Module: Program Transformation + +Author: diffblue + +\*******************************************************************/ + +/// \file +/// Program Transformation + +#include "collect_function_pointer_targets.h" + +#include + +collect_function_pointer_targetst::collect_function_pointer_targetst( + message_handlert &message_handler, + const symbol_tablet &symbol_table, + bool only_resolve_const_fps) + : log(message_handler), + ns(symbol_table), + symbol_table(symbol_table), + only_resolve_const_fps(only_resolve_const_fps), + initialised(false) +{ +} + +void collect_function_pointer_targetst::initialise_taken_addresses( + const goto_functionst &goto_functions) +{ + for(const auto &s : symbol_table.symbols) + compute_address_taken_functions(s.second.value, address_taken); + + compute_address_taken_functions(goto_functions, address_taken); +} + +void collect_function_pointer_targetst::initialise_type_map( + const goto_functionst &goto_functions) +{ + for(const auto &fmap_pair : goto_functions.function_map) + type_map.emplace(fmap_pair.first, fmap_pair.second.type); +} + +possible_fp_targets_mapt collect_function_pointer_targetst:: +operator()(const goto_functionst &goto_functions) +{ + if(!initialised) + { + initialise_taken_addresses(goto_functions); + initialise_type_map(goto_functions); + initialised = true; + } + + possible_fp_targets_mapt target_map; + for(const auto &function_pair : goto_functions.function_map) + { + const auto &instructions = function_pair.second.body.instructions; + for(auto target = instructions.begin(); target != instructions.end(); + target++) + { + if( + target->is_function_call() && + target->get_function_call().function().id() == ID_dereference) + { + const auto &function = target->get_function_call().function(); + irep_idt callee_id = get_callee_id(function); + CHECK_RETURN(!callee_id.empty()); + if(target_map.count(callee_id) == 0) + { + target_map.emplace( + callee_id, get_function_pointer_targets(goto_functions, target)); + } + } + } + } + return target_map; +} + +fp_state_targetst +collect_function_pointer_targetst::get_function_pointer_targets( + const goto_functionst &goto_functions, + goto_programt::const_targett &call_site) +{ + PRECONDITION(initialised); + + fp_state_targetst stateful_targets; + auto &fp_state = stateful_targets.first; + auto &functions = stateful_targets.second; + + for(const auto &function_pair : goto_functions.function_map) + { + const auto &function_body = function_pair.second.body; + const auto &candidates = + get_function_pointer_targets(function_body, call_site); + + functions.insert(candidates.second.begin(), candidates.second.end()); + fp_state.merge(candidates.first); + } + return stateful_targets; +} + +fp_state_targetst +collect_function_pointer_targetst::get_function_pointer_targets( + const goto_programt &goto_program, + goto_programt::const_targett &call_site) +{ + PRECONDITION(call_site->is_function_call()); + + const code_function_callt &code = call_site->get_function_call(); + const auto &function = to_dereference_expr(code.function()); + const auto &refined_call_type = refine_call_type(function.type(), code); + + auto stateful_targets = try_remove_const_fp(goto_program, function.pointer()); + auto &fp_state = stateful_targets.first; + auto &functions = stateful_targets.second; + + fp_state.precise_const_removal = + !fp_state.code_removes_const && functions.size() == 1; + + if( + !fp_state.precise_const_removal && !fp_state.remove_const_found_functions && + !only_resolve_const_fps) + { + // get all type-compatible functions + // whose address is ever taken + for(const auto &type_pair : type_map) + { + const auto &candidate_function_name = type_pair.first; + const auto &candidate_function_type = type_pair.second; + + // only accept as candidate functions such that: + // 1. their address was taken + // 2. their type is compatible with the call-site-function type + // 3. they're not pthread mutex clean-up + if( + address_taken.find(candidate_function_name) != address_taken.end() && + is_type_compatible( + code.lhs().is_not_nil(), + refined_call_type, + candidate_function_type, + ns) && + candidate_function_name != "pthread_mutex_cleanup") + { + functions.insert( + symbol_exprt{candidate_function_name, candidate_function_type}); + } + } + } + return stateful_targets; +} + +fp_state_targetst collect_function_pointer_targetst::try_remove_const_fp( + const goto_programt &goto_program, + const exprt &pointer) +{ + fp_state_targetst stateful_targets; + auto &fp_state = stateful_targets.first; + auto &functions = stateful_targets.second; + + does_remove_constt const_removal_check(goto_program, ns); + fp_state.code_removes_const = const_removal_check().first; + + if(fp_state.code_removes_const) + { + fp_state.remove_const_found_functions = false; + } + else + { + remove_const_function_pointerst fpr( + log.get_message_handler(), ns, symbol_table); + fp_state.remove_const_found_functions = fpr(pointer, functions); + CHECK_RETURN(fp_state.remove_const_found_functions || functions.empty()); + } + return stateful_targets; +} + +code_typet collect_function_pointer_targetst::refine_call_type( + const typet &type, + const code_function_callt &code) +{ + PRECONDITION(can_cast_type(type)); + code_typet call_type = to_code_type(type); + + if(call_type.has_ellipsis() && call_type.parameters().empty()) + { + call_type.remove_ellipsis(); + for(const auto &argument : code.arguments()) + call_type.parameters().push_back(code_typet::parametert{argument.type()}); + } + return call_type; +} + +irep_idt +collect_function_pointer_targetst::get_callee_id(const exprt &called_function) +{ + irep_idt callee_id; + bool contains_code = false; + auto type_contains_code = [&contains_code](const typet &t) { + if(t.id() == ID_code) + contains_code = true; + }; + + called_function.visit_post( + [&callee_id, &type_contains_code, &contains_code](const exprt &e) { + if(e.id() == ID_symbol) + { + e.type().visit(type_contains_code); + if(contains_code) + { + callee_id = to_symbol_expr(e).get_identifier(); + return; + } + } + if(e.id() == ID_dereference) + { + const auto &pointer = to_dereference_expr(e).pointer(); + if(pointer.id() == ID_symbol) + callee_id = to_symbol_expr(pointer).get_identifier(); + if(pointer.id() == ID_member) + { + pointer.type().visit(type_contains_code); + if(contains_code) + callee_id = to_member_expr(pointer).get_component_name(); + } + } + }); + return callee_id; +} + +bool collect_function_pointer_targetst::arg_is_type_compatible( + const typet &call_type, + const typet &function_type, + const namespacet &name_space) +{ + if(call_type == function_type) + return true; + + // any integer-vs-enum-vs-pointer is ok + if( + call_type.id() == ID_signedbv || call_type.id() == ID_unsigned || + call_type.id() == ID_bool || call_type.id() == ID_c_bool || + call_type.id() == ID_c_enum_tag || call_type.id() == ID_c_enum || + call_type.id() == ID_pointer) + { + return function_type.id() == ID_signedbv || + function_type.id() == ID_unsigned || function_type.id() == ID_bool || + function_type.id() == ID_c_bool || + function_type.id() == ID_pointer || + function_type.id() == ID_c_enum || + function_type.id() == ID_c_enum_tag; + } + + return pointer_offset_bits(call_type, name_space) == + pointer_offset_bits(function_type, name_space); +} + +bool collect_function_pointer_targetst::is_type_compatible( + bool return_value_used, + const code_typet &call_type, + const code_typet &function_type, + const namespacet &name_space) +{ + // we are willing to ignore anything that's returned + // if we call with 'void' + if( + return_value_used && call_type.return_type() != empty_typet() && + !arg_is_type_compatible( + call_type.return_type(), function_type.return_type(), name_space)) + return false; + + // let's look at the parameters + const auto &call_parameters = call_type.parameters(); + const auto &function_parameters = function_type.parameters(); + + if(function_type.has_ellipsis() && function_parameters.empty()) + return true; + if(call_type.has_ellipsis() && call_parameters.empty()) + return true; + + // we are quite strict here, could be much more generous + if(call_parameters.size() != function_parameters.size()) + return false; + + for(std::size_t i = 0; i < call_parameters.size(); i++) + if(!arg_is_type_compatible( + call_parameters[i].type(), function_parameters[i].type(), name_space)) + return false; + return true; +} + +possible_fp_targets_mapt get_function_pointer_targets( + message_handlert &message_handler, + const goto_functionst &goto_functions, + const symbol_tablet &symbol_table, + bool only_resolve_const_fps) +{ + collect_function_pointer_targetst collector( + message_handler, symbol_table, only_resolve_const_fps); + + return collector(goto_functions); +} diff --git a/src/goto-programs/collect_function_pointer_targets.h b/src/goto-programs/collect_function_pointer_targets.h new file mode 100644 index 00000000000..4a8aabdfab0 --- /dev/null +++ b/src/goto-programs/collect_function_pointer_targets.h @@ -0,0 +1,168 @@ +/*******************************************************************\ + +Module: Collect Indirect Function Call Targets + +Author: diffblue + +Date: May 2019 + +\*******************************************************************/ + +/// \file +/// Collect Indirect Function Call Targets + +#ifndef CPROVER_GOTO_PROGRAMS_COLLECT_FUNCTION_POINTER_TARGETS_H +#define CPROVER_GOTO_PROGRAMS_COLLECT_FUNCTION_POINTER_TARGETS_H + +#include +#include + +#include +#include + +#include "compute_called_functions.h" +#include "remove_const_function_pointers.h" + +class goto_modelt; + +using possible_fp_targetst = remove_const_function_pointerst::functionst; + +/// Function pointer removal state: +/// Keeping track of the results of preceding analyses: does-remove-const and +/// remove-const-function. Since we maintain state throughout analysing multiple +/// goto-programs, it is necessary to be able to merge state of two subsequent +/// analyses. +struct fp_statet +{ + bool remove_const_found_functions; + bool code_removes_const; + bool precise_const_removal; + + fp_statet() + : remove_const_found_functions(true), + code_removes_const(false), + precise_const_removal(true) + { + } + + void merge(const fp_statet &new_state) + { + if(!new_state.remove_const_found_functions) + remove_const_found_functions = false; + if(new_state.code_removes_const) + code_removes_const = true; + if(!new_state.precise_const_removal) + precise_const_removal = false; + } +}; + +using fp_state_targetst = std::pair; +using possible_fp_targets_mapt = std::map; + +/// Go through the whole model and find all potential function the pointer at +/// all call sites +/// \param message_handler: a message handler for reporting +/// \param goto_model: model to search for potential functions +/// \return a map from ids to sets of function candidates +possible_fp_targets_mapt get_function_pointer_targets( + message_handlert &message_handler, + const goto_functionst &goto_functions, + const symbol_tablet &symbol_table, + bool only_remove_const_fps = false); + +/// Houses the facilities for collecting function pointer targets via the +/// call-operator. +class collect_function_pointer_targetst +{ +public: + collect_function_pointer_targetst( + message_handlert &message_handler, + const symbol_tablet &symbol_table, + bool only_resolve_const_fps); + + /// Interface for running the function pointer targets collection + /// \param goto_functions: the function to search for potential targets + /// \return the map id -> stateful_target + possible_fp_targets_mapt operator()(const goto_functionst &goto_functions); + + /// Extract function name from \p called_functions + /// \param called_function: the function call expression + /// \return function identifier + static irep_idt get_callee_id(const exprt &called_function); + + /// Compare the type of arguments of two functions + /// \param call_type: first function type + /// \param function_type: second function type + /// \param name_space: the namespace to be used to analyse symbols + /// \return true if argument types are compatible + static bool arg_is_type_compatible( + const typet &call_type, + const typet &function_type, + const namespacet &name_space); + + /// Compare the types of two functions + /// \param return_value_used: flag indicating the return value usage + /// \param call_type: first function type + /// \param function_type: second function type + /// \param name_space: the namespace to be used to analyse symbols + /// \return true if types are compatible + static bool is_type_compatible( + bool return_value_used, + const code_typet &call_type, + const code_typet &function_type, + const namespacet &name_space); + +protected: + messaget log; + const namespacet ns; + const symbol_tablet &symbol_table; + bool only_resolve_const_fps; + bool initialised; + + std::unordered_set address_taken; + + using type_mapt = std::map; + type_mapt type_map; + + /// Initialise the set of take addresses + /// \param goto_functions: goto functions to search through + void initialise_taken_addresses(const goto_functionst &goto_functions); + + /// Initialise the type map: function_id -> type + /// \param goto_functions: goto functions to search through + void initialise_type_map(const goto_functionst &goto_functions); + + /// Go through the whole model and find all potential function the pointer at + /// \p call site may point to + /// \param goto_functions: goto functions to search for potential candidates + /// \param call_site: the call site of the function pointer under analysis + /// \return the set of the potential functions + fp_state_targetst get_function_pointer_targets( + const goto_functionst &goto_functions, + goto_programt::const_targett &call_site); + + /// Go through a single function body and find all potential function the + /// pointer at \p call site may point to + /// \param goto_program: function body to search for potential functions + /// \param call_site: the call site of the function pointer under analysis + /// \return the set of the potential functions + fp_state_targetst get_function_pointer_targets( + const goto_programt &goto_program, + goto_programt::const_targett &call_site); + + /// Try to remove the const function pointers + /// \param goto_program: the function body to run the const_removal_check on + /// \param functions: the list of functions the const removal found + /// \param pointer: the pointer to be resolved + fp_state_targetst + try_remove_const_fp(const goto_programt &goto_program, const exprt &pointer); + + /// Refine the \p type in case the forward declaration was incomplete + /// \param type: the type to be refined + /// \param code: the function call code to get the arguments from + /// \return the refined call type + static code_typet + refine_call_type(const typet &type, const code_function_callt &code); +}; + +#endif // CPROVER_GOTO_PROGRAMS_COLLECT_FUNCTION_POINTER_TARGETS_H diff --git a/src/goto-programs/remove_function_pointers.cpp b/src/goto-programs/remove_function_pointers.cpp index 3c579f386b1..668a55bbc47 100644 --- a/src/goto-programs/remove_function_pointers.cpp +++ b/src/goto-programs/remove_function_pointers.cpp @@ -25,6 +25,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "remove_skip.h" +#include "collect_function_pointer_targets.h" #include "compute_called_functions.h" #include "remove_const_function_pointers.h" @@ -38,27 +39,21 @@ class remove_function_pointerst bool only_resolve_const_fps, const goto_functionst &goto_functions); - void operator()(goto_functionst &goto_functions); - + /// Call the function pointer removal via an operator + /// \param goto_functions: functions to modify + /// \param target_map: candidate functions + void operator()( + goto_functionst &goto_functions, + const possible_fp_targets_mapt &target_map); + + /// Call the function pointer removal within the \p goto_program + /// \param goto_program: program to modify + /// \param function_id: identifier of the function pointer to be removed + /// \param target_map: candidate functions bool remove_function_pointers( - goto_programt &goto_program, - const irep_idt &function_id); - - // a set of function symbols - using functionst = remove_const_function_pointerst::functionst; - - /// Replace a call to a dynamic function at location - /// target in the given goto-program by a case-split - /// over a given set of functions - /// \param goto_program: The goto program that contains target - /// \param function_id: Name of function containing the target - /// \param target: location with function call with function pointer - /// \param functions: The set of functions to consider - void remove_function_pointer( goto_programt &goto_program, const irep_idt &function_id, - goto_programt::targett target, - const functionst &functions); + const possible_fp_targets_mapt &target_map); protected: messaget log; @@ -74,36 +69,67 @@ class remove_function_pointerst // --remove-const-function-pointers instead of --remove-function-pointers bool only_resolve_const_fps; + /// Replace a call to a dynamic function at location + /// \p target in the given goto-program by a case-split + /// over a given set of functions + /// \param goto_program: The goto program that contains target + /// \param function_id: Name of function containing the target + /// \param target: location with function call with function pointer + /// \param functions: the set of functions to consider + void remove_function_pointer( + goto_programt &goto_program, + const irep_idt &function_id, + goto_programt::targett target, + const possible_fp_targetst &functions); + /// Replace a call to a dynamic function at location /// target in the given goto-program by determining /// functions that have a compatible signature /// \param goto_program: The goto program that contains target /// \param function_id: Name of function containing the target /// \param target: location with function call with function pointer + /// \param stateful_targets: the set of functions to consider void remove_function_pointer( goto_programt &goto_program, const irep_idt &function_id, - goto_programt::targett target); - - std::unordered_set address_taken; - - typedef std::map type_mapt; - type_mapt type_map; - - bool is_type_compatible( - bool return_value_used, - const code_typet &call_type, - const code_typet &function_type); - - bool arg_is_type_compatible( - const typet &call_type, - const typet &function_type); + goto_programt::targett target, + const fp_state_targetst &stateful_targets); void fix_argument_types(code_function_callt &function_call); void fix_return_type( const irep_idt &in_function_id, code_function_callt &function_call, goto_programt &dest); + + /// From *fp() build the following sequence of instructions: + /// + /// if (fp=&f1) then goto loc1 + /// if (fp=&f2) then goto loc2 + /// .. + /// if (fp=&fn) then goto locN + /// loc1: f1(); goto N+1; + /// loc2: f2(); goto N+1; + /// .. + /// locN: fn(); goto N+1; + /// locN+1: + /// + /// \param functions: set of function candidates + /// \param code: the original function call + /// \param function_id: name of the function + /// \param target: iterator to the target call-site + /// \return the GOTO program for the new code + goto_programt build_new_code( + const possible_fp_targetst &functions, + const code_function_callt &code, + const irep_idt &function_id, + goto_programt::targett target); + + /// Log the statistics collected during this analysis + /// \param target: iterator to the target call-site + /// \param functions: the set of function candidates + void remove_function_pointer_log( + goto_programt::targett target, + const possible_fp_targetst &functions) const; }; remove_function_pointerst::remove_function_pointerst( @@ -118,90 +144,6 @@ remove_function_pointerst::remove_function_pointerst( add_safety_assertion(_add_safety_assertion), only_resolve_const_fps(only_resolve_const_fps) { - for(const auto &s : symbol_table.symbols) - compute_address_taken_functions(s.second.value, address_taken); - - compute_address_taken_functions(goto_functions, address_taken); - - // build type map - forall_goto_functions(f_it, goto_functions) - type_map.emplace(f_it->first, f_it->second.type); -} - -bool remove_function_pointerst::arg_is_type_compatible( - const typet &call_type, - const typet &function_type) -{ - if(call_type == function_type) - return true; - - // any integer-vs-enum-vs-pointer is ok - if( - call_type.id() == ID_signedbv || call_type.id() == ID_unsigned || - call_type.id() == ID_bool || call_type.id() == ID_c_bool || - call_type.id() == ID_c_enum_tag || call_type.id() == ID_c_enum || - call_type.id() == ID_pointer) - { - return function_type.id() == ID_signedbv || - function_type.id() == ID_unsigned || function_type.id() == ID_bool || - function_type.id() == ID_c_bool || - function_type.id() == ID_pointer || - function_type.id() == ID_c_enum || - function_type.id() == ID_c_enum_tag; - } - - return pointer_offset_bits(call_type, ns) == - pointer_offset_bits(function_type, ns); -} - -bool remove_function_pointerst::is_type_compatible( - bool return_value_used, - const code_typet &call_type, - const code_typet &function_type) -{ - // we are willing to ignore anything that's returned - // if we call with 'void' - if(!return_value_used) - { - } - else if(call_type.return_type() == empty_typet()) - { - // ok - } - else - { - if(!arg_is_type_compatible(call_type.return_type(), - function_type.return_type())) - return false; - } - - // let's look at the parameters - const code_typet::parameterst &call_parameters=call_type.parameters(); - const code_typet::parameterst &function_parameters=function_type.parameters(); - - if(function_type.has_ellipsis() && - function_parameters.empty()) - { - // always ok - } - else if(call_type.has_ellipsis() && - call_parameters.empty()) - { - // always ok - } - else - { - // we are quite strict here, could be much more generous - if(call_parameters.size()!=function_parameters.size()) - return false; - - for(std::size_t i=0; iget_function_call(); - - const auto &function = to_dereference_expr(code.function()); - - // this better have the right type - code_typet call_type=to_code_type(function.type()); - - // refine the type in case the forward declaration was incomplete - if(call_type.has_ellipsis() && - call_type.parameters().empty()) - { - call_type.remove_ellipsis(); - forall_expr(it, code.arguments()) - call_type.parameters().push_back( - code_typet::parametert(it->type())); - } - - bool found_functions; - - const exprt &pointer = function.pointer(); - remove_const_function_pointerst::functionst functions; - does_remove_constt const_removal_check(goto_program, ns); - const auto does_remove_const = const_removal_check(); - if(does_remove_const.first) + const auto &fp_state = stateful_targets.first; + const auto &functions = stateful_targets.second; + if(fp_state.precise_const_removal) { - log.warning().source_location = does_remove_const.second; - log.warning() << "cast from const to non-const pointer found, " - << "only worst case function pointer removal will be done." - << messaget::eom; - found_functions=false; + auto call = target->get_function_call(); + call.function() = *functions.cbegin(); + target->set_function_call(call); } - else + else if(fp_state.remove_const_found_functions || !only_resolve_const_fps) { - remove_const_function_pointerst fpr( - log.get_message_handler(), ns, symbol_table); - - found_functions=fpr(pointer, functions); - - // if found_functions is false, functions should be empty - // however, it is possible for found_functions to be true and functions - // to be empty (this happens if the pointer can only resolve to the null - // pointer) - CHECK_RETURN(found_functions || functions.empty()); - - if(functions.size()==1) - { - auto call = target->get_function_call(); - call.function() = *functions.cbegin(); - target->set_function_call(call); - return; - } + // If this mode is enabled, we only remove function pointers + // that we can resolve either to an exact function, or an exact subset + // (e.g. a variable index in a constant array). + // Since we haven't found functions, we would now resort to + // replacing the function pointer with any function with a valid signature + // Since we don't want to do that, we abort. + remove_function_pointer(goto_program, function_id, target, functions); } - - if(!found_functions) - { - if(only_resolve_const_fps) - { - // If this mode is enabled, we only remove function pointers - // that we can resolve either to an exact function, or an exact subset - // (e.g. a variable index in a constant array). - // Since we haven't found functions, we would now resort to - // replacing the function pointer with any function with a valid signature - // Since we don't want to do that, we abort. - return; - } - - bool return_value_used=code.lhs().is_not_nil(); - - // get all type-compatible functions - // whose address is ever taken - for(const auto &t : type_map) - { - // address taken? - if(address_taken.find(t.first)==address_taken.end()) - continue; - - // type-compatible? - if(!is_type_compatible(return_value_used, call_type, t.second)) - continue; - - if(t.first=="pthread_mutex_cleanup") - continue; - - symbol_exprt expr(t.first, t.second); - functions.insert(expr); - } - } - - remove_function_pointer(goto_program, function_id, target, functions); } void remove_function_pointerst::remove_function_pointer( goto_programt &goto_program, const irep_idt &function_id, goto_programt::targett target, - const functionst &functions) + const possible_fp_targetst &functions) { const code_function_callt &code = target->get_function_call(); - const exprt &function = code.function(); - const exprt &pointer = to_dereference_expr(function).pointer(); - - // the final target is a skip - goto_programt final_skip; - - goto_programt::targett t_final = final_skip.add(goto_programt::make_skip()); - // build the calls and gotos - - goto_programt new_code_calls; - goto_programt new_code_gotos; - - for(const auto &fun : functions) - { - // call function - auto new_call = code; - new_call.function() = fun; - - // the signature of the function might not match precisely - fix_argument_types(new_call); - - goto_programt tmp; - fix_return_type(function_id, new_call, tmp); - - auto call = new_code_calls.add(goto_programt::make_function_call(new_call)); - new_code_calls.destructive_append(tmp); - - // goto final - new_code_calls.add(goto_programt::make_goto(t_final, true_exprt())); - - // goto to call - const address_of_exprt address_of(fun, pointer_type(fun.type())); - - const auto casted_address = - typecast_exprt::conditional_cast(address_of, pointer.type()); - - new_code_gotos.add( - goto_programt::make_goto(call, equal_exprt(pointer, casted_address))); - } - - // fall-through - if(add_safety_assertion) - { - goto_programt::targett t = - new_code_gotos.add(goto_programt::make_assertion(false_exprt())); - t->source_location.set_property_class("pointer dereference"); - t->source_location.set_comment("invalid function pointer"); - } - new_code_gotos.add(goto_programt::make_assumption(false_exprt())); - - goto_programt new_code; - - // patch them all together - new_code.destructive_append(new_code_gotos); - new_code.destructive_append(new_code_calls); - new_code.destructive_append(final_skip); - - // set locations - Forall_goto_program_instructions(it, new_code) - { - irep_idt property_class=it->source_location.get_property_class(); - irep_idt comment=it->source_location.get_comment(); - it->source_location=target->source_location; - if(!property_class.empty()) - it->source_location.set_property_class(property_class); - if(!comment.empty()) - it->source_location.set_comment(comment); - } + auto new_code = build_new_code(functions, code, function_id, target); goto_programt::targett next_target=target; next_target++; @@ -450,32 +255,13 @@ void remove_function_pointerst::remove_function_pointer( target->type=OTHER; // report statistics - log.statistics().source_location = target->source_location; - log.statistics() << "replacing function pointer by " << functions.size() - << " possible targets" << messaget::eom; - - // list the names of functions when verbosity is at debug level - log.conditional_output( - log.debug(), [&functions](messaget::mstreamt &mstream) { - mstream << "targets: "; - - bool first = true; - for(const auto &function : functions) - { - if(!first) - mstream << ", "; - - mstream << function.get_identifier(); - first = false; - } - - mstream << messaget::eom; - }); + remove_function_pointer_log(target, functions); } bool remove_function_pointerst::remove_function_pointers( goto_programt &goto_program, - const irep_idt &function_id) + const irep_idt &function_id, + const possible_fp_targets_mapt &target_map) { bool did_something=false; @@ -483,11 +269,16 @@ bool remove_function_pointerst::remove_function_pointers( if(target->is_function_call()) { const code_function_callt &code = target->get_function_call(); + const auto &callee = code.function(); if(code.function().id()==ID_dereference) { - remove_function_pointer(goto_program, function_id, target); - did_something=true; + auto callee_id = + collect_function_pointer_targetst::get_callee_id(callee); + CHECK_RETURN(target_map.count(callee_id) > 0); + remove_function_pointer( + goto_program, function_id, target, target_map.at(callee_id)); + did_something = true; } } @@ -497,7 +288,9 @@ bool remove_function_pointerst::remove_function_pointers( return did_something; } -void remove_function_pointerst::operator()(goto_functionst &functions) +void remove_function_pointerst::operator()( + goto_functionst &functions, + const possible_fp_targets_mapt &target_map) { bool did_something=false; @@ -508,7 +301,7 @@ void remove_function_pointerst::operator()(goto_functionst &functions) { goto_programt &goto_program=f_it->second.body; - if(remove_function_pointers(goto_program, f_it->first)) + if(remove_function_pointers(goto_program, f_it->first, target_map)) did_something=true; } @@ -533,7 +326,11 @@ bool remove_function_pointers( only_remove_const_fps, goto_functions); - return rfp.remove_function_pointers(goto_program, function_id); + return rfp.remove_function_pointers( + goto_program, + function_id, + get_function_pointer_targets( + _message_handler, goto_functions, symbol_table, only_remove_const_fps)); } void remove_function_pointers( @@ -551,7 +348,10 @@ void remove_function_pointers( only_remove_const_fps, goto_functions); - rfp(goto_functions); + rfp( + goto_functions, + get_function_pointer_targets( + _message_handler, goto_functions, symbol_table, only_remove_const_fps)); } void remove_function_pointers(message_handlert &_message_handler, @@ -566,3 +366,117 @@ void remove_function_pointers(message_handlert &_message_handler, add_safety_assertion, only_remove_const_fps); } + +void remove_function_pointers( + message_handlert &_message_handler, + goto_modelt &goto_model, + const possible_fp_targets_mapt &target_map, + bool add_safety_assertion, + bool only_remove_const_fps) +{ + remove_function_pointerst rfp( + _message_handler, + goto_model.symbol_table, + add_safety_assertion, + only_remove_const_fps, + goto_model.goto_functions); + + rfp(goto_model.goto_functions, target_map); +} + +goto_programt remove_function_pointerst::build_new_code( + const possible_fp_targetst &functions, + const code_function_callt &code, + const irep_idt &function_id, + goto_programt::targett target) +{ + // the final target is a skip + goto_programt final_skip; + const exprt &pointer = to_dereference_expr(code.function()).pointer(); + goto_programt::targett t_final = final_skip.add(goto_programt::make_skip()); + + goto_programt new_code_calls; + goto_programt new_code_gotos; + for(const auto &function : functions) + { + // call function + auto new_call = code; + + new_call.function() = function; + + // the signature of the function might not match precisely + fix_argument_types(new_call); + + goto_programt tmp; + fix_return_type(function_id, new_call, tmp); + + auto call = new_code_calls.add(goto_programt::make_function_call(new_call)); + new_code_calls.destructive_append(tmp); + + // goto final + new_code_calls.add(goto_programt::make_goto(t_final, true_exprt{})); + + // goto to call + const auto casted_address = typecast_exprt::conditional_cast( + address_of_exprt{function, pointer_type(function.type())}, + pointer.type()); + + const auto goto_it = new_code_gotos.add( + goto_programt::make_goto(call, equal_exprt{pointer, casted_address})); + + // set locations for the above goto + irep_idt property_class = goto_it->source_location.get_property_class(); + irep_idt comment = goto_it->source_location.get_comment(); + goto_it->source_location = target->source_location; + if(!property_class.empty()) + goto_it->source_location.set_property_class(property_class); + if(!comment.empty()) + goto_it->source_location.set_comment(comment); + } + + // fall-through + if(add_safety_assertion) + { + goto_programt::targett t = + new_code_gotos.add(goto_programt::make_assertion(false_exprt{})); + t->source_location.set_property_class("pointer dereference"); + t->source_location.set_comment("invalid function pointer"); + } + new_code_gotos.add(goto_programt::make_assumption(false_exprt{})); + + goto_programt new_code; + + // patch them all together + new_code.destructive_append(new_code_gotos); + new_code.destructive_append(new_code_calls); + new_code.destructive_append(final_skip); + + return new_code; +} + +void remove_function_pointerst::remove_function_pointer_log( + goto_programt::targett target, + const possible_fp_targetst &functions) const +{ + log.statistics().source_location = target->source_location; + log.statistics() << "replacing function pointer by " << functions.size() + << " possible targets" << messaget::eom; + + // list the names of functions when verbosity is at debug level + log.conditional_output( + log.debug(), [&functions](messaget::mstreamt &mstream) { + mstream << "targets: "; + + bool first = true; + for(const auto &function : functions) + { + if(!first) + mstream << ", "; + + mstream << function.get_identifier(); + first = false; + } + + mstream << messaget::eom; + }); +} diff --git a/src/goto-programs/remove_function_pointers.h b/src/goto-programs/remove_function_pointers.h index bff989e1cea..0e6760d809c 100644 --- a/src/goto-programs/remove_function_pointers.h +++ b/src/goto-programs/remove_function_pointers.h @@ -16,6 +16,11 @@ Date: June 2003 #include +#include + +#include "collect_function_pointer_targets.h" +#include "remove_const_function_pointers.h" + class goto_functionst; class goto_programt; class goto_modelt; @@ -30,6 +35,20 @@ void remove_function_pointers( bool add_safety_assertion, bool only_remove_const_fps=false); +/// Replace all calls to a dynamic function by a case-split over a given set of +/// candidate functions +/// \param _message_handler: a message handler for reporting +/// \param goto_model: model to search for potential functions +/// \param target_map: candidate functions +/// \param add_safety_assertion: check that at least one function matches +/// \param only_remove_const_fps: restrict the pointer remove to const +void remove_function_pointers( + message_handlert &_message_handler, + goto_modelt &goto_model, + const possible_fp_targets_mapt &target_map, + bool add_safety_assertion, + bool only_remove_const_fps = false); + void remove_function_pointers( message_handlert &_message_handler, symbol_tablet &symbol_table, diff --git a/src/goto-programs/remove_returns.cpp b/src/goto-programs/remove_returns.cpp index 57d515d4865..7a67ff8b83c 100644 --- a/src/goto-programs/remove_returns.cpp +++ b/src/goto-programs/remove_returns.cpp @@ -13,11 +13,16 @@ Date: September 2009 #include "remove_returns.h" +#include +#include +#include #include #include #include "goto_model.h" +#include "collect_function_pointer_targets.h" +#include "remove_function_pointers.h" #include "remove_skip.h" #define RETURN_VALUE_SUFFIX "#return_value" @@ -25,8 +30,10 @@ Date: September 2009 class remove_returnst { public: - explicit remove_returnst(symbol_table_baset &_symbol_table): - symbol_table(_symbol_table) + explicit remove_returnst( + symbol_table_baset &_symbol_table, + message_handlert &m) + : log(m), symbol_table(_symbol_table) { } @@ -40,8 +47,20 @@ class remove_returnst void restore( goto_functionst &goto_functions); + /// Build the internal map to know the potential function pointer targets + /// \param goto_model: The model to search for the targets + void build_fp_targets(goto_modelt &goto_model) + { + possible_fp_targets_map = get_function_pointer_targets( + log.get_message_handler(), + goto_model.goto_functions, + goto_model.symbol_table); + } + protected: + messaget log; symbol_table_baset &symbol_table; + possible_fp_targets_mapt possible_fp_targets_map; void replace_returns( const irep_idt &function_id, @@ -59,6 +78,29 @@ class remove_returnst optionalt get_or_create_return_value_symbol(const irep_idt &function_id); + + /// Remove return of a complete function call + /// \param target: the function call instruction iterator + /// \param goto_program: the GOTO program to be updated + void do_function_call_complete( + goto_programt::targett target, + goto_programt &goto_program); + + /// Remove return of a complete function call of a function pointer + /// \param target: the function call instruction iterator + /// \param possible_fp_targets: functions the pointer may point to + /// \param goto_program: the GOTO program to be updated + void do_function_call_complete( + goto_programt::targett target, + const possible_fp_targetst &possible_fp_targets, + goto_programt &goto_program); + + /// Remove return of a stub function call + /// \param target: the function call instruction iterator + /// \param goto_program: the GOTO program to be updated + void do_function_call_stub( + goto_programt::targett target, + goto_programt &goto_program); }; optionalt @@ -149,72 +191,59 @@ bool remove_returnst::do_function_calls( Forall_goto_program_instructions(i_it, goto_program) { - if(i_it->is_function_call()) - { - code_function_callt function_call = i_it->get_function_call(); + if(!i_it->is_function_call()) + continue; - INVARIANT( - function_call.function().id() == ID_symbol, - "indirect function calls should have been removed prior to running " - "remove-returns"); + code_function_callt function_call = i_it->get_function_call(); - const irep_idt function_id = - to_symbol_expr(function_call.function()).get_identifier(); + INVARIANT( + function_call.function().id() == ID_symbol || + function_call.function().id() == ID_dereference, + "only direct function calls (via a symbol) or indirect function calls " + "(via a dereference) are supported"); - // Do we return anything? - if( - to_code_type(function_call.function().type()).return_type() != - empty_typet() && - function_call.lhs().is_not_nil()) - { - // replace "lhs=f(...)" by - // "f(...); lhs=f#return_value; DEAD f#return_value;" - - exprt rhs; - - bool is_stub = function_is_stub(function_id); - optionalt return_value; - - if(!is_stub) - { - return_value = get_or_create_return_value_symbol(function_id); - CHECK_RETURN(return_value.has_value()); - - // The return type in the definition of the function may differ - // from the return type in the declaration. We therefore do a - // cast. - rhs = typecast_exprt::conditional_cast( - *return_value, function_call.lhs().type()); - } - else - { - rhs = side_effect_expr_nondett( - function_call.lhs().type(), i_it->source_location); - } - - goto_programt::targett t_a = goto_program.insert_after( - i_it, - goto_programt::make_assignment( - code_assignt(function_call.lhs(), rhs), i_it->source_location)); - - // fry the previous assignment - function_call.lhs().make_nil(); - - if(!is_stub) - { - goto_program.insert_after( - t_a, - goto_programt::make_dead(*return_value, i_it->source_location)); - } - - // update the call - i_it->set_function_call(function_call); - - requires_update = true; - } + bool is_function_pointer = function_call.function().id() == ID_dereference; + + const auto &function_symbol = + is_function_pointer + ? to_dereference_expr(function_call.function()).pointer() + : function_call.function(); + const auto &function_id = to_symbol_expr(function_symbol).get_identifier(); + + // Do we return anything? + if( + to_code_type(function_call.function().type()).return_type() == + empty_typet() || + !function_call.lhs().is_not_nil()) + continue; + + if(is_function_pointer) + { + // f may point to {f1,f2,..}=possible_fp_targets_map[function_id] + // lhs=*f(..) => *f(..); lhs = (f == f1 ? f1#return_value : f == f2 ? ..); + do_function_call_complete( + i_it, possible_fp_targets_map[function_id].second, goto_program); + } + else if(function_is_stub(function_id)) + { + // stub: + // lhs=f(..) => f(..); lhs=nondet; + do_function_call_stub(i_it, goto_program); + } + else + { + // lhs=f(..) => f(..); lhs=f#return_value; + do_function_call_complete(i_it, goto_program); } - } + // fry the previous assignment + function_call.lhs().make_nil(); + + // update the call + i_it->set_function_call(function_call); + + requires_update = true; + } return requires_update; } @@ -257,35 +286,27 @@ void remove_returnst::operator()( /// removes returns void remove_returns( symbol_table_baset &symbol_table, + message_handlert &m, goto_functionst &goto_functions) { - remove_returnst rr(symbol_table); + remove_returnst rr(symbol_table, m); rr(goto_functions); } -/// Removes returns from a single function. Only usable with Java programs at -/// the moment; to use it with other languages, they must annotate their stub -/// functions with ID_C_incomplete as currently done in -/// java_bytecode_convert_method.cpp. -/// -/// This will generate \#return_value variables, if not already present, for -/// both the function being altered *and* any callees. -/// \param goto_model_function: function to transform -/// \param function_is_stub: function that will be used to test whether a given -/// callee has been or will be given a body. It should return true if so, or -/// false if the function will remain a bodyless stub. void remove_returns( goto_model_functiont &goto_model_function, + message_handlert &m, function_is_stubt function_is_stub) { - remove_returnst rr(goto_model_function.get_symbol_table()); + remove_returnst rr(goto_model_function.get_symbol_table(), m); rr(goto_model_function, function_is_stub); } /// removes returns -void remove_returns(goto_modelt &goto_model) +void remove_returns(goto_modelt &goto_model, message_handlert &m) { - remove_returnst rr(goto_model.symbol_table); + remove_returnst rr(goto_model.symbol_table, m); + rr.build_fp_targets(goto_model); rr(goto_model.goto_functions); } @@ -402,9 +423,9 @@ void remove_returnst::restore(goto_functionst &goto_functions) } /// restores return statements -void restore_returns(goto_modelt &goto_model) +void restore_returns(goto_modelt &goto_model, message_handlert &m) { - remove_returnst rr(goto_model.symbol_table); + remove_returnst rr(goto_model.symbol_table, m); rr.restore(goto_model.goto_functions); } @@ -430,3 +451,141 @@ bool is_return_value_symbol(const symbol_exprt &symbol_expr) { return is_return_value_identifier(symbol_expr.get_identifier()); } + +void remove_returnst::do_function_call_complete( + goto_programt::targett target, + goto_programt &goto_program) +{ + const auto &function_call = target->get_function_call(); + const auto &function_id = + to_symbol_expr(function_call.function()).get_identifier(); + + // The return type in the definition of the function may differ + // from the return type in the declaration. We therefore do a + // cast. + optionalt return_value = + get_or_create_return_value_symbol(function_id); + CHECK_RETURN(return_value.has_value()); + + // replace "lhs=f(...)" by + // "f(...); lhs=f#return_value; DEAD f#return_value;" + goto_programt::targett t_a = goto_program.insert_after( + target, + goto_programt::make_assignment( + code_assignt{function_call.lhs(), + typecast_exprt::conditional_cast( + *return_value, function_call.lhs().type())}, + target->source_location)); + + goto_program.insert_after( + t_a, goto_programt::make_dead(*return_value, target->source_location)); +} + +void remove_returnst::do_function_call_complete( + goto_programt::targett target, + const possible_fp_targetst &possible_fp_targets, + goto_programt &goto_program) +{ + PRECONDITION(target->is_function_call()); + const auto &function_call = target->get_function_call(); + PRECONDITION(function_call.function().id() == ID_dereference); + const auto &function_symbol = + to_dereference_expr(function_call.function()).pointer(); + PRECONDITION(function_symbol.id() == ID_symbol); + const auto &function_id = to_symbol_expr(function_symbol).get_identifier(); + if(possible_fp_targets.empty()) + throw incorrect_goto_program_exceptiont{ + "the function pointer " + id2string(function_id) + + " does not have any potential targets", + target->source_location}; + + cond_exprt rhs(exprt::operandst{}, function_call.lhs().type()); + std::vector return_values; + + const auto &first_target_id = possible_fp_targets.begin()->get_identifier(); + const auto first_target_symbol = symbol_table.lookup(first_target_id); + CHECK_RETURN(first_target_symbol); + + auto &pointer_lhs_symbol = get_fresh_aux_symbol( + pointer_type(function_call.lhs().type()), + "", // no name prefix + "p_lhs", + target->source_location, + first_target_symbol->mode, + symbol_table); + symbol_exprt pointer_lhs_expr = pointer_lhs_symbol.symbol_expr(); + + exprt::operandst disjunction_operands; + + for(const auto &target_expr : possible_fp_targets) + { + irep_idt target_id = target_expr.get_identifier(); + const symbolt *target_symbol = symbol_table.lookup(target_id); + CHECK_RETURN(target_symbol); + + // The return type in the definition of the function may differ + // from the return type in the declaration. We therefore do a + // cast. + optionalt return_value = + get_or_create_return_value_symbol(target_id); + CHECK_RETURN(return_value.has_value()); + return_values.push_back(*return_value); + + const auto f_equals_f_i = equal_exprt{ + function_call.function(), address_of_exprt{target_symbol->symbol_expr()}}; + disjunction_operands.push_back(f_equals_f_i); + rhs.add_case(f_equals_f_i, address_of_exprt{*return_value}); + } + + // replace "lhs=*f(...)" by + // + // assert(f==f1 || f==f2 || ..); // so that the user knows + // assume(f==f1 || f==f2 || ..); // so that we abort this execution + // T* p_lhs = lhs = (f == f1 ? &f1#return_value : f == f2 + // ? &f2#return_value .. ); + // f*(...); + // lhs = *p_lhs; + // DEAD f1#return_value; + // DEAD f2#return_value; + // .. + // DEAD p_lhs + goto_program.insert_before( + target, + goto_programt::make_assertion( + disjunction(disjunction_operands), target->source_location)); + goto_program.insert_before( + target, + goto_programt::make_assumption( + disjunction(disjunction_operands), target->source_location)); + goto_program.insert_before( + target, + goto_programt::make_assignment( + code_assignt{pointer_lhs_expr, rhs}, target->source_location)); + goto_programt::targett t_a = goto_program.insert_after( + target, + goto_programt::make_assignment( + code_assignt{function_call.lhs(), dereference_exprt{pointer_lhs_expr}}, + target->source_location)); + + for(const auto &return_value : return_values) + t_a = goto_program.insert_after( + t_a, goto_programt::make_dead(return_value, target->source_location)); + goto_program.insert_after( + t_a, goto_programt::make_dead(pointer_lhs_expr, target->source_location)); +} + +void remove_returnst::do_function_call_stub( + goto_programt::targett target, + goto_programt &goto_program) +{ + const auto &function_call = target->get_function_call(); + + // replace "lhs=f(...)" by "f(...); lhs=nondet;" + goto_program.insert_after( + target, + goto_programt::make_assignment( + code_assignt{function_call.lhs(), + side_effect_expr_nondett{function_call.lhs().type(), + target->source_location}}, + target->source_location)); +} diff --git a/src/goto-programs/remove_returns.h b/src/goto-programs/remove_returns.h index 5d990f82e60..60c6553dd5e 100644 --- a/src/goto-programs/remove_returns.h +++ b/src/goto-programs/remove_returns.h @@ -73,6 +73,7 @@ Date: September 2009 #include +#include #include class goto_functionst; @@ -82,18 +83,36 @@ class namespacet; class symbol_table_baset; class symbol_exprt; -void remove_returns(symbol_table_baset &, goto_functionst &); +void remove_returns( + symbol_table_baset &symbol_table, + message_handlert &m, + goto_functionst &); typedef std::function function_is_stubt; -void remove_returns(goto_model_functiont &, function_is_stubt); - -void remove_returns(goto_modelt &); +/// Removes returns from a single function. Only usable with Java programs at +/// the moment; to use it with other languages, they must annotate their stub +/// functions with ID_C_incomplete as currently done in +/// java_bytecode_convert_method.cpp. +/// +/// This will generate \#return_value variables, if not already present, for +/// both the function being altered *and* any callees. +/// \param m: message to initialise remove_function_pointers with +/// \param goto_model_function: function to transform +/// \param function_is_stub: function that will be used to test whether a given +/// callee has been or will be given a body. It should return true if so, or +/// false if the function will remain a bodyless stub. +void remove_returns( + goto_model_functiont &goto_model_function, + message_handlert &m, + function_is_stubt function_is_stub); + +void remove_returns(goto_modelt &goto_model, message_handlert &m); // reverse the above operations void restore_returns(symbol_table_baset &, goto_functionst &); -void restore_returns(goto_modelt &); +void restore_returns(goto_modelt &goto_model, message_handlert &m); /// produces the identifier that is used to store the return /// value of the function with the given identifier diff --git a/src/util/type.h b/src/util/type.h index 7f38fc5a335..2104929dcee 100644 --- a/src/util/type.h +++ b/src/util/type.h @@ -139,6 +139,16 @@ class typet:public irept validate_type(type, ns, vm); } + + void visit(std::function f) const + { + f(*this); + for(const auto &sub_irep : get_sub()) + { + const typet &subtype = static_cast(sub_irep); + subtype.visit(f); + } + } }; /// Type with a single subtype. diff --git a/unit/Makefile b/unit/Makefile index 46f8ad567aa..702e59fa14f 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -14,6 +14,9 @@ SRC += analyses/ai/ai.cpp \ analyses/does_remove_const/does_expr_lose_const.cpp \ analyses/does_remove_const/does_type_preserve_const_correctness.cpp \ analyses/does_remove_const/is_type_at_least_as_const_as.cpp \ + analyses/remove_function_pointers.cpp \ + analyses/remove_function_pointers_refine.cpp \ + analyses/remove_returns.cpp \ big-int/big-int.cpp \ compound_block_locations.cpp \ goto-instrument/cover/cover_only.cpp \ diff --git a/unit/analyses/remove_function_pointers.cpp b/unit/analyses/remove_function_pointers.cpp new file mode 100644 index 00000000000..36b5684536e --- /dev/null +++ b/unit/analyses/remove_function_pointers.cpp @@ -0,0 +1,129 @@ +/*******************************************************************\ + Module: Unit tests for module remove_function_pointers + + Author: Diffblue Ltd. + + Date: Apr 2019 + +\*******************************************************************/ + +/// \file +/// Unit tests for module remove_function_pointers + +#include +#include + +#include +#include +#include + +#include +#include +#include + +SCENARIO( + "List potential targets works reliable", + "[core][goto-programs][list_potential_targets]") +{ + goto_modelt goto_model; + // create one instance of the function type, they are all + // going to have the same signature + typet func_type = code_typet{{}, empty_typet{}}; + // create an empty code block, to act as the functions' body + code_blockt func_body = code_blockt{}; + + // void f(){}; + symbolt f; + f.name = "f"; + f.mode = ID_C; + f.type = func_type; // void, takes no params + f.value = func_body; + goto_model.symbol_table.add(f); + + // void g(){}; + symbolt g; + g.name = "g"; + g.mode = ID_C; + g.type = func_type; // void, takes no params + g.value = func_body; + goto_model.symbol_table.add(g); + + // void h(){}; + symbolt h; + h.name = "h"; + h.mode = ID_C; + h.type = func_type; // void, takes no params + h.value = func_body; + goto_model.symbol_table.add(h); + + // void l(){}; + symbolt l; + l.name = "l"; + l.mode = ID_C; + l.type = func_type; // void, takes no params + l.value = func_body; + goto_model.symbol_table.add(l); + + // create array with function pointer elements + array_typet fp_array_type( + pointer_typet{func_type, 64}, from_integer(4, signed_int_type())); + array_exprt fp_array{{}, fp_array_type}; + + fp_array.copy_to_operands(address_of_exprt{f.symbol_expr()}); + fp_array.copy_to_operands(address_of_exprt{g.symbol_expr()}); + fp_array.copy_to_operands(address_of_exprt{h.symbol_expr()}); + fp_array.copy_to_operands(address_of_exprt{l.symbol_expr()}); + + // f = fparray {f1, f2, f3, f4}; + symbolt fpa; + fpa.name = "f"; + fpa.mode = ID_C; + fpa.type = fp_array_type; + fpa.value = fp_array; + + goto_model.symbol_table.add(fpa); + + // pointer to fn call + symbolt fn_ptr; + fn_ptr.name = "fn_ptr"; + fn_ptr.mode = ID_C; + fn_ptr.type = pointer_typet{func_type, 64}; + goto_model.symbol_table.add(fn_ptr); + + // symbol for indexing the array + symbolt z; + z.name = "z"; + z.mode = ID_C; + z.type = signed_int_type(); + goto_model.symbol_table.add(z); + + // create function with pointer function call instruction + + // void entry(){z; array; fn_ptr = array[z]; *fn_ptr()}; + symbolt entry; + entry.name = "entry"; + entry.mode = ID_C; + entry.type = func_type; // void, takes no params + + code_blockt entry_body{ + {// fn_ptr = array[z]; + code_assignt{ + fn_ptr.symbol_expr(), + index_exprt{fp_array, z.symbol_expr(), pointer_type(func_type)}}, + // *fn_ptr(); + code_function_callt{dereference_exprt{fn_ptr.symbol_expr()}}}}; + entry.value = entry_body; + goto_model.symbol_table.add(entry); + + WHEN("list_potential_targets is run") + { + goto_convert(goto_model, null_message_handler); + auto target_map = get_function_pointer_targets( + null_message_handler, goto_model.goto_functions, goto_model.symbol_table); + THEN("there should be 4 targets recognised") + { + REQUIRE(target_map.size() == 1); + REQUIRE(target_map.begin()->second.second.size() == 4); + } + } +} diff --git a/unit/analyses/remove_function_pointers_refine.cpp b/unit/analyses/remove_function_pointers_refine.cpp new file mode 100644 index 00000000000..13e071df111 --- /dev/null +++ b/unit/analyses/remove_function_pointers_refine.cpp @@ -0,0 +1,157 @@ +/*******************************************************************\ + Module: Unit tests for module remove_function_pointers.refine_call_type + + Author: Diffblue Ltd. + + Date: May 2019 + +\*******************************************************************/ + +/// \file +/// Unit tests for module remove_function_pointers + +#include +#include + +#include +#include +#include + +#include +#include +#include + +SCENARIO( + "List potential targets works for ellipsis", + "[core][goto-programs][list_potential_targets]") +{ + goto_modelt goto_model; + // create one instance of the function type, they are all + // going to have the same signature + + symbolt int_param; + int_param.name = "int_param"; + int_param.mode = ID_C; + int_param.type = signedbv_typet{32}; + goto_model.symbol_table.add(int_param); + + code_typet::parameterst parameters; + parameters.emplace_back(signedbv_typet{32}); + parameters.back().set_identifier("int_param"); + auto func_empty_type = code_typet{{}, empty_typet{}}; + auto func_ellipsis_type = code_typet{{}, empty_typet{}}; + func_ellipsis_type.make_ellipsis(); + auto func_one_arg_type = code_typet{parameters, empty_typet{}}; + // create an empty code block, to act as the functions' body + code_blockt func_body = code_blockt{}; + + irep_idt f_name{"f"}; + + // void f(...){}; + symbolt f; + f.name = f_name; + f.mode = ID_C; + f.type = func_ellipsis_type; // takes anything + f.value = func_body; + goto_model.symbol_table.add(f); + + // void g(int){}; + symbolt g; + g.name = "g"; + g.mode = ID_C; + g.type = func_one_arg_type; // takes one int + g.value = func_body; + goto_model.symbol_table.add(g); + + // create array with function pointer elements + array_typet fp_array_type( + pointer_typet{func_ellipsis_type, 64}, from_integer(2, index_type())); + array_exprt fp_array{{}, fp_array_type}; + + fp_array.copy_to_operands(address_of_exprt{f.symbol_expr()}); + fp_array.copy_to_operands(address_of_exprt{g.symbol_expr()}); + + // fpa = fparray {f1, f2}; + symbolt fpa; + fpa.name = "fpa"; + fpa.mode = ID_C; + fpa.type = fp_array_type; + fpa.value = fp_array; + + goto_model.symbol_table.add(fpa); + + irep_idt fn_ptr_name{"fn_ptr"}; + + // pointer to fn call + symbolt fn_ptr; + fn_ptr.name = fn_ptr_name; + fn_ptr.mode = ID_C; + fn_ptr.type = pointer_typet{func_ellipsis_type, 64}; + goto_model.symbol_table.add(fn_ptr); + + // symbol for indexing the array + symbolt z; + z.name = "z"; + z.mode = ID_C; + z.type = index_type(); + goto_model.symbol_table.add(z); + + // create function with pointer function call instruction + + // void entry(){z; array; fn_ptr = array[z]; *fn_ptr(z)}; + symbolt entry; + entry.name = "entry"; + entry.mode = ID_C; + entry.type = func_empty_type; + + code_function_callt::argumentst arguments; + arguments.emplace_back(z.symbol_expr()); + + code_blockt entry_body{ + {// fn_ptr = array[z]; + code_assignt{fn_ptr.symbol_expr(), + index_exprt{fp_array, + z.symbol_expr(), + pointer_type(func_ellipsis_type)}}, + // *fn_ptr(); + code_function_callt{dereference_exprt{fn_ptr.symbol_expr()}, arguments}}}; + entry.value = entry_body; + goto_model.symbol_table.add(entry); + + WHEN("list_potential_targets is run") + { + goto_convert(goto_model, null_message_handler); + + // goto convert removes ellipsis so we need to re-insert them + auto &f_symbol = goto_model.symbol_table.get_writeable_ref(f_name); + to_code_type(f_symbol.type).make_ellipsis(); + auto &fn_ptr_symbol = + goto_model.symbol_table.get_writeable_ref(fn_ptr_name); + to_code_type(fn_ptr_symbol.type.subtype()).make_ellipsis(); + + auto &goto_functions = goto_model.goto_functions; + for(auto &goto_function_pair : goto_functions.function_map) + { + auto &goto_function = goto_function_pair.second; + auto &goto_program = goto_function.body; + for(auto &instruction : goto_program.instructions) + { + if(instruction.is_function_call()) + { + code_function_callt function_call = instruction.get_function_call(); + auto &function = to_dereference_expr(function_call.function()); + to_code_type(function.type()).make_ellipsis(); + instruction.set_function_call(function_call); + } + } + } + + auto target_map = get_function_pointer_targets( + null_message_handler, goto_model.goto_functions, goto_model.symbol_table); + THEN("there should be 4 targets recognised") + { + REQUIRE(target_map.size() == 1); + REQUIRE(target_map.begin()->second.second.size() == 2); + } + } +} diff --git a/unit/analyses/remove_returns.cpp b/unit/analyses/remove_returns.cpp new file mode 100644 index 00000000000..8c4c221c414 --- /dev/null +++ b/unit/analyses/remove_returns.cpp @@ -0,0 +1,93 @@ +/*******************************************************************\ + Module: Unit tests for module remove_returns + + Author: Diffblue Ltd. + + Date: Apr 2019 + +\*******************************************************************/ + +/// \file +/// Unit tests for module remove_returns +/// This is testing the top level interfaces for now. + +#include +#include + +#include +#include +#include + +#include +#include +#include + +// scenario for testing remove returns + +SCENARIO( + "Remove returns is working reliably", + "[core][goto-programs][remove_returns]") +{ + goto_modelt goto_model; + + // int f() { return 5; } + symbolt f; + f.name = "f"; + f.mode = ID_C; + f.type = code_typet{{}, signed_int_type()}; + f.value = code_blockt{{code_returnt(from_integer(5, signed_int_type()))}}; + goto_model.symbol_table.add(f); + + goto_convert(goto_model, null_message_handler); + + WHEN("remove_returns is invoked") + { + remove_returns( + goto_model.symbol_table, null_message_handler, goto_model.goto_functions); + THEN("function f should not contain any return instructions") + { + const auto &goto_functions = goto_model.goto_functions; + for(const auto &function_pair : goto_functions.function_map) + { + if(function_pair.first == "f") + { + const auto &instructions = function_pair.second.body.instructions; + for(auto target = instructions.begin(); target != instructions.end(); + target++) + { + // make sure there are no return instructions. + REQUIRE(!target->is_return()); + } + } + } + } + } + + WHEN("restore_returns is invoked") + { + restore_returns(goto_model, null_message_handler); + THEN("function f should have a skip instruction") + { + int number_of_returns = 0; + const auto &goto_functions = goto_model.goto_functions; + for(const auto &function_pair : goto_functions.function_map) + { + if(function_pair.first == "f") + { + const auto &instructions = function_pair.second.body.instructions; + for(auto target = instructions.begin(); target != instructions.end(); + target++) + { + if(target->is_return()) + number_of_returns++; + } + } + } + + // ensure that there were only 1 return instructions in f + REQUIRE(number_of_returns == 1); + } + } +} + +// scenario for testing restore returns