From a7595c18693c5d3e74f9add1b262b407f30cb735 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 11 Jan 2018 16:00:23 +0000 Subject: [PATCH 1/2] Remove returns: support running per-function This converts functions from conventional to via-global-variable return style on a function-by-function basis. It results in some complication, as the globals are now seen by the language's final pass, meaning it must ignore them for initialisation purposes, and we must cope with functions not having their "true" type while other functions are still being loaded. One test in cbmc-java failed because it was written in C, which we don't adapt to support incremental loading yet, so it is moved to the cbmc test directory, where it can still achieve its goal of checking whether uncaught_exceptions_analysist can tolerate CPROVER_assert and similar builtins, since cbmc also runs the remove_exceptions pass. --- .../uncaught_exceptions_analysis1}/main.c | 0 .../uncaught_exceptions_analysis1}/test.desc | 0 src/goto-programs/lazy_goto_functions_map.h | 10 + src/goto-programs/lazy_goto_model.cpp | 9 +- src/goto-programs/lazy_goto_model.h | 27 +- src/goto-programs/remove_returns.cpp | 232 ++++++++++++------ src/goto-programs/remove_returns.h | 4 + src/java_bytecode/java_entry_point.cpp | 6 +- src/util/irep_ids.def | 1 + src/util/language_file.h | 5 + unit/testing-utils/load_java_class.cpp | 6 +- 11 files changed, 207 insertions(+), 93 deletions(-) rename regression/{cbmc-java/exceptions25 => cbmc/uncaught_exceptions_analysis1}/main.c (100%) rename regression/{cbmc-java/exceptions25 => cbmc/uncaught_exceptions_analysis1}/test.desc (100%) diff --git a/regression/cbmc-java/exceptions25/main.c b/regression/cbmc/uncaught_exceptions_analysis1/main.c similarity index 100% rename from regression/cbmc-java/exceptions25/main.c rename to regression/cbmc/uncaught_exceptions_analysis1/main.c diff --git a/regression/cbmc-java/exceptions25/test.desc b/regression/cbmc/uncaught_exceptions_analysis1/test.desc similarity index 100% rename from regression/cbmc-java/exceptions25/test.desc rename to regression/cbmc/uncaught_exceptions_analysis1/test.desc diff --git a/src/goto-programs/lazy_goto_functions_map.h b/src/goto-programs/lazy_goto_functions_map.h index 5df717b5088..5dbcf9a22ea 100644 --- a/src/goto-programs/lazy_goto_functions_map.h +++ b/src/goto-programs/lazy_goto_functions_map.h @@ -99,6 +99,16 @@ class lazy_goto_functions_mapt final return ensure_function_loaded_internal(name).second; } + /// Determines if this lazy GOTO functions map can produce a body for the + /// given function + /// \param name: function ID to query + /// \return true if we can produce a function body, or false if we would leave + /// it a bodyless stub. + bool can_produce_function(const key_type &name) const + { + return language_files.can_convert_lazy_method(name); + } + void unload(const key_type &name) const { goto_functions.erase(name); } void ensure_function_loaded(const key_type &name) const diff --git a/src/goto-programs/lazy_goto_model.cpp b/src/goto-programs/lazy_goto_model.cpp index 045db0875e4..cbd61295820 100644 --- a/src/goto-programs/lazy_goto_model.cpp +++ b/src/goto-programs/lazy_goto_model.cpp @@ -31,7 +31,7 @@ lazy_goto_modelt::lazy_goto_modelt( [this] (goto_functionst::goto_functiont &function) -> void { goto_model_functiont model_function(*goto_model, function); - this->post_process_function(model_function); + this->post_process_function(model_function, *this); }, message_handler), post_process_function(std::move(post_process_function)), @@ -51,7 +51,7 @@ lazy_goto_modelt::lazy_goto_modelt(lazy_goto_modelt &&other) [this] (goto_functionst::goto_functiont &function) -> void { goto_model_functiont model_function(*goto_model, function); - this->post_process_function(model_function); + this->post_process_function(model_function, *this); }, other.message_handler), language_files(std::move(other.language_files)), @@ -239,3 +239,8 @@ bool lazy_goto_modelt::finalize() return post_process_functions(*goto_model); } + +bool lazy_goto_modelt::can_produce_function(const irep_idt &id) const +{ + return goto_functions.can_produce_function(id); +} diff --git a/src/goto-programs/lazy_goto_model.h b/src/goto-programs/lazy_goto_model.h index 80a8f0e8a6f..c9f6f612833 100644 --- a/src/goto-programs/lazy_goto_model.h +++ b/src/goto-programs/lazy_goto_model.h @@ -15,11 +15,24 @@ class cmdlinet; class optionst; +/// Interface for a provider of function definitions to report whether or not it +/// can provide a definition (function body) for a given function ID. +struct can_produce_functiont +{ + /// Determines if this function provider can produce a body for the given + /// function + /// \param id: function ID to query + /// \return true if we can produce a function body, or false if we would leave + /// it a bodyless stub. + virtual bool can_produce_function(const irep_idt &id) const = 0; +}; + /// Model that holds partially loaded map of functions -class lazy_goto_modelt +class lazy_goto_modelt : public can_produce_functiont { public: - typedef std::function + typedef std::function< + void(goto_model_functiont &function, const can_produce_functiont &)> post_process_functiont; typedef std::function post_process_functionst; @@ -51,12 +64,10 @@ class lazy_goto_modelt message_handlert &message_handler) { return lazy_goto_modelt( - [&handler] (goto_model_functiont &function) - { - handler.process_goto_function(function); + [&handler] (goto_model_functiont &fun, const can_produce_functiont &cpf) { // NOLINT(*) + handler.process_goto_function(fun, cpf); }, - [&handler, &options] (goto_modelt &goto_model) -> bool - { + [&handler, &options] (goto_modelt &goto_model) -> bool { // NOLINT(*) return handler.process_goto_functions(goto_model, options); }, message_handler); @@ -88,6 +99,8 @@ class lazy_goto_modelt return std::move(model.goto_model); } + virtual bool can_produce_function(const irep_idt &id) const; + private: std::unique_ptr goto_model; diff --git a/src/goto-programs/remove_returns.cpp b/src/goto-programs/remove_returns.cpp index 2ecc8c75779..e5235071190 100644 --- a/src/goto-programs/remove_returns.cpp +++ b/src/goto-programs/remove_returns.cpp @@ -27,6 +27,10 @@ class remove_returnst void operator()( goto_functionst &goto_functions); + void operator()( + goto_model_functiont &model_function, + function_is_stubt function_is_stub); + void restore( goto_functionst &goto_functions); @@ -34,10 +38,11 @@ class remove_returnst symbol_tablet &symbol_table; void replace_returns( - goto_functionst::function_mapt::iterator f_it); + const irep_idt &function_id, + goto_functionst::goto_functiont &function); void do_function_calls( - goto_functionst &goto_functions, + function_is_stubt function_is_stub, goto_programt &goto_program); bool restore_returns( @@ -46,74 +51,90 @@ class remove_returnst void undo_function_calls( goto_functionst &goto_functions, goto_programt &goto_program); + + symbol_exprt get_or_create_return_value_symbol(const irep_idt &function_id); }; +symbol_exprt +remove_returnst::get_or_create_return_value_symbol(const irep_idt &function_id) +{ + const irep_idt symbol_name = id2string(function_id) + RETURN_VALUE_SUFFIX; + const symbolt *existing_symbol = symbol_table.lookup(symbol_name); + if(existing_symbol != nullptr) + return existing_symbol->symbol_expr(); + + const symbolt &function_symbol = symbol_table.lookup_ref(function_id); + const typet &return_type = to_code_type(function_symbol.type).return_type(); + + if(return_type == empty_typet()) + return symbol_exprt(); + + auxiliary_symbolt new_symbol; + new_symbol.is_static_lifetime = true; + new_symbol.module = function_symbol.module; + new_symbol.base_name = + id2string(function_symbol.base_name) + RETURN_VALUE_SUFFIX; + new_symbol.name = symbol_name; + new_symbol.mode = function_symbol.mode; + // If we're creating this for the first time, the target function cannot have + // been remove_return'd yet, so this will still be the "true" return type: + new_symbol.type = return_type; + // Return-value symbols will always be written before they are read, so there + // is no need for __CPROVER_initialize to do anything: + new_symbol.type.set(ID_C_no_initialization_required, true); + + symbol_table.add(new_symbol); + return new_symbol.symbol_expr(); +} + /// turns 'return x' into an assignment to fkt#return_value +/// \param function_id: name of the function to transform +/// \param function: function to transform void remove_returnst::replace_returns( - goto_functionst::function_mapt::iterator f_it) + const irep_idt &function_id, + goto_functionst::goto_functiont &function) { - typet return_type=f_it->second.type.return_type(); - - const irep_idt function_id=f_it->first; + typet return_type = function.type.return_type(); // returns something but void? - bool has_return_value=return_type!=empty_typet(); + if(return_type == empty_typet()) + return; - if(has_return_value) - { - // look up the function symbol - symbolt &function_symbol=*symbol_table.get_writeable(function_id); - - // make the return type 'void' - f_it->second.type.return_type()=empty_typet(); - function_symbol.type=f_it->second.type; - - // add return_value symbol to symbol_table - auxiliary_symbolt new_symbol; - new_symbol.is_static_lifetime=true; - new_symbol.module=function_symbol.module; - new_symbol.base_name= - id2string(function_symbol.base_name)+RETURN_VALUE_SUFFIX; - new_symbol.name=id2string(function_symbol.name)+RETURN_VALUE_SUFFIX; - new_symbol.mode=function_symbol.mode; - new_symbol.type=return_type; - - symbol_table.add(new_symbol); - } + // add return_value symbol to symbol_table, if not already created: + symbol_exprt return_symbol = get_or_create_return_value_symbol(function_id); - goto_programt &goto_program=f_it->second.body; + // look up the function symbol + symbolt &function_symbol = *symbol_table.get_writeable(function_id); - if(goto_program.empty()) - return; + // make the return type 'void' + function.type.return_type() = empty_typet(); + function_symbol.type = function.type; - if(has_return_value) + goto_programt &goto_program = function.body; + + Forall_goto_program_instructions(i_it, goto_program) { - Forall_goto_program_instructions(i_it, goto_program) + if(i_it->is_return()) { - if(i_it->is_return()) - { - INVARIANT( - i_it->code.operands().size()==1, - "return instructions should have one operand"); - - // replace "return x;" by "fkt#return_value=x;" - symbol_exprt lhs_expr; - lhs_expr.set_identifier(id2string(function_id)+RETURN_VALUE_SUFFIX); - lhs_expr.type()=return_type; + INVARIANT( + i_it->code.operands().size() == 1, + "return instructions should have one operand"); - code_assignt assignment(lhs_expr, i_it->code.op0()); + // replace "return x;" by "fkt#return_value=x;" + code_assignt assignment(return_symbol, i_it->code.op0()); - // now turn the `return' into `assignment' - i_it->type=ASSIGN; - i_it->code=assignment; - } + // now turn the `return' into `assignment' + i_it->make_assignment(assignment); } } } /// turns x=f(...) into f(...); lhs=f#return_value; +/// \param function_is_stub: function (irep_idt -> bool) that determines whether +/// a given function ID is a stub +/// \param goto_program: program to transform void remove_returnst::do_function_calls( - goto_functionst &goto_functions, + function_is_stubt function_is_stub, goto_programt &goto_program) { Forall_goto_program_instructions(i_it, goto_program) @@ -122,29 +143,40 @@ void remove_returnst::do_function_calls( { code_function_callt &function_call=to_code_function_call(i_it->code); - code_typet old_type=to_code_type(function_call.function().type()); + INVARIANT( + function_call.function().id() == ID_symbol, + "indirect function calls should have been removed prior to running " + "remove-returns"); + + const irep_idt function_id = + to_symbol_expr(function_call.function()).get_identifier(); + + symbol_exprt return_value; + typet old_return_type; + bool is_stub = function_is_stub(function_id); + + if(is_stub) + { + old_return_type = + to_code_type(function_call.function().type()).return_type(); + } + else + { + // The callee may or may not already have been transformed by this pass, + // so its symbol-table entry may already have void return type. + // To simplify matters, create its return-value global now (if not + // already done), and use that to determine its true return type. + return_value = get_or_create_return_value_symbol(function_id); + if(return_value == symbol_exprt()) // really void-typed? + continue; + old_return_type = return_value.type(); + } // Do we return anything? - if(old_type.return_type()!=empty_typet()) + if(old_return_type != empty_typet()) { // replace "lhs=f(...)" by // "f(...); lhs=f#return_value; DEAD f#return_value;" - INVARIANT( - function_call.function().id()==ID_symbol, - "indirect function calls should have been removed prior to running " - "remove-returns"); - - const irep_idt function_id= - to_symbol_expr(function_call.function()).get_identifier(); - - // see if we have a body - goto_functionst::function_mapt::const_iterator - f_it=goto_functions.function_map.find(function_id); - - if(f_it==goto_functions.function_map.end()) - throw - "failed to find function `"+id2string(function_id)+ - "' in function map"; // fix the type to_code_type(function_call.function().type()).return_type()= @@ -154,18 +186,10 @@ void remove_returnst::do_function_calls( { exprt rhs; - if(f_it->second.body_available()) - { - symbol_exprt return_value; - return_value.type()=function_call.lhs().type(); - return_value.set_identifier( - id2string(function_id)+RETURN_VALUE_SUFFIX); + if(!is_stub) rhs=return_value; - } else - { rhs=side_effect_expr_nondett(function_call.lhs().type()); - } goto_programt::targett t_a=goto_program.insert_after(i_it); t_a->make_assignment(); @@ -176,7 +200,7 @@ void remove_returnst::do_function_calls( // fry the previous assignment function_call.lhs().make_nil(); - if(f_it->second.body_available()) + if(!is_stub) { goto_programt::targett t_d=goto_program.insert_after(t_a); t_d->make_dead(); @@ -194,11 +218,37 @@ void remove_returnst::operator()(goto_functionst &goto_functions) { Forall_goto_functions(it, goto_functions) { - replace_returns(it); - do_function_calls(goto_functions, it->second.body); + // NOLINTNEXTLINE + auto function_is_stub = [&goto_functions](const irep_idt &function_id) { + auto findit = goto_functions.function_map.find(function_id); + INVARIANT( + findit != goto_functions.function_map.end(), + "called function should have some entry in the function map"); + return !findit->second.body_available(); + }; + + replace_returns(it->first, it->second); + do_function_calls(function_is_stub, it->second.body); } } +void remove_returnst::operator()( + goto_model_functiont &model_function, + function_is_stubt function_is_stub) +{ + goto_functionst::goto_functiont &goto_function = + model_function.get_goto_function(); + + // If this is a stub it doesn't have a corresponding #return_value, + // not any return instructions to alter: + if(goto_function.body.empty()) + return; + + replace_returns( + goto_programt::get_function_id(goto_function.body), goto_function); + do_function_calls(function_is_stub, goto_function.body); +} + /// removes returns void remove_returns( symbol_tablet &symbol_table, @@ -208,6 +258,25 @@ void remove_returns( rr(goto_functions); } +/// Removes returns from a single function. Only usable with Java programs at +/// the moment; to use it with other languages, they must annotate their stub +/// functions with ID_C_incomplete as currently done in +/// java_bytecode_convert_method.cpp. +/// +/// This will generate \#return_value variables, if not already present, for +/// both the function being altered *and* any callees. +/// \param goto_model_function: function to transform +/// \param function_is_stub: function that will be used to test whether a given +/// callee has been or will be given a body. It should return true if so, or +/// false if the function will remain a bodyless stub. +void remove_returns( + goto_model_functiont &goto_model_function, + function_is_stubt function_is_stub) +{ + remove_returnst rr(goto_model_function.get_symbol_table()); + rr(goto_model_function, function_is_stub); +} + /// removes returns void remove_returns(goto_modelt &goto_model) { @@ -215,6 +284,11 @@ void remove_returns(goto_modelt &goto_model) rr(goto_model.goto_functions); } +/// Get code type of a function that has had remove_returns run upon it +/// \param symbol_table: global symbol table +/// \param function_id: function to get the type of +/// \return the function's type with its `return_type()` restored to its +/// original value if a \#return_value variable exists, or nil otherwise code_typet original_return_type( const symbol_tablet &symbol_table, const irep_idt &function_id) diff --git a/src/goto-programs/remove_returns.h b/src/goto-programs/remove_returns.h index 04b113cff18..50bbf22e246 100644 --- a/src/goto-programs/remove_returns.h +++ b/src/goto-programs/remove_returns.h @@ -24,6 +24,10 @@ Date: September 2009 void remove_returns(symbol_tablet &, goto_functionst &); +typedef std::function function_is_stubt; + +void remove_returns(goto_model_functiont &, function_is_stubt); + void remove_returns(goto_modelt &); // reverse the above operations diff --git a/src/java_bytecode/java_entry_point.cpp b/src/java_bytecode/java_entry_point.cpp index eab1a397286..9b204f5745e 100644 --- a/src/java_bytecode/java_entry_point.cpp +++ b/src/java_bytecode/java_entry_point.cpp @@ -73,7 +73,11 @@ static bool should_init_symbol(const symbolt &sym) sym.is_state_var && sym.is_static_lifetime && sym.mode==ID_java) - return true; + { + // Consider some sort of annotation indicating a global variable that + // doesn't require initialisation? + return !sym.type.get_bool(ID_C_no_initialization_required); + } return is_java_string_literal_id(sym.name); } diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index e2e3c30885e..0b134e536c6 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -837,6 +837,7 @@ IREP_ID_TWO(implicit_generic_types, #implicit_generic_types) IREP_ID_TWO(type_variables, #type_variables) IREP_ID_ONE(havoc_object) IREP_ID_TWO(overflow_shl, overflow-shl) +IREP_ID_TWO(C_no_initialization_required, #no_initialization_required) #undef IREP_ID_ONE #undef IREP_ID_TWO diff --git a/src/util/language_file.h b/src/util/language_file.h index 6c32c68496a..ecdce4a1b91 100644 --- a/src/util/language_file.h +++ b/src/util/language_file.h @@ -127,6 +127,11 @@ class language_filest:public messaget it->second->convert_lazy_method(id, symbol_table); } + bool can_convert_lazy_method(const irep_idt &id) const + { + return lazy_method_map.count(id) != 0; + } + void clear() { file_map.clear(); diff --git a/unit/testing-utils/load_java_class.cpp b/unit/testing-utils/load_java_class.cpp index ddf072baa35..0f112b0d2e4 100644 --- a/unit/testing-utils/load_java_class.cpp +++ b/unit/testing-utils/load_java_class.cpp @@ -62,10 +62,8 @@ symbol_tablet load_java_class( // Construct a lazy_goto_modelt null_message_handlert message_handler; lazy_goto_modelt lazy_goto_model( - [] (goto_model_functiont &function) - { }, - [] (goto_modelt &goto_model) - { return false; }, + [] (goto_model_functiont &function, const can_produce_functiont &cpf) { }, // NOLINT (*) + [] (goto_modelt &goto_model) { return false; }, // NOLINT (*) message_handler); // Configure the path loading From 96569c3916e14a6fc1588453c3bfb803f20bbab1 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 12 Jan 2018 14:13:13 +0000 Subject: [PATCH 2/2] JBMC: remove return values on a per-function basis --- src/jbmc/jbmc_parse_options.cpp | 14 ++++++++++---- src/jbmc/jbmc_parse_options.h | 5 ++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/jbmc/jbmc_parse_options.cpp b/src/jbmc/jbmc_parse_options.cpp index 41177bffcfb..d86c5dc9258 100644 --- a/src/jbmc/jbmc_parse_options.cpp +++ b/src/jbmc/jbmc_parse_options.cpp @@ -642,7 +642,8 @@ int jbmc_parse_optionst::get_goto_program( } void jbmc_parse_optionst::process_goto_function( - goto_model_functiont &function) + goto_model_functiont &function, + const can_produce_functiont &available_functions) { symbol_tablet &symbol_table = function.get_symbol_table(); goto_functionst::goto_functiont &goto_function = function.get_goto_function(); @@ -655,6 +656,14 @@ void jbmc_parse_optionst::process_goto_function( remove_instanceof(goto_function, symbol_table); // Java virtual functions -> explicit dispatch tables: remove_virtual_functions(function); + + auto function_is_stub = + [&symbol_table, &available_functions](const irep_idt &id) { // NOLINT(*) + return symbol_table.lookup_ref(id).value.is_nil() && + !available_functions.can_produce_function(id); + }; + + remove_returns(function, function_is_stub); } catch(const char *e) @@ -692,9 +701,6 @@ bool jbmc_parse_optionst::process_goto_functions( // instrument library preconditions instrument_preconditions(goto_model); - // remove returns, gcc vectors, complex - remove_returns(goto_model); - // Similar removal of java nondet statements: // TODO Should really get this from java_bytecode_language somehow, but we // don't have an instance of that here. diff --git a/src/jbmc/jbmc_parse_options.h b/src/jbmc/jbmc_parse_options.h index cbb922f938e..c98b4ca6f49 100644 --- a/src/jbmc/jbmc_parse_options.h +++ b/src/jbmc/jbmc_parse_options.h @@ -19,6 +19,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include @@ -83,7 +84,9 @@ class jbmc_parse_optionst: const char **argv, const std::string &extra_options); - void process_goto_function(goto_model_functiont &function); + void process_goto_function( + goto_model_functiont &function, + const can_produce_functiont &); bool process_goto_functions(goto_modelt &goto_model, const optionst &options); protected: