From d70afb77237ea98b4a3935e6d2dc5b21e1e85cf9 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 24 Oct 2016 12:27:56 +0100 Subject: [PATCH 01/18] Skip already-loaded Java classes --- src/java_bytecode/java_bytecode_convert_class.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 8d42611bfe0..b62c5746765 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -75,6 +75,13 @@ Function: java_bytecode_convert_classt::convert void java_bytecode_convert_classt::convert(const classt &c) { + std::string qualified_classname="java::"+id2string(c.name); + if(symbol_table.has_symbol(qualified_classname)) + { + debug() << "Skip class " << c.name << " (already loaded)" << eom; + return; + } + class_typet class_type; class_type.set_tag(c.name); @@ -107,7 +114,7 @@ void java_bytecode_convert_classt::convert(const classt &c) symbolt new_symbol; new_symbol.base_name=c.name; new_symbol.pretty_name=c.name; - new_symbol.name="java::"+id2string(c.name); + new_symbol.name=qualified_classname; class_type.set(ID_name, new_symbol.name); new_symbol.type=class_type; new_symbol.mode=ID_java; From 11ff2b283b57567d097a75844b5dc23040883303 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 1 Nov 2016 13:48:01 +0000 Subject: [PATCH 02/18] Convert Java methods only when they have a caller This means that library methods, for example, will often not be converted. For the time being virtual methods calls are handled simply, assuming that any class we know of that provides an override might be called. --- .../java_bytecode_convert_class.cpp | 35 ++++++--- .../java_bytecode_convert_class.h | 6 +- .../java_bytecode_convert_method.cpp | 57 +++++++++++++-- .../java_bytecode_convert_method.h | 12 ++- .../java_bytecode_convert_method_class.h | 12 ++- src/java_bytecode/java_bytecode_language.cpp | 73 ++++++++++++++++++- src/java_bytecode/java_entry_point.cpp | 8 +- src/java_bytecode/java_entry_point.h | 3 +- 8 files changed, 179 insertions(+), 27 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index b62c5746765..2036bcd9bce 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -27,11 +27,13 @@ class java_bytecode_convert_classt:public messaget symbol_tablet &_symbol_table, message_handlert &_message_handler, bool _disable_runtime_checks, - size_t _max_array_length): + size_t _max_array_length, + lazy_methodst& _lm): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), - max_array_length(_max_array_length) + max_array_length(_max_array_length), + lazy_methods(_lm) { } @@ -52,6 +54,7 @@ class java_bytecode_convert_classt:public messaget symbol_tablet &symbol_table; const bool disable_runtime_checks; const size_t max_array_length; + lazy_methodst &lazy_methods; // conversion void convert(const classt &c); @@ -81,7 +84,7 @@ void java_bytecode_convert_classt::convert(const classt &c) debug() << "Skip class " << c.name << " (already loaded)" << eom; return; } - + class_typet class_type; class_type.set_tag(c.name); @@ -135,13 +138,19 @@ void java_bytecode_convert_classt::convert(const classt &c) // now do methods for(const auto &method : c.methods) - java_bytecode_convert_method( - *class_symbol, - method, - symbol_table, - get_message_handler(), - disable_runtime_checks, - max_array_length); + { + const irep_idt method_identifier= + id2string(qualified_classname)+ + "."+id2string(method.name)+ + ":"+method.signature; + java_bytecode_convert_method_lazy( + *class_symbol,method_identifier,method,symbol_table); + lazy_methods[method_identifier]= + std::make_pair(class_symbol,&method); + } + //java_bytecode_convert_method( + // *class_symbol, method, symbol_table, get_message_handler(), + // disable_runtime_checks, max_array_length); // is this a root class? if(c.extends.empty()) @@ -329,13 +338,15 @@ bool java_bytecode_convert_class( symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length) + size_t max_array_length, + lazy_methodst &lazy_methods) { java_bytecode_convert_classt java_bytecode_convert_class( symbol_table, message_handler, disable_runtime_checks, - max_array_length); + max_array_length, + lazy_methods); try { diff --git a/src/java_bytecode/java_bytecode_convert_class.h b/src/java_bytecode/java_bytecode_convert_class.h index ffaaf6e34da..8c1dccc02f5 100644 --- a/src/java_bytecode/java_bytecode_convert_class.h +++ b/src/java_bytecode/java_bytecode_convert_class.h @@ -14,11 +14,15 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_bytecode_parse_tree.h" +typedef std::map > + lazy_methodst; + bool java_bytecode_convert_class( const java_bytecode_parse_treet &parse_tree, symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length); + size_t max_array_length, + lazy_methodst &); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_CLASS_H diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index f9cb121091b..cd1cfc7729a 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -21,6 +21,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include "java_bytecode_convert_method.h" @@ -177,6 +178,35 @@ const exprt java_bytecode_convert_methodt::variable( } } +void java_bytecode_convert_method_lazy( + const symbolt& class_symbol, + const irep_idt method_identifier, + const java_bytecode_parse_treet::methodt &m, + symbol_tablet& symbol_table) +{ + symbolt method_symbol; + typet member_type=java_type_from_string(m.signature); + + method_symbol.name=method_identifier; + method_symbol.base_name=m.base_name; + method_symbol.mode=ID_java; + method_symbol.location=m.source_location; + method_symbol.location.set_function(method_identifier); + + if(method_symbol.base_name=="") + { + method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ + id2string(class_symbol.base_name)+"()"; + member_type.set(ID_constructor,true); + } + else + method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ + id2string(m.base_name)+"()"; + + method_symbol.type=member_type; + symbol_table.add(method_symbol); +} + /*******************************************************************\ Function: java_bytecode_convert_methodt::convert @@ -343,10 +373,10 @@ void java_bytecode_convert_methodt::convert( if((!m.is_abstract) && (!m.is_native)) method_symbol.value=convert_instructions(m, code_type); - // do we have the method symbol already? + // Replace the existing stub symbol with the real deal: const auto s_it=symbol_table.symbols.find(method.get_name()); - if(s_it!=symbol_table.symbols.end()) - symbol_table.symbols.erase(s_it); // erase, we stubbed it + assert(s_it!=symbol_table.symbols.end()); + symbol_table.symbols.erase(s_it); symbol_table.add(method_symbol); } @@ -1109,12 +1139,25 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_table.add(symbol); } + needed_methods.push_back(arg0.get(ID_identifier)); + if(is_virtual) { // dynamic binding assert(use_this); assert(!call.arguments().empty()); call.function()=arg0; + const auto& call_class=arg0.get(ID_C_class); + assert(call_class!=irep_idt()); + const auto& call_basename=arg0.get(ID_component_name); + assert(call_basename!=irep_idt()); + auto child_classes=class_hierarchy.get_children_trans(call_class); + for(const auto& child_class : child_classes) + { + auto methodid=id2string(child_class)+"."+id2string(call_basename); + if(symbol_table.has_symbol(methodid)) + needed_methods.push_back(methodid); + } } else { @@ -2140,13 +2183,17 @@ void java_bytecode_convert_method( symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length) + size_t max_array_length, + std::vector& needed_methods, + const class_hierarchyt& ch) { java_bytecode_convert_methodt java_bytecode_convert_method( symbol_table, message_handler, disable_runtime_checks, - max_array_length); + max_array_length, + needed_methods, + ch); java_bytecode_convert_method(class_symbol, method); } diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 8945af95bd1..327bfd09932 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -14,12 +14,22 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_bytecode_parse_tree.h" +class class_hierarchyt; + void java_bytecode_convert_method( const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &, symbol_tablet &symbol_table, message_handlert &message_handler, bool disable_runtime_checks, - size_t max_array_length); + size_t max_array_length, + std::vector& needed_methods, + const class_hierarchyt&); + +void java_bytecode_convert_method_lazy( + const symbolt &class_symbol, + const irep_idt method_identifier, + const java_bytecode_parse_treet::methodt &, + symbol_tablet &symbol_table); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_METHOD_H diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 43f51269aab..4a42eb9f1d3 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -15,12 +15,14 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include "java_bytecode_parse_tree.h" +#include "java_bytecode_convert_class.h" #include #include class symbol_tablet; class symbolt; +class class_hierarchyt; class java_bytecode_convert_methodt:public messaget { @@ -29,11 +31,15 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &_symbol_table, message_handlert &_message_handler, bool _disable_runtime_checks, - size_t _max_array_length): + size_t _max_array_length, + std::vector& _needed_methods, + const class_hierarchyt& _ch): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), - max_array_length(_max_array_length) + max_array_length(_max_array_length), + needed_methods(_needed_methods), + class_hierarchy(_ch) { } @@ -52,6 +58,8 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &symbol_table; const bool disable_runtime_checks; const size_t max_array_length; + std::vector& needed_methods; + const class_hierarchyt& class_hierarchy; irep_idt method_id; irep_idt current_method; diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 1d9d9f57b41..fb0e19ef487 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -14,8 +14,11 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include + #include "java_bytecode_language.h" #include "java_bytecode_convert_class.h" +#include "java_bytecode_convert_method.h" #include "java_bytecode_internal_additions.h" #include "java_bytecode_typecheck.h" #include "java_entry_point.h" @@ -180,10 +183,15 @@ Function: java_bytecode_languaget::typecheck \*******************************************************************/ + + bool java_bytecode_languaget::typecheck( symbol_tablet &symbol_table, const std::string &module) { + std::map > + lazy_methods; + // first convert all for(java_class_loadert::class_mapt::const_iterator c_it=java_class_loader.class_map.begin(); @@ -200,10 +208,73 @@ bool java_bytecode_languaget::typecheck( symbol_table, get_message_handler(), disable_runtime_checks, - max_user_array_length)) + max_user_array_length, + lazy_methods)) return true; } + // Now incrementally elaborate methods that are reachable from this entry point: + + // Convert-method will need this to find virtual function targets. + class_hierarchyt ch; + ch(symbol_table); + + std::vector worklist1; + std::vector worklist2; + + auto main_function=get_main_symbol(symbol_table,main_class,get_message_handler(),true); + if(main_function.stop_convert) + { + // Failed, mark all functions in the given main class reachable. + const auto& methods=java_class_loader.class_map.at(main_class).parsed_class.methods; + for(const auto& method : methods) + { + const irep_idt methodid="java::"+id2string(main_class)+"."+ + id2string(method.name)+":"+ + id2string(method.signature); + worklist2.push_back(methodid); + } + } + else + worklist2.push_back(main_function.main_function.name); + + std::set already_populated; + while(worklist2.size()!=0) + { + std::swap(worklist1,worklist2); + for(const auto& mname : worklist1) + { + if(!already_populated.insert(mname).second) + continue; + auto findit=lazy_methods.find(mname); + if(findit==lazy_methods.end()) + { + debug() << "Skip " << mname << eom; + continue; + } + debug() << "Lazy methods: elaborate " << mname << eom; + const auto& parsed_method=findit->second; + java_bytecode_convert_method(*parsed_method.first,*parsed_method.second, + symbol_table,get_message_handler(), + disable_runtime_checks,max_user_array_length,worklist2,ch); + } + worklist1.clear(); + } + + // Remove symbols for methods that were declared but never used: + symbol_tablet keep_symbols; + + for(const auto& sym : symbol_table.symbols) + { + if(lazy_methods.count(sym.first) && !already_populated.count(sym.first)) + continue; + keep_symbols.add(sym.second); + } + + debug() << "Lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods" << eom; + + symbol_table.swap(keep_symbols); + // now typecheck all if(java_bytecode_typecheck( symbol_table, get_message_handler())) diff --git a/src/java_bytecode/java_entry_point.cpp b/src/java_bytecode/java_entry_point.cpp index d4d40beff3b..3a687ad57f6 100644 --- a/src/java_bytecode/java_entry_point.cpp +++ b/src/java_bytecode/java_entry_point.cpp @@ -326,11 +326,11 @@ void java_record_outputs( } } - main_function_resultt get_main_symbol( symbol_tablet &symbol_table, const irep_idt &main_class, - message_handlert &message_handler) + message_handlert &message_handler, + bool allow_no_body) { symbolt symbol; main_function_resultt res; @@ -414,7 +414,7 @@ main_function_resultt get_main_symbol( } // check if it has a body - if(symbol.value.is_nil()) + if(symbol.value.is_nil() && !allow_no_body) { message.error() << "main method `" << main_class << "' has no body" << messaget::eom; @@ -479,7 +479,7 @@ main_function_resultt get_main_symbol( symbol=symbol_table.symbols.find(*matches.begin())->second; // check if it has a body - if(symbol.value.is_nil()) + if(symbol.value.is_nil() && !allow_no_body) { message.error() << "main method `" << main_class << "' has no body" << messaget::eom; diff --git a/src/java_bytecode/java_entry_point.h b/src/java_bytecode/java_entry_point.h index 7fa562de89a..82071aa26b1 100644 --- a/src/java_bytecode/java_entry_point.h +++ b/src/java_bytecode/java_entry_point.h @@ -28,6 +28,7 @@ typedef struct main_function_resultt get_main_symbol( symbol_tablet &symbol_table, const irep_idt &main_class, - message_handlert &); + message_handlert &, + bool allow_no_body = false); #endif // CPROVER_JAVA_BYTECODE_JAVA_ENTRY_POINT_H From fcd03f94ab8002127a48be4492b0308dec065472 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 1 Nov 2016 14:19:53 +0000 Subject: [PATCH 03/18] Remove dead global variables When lazy method conversion is in use, they might end up unused. --- src/java_bytecode/java_bytecode_language.cpp | 21 +++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index fb0e19ef487..fc5ee421343 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -171,6 +171,19 @@ bool java_bytecode_languaget::parse( return false; } +static void gather_needed_globals(const exprt& e, const symbol_tablet& symbol_table, symbol_tablet& needed) +{ + if(e.id()==ID_symbol) + { + const auto& sym=symbol_table.lookup(to_symbol_expr(e).get_identifier()); + if(sym.is_static_lifetime) + needed.add(sym); + } + else + forall_operands(opit,e) + gather_needed_globals(*opit,symbol_table,needed); +} + /*******************************************************************\ Function: java_bytecode_languaget::typecheck @@ -183,8 +196,6 @@ Function: java_bytecode_languaget::typecheck \*******************************************************************/ - - bool java_bytecode_languaget::typecheck( symbol_tablet &symbol_table, const std::string &module) @@ -266,12 +277,16 @@ bool java_bytecode_languaget::typecheck( for(const auto& sym : symbol_table.symbols) { + if(sym.second.is_static_lifetime) + continue; if(lazy_methods.count(sym.first) && !already_populated.count(sym.first)) continue; + if(sym.second.type.id()==ID_code) + gather_needed_globals(sym.second.value,symbol_table,keep_symbols); keep_symbols.add(sym.second); } - debug() << "Lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods" << eom; + debug() << "Lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods and globals" << eom; symbol_table.swap(keep_symbols); From 7f752bb55b258a4d4cfea6d2034071dc8e7bd5b2 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 2 Nov 2016 17:02:41 +0000 Subject: [PATCH 04/18] Restrict virtual method targets to known-created types Previously even with lazy method conversion enabled, if we had a caller to A.f and C.f overrides A.f, we would generate a possible call to C.f even if a C is never instantiated in any reachable method. This commit changes that, such that we need *both* a callsite and evidence of a C being constructed to generate the callgraph edge. --- .../java_bytecode_convert_method.cpp | 24 ++- .../java_bytecode_convert_method.h | 1 + .../java_bytecode_convert_method_class.h | 3 + src/java_bytecode/java_bytecode_language.cpp | 156 +++++++++++++++--- 4 files changed, 148 insertions(+), 36 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index cd1cfc7729a..0d5198ad4b8 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -1063,7 +1063,10 @@ codet java_bytecode_convert_methodt::convert_instructions( { if(as_string(arg0.get(ID_identifier)) .find("")!=std::string::npos) + { + needed_classes.insert(classname); code_type.set(ID_constructor, true); + } else code_type.set(ID_java_super_method_call, true); } @@ -1139,30 +1142,19 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_table.add(symbol); } - needed_methods.push_back(arg0.get(ID_identifier)); - if(is_virtual) { // dynamic binding assert(use_this); assert(!call.arguments().empty()); call.function()=arg0; - const auto& call_class=arg0.get(ID_C_class); - assert(call_class!=irep_idt()); - const auto& call_basename=arg0.get(ID_component_name); - assert(call_basename!=irep_idt()); - auto child_classes=class_hierarchy.get_children_trans(call_class); - for(const auto& child_class : child_classes) - { - auto methodid=id2string(child_class)+"."+id2string(call_basename); - if(symbol_table.has_symbol(methodid)) - needed_methods.push_back(methodid); - } + // Populate needed methods later, once we know what object types can exist. } else { // static binding call.function()=symbol_exprt(arg0.get(ID_identifier), arg0.type()); + needed_methods.push_back(arg0.get(ID_identifier)); } call.function().add_source_location()=loc; @@ -1704,6 +1696,8 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_exprt symbol_expr(arg0.type()); const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); + if(arg0.type().id()==ID_symbol) + needed_classes.insert(to_symbol_type(arg0.type()).get_identifier()); results[0]=java_bytecode_promotion(symbol_expr); // set $assertionDisabled to false @@ -1721,6 +1715,8 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_exprt symbol_expr(arg0.type()); const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); + if(arg0.type().id()==ID_symbol) + needed_classes.insert(to_symbol_type(arg0.type()).get_identifier()); c=code_assignt(symbol_expr, op[0]); } else if(statement==patternt("?2?")) // i2c etc. @@ -2185,6 +2181,7 @@ void java_bytecode_convert_method( bool disable_runtime_checks, size_t max_array_length, std::vector& needed_methods, + std::set& needed_classes, const class_hierarchyt& ch) { java_bytecode_convert_methodt java_bytecode_convert_method( @@ -2193,6 +2190,7 @@ void java_bytecode_convert_method( disable_runtime_checks, max_array_length, needed_methods, + needed_classes, ch); java_bytecode_convert_method(class_symbol, method); diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 327bfd09932..b6302161a43 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -24,6 +24,7 @@ void java_bytecode_convert_method( bool disable_runtime_checks, size_t max_array_length, std::vector& needed_methods, + std::set& needed_classes, const class_hierarchyt&); void java_bytecode_convert_method_lazy( diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 4a42eb9f1d3..b49d22934b1 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -33,12 +33,14 @@ class java_bytecode_convert_methodt:public messaget bool _disable_runtime_checks, size_t _max_array_length, std::vector& _needed_methods, + std::set& _needed_classes, const class_hierarchyt& _ch): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), max_array_length(_max_array_length), needed_methods(_needed_methods), + needed_classes(_needed_classes), class_hierarchy(_ch) { } @@ -59,6 +61,7 @@ class java_bytecode_convert_methodt:public messaget const bool disable_runtime_checks; const size_t max_array_length; std::vector& needed_methods; + std::set& needed_classes; const class_hierarchyt& class_hierarchy; irep_idt method_id; diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index fc5ee421343..057859cef04 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -171,6 +171,48 @@ bool java_bytecode_languaget::parse( return false; } +static void get_virtual_method_targets( + const code_function_callt& c, + const std::set& needed_classes, + std::vector& needed_methods, + const symbol_tablet& symbol_table, + const class_hierarchyt& class_hierarchy) +{ + + const auto& called_function=c.function(); + assert(called_function.id()==ID_virtual_function); + + const auto& call_class=called_function.get(ID_C_class); + assert(call_class!=irep_idt()); + const auto& call_basename=called_function.get(ID_component_name); + assert(call_basename!=irep_idt()); + + auto child_classes=class_hierarchy.get_children_trans(call_class); + child_classes.push_back(call_class); + for(const auto& child_class : child_classes) + { + // Program-wide, is this class ever instantiated? + if(!needed_classes.count(child_class)) + continue; + auto methodid=id2string(child_class)+"."+id2string(call_basename); + if(symbol_table.has_symbol(methodid)) + needed_methods.push_back(methodid); + } +} + +static void gather_virtual_callsites(const exprt& e, std::vector& result) +{ + if(e.id()!=ID_code) + return; + const codet& c=to_code(e); + if(c.get_statement()==ID_function_call && + to_code_function_call(c).function().id()==ID_virtual_function) + result.push_back(&to_code_function_call(c)); + else + forall_operands(it,e) + gather_virtual_callsites(*it,result); +} + static void gather_needed_globals(const exprt& e, const symbol_tablet& symbol_table, symbol_tablet& needed) { if(e.id()==ID_symbol) @@ -184,6 +226,48 @@ static void gather_needed_globals(const exprt& e, const symbol_tablet& symbol_ta gather_needed_globals(*opit,symbol_table,needed); } +static void gather_field_types( + const typet& class_type, + const namespacet& ns, + std::set& needed_classes) +{ + const auto& underlying_type=to_struct_type(ns.follow(class_type)); + for(const auto& field : underlying_type.components()) + { + if(field.type().id()==ID_struct || field.type().id()==ID_symbol) + gather_field_types(field.type(),ns,needed_classes); + else if(field.type().id()==ID_pointer) + { + // Skip array primitive pointers, for example: + if(field.type().subtype().id()!=ID_symbol) + continue; + const auto& field_classid=to_symbol_type(field.type().subtype()).get_identifier(); + if(needed_classes.insert(field_classid).second) + gather_field_types(field.type().subtype(),ns,needed_classes); + } + } +} + +static void initialise_needed_classes( + const std::vector& entry_points, + const namespacet& ns, + std::set& needed_classes) +{ + for(const auto& mname : entry_points) + { + const auto& symbol=ns.lookup(mname); + const auto& mtype=to_code_type(symbol.type); + for(const auto& param : mtype.parameters()) + { + if(param.type().id()==ID_pointer) + { + needed_classes.insert(to_symbol_type(param.type().subtype()).get_identifier()); + gather_field_types(param.type().subtype(),ns,needed_classes); + } + } + } +} + /*******************************************************************\ Function: java_bytecode_languaget::typecheck @@ -230,8 +314,8 @@ bool java_bytecode_languaget::typecheck( class_hierarchyt ch; ch(symbol_table); - std::vector worklist1; - std::vector worklist2; + std::vector method_worklist1; + std::vector method_worklist2; auto main_function=get_main_symbol(symbol_table,main_class,get_message_handler(),true); if(main_function.stop_convert) @@ -243,34 +327,60 @@ bool java_bytecode_languaget::typecheck( const irep_idt methodid="java::"+id2string(main_class)+"."+ id2string(method.name)+":"+ id2string(method.signature); - worklist2.push_back(methodid); + method_worklist2.push_back(methodid); } } else - worklist2.push_back(main_function.main_function.name); + method_worklist2.push_back(main_function.main_function.name); - std::set already_populated; - while(worklist2.size()!=0) - { - std::swap(worklist1,worklist2); - for(const auto& mname : worklist1) + std::set needed_classes; + initialise_needed_classes(method_worklist2,namespacet(symbol_table),needed_classes); + + std::set methods_already_populated; + std::vector virtual_callsites; + + bool any_new_methods; + do { + + any_new_methods=false; + while(method_worklist2.size()!=0) { - if(!already_populated.insert(mname).second) - continue; - auto findit=lazy_methods.find(mname); - if(findit==lazy_methods.end()) + std::swap(method_worklist1,method_worklist2); + for(const auto& mname : method_worklist1) { - debug() << "Skip " << mname << eom; - continue; + if(!methods_already_populated.insert(mname).second) + continue; + auto findit=lazy_methods.find(mname); + if(findit==lazy_methods.end()) + { + debug() << "Skip " << mname << eom; + continue; + } + debug() << "Lazy methods: elaborate " << mname << eom; + const auto& parsed_method=findit->second; + java_bytecode_convert_method(*parsed_method.first,*parsed_method.second, + symbol_table,get_message_handler(), + disable_runtime_checks,max_user_array_length, + method_worklist2,needed_classes,ch); + gather_virtual_callsites(symbol_table.lookup(mname).value,virtual_callsites); + any_new_methods=true; } - debug() << "Lazy methods: elaborate " << mname << eom; - const auto& parsed_method=findit->second; - java_bytecode_convert_method(*parsed_method.first,*parsed_method.second, - symbol_table,get_message_handler(), - disable_runtime_checks,max_user_array_length,worklist2,ch); + method_worklist1.clear(); } - worklist1.clear(); - } + + // Given the object types we now know may be created, populate more + // possible virtual function call targets: + + debug() << "Lazy methods: add virtual method targets (" << virtual_callsites.size() << + " callsites)" << eom; + + for(const auto& callsite : virtual_callsites) + { + get_virtual_method_targets(*callsite,needed_classes,method_worklist2, + symbol_table,ch); + } + + } while(any_new_methods); // Remove symbols for methods that were declared but never used: symbol_tablet keep_symbols; @@ -279,7 +389,7 @@ bool java_bytecode_languaget::typecheck( { if(sym.second.is_static_lifetime) continue; - if(lazy_methods.count(sym.first) && !already_populated.count(sym.first)) + if(lazy_methods.count(sym.first) && !methods_already_populated.count(sym.first)) continue; if(sym.second.type.id()==ID_code) gather_needed_globals(sym.second.value,symbol_table,keep_symbols); From e1cb0c4d6f54eafa818565d64c03a76c6bc06eb7 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 21 Nov 2016 15:01:38 +0000 Subject: [PATCH 05/18] Mark all classes in root jar reachable if it doesn't declare a main function --- src/java_bytecode/java_bytecode_language.cpp | 24 ++++++++++++++------ src/java_bytecode/java_bytecode_language.h | 1 + src/java_bytecode/java_class_loader.h | 1 - 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 057859cef04..dc6c350ba7f 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -150,6 +150,8 @@ bool java_bytecode_languaget::parse( { status() << "JAR file without entry point: loading it all" << eom; java_class_loader.load_entire_jar(path); + for(const auto& kv : java_class_loader.jar_map.at(path).entries) + main_jar_classes.push_back(kv.first); } else java_class_loader.add_jar_file(path); @@ -320,14 +322,22 @@ bool java_bytecode_languaget::typecheck( auto main_function=get_main_symbol(symbol_table,main_class,get_message_handler(),true); if(main_function.stop_convert) { - // Failed, mark all functions in the given main class reachable. - const auto& methods=java_class_loader.class_map.at(main_class).parsed_class.methods; - for(const auto& method : methods) + // Failed, mark all functions in the given main class(es) reachable. + std::vector reachable_classes; + if(!main_class.empty()) + reachable_classes.push_back(main_class); + else + reachable_classes=main_jar_classes; + for(const auto& classname : reachable_classes) { - const irep_idt methodid="java::"+id2string(main_class)+"."+ - id2string(method.name)+":"+ - id2string(method.signature); - method_worklist2.push_back(methodid); + const auto& methods=java_class_loader.class_map.at(classname).parsed_class.methods; + for(const auto& method : methods) + { + const irep_idt methodid="java::"+id2string(classname)+"."+ + id2string(method.name)+":"+ + id2string(method.signature); + method_worklist2.push_back(methodid); + } } } else diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 5937f6f4fd4..36b28682dca 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -72,6 +72,7 @@ class java_bytecode_languaget:public languaget protected: irep_idt main_class; + std::vector main_jar_classes; java_class_loadert java_class_loader; bool assume_inputs_non_null; // assume inputs variables to be non-null diff --git a/src/java_bytecode/java_class_loader.h b/src/java_bytecode/java_class_loader.h index d3fcdd62810..b96097c504d 100644 --- a/src/java_bytecode/java_class_loader.h +++ b/src/java_bytecode/java_class_loader.h @@ -37,7 +37,6 @@ class java_class_loadert:public messaget jar_poolt jar_pool; -protected: class jar_map_entryt { public: From c10849c39dc8419c63c0fad501d7ba49e51e0ff5 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 23 Nov 2016 11:02:34 +0000 Subject: [PATCH 06/18] Add this-parameter before determining needed classes If the given entry point has a this-parameter of type A, that constitutes evidence that virtual callsites might target a class of type A. --- .../java_bytecode_convert_method.cpp | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 0d5198ad4b8..3cc78a0a053 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -203,6 +203,18 @@ void java_bytecode_convert_method_lazy( method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ id2string(m.base_name)+"()"; + // do we need to add 'this' as a parameter? + if(!m.is_static) + { + code_typet &code_type=to_code_type(member_type); + code_typet::parameterst ¶meters=code_type.parameters(); + code_typet::parametert this_p; + const reference_typet object_ref_type(symbol_typet(class_symbol.name)); + this_p.type()=object_ref_type; + this_p.set_this(); + parameters.insert(parameters.begin(), this_p); + } + method_symbol.type=member_type; symbol_table.add(method_symbol); } @@ -223,29 +235,17 @@ void java_bytecode_convert_methodt::convert( const symbolt &class_symbol, const methodt &m) { - typet member_type=java_type_from_string(m.signature); - - assert(member_type.id()==ID_code); - const irep_idt method_identifier= id2string(class_symbol.name)+"."+id2string(m.name)+":"+m.signature; method_id=method_identifier; + const auto& old_sym=symbol_table.lookup(method_identifier); + + typet member_type=old_sym.type; code_typet &code_type=to_code_type(member_type); method_return_type=code_type.return_type(); code_typet::parameterst ¶meters=code_type.parameters(); - // do we need to add 'this' as a parameter? - if(!m.is_static) - { - code_typet::parametert this_p; - const reference_typet object_ref_type( - symbol_typet(class_symbol.name)); - this_p.type()=object_ref_type; - this_p.set_this(); - parameters.insert(parameters.begin(), this_p); - } - variables.clear(); // find parameter names in the local variable table: From 8d96409a92a21375d7f465b38226fabbfd6bed7c Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 30 Nov 2016 15:38:08 +0000 Subject: [PATCH 07/18] Lazy method conversion: load callee parent classes Previously lazy virtual callsite resolution would assume that in order to call B.f, it is necessary to instantiate a B and call A.f, where A is some parent of B. This commit amends that to allow creation of a C, where C is a subtype of B and does not override f itself, is enough for B.f to be callable. --- src/java_bytecode/java_bytecode_language.cpp | 84 +++++++++++++++++--- 1 file changed, 72 insertions(+), 12 deletions(-) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index dc6c350ba7f..8e5ce448b57 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -173,14 +173,29 @@ bool java_bytecode_languaget::parse( return false; } +static irep_idt get_virtual_method_target( + const std::set& needed_classes, + const irep_idt& call_basename, + const irep_idt& classname, + const symbol_tablet& symbol_table) +{ + // Program-wide, is this class ever instantiated? + if(!needed_classes.count(classname)) + return irep_idt(); + auto methodid=id2string(classname)+"."+id2string(call_basename); + if(symbol_table.has_symbol(methodid)) + return methodid; + else + return irep_idt(); +} + static void get_virtual_method_targets( const code_function_callt& c, const std::set& needed_classes, std::vector& needed_methods, - const symbol_tablet& symbol_table, + symbol_tablet& symbol_table, const class_hierarchyt& class_hierarchy) { - const auto& called_function=c.function(); assert(called_function.id()==ID_virtual_function); @@ -189,17 +204,56 @@ static void get_virtual_method_targets( const auto& call_basename=called_function.get(ID_component_name); assert(call_basename!=irep_idt()); + auto old_size=needed_methods.size(); + auto child_classes=class_hierarchy.get_children_trans(call_class); - child_classes.push_back(call_class); for(const auto& child_class : child_classes) { - // Program-wide, is this class ever instantiated? - if(!needed_classes.count(child_class)) - continue; - auto methodid=id2string(child_class)+"."+id2string(call_basename); - if(symbol_table.has_symbol(methodid)) - needed_methods.push_back(methodid); + auto child_method=get_virtual_method_target(needed_classes,call_basename, + child_class,symbol_table); + if(child_method!=irep_idt()) + needed_methods.push_back(child_method); + } + + irep_idt parent_class_id=call_class; + while(1) + { + auto parent_method=get_virtual_method_target(needed_classes,call_basename, + parent_class_id,symbol_table); + if(parent_method!=irep_idt()) + { + needed_methods.push_back(parent_method); + break; + } + else + { + auto findit=class_hierarchy.class_map.find(parent_class_id); + if(findit==class_hierarchy.class_map.end()) + break; + else + { + const auto& entry=findit->second; + if(entry.parents.size()==0) + break; + else + parent_class_id=entry.parents[0]; + } + } } + + if(needed_methods.size()==old_size) + { + // Didn't find any candidate callee. Generate a stub. + std::string stubname=id2string(call_class)+"."+id2string(call_basename); + symbolt symbol; + symbol.name=stubname; + symbol.base_name=call_basename; + symbol.type=c.function().type(); + symbol.value.make_nil(); + symbol.mode=ID_java; + symbol_table.add(symbol); + } + } static void gather_virtual_callsites(const exprt& e, std::vector& result) @@ -253,6 +307,7 @@ static void gather_field_types( static void initialise_needed_classes( const std::vector& entry_points, const namespacet& ns, + const class_hierarchyt& ch, std::set& needed_classes) { for(const auto& mname : entry_points) @@ -263,8 +318,12 @@ static void initialise_needed_classes( { if(param.type().id()==ID_pointer) { - needed_classes.insert(to_symbol_type(param.type().subtype()).get_identifier()); - gather_field_types(param.type().subtype(),ns,needed_classes); + const auto& param_classid=to_symbol_type(param.type().subtype()).get_identifier(); + std::vector class_and_parents=ch.get_parents_trans(param_classid); + class_and_parents.push_back(param_classid); + for(const auto& classid : class_and_parents) + needed_classes.insert(classid); + gather_field_types(param.type().subtype(),ns,needed_classes); } } } @@ -344,7 +403,7 @@ bool java_bytecode_languaget::typecheck( method_worklist2.push_back(main_function.main_function.name); std::set needed_classes; - initialise_needed_classes(method_worklist2,namespacet(symbol_table),needed_classes); + initialise_needed_classes(method_worklist2,namespacet(symbol_table),ch,needed_classes); std::set methods_already_populated; std::vector virtual_callsites; @@ -386,6 +445,7 @@ bool java_bytecode_languaget::typecheck( for(const auto& callsite : virtual_callsites) { + // This will also create a stub if a virtual callsite has no targets. get_virtual_method_targets(*callsite,needed_classes,method_worklist2, symbol_table,ch); } From 2024c34c1c697e1ec203c5bf60ead40b3eb41e83 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 17 Jan 2017 11:55:35 +0000 Subject: [PATCH 08/18] Always assume String, Class and Object are callable --- src/java_bytecode/java_bytecode_language.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 8e5ce448b57..1655347f627 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -327,6 +327,13 @@ static void initialise_needed_classes( } } } + + // Also add classes whose instances are magically + // created by the JVM and so won't be spotted by + // looking for constructors and calls as usual: + needed_classes.insert("java::java.lang.String"); + needed_classes.insert("java::java.lang.Class"); + needed_classes.insert("java::java.lang.Object"); } /*******************************************************************\ From 6700a5646b75aa1913921e27aba74906300dcf23 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 17 Jan 2017 12:01:49 +0000 Subject: [PATCH 09/18] Don't try to call an abstract function --- src/goto-programs/remove_virtual_functions.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/goto-programs/remove_virtual_functions.cpp b/src/goto-programs/remove_virtual_functions.cpp index da9cce5039f..b24621fe668 100644 --- a/src/goto-programs/remove_virtual_functions.cpp +++ b/src/goto-programs/remove_virtual_functions.cpp @@ -339,7 +339,8 @@ void remove_virtual_functionst::get_functions( component_name, functions); - functions.push_back(root_function); + if(root_function.symbol_expr!=symbol_exprt()) + functions.push_back(root_function); } /*******************************************************************\ From 79ff6675ec5d79a4e19332e454da785994d5f977 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 25 Jan 2017 15:53:40 +0000 Subject: [PATCH 10/18] Make lazy method loading optional --- .../java_bytecode_convert_class.cpp | 42 +++++++--- .../java_bytecode_convert_class.h | 7 +- .../java_bytecode_convert_method.cpp | 23 +++--- .../java_bytecode_convert_method.h | 25 +++++- .../java_bytecode_convert_method_class.h | 14 ++-- src/java_bytecode/java_bytecode_language.cpp | 76 ++++++++++++++----- src/java_bytecode/java_bytecode_language.h | 17 +++++ src/util/language.h | 9 +++ src/util/language_file.cpp | 16 ++++ src/util/language_file.h | 23 ++++++ 10 files changed, 196 insertions(+), 56 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 2036bcd9bce..eb642baa846 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -14,6 +14,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_root_class.h" #include "java_types.h" #include "java_bytecode_convert_method.h" +#include "java_bytecode_language.h" #include #include @@ -28,12 +29,14 @@ class java_bytecode_convert_classt:public messaget message_handlert &_message_handler, bool _disable_runtime_checks, size_t _max_array_length, - lazy_methodst& _lm): + lazy_methodst& _lm, + lazy_methods_modet _lazy_methods_mode): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), max_array_length(_max_array_length), - lazy_methods(_lm) + lazy_methods(_lm), + lazy_methods_mode(_lazy_methods_mode) { } @@ -55,6 +58,7 @@ class java_bytecode_convert_classt:public messaget const bool disable_runtime_checks; const size_t max_array_length; lazy_methodst &lazy_methods; + lazy_methods_modet lazy_methods_mode; // conversion void convert(const classt &c); @@ -143,14 +147,30 @@ void java_bytecode_convert_classt::convert(const classt &c) id2string(qualified_classname)+ "."+id2string(method.name)+ ":"+method.signature; + // Always run the lazy pre-stage, as it symbol-table + // registers the function. java_bytecode_convert_method_lazy( - *class_symbol,method_identifier,method,symbol_table); - lazy_methods[method_identifier]= - std::make_pair(class_symbol,&method); + *class_symbol, + method_identifier, + method, + symbol_table); + if(lazy_methods_mode==LAZY_METHODS_MODE_EAGER) + { + // Upgrade to a fully-realized symbol now: + java_bytecode_convert_method( + *class_symbol, + method, + symbol_table, + get_message_handler(), + disable_runtime_checks, + max_array_length); + } + else + { + // Wait for our caller to decide what needs elaborating. + lazy_methods[method_identifier]=std::make_pair(class_symbol, &method); + } } - //java_bytecode_convert_method( - // *class_symbol, method, symbol_table, get_message_handler(), - // disable_runtime_checks, max_array_length); // is this a root class? if(c.extends.empty()) @@ -339,14 +359,16 @@ bool java_bytecode_convert_class( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - lazy_methodst &lazy_methods) + lazy_methodst &lazy_methods, + lazy_methods_modet lazy_methods_mode) { java_bytecode_convert_classt java_bytecode_convert_class( symbol_table, message_handler, disable_runtime_checks, max_array_length, - lazy_methods); + lazy_methods, + lazy_methods_mode); try { diff --git a/src/java_bytecode/java_bytecode_convert_class.h b/src/java_bytecode/java_bytecode_convert_class.h index 8c1dccc02f5..5cc1526a125 100644 --- a/src/java_bytecode/java_bytecode_convert_class.h +++ b/src/java_bytecode/java_bytecode_convert_class.h @@ -13,9 +13,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "java_bytecode_parse_tree.h" - -typedef std::map > - lazy_methodst; +#include "java_bytecode_language.h" bool java_bytecode_convert_class( const java_bytecode_parse_treet &parse_tree, @@ -23,6 +21,7 @@ bool java_bytecode_convert_class( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - lazy_methodst &); + lazy_methodst &, + lazy_methods_modet); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_CLASS_H diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 3cc78a0a053..0151833007e 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -21,7 +21,6 @@ Author: Daniel Kroening, kroening@kroening.com #include #include -#include #include #include "java_bytecode_convert_method.h" @@ -1064,7 +1063,8 @@ codet java_bytecode_convert_methodt::convert_instructions( if(as_string(arg0.get(ID_identifier)) .find("")!=std::string::npos) { - needed_classes.insert(classname); + if(needed_classes) + needed_classes->insert(classname); code_type.set(ID_constructor, true); } else @@ -1154,7 +1154,8 @@ codet java_bytecode_convert_methodt::convert_instructions( { // static binding call.function()=symbol_exprt(arg0.get(ID_identifier), arg0.type()); - needed_methods.push_back(arg0.get(ID_identifier)); + if(needed_methods) + needed_methods->push_back(arg0.get(ID_identifier)); } call.function().add_source_location()=loc; @@ -1696,8 +1697,8 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_exprt symbol_expr(arg0.type()); const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); - if(arg0.type().id()==ID_symbol) - needed_classes.insert(to_symbol_type(arg0.type()).get_identifier()); + if(needed_classes && arg0.type().id()==ID_symbol) + needed_classes->insert(to_symbol_type(arg0.type()).get_identifier()); results[0]=java_bytecode_promotion(symbol_expr); // set $assertionDisabled to false @@ -1715,8 +1716,8 @@ codet java_bytecode_convert_methodt::convert_instructions( symbol_exprt symbol_expr(arg0.type()); const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); - if(arg0.type().id()==ID_symbol) - needed_classes.insert(to_symbol_type(arg0.type()).get_identifier()); + if(needed_classes && arg0.type().id()==ID_symbol) + needed_classes->insert(to_symbol_type(arg0.type()).get_identifier()); c=code_assignt(symbol_expr, op[0]); } else if(statement==patternt("?2?")) // i2c etc. @@ -2180,9 +2181,8 @@ void java_bytecode_convert_method( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - std::vector& needed_methods, - std::set& needed_classes, - const class_hierarchyt& ch) + std::vector *needed_methods, + std::set *needed_classes) { java_bytecode_convert_methodt java_bytecode_convert_method( symbol_table, @@ -2190,8 +2190,7 @@ void java_bytecode_convert_method( disable_runtime_checks, max_array_length, needed_methods, - needed_classes, - ch); + needed_classes); java_bytecode_convert_method(class_symbol, method); } diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index b6302161a43..145c7a5cc91 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -23,9 +23,28 @@ void java_bytecode_convert_method( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - std::vector& needed_methods, - std::set& needed_classes, - const class_hierarchyt&); + std::vector *needed_methods, + std::set *needed_classes); + +// Must provide both the optional parameters or neither. +inline void java_bytecode_convert_method( + const symbolt &class_symbol, + const java_bytecode_parse_treet::methodt &method, + symbol_tablet &symbol_table, + message_handlert &message_handler, + bool disable_runtime_checks, + size_t max_array_length) +{ + java_bytecode_convert_method( + class_symbol, + method, + symbol_table, + message_handler, + disable_runtime_checks, + max_array_length, + nullptr, + nullptr); +} void java_bytecode_convert_method_lazy( const symbolt &class_symbol, diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index b49d22934b1..8b11c831266 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -22,7 +22,6 @@ Author: Daniel Kroening, kroening@kroening.com class symbol_tablet; class symbolt; -class class_hierarchyt; class java_bytecode_convert_methodt:public messaget { @@ -32,16 +31,14 @@ class java_bytecode_convert_methodt:public messaget message_handlert &_message_handler, bool _disable_runtime_checks, size_t _max_array_length, - std::vector& _needed_methods, - std::set& _needed_classes, - const class_hierarchyt& _ch): + std::vector *_needed_methods, + std::set *_needed_classes): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), max_array_length(_max_array_length), needed_methods(_needed_methods), - needed_classes(_needed_classes), - class_hierarchy(_ch) + needed_classes(_needed_classes) { } @@ -60,9 +57,8 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &symbol_table; const bool disable_runtime_checks; const size_t max_array_length; - std::vector& needed_methods; - std::set& needed_classes; - const class_hierarchyt& class_hierarchy; + std::vector *needed_methods; + std::set *needed_classes; irep_idt method_id; irep_idt current_method; diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 1655347f627..42c6d0bb9ef 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -48,6 +48,12 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd) std::stoi(cmd.get_value("java-max-input-array-length")); if(cmd.isset("java-max-vla-length")) max_user_array_length=std::stoi(cmd.get_value("java-max-vla-length")); + if(cmd.isset("lazy-methods-context-sensitive")) + lazy_methods_mode=LAZY_METHODS_MODE_CONTEXT_SENSITIVE; + else if(cmd.isset("lazy-methods")) + lazy_methods_mode=LAZY_METHODS_MODE_CONTEXT_INSENSITIVE; + else + lazy_methods_mode=LAZY_METHODS_MODE_EAGER; } /*******************************************************************\ @@ -352,9 +358,6 @@ bool java_bytecode_languaget::typecheck( symbol_tablet &symbol_table, const std::string &module) { - std::map > - lazy_methods; - // first convert all for(java_class_loadert::class_mapt::const_iterator c_it=java_class_loader.class_map.begin(); @@ -372,13 +375,30 @@ bool java_bytecode_languaget::typecheck( get_message_handler(), disable_runtime_checks, max_user_array_length, - lazy_methods)) + lazy_methods, + lazy_methods_mode)) return true; } - // Now incrementally elaborate methods that are reachable from this entry point: + // Now incrementally elaborate methods that are reachable from this entry point. + if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE) + { + if(do_ci_lazy_method_conversion(symbol_table, lazy_methods)) + return true; + } + + // now typecheck all + if(java_bytecode_typecheck( + symbol_table, get_message_handler())) + return true; + + return false; +} - // Convert-method will need this to find virtual function targets. +bool java_bytecode_languaget::do_ci_lazy_method_conversion( + symbol_tablet &symbol_table, + lazy_methodst &lazy_methods) +{ class_hierarchyt ch; ch(symbol_table); @@ -432,12 +452,17 @@ bool java_bytecode_languaget::typecheck( debug() << "Skip " << mname << eom; continue; } - debug() << "Lazy methods: elaborate " << mname << eom; + debug() << "CI lazy methods: elaborate " << mname << eom; const auto& parsed_method=findit->second; - java_bytecode_convert_method(*parsed_method.first,*parsed_method.second, - symbol_table,get_message_handler(), - disable_runtime_checks,max_user_array_length, - method_worklist2,needed_classes,ch); + java_bytecode_convert_method( + *parsed_method.first, + *parsed_method.second, + symbol_table, + get_message_handler(), + disable_runtime_checks, + max_user_array_length, + &method_worklist2, + &needed_classes); gather_virtual_callsites(symbol_table.lookup(mname).value,virtual_callsites); any_new_methods=true; } @@ -447,7 +472,7 @@ bool java_bytecode_languaget::typecheck( // Given the object types we now know may be created, populate more // possible virtual function call targets: - debug() << "Lazy methods: add virtual method targets (" << virtual_callsites.size() << + debug() << "CI lazy methods: add virtual method targets (" << virtual_callsites.size() << " callsites)" << eom; for(const auto& callsite : virtual_callsites) @@ -473,18 +498,33 @@ bool java_bytecode_languaget::typecheck( keep_symbols.add(sym.second); } - debug() << "Lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods and globals" << eom; + debug() << "CI lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods and globals" << eom; symbol_table.swap(keep_symbols); - // now typecheck all - if(java_bytecode_typecheck( - symbol_table, get_message_handler())) - return true; - return false; } +void java_bytecode_languaget::lazy_methods_provided(std::set &methods) const +{ + for(const auto &kv : lazy_methods) + methods.insert(kv.first); +} + +void java_bytecode_languaget::convert_lazy_method( + const irep_idt &id, + symbol_tablet &symtab) +{ + const auto &lazy_method_entry=lazy_methods.at(id); + java_bytecode_convert_method( + *lazy_method_entry.first, + *lazy_method_entry.second, + symtab, + get_message_handler(), + disable_runtime_checks, + max_user_array_length); +} + /*******************************************************************\ Function: java_bytecode_languaget::final diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 36b28682dca..750b92aa367 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -16,6 +16,16 @@ Author: Daniel Kroening, kroening@kroening.com #define MAX_NONDET_ARRAY_LENGTH_DEFAULT 5 +enum lazy_methods_modet +{ + LAZY_METHODS_MODE_EAGER, + LAZY_METHODS_MODE_CONTEXT_INSENSITIVE, + LAZY_METHODS_MODE_CONTEXT_SENSITIVE +}; + +typedef std::map > + lazy_methodst; + class java_bytecode_languaget:public languaget { public: @@ -69,8 +79,13 @@ class java_bytecode_languaget:public languaget std::set extensions() const override; void modules_provided(std::set &modules) override; + virtual void lazy_methods_provided(std::set &) const override; + virtual void convert_lazy_method( + const irep_idt &id, symbol_tablet &) override; protected: + bool do_ci_lazy_method_conversion(symbol_tablet &, lazy_methodst &); + irep_idt main_class; std::vector main_jar_classes; java_class_loadert java_class_loader; @@ -83,6 +98,8 @@ class java_bytecode_languaget:public languaget // - array size for newarray size_t max_nondet_array_length; // maximal length for non-det array creation size_t max_user_array_length; // max size for user code created arrays + lazy_methodst lazy_methods; + lazy_methods_modet lazy_methods_mode; }; languaget *new_java_bytecode_language(); diff --git a/src/util/language.h b/src/util/language.h index edd83471cd5..b19615496ec 100644 --- a/src/util/language.h +++ b/src/util/language.h @@ -49,6 +49,15 @@ class languaget:public messaget virtual void modules_provided(std::set &modules) { } + // add lazy functions provided to set + + virtual void lazy_methods_provided(std::set &methods) const + { } + + // populate a lazy method + virtual void convert_lazy_method(const irep_idt &id, symbol_tablet &) + { } + // final adjustments, e.g., initialization and call to main() virtual bool final( diff --git a/src/util/language_file.cpp b/src/util/language_file.cpp index f7a49991d63..29775ad8849 100644 --- a/src/util/language_file.cpp +++ b/src/util/language_file.cpp @@ -64,6 +64,13 @@ void language_filet::get_modules() language->modules_provided(modules); } +void language_filet::convert_lazy_method( + const irep_idt &id, + symbol_tablet &symtab) +{ + language->convert_lazy_method(id, symtab); +} + /*******************************************************************\ Function: language_filest::show_parse @@ -189,8 +196,17 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) it!=filemap.end(); it++) { if(it->second.modules.empty()) + { if(it->second.language->typecheck(symbol_table, "")) return true; + // register lazy methods. + // TODO: learn about modules and generalise this + // to module-providing languages if required. + std::set lazy_method_ids; + it->second.language->lazy_methods_provided(lazy_method_ids); + for(const auto &id : lazy_method_ids) + lazymethodmap[id]=&it->second; + } } // typecheck modules diff --git a/src/util/language_file.h b/src/util/language_file.h index 533e8d5240e..b538c24e84f 100644 --- a/src/util/language_file.h +++ b/src/util/language_file.h @@ -42,6 +42,10 @@ class language_filet void get_modules(); + void convert_lazy_method( + const irep_idt &id, + symbol_tablet &symtab); + language_filet(const language_filet &rhs); language_filet():language(NULL) @@ -57,9 +61,15 @@ class language_filest:public messaget typedef std::map filemapt; filemapt filemap; + // Contains pointers into filemapt! typedef std::map modulemapt; modulemapt modulemap; + // Contains pointers into filemapt! + // This is safe-ish as long as this is std::map. + typedef std::map lazymethodmapt; + lazymethodmapt lazymethodmap; + void clear_files() { filemap.clear(); @@ -75,10 +85,23 @@ class language_filest:public messaget bool interfaces(symbol_tablet &symbol_table); + bool has_lazy_method(const irep_idt &id) + { + return lazymethodmap.count(id); + } + + void convert_lazy_method( + const irep_idt &id, + symbol_tablet &symtab) + { + return lazymethodmap.at(id)->convert_lazy_method(id, symtab); + } + void clear() { filemap.clear(); modulemap.clear(); + lazymethodmap.clear(); } protected: From 5be71063cf850e3dff16165abc87448a70746450 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 6 Feb 2017 11:18:04 +0000 Subject: [PATCH 11/18] Add CBMC lazy-methods parameter TODO: move all Java frontend parameters into something defined on java_bytecode_languaget and have all driver programs gather the frontends' defined parameters. --- src/cbmc/cbmc_parse_options.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cbmc/cbmc_parse_options.h b/src/cbmc/cbmc_parse_options.h index 87e9beb45d2..adc6e2ee517 100644 --- a/src/cbmc/cbmc_parse_options.h +++ b/src/cbmc/cbmc_parse_options.h @@ -56,6 +56,7 @@ class optionst; "(graphml-witness):" \ "(java-max-vla-length):(java-unwind-enum-static)" \ "(localize-faults)(localize-faults-method):" \ + "(lazy-methods)" \ "(fixedbv)(floatbv)(all-claims)(all-properties)" // legacy, and will eventually disappear // NOLINT(whitespace/line_length) class cbmc_parse_optionst: From d932cfde321011b506a0f2acbb31ffc181b30a16 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 9 Feb 2017 16:44:27 +0000 Subject: [PATCH 12/18] Document lazy methods --- .../java_bytecode_convert_method.cpp | 19 ++++ src/java_bytecode/java_bytecode_language.cpp | 103 ++++++++++++++++++ 2 files changed, 122 insertions(+) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 0151833007e..7842f38fa5e 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -177,6 +177,25 @@ const exprt java_bytecode_convert_methodt::variable( } } +/*******************************************************************\ + +Function: java_bytecode_convert_method_lazy + + Inputs: `class_symbol`: class this method belongs to + `method_identifier`: fully qualified method name, including + type signature (e.g. "x.y.z.f:(I)") + `m`: parsed method object to convert + `symbol_table`: global symbol table (will be modified) + + Outputs: + + Purpose: This creates a method symbol in the symtab, but doesn't + actually perform method conversion just yet. The caller + should call java_bytecode_convert_method later to give the + symbol/method a body. + +\*******************************************************************/ + void java_bytecode_convert_method_lazy( const symbolt& class_symbol, const irep_idt method_identifier, diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 42c6d0bb9ef..310a5ef69dd 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -179,6 +179,27 @@ bool java_bytecode_languaget::parse( return false; } +/*******************************************************************\ + +Function: get_virtual_method_target + + Inputs: `needed_classes`: set of classes that can be instantiated. + Any potential callee not in this set will be ignored. + `call_basename`: unqualified function name with type + signature (e.g. "f:(I)") + `classname`: class name that may define or override a + function named `call_basename`. + `symbol_table`: global symtab + + Outputs: Returns the fully qualified name of `classname`'s definition + of `call_basename` if found and `classname` is present in + `needed_classes`, or irep_idt() otherwise. + + Purpose: Find a virtual callee, if one is defined and the callee type + is known to exist. + +\*******************************************************************/ + static irep_idt get_virtual_method_target( const std::set& needed_classes, const irep_idt& call_basename, @@ -195,6 +216,27 @@ static irep_idt get_virtual_method_target( return irep_idt(); } +/*******************************************************************\ + +Function: get_virtual_method_target + + Inputs: `c`: function call whose potential target functions should + be determined. + `needed_classes`: set of classes that can be instantiated. + Any potential callee not in this set will be ignored. + `symbol_table`: global symtab + `class_hierarchy`: global class hierarchy + + Outputs: Populates `needed_methods` with all possible `c` callees, + taking `needed_classes` into account (virtual function + overrides defined on classes that are not 'needed' are + ignored) + + Purpose: Find possible callees, excluding types that are not known + to be instantiated. + +\*******************************************************************/ + static void get_virtual_method_targets( const code_function_callt& c, const std::set& needed_classes, @@ -262,6 +304,19 @@ static void get_virtual_method_targets( } +/*******************************************************************\ + +Function: gather_virtual_callsites + + Inputs: `e`: expression tree to search + + Outputs: Populates `result` with pointers to each function call + within e that calls a virtual function. + + Purpose: See output + +\*******************************************************************/ + static void gather_virtual_callsites(const exprt& e, std::vector& result) { if(e.id()!=ID_code) @@ -275,6 +330,20 @@ static void gather_virtual_callsites(const exprt& e, std::vector& entry_points, const namespacet& ns, From d19448a9302a8fb3a4126484f46f58aa9683315f Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 9 Feb 2017 16:55:15 +0000 Subject: [PATCH 13/18] Style fixes --- .../java_bytecode_convert_method.cpp | 5 +- src/java_bytecode/java_bytecode_language.cpp | 98 ++++++++++++------- src/java_bytecode/java_bytecode_language.h | 6 +- 3 files changed, 73 insertions(+), 36 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 7842f38fa5e..6e12a8ab537 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -215,7 +215,7 @@ void java_bytecode_convert_method_lazy( { method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ id2string(class_symbol.base_name)+"()"; - member_type.set(ID_constructor,true); + member_type.set(ID_constructor, true); } else method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ @@ -1167,7 +1167,8 @@ codet java_bytecode_convert_methodt::convert_instructions( assert(use_this); assert(!call.arguments().empty()); call.function()=arg0; - // Populate needed methods later, once we know what object types can exist. + // Populate needed methods later, + // once we know what object types can exist. } else { diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 310a5ef69dd..c9f1b1771d2 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -257,8 +257,12 @@ static void get_virtual_method_targets( auto child_classes=class_hierarchy.get_children_trans(call_class); for(const auto& child_class : child_classes) { - auto child_method=get_virtual_method_target(needed_classes,call_basename, - child_class,symbol_table); + auto child_method= + get_virtual_method_target( + needed_classes, + call_basename, + child_class, + symbol_table); if(child_method!=irep_idt()) needed_methods.push_back(child_method); } @@ -266,8 +270,12 @@ static void get_virtual_method_targets( irep_idt parent_class_id=call_class; while(1) { - auto parent_method=get_virtual_method_target(needed_classes,call_basename, - parent_class_id,symbol_table); + auto parent_method= + get_virtual_method_target( + needed_classes, + call_basename, + parent_class_id, + symbol_table); if(parent_method!=irep_idt()) { needed_methods.push_back(parent_method); @@ -301,7 +309,6 @@ static void get_virtual_method_targets( symbol.mode=ID_java; symbol_table.add(symbol); } - } /*******************************************************************\ @@ -317,7 +324,9 @@ Function: gather_virtual_callsites \*******************************************************************/ -static void gather_virtual_callsites(const exprt& e, std::vector& result) +static void gather_virtual_callsites( + const exprt& e, + std::vector& result) { if(e.id()!=ID_code) return; @@ -326,8 +335,8 @@ static void gather_virtual_callsites(const exprt& e, std::vector class_and_parents=ch.get_parents_trans(param_classid); + const auto& param_classid= + to_symbol_type(param.type().subtype()).get_identifier(); + std::vector class_and_parents= + ch.get_parents_trans(param_classid); class_and_parents.push_back(param_classid); for(const auto& classid : class_and_parents) needed_classes.insert(classid); - gather_field_types(param.type().subtype(),ns,needed_classes); + gather_field_types(param.type().subtype(), ns, needed_classes); } } } @@ -483,7 +498,8 @@ bool java_bytecode_languaget::typecheck( return true; } - // Now incrementally elaborate methods that are reachable from this entry point. + // Now incrementally elaborate methods + // that are reachable from this entry point. if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE) { if(do_ci_lazy_method_conversion(symbol_table, lazy_methods)) @@ -508,7 +524,8 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( std::vector method_worklist1; std::vector method_worklist2; - auto main_function=get_main_symbol(symbol_table,main_class,get_message_handler(),true); + auto main_function= + get_main_symbol(symbol_table, main_class, get_message_handler(), true); if(main_function.stop_convert) { // Failed, mark all functions in the given main class(es) reachable. @@ -519,7 +536,8 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( reachable_classes=main_jar_classes; for(const auto& classname : reachable_classes) { - const auto& methods=java_class_loader.class_map.at(classname).parsed_class.methods; + const auto& methods= + java_class_loader.class_map.at(classname).parsed_class.methods; for(const auto& method : methods) { const irep_idt methodid="java::"+id2string(classname)+"."+ @@ -533,18 +551,22 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( method_worklist2.push_back(main_function.main_function.name); std::set needed_classes; - initialise_needed_classes(method_worklist2,namespacet(symbol_table),ch,needed_classes); + initialise_needed_classes( + method_worklist2, + namespacet(symbol_table), + ch, + needed_classes); std::set methods_already_populated; std::vector virtual_callsites; bool any_new_methods; - do { - + do + { any_new_methods=false; while(method_worklist2.size()!=0) { - std::swap(method_worklist1,method_worklist2); + std::swap(method_worklist1, method_worklist2); for(const auto& mname : method_worklist1) { if(!methods_already_populated.insert(mname).second) @@ -566,7 +588,9 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( max_user_array_length, &method_worklist2, &needed_classes); - gather_virtual_callsites(symbol_table.lookup(mname).value,virtual_callsites); + gather_virtual_callsites( + symbol_table.lookup(mname).value, + virtual_callsites); any_new_methods=true; } method_worklist1.clear(); @@ -575,14 +599,16 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( // Given the object types we now know may be created, populate more // possible virtual function call targets: - debug() << "CI lazy methods: add virtual method targets (" << virtual_callsites.size() << - " callsites)" << eom; + debug() << "CI lazy methods: add virtual method targets (" + << virtual_callsites.size() + << " callsites)" + << eom; for(const auto& callsite : virtual_callsites) { // This will also create a stub if a virtual callsite has no targets. - get_virtual_method_targets(*callsite,needed_classes,method_worklist2, - symbol_table,ch); + get_virtual_method_targets(*callsite, needed_classes, method_worklist2, + symbol_table, ch); } } while(any_new_methods); @@ -594,14 +620,20 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( { if(sym.second.is_static_lifetime) continue; - if(lazy_methods.count(sym.first) && !methods_already_populated.count(sym.first)) + if(lazy_methods.count(sym.first) && + !methods_already_populated.count(sym.first)) + { continue; + } if(sym.second.type.id()==ID_code) - gather_needed_globals(sym.second.value,symbol_table,keep_symbols); + gather_needed_globals(sym.second.value, symbol_table, keep_symbols); keep_symbols.add(sym.second); } - debug() << "CI lazy methods: removed " << symbol_table.symbols.size() - keep_symbols.symbols.size() << " unreachable methods and globals" << eom; + debug() << "CI lazy methods: removed " + << symbol_table.symbols.size() - keep_symbols.symbols.size() + << " unreachable methods and globals" + << eom; symbol_table.swap(keep_symbols); diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 750b92aa367..afdd275ddf6 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -23,7 +23,11 @@ enum lazy_methods_modet LAZY_METHODS_MODE_CONTEXT_SENSITIVE }; -typedef std::map > +typedef std::pair< + const symbolt*, + const java_bytecode_parse_treet::methodt*> + lazy_method_valuet; +typedef std::map lazy_methodst; class java_bytecode_languaget:public languaget From 7c669b277e4e097e862917198859d383467d696a Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Thu, 9 Feb 2017 17:08:15 +0000 Subject: [PATCH 14/18] Add lazy loading tests --- regression/cbmc-java/lazyloading1/A.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading1/B.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading1/test.class | Bin 0 -> 292 bytes regression/cbmc-java/lazyloading1/test.desc | 8 +++++ regression/cbmc-java/lazyloading1/test.java | 21 +++++++++++++ regression/cbmc-java/lazyloading2/A.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading2/B.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading2/test.class | Bin 0 -> 310 bytes regression/cbmc-java/lazyloading2/test.desc | 8 +++++ regression/cbmc-java/lazyloading2/test.java | 23 ++++++++++++++ regression/cbmc-java/lazyloading3/A.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading3/B.class | Bin 0 -> 222 bytes regression/cbmc-java/lazyloading3/C.class | Bin 0 -> 197 bytes regression/cbmc-java/lazyloading3/D.class | Bin 0 -> 197 bytes regression/cbmc-java/lazyloading3/test.class | Bin 0 -> 296 bytes regression/cbmc-java/lazyloading3/test.desc | 8 +++++ regression/cbmc-java/lazyloading3/test.java | 30 +++++++++++++++++++ 17 files changed, 98 insertions(+) create mode 100644 regression/cbmc-java/lazyloading1/A.class create mode 100644 regression/cbmc-java/lazyloading1/B.class create mode 100644 regression/cbmc-java/lazyloading1/test.class create mode 100644 regression/cbmc-java/lazyloading1/test.desc create mode 100644 regression/cbmc-java/lazyloading1/test.java create mode 100644 regression/cbmc-java/lazyloading2/A.class create mode 100644 regression/cbmc-java/lazyloading2/B.class create mode 100644 regression/cbmc-java/lazyloading2/test.class create mode 100644 regression/cbmc-java/lazyloading2/test.desc create mode 100644 regression/cbmc-java/lazyloading2/test.java create mode 100644 regression/cbmc-java/lazyloading3/A.class create mode 100644 regression/cbmc-java/lazyloading3/B.class create mode 100644 regression/cbmc-java/lazyloading3/C.class create mode 100644 regression/cbmc-java/lazyloading3/D.class create mode 100644 regression/cbmc-java/lazyloading3/test.class create mode 100644 regression/cbmc-java/lazyloading3/test.desc create mode 100644 regression/cbmc-java/lazyloading3/test.java diff --git a/regression/cbmc-java/lazyloading1/A.class b/regression/cbmc-java/lazyloading1/A.class new file mode 100644 index 0000000000000000000000000000000000000000..d6a62ca1ce465a6ae3fc35f9fa702cfa834ca298 GIT binary patch literal 222 zcmZXNy9&ZU5Jm6gVa!WyEd)!oFpZ@MB3OwQihUAS*$@+m8~@8nu3 zOQ{W4iHjO%@ih)$!DeDk=;%Aj!QtNc Vnm$}Ke`uF|tv@Ve*yV=-?*m^uqBqm`X_Sbr8+Ekm=tM6r!7<>R9O5A6#ILZBY z?k_p7_u~m*hK>glEeF1XHlZ<3i&QNL=4gB*SgZ0R3EfRv$ZdU&WOWcx&JHC6Z(r6` zEZ3A-v~jNyqM}hnO`)!EBM>9Vjr$$4#Ipfe)OD%LdK@30-UQ)r^KJ#@ zu}ke_w@&PKW|9?^aBW;F(65`_c5X+`YsJidW-m*#H!+IP3b!%tpc(dtpyI98%A_By zX|l9)O44U7&_s^E0|Hha5fM-*5CiaCXkv-e1G}*2pAV2P5Pq|~C2#H1Xc2xA%}16OcPq|~C2#H1Xc2xB@U16OcPq|~C2#H1Xc2xA%}16OcK literal 0 HcmV?d00001 diff --git a/regression/cbmc-java/lazyloading3/B.class b/regression/cbmc-java/lazyloading3/B.class new file mode 100644 index 0000000000000000000000000000000000000000..9a4ab54d369d7175755e23dfabef5f15440bf383 GIT binary patch literal 222 zcmX^0Z`VEs1_l!bJ}w4k25xo+9(D#^Mg}&U%)HDJJ4Oa(4b3n{1{UZ1lvG9rexJ;| zRKL>Pq|~C2#H1Xc2xB@U16Occ5zb#DL54!6!+VDk(Sy*z4g7E1P33$hZ57`kdu7p zkmU1yy#XxXyU^j;=-KEKND%aBvm{unv`E#OaWXv-jIcaQf*+-Y+|}1uR!0%%?6`-r zt`fOT8*r4osd*;u!h;2y?}dax!$B^JOR$eKnJA2*@l$}f44D6k7ue}Ajkq_NIeUU0 PZS1gGtvO_MnKX2N4w)Zy literal 0 HcmV?d00001 diff --git a/regression/cbmc-java/lazyloading3/D.class b/regression/cbmc-java/lazyloading3/D.class new file mode 100644 index 0000000000000000000000000000000000000000..7e16bd6527d6835776fcc14eb5132a4d94fcf1b2 GIT binary patch literal 197 zcmX^0Z`VEs1_l!bel7-P1|D_>UUmjPMh3FqHk-`6%n~~wS3@(5k%7fI zKP8osf!`-HFV(L!Hz~C!Brz!mD8dz-Us{x$>Xr%OaF(PNm*{0BmL>8quraU$&2wR7 z5CHM@a}x8?_5G8wQj<#<6d0I*mNGCf0x=5%E0hJ)02JW>@??QDNRCx&I|JiJurw!- QWCIII07)(&kBNaB01$#7eEKI~GBKD~_yhb= z#y2)@GV|WfelxS*Kc8O!?r>>P#z_@5)D2G2&}eG36ry{d`{F^N)a@-4!esmC6l$r@ z-DJPX+-`2OH7nu9`kZ(-O@;!R*>=BMy0MqXYT;gr+m(H_F(PPK|Bm2*(m!oDlp{D)`eOP3$G;8oD9r!> literal 0 HcmV?d00001 diff --git a/regression/cbmc-java/lazyloading3/test.desc b/regression/cbmc-java/lazyloading3/test.desc new file mode 100644 index 00000000000..c0011248128 --- /dev/null +++ b/regression/cbmc-java/lazyloading3/test.desc @@ -0,0 +1,8 @@ +CORE +test.class +--lazy-methods --verbosity 10 --function test.main +^EXIT=0$ +^SIGNAL=0$ +elaborate java::A\.f:\(\)V +-- +elaborate java::B\.g:\(\)V diff --git a/regression/cbmc-java/lazyloading3/test.java b/regression/cbmc-java/lazyloading3/test.java new file mode 100644 index 00000000000..6d3129d1261 --- /dev/null +++ b/regression/cbmc-java/lazyloading3/test.java @@ -0,0 +1,30 @@ +// This test checks that because `main` has a parameter of type C, which has a field of type A, +// A::f is considered reachable, but B::g is not. + +public class test +{ + public static void main(C c) + { + c.a.f(); + } +} + +class A +{ + public void f() {} +} + +class B +{ + public void g() {} +} + +class C +{ + A a; +} + +class D +{ + B b; +} From b909a547b0f3f30239f0c2debcae202b5566f3d7 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Mon, 13 Feb 2017 11:08:22 +0000 Subject: [PATCH 15/18] Improve code style No functional changes intended --- src/goto-cc/compile.cpp | 6 +- src/goto-programs/get_goto_model.cpp | 4 +- .../java_bytecode_convert_class.cpp | 4 +- .../java_bytecode_convert_method.cpp | 16 +-- .../java_bytecode_convert_method.h | 2 +- src/java_bytecode/java_bytecode_language.cpp | 98 +++++++++---------- src/java_bytecode/java_entry_point.h | 2 +- src/langapi/language_ui.cpp | 4 +- src/util/language_file.cpp | 46 ++++----- src/util/language_file.h | 33 ++++--- 10 files changed, 110 insertions(+), 105 deletions(-) diff --git a/src/goto-cc/compile.cpp b/src/goto-cc/compile.cpp index 7ab9c5eae3d..3027bc5bf0b 100644 --- a/src/goto-cc/compile.cpp +++ b/src/goto-cc/compile.cpp @@ -540,8 +540,8 @@ bool compilet::parse(const std::string &file_name) language_filet language_file; - std::pair - res=language_files.filemap.insert( + std::pair + res=language_files.file_map.insert( std::pair(file_name, language_file)); language_filet &lf=res.first->second; @@ -732,7 +732,7 @@ bool compilet::parse_source(const std::string &file_name) return true; // so we remove it from the list afterwards - language_files.filemap.erase(file_name); + language_files.file_map.erase(file_name); return false; } diff --git a/src/goto-programs/get_goto_model.cpp b/src/goto-programs/get_goto_model.cpp index 5afef435596..02aae876030 100644 --- a/src/goto-programs/get_goto_model.cpp +++ b/src/goto-programs/get_goto_model.cpp @@ -75,8 +75,8 @@ bool get_goto_modelt::operator()(const std::vector &files) return true; } - std::pair - result=language_files.filemap.insert( + std::pair + result=language_files.file_map.insert( std::pair(filename, language_filet())); language_filet &lf=result.first->second; diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index eb642baa846..440b0ce32cf 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -29,13 +29,13 @@ class java_bytecode_convert_classt:public messaget message_handlert &_message_handler, bool _disable_runtime_checks, size_t _max_array_length, - lazy_methodst& _lm, + lazy_methodst& _lazy_methods, lazy_methods_modet _lazy_methods_mode): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), max_array_length(_max_array_length), - lazy_methods(_lm), + lazy_methods(_lazy_methods), lazy_methods_mode(_lazy_methods_mode) { } diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 6e12a8ab537..498f880c965 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -197,10 +197,10 @@ Function: java_bytecode_convert_method_lazy \*******************************************************************/ void java_bytecode_convert_method_lazy( - const symbolt& class_symbol, - const irep_idt method_identifier, + const symbolt &class_symbol, + const irep_idt &method_identifier, const java_bytecode_parse_treet::methodt &m, - symbol_tablet& symbol_table) + symbol_tablet &symbol_table) { symbolt method_symbol; typet member_type=java_type_from_string(m.signature); @@ -213,12 +213,14 @@ void java_bytecode_convert_method_lazy( if(method_symbol.base_name=="") { - method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ - id2string(class_symbol.base_name)+"()"; + method_symbol.pretty_name= + id2string(class_symbol.pretty_name)+"."+ + id2string(class_symbol.base_name)+"()"; member_type.set(ID_constructor, true); } else - method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ + method_symbol.pretty_name= + id2string(class_symbol.pretty_name)+"."+ id2string(m.base_name)+"()"; // do we need to add 'this' as a parameter? @@ -257,7 +259,7 @@ void java_bytecode_convert_methodt::convert( id2string(class_symbol.name)+"."+id2string(m.name)+":"+m.signature; method_id=method_identifier; - const auto& old_sym=symbol_table.lookup(method_identifier); + const auto &old_sym=symbol_table.lookup(method_identifier); typet member_type=old_sym.type; code_typet &code_type=to_code_type(member_type); diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 145c7a5cc91..68b0dd4e0a8 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -48,7 +48,7 @@ inline void java_bytecode_convert_method( void java_bytecode_convert_method_lazy( const symbolt &class_symbol, - const irep_idt method_identifier, + const irep_idt &method_identifier, const java_bytecode_parse_treet::methodt &, symbol_tablet &symbol_table); diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index c9f1b1771d2..4512ec97709 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -156,7 +156,7 @@ bool java_bytecode_languaget::parse( { status() << "JAR file without entry point: loading it all" << eom; java_class_loader.load_entire_jar(path); - for(const auto& kv : java_class_loader.jar_map.at(path).entries) + for(const auto &kv : java_class_loader.jar_map.at(path).entries) main_jar_classes.push_back(kv.first); } else @@ -201,10 +201,10 @@ Function: get_virtual_method_target \*******************************************************************/ static irep_idt get_virtual_method_target( - const std::set& needed_classes, - const irep_idt& call_basename, - const irep_idt& classname, - const symbol_tablet& symbol_table) + const std::set &needed_classes, + const irep_idt &call_basename, + const irep_idt &classname, + const symbol_tablet &symbol_table) { // Program-wide, is this class ever instantiated? if(!needed_classes.count(classname)) @@ -238,24 +238,24 @@ Function: get_virtual_method_target \*******************************************************************/ static void get_virtual_method_targets( - const code_function_callt& c, - const std::set& needed_classes, - std::vector& needed_methods, - symbol_tablet& symbol_table, - const class_hierarchyt& class_hierarchy) + const code_function_callt &c, + const std::set &needed_classes, + std::vector &needed_methods, + symbol_tablet &symbol_table, + const class_hierarchyt &class_hierarchy) { - const auto& called_function=c.function(); + const auto &called_function=c.function(); assert(called_function.id()==ID_virtual_function); - const auto& call_class=called_function.get(ID_C_class); + const auto &call_class=called_function.get(ID_C_class); assert(call_class!=irep_idt()); - const auto& call_basename=called_function.get(ID_component_name); + const auto &call_basename=called_function.get(ID_component_name); assert(call_basename!=irep_idt()); auto old_size=needed_methods.size(); auto child_classes=class_hierarchy.get_children_trans(call_class); - for(const auto& child_class : child_classes) + for(const auto &child_class : child_classes) { auto child_method= get_virtual_method_target( @@ -288,8 +288,8 @@ static void get_virtual_method_targets( break; else { - const auto& entry=findit->second; - if(entry.parents.size()==0) + const auto &entry=findit->second; + if(entry.parents.empty()) break; else parent_class_id=entry.parents[0]; @@ -325,12 +325,12 @@ Function: gather_virtual_callsites \*******************************************************************/ static void gather_virtual_callsites( - const exprt& e, - std::vector& result) + const exprt &e, + std::vector &result) { if(e.id()!=ID_code) return; - const codet& c=to_code(e); + const codet &c=to_code(e); if(c.get_statement()==ID_function_call && to_code_function_call(c).function().id()==ID_virtual_function) result.push_back(&to_code_function_call(c)); @@ -354,13 +354,13 @@ Function: gather_needed_globals \*******************************************************************/ static void gather_needed_globals( - const exprt& e, - const symbol_tablet& symbol_table, - symbol_tablet& needed) + const exprt &e, + const symbol_tablet &symbol_table, + symbol_tablet &needed) { if(e.id()==ID_symbol) { - const auto& sym=symbol_table.lookup(to_symbol_expr(e).get_identifier()); + const auto &sym=symbol_table.lookup(to_symbol_expr(e).get_identifier()); if(sym.is_static_lifetime) needed.add(sym); } @@ -387,12 +387,12 @@ Function: gather_field_types \*******************************************************************/ static void gather_field_types( - const typet& class_type, - const namespacet& ns, - std::set& needed_classes) + const typet &class_type, + const namespacet &ns, + std::set &needed_classes) { - const auto& underlying_type=to_struct_type(ns.follow(class_type)); - for(const auto& field : underlying_type.components()) + const auto &underlying_type=to_struct_type(ns.follow(class_type)); + for(const auto &field : underlying_type.components()) { if(field.type().id()==ID_struct || field.type().id()==ID_symbol) gather_field_types(field.type(), ns, needed_classes); @@ -401,7 +401,7 @@ static void gather_field_types( // Skip array primitive pointers, for example: if(field.type().subtype().id()!=ID_symbol) continue; - const auto& field_classid= + const auto &field_classid= to_symbol_type(field.type().subtype()).get_identifier(); if(needed_classes.insert(field_classid).second) gather_field_types(field.type().subtype(), ns, needed_classes); @@ -411,7 +411,7 @@ static void gather_field_types( /*******************************************************************\ -Function: initialise_needed_classes +Function: initialize_needed_classes Inputs: `entry_points`: list of fully-qualified function names that we should assume are reachable @@ -426,26 +426,26 @@ Function: initialise_needed_classes \*******************************************************************/ -static void initialise_needed_classes( - const std::vector& entry_points, - const namespacet& ns, - const class_hierarchyt& ch, - std::set& needed_classes) +static void initialize_needed_classes( + const std::vector &entry_points, + const namespacet &ns, + const class_hierarchyt &ch, + std::set &needed_classes) { - for(const auto& mname : entry_points) + for(const auto &mname : entry_points) { - const auto& symbol=ns.lookup(mname); - const auto& mtype=to_code_type(symbol.type); - for(const auto& param : mtype.parameters()) + const auto &symbol=ns.lookup(mname); + const auto &mtype=to_code_type(symbol.type); + for(const auto ¶m : mtype.parameters()) { if(param.type().id()==ID_pointer) { - const auto& param_classid= + const auto ¶m_classid= to_symbol_type(param.type().subtype()).get_identifier(); std::vector class_and_parents= ch.get_parents_trans(param_classid); class_and_parents.push_back(param_classid); - for(const auto& classid : class_and_parents) + for(const auto &classid : class_and_parents) needed_classes.insert(classid); gather_field_types(param.type().subtype(), ns, needed_classes); } @@ -534,11 +534,11 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( reachable_classes.push_back(main_class); else reachable_classes=main_jar_classes; - for(const auto& classname : reachable_classes) + for(const auto &classname : reachable_classes) { - const auto& methods= + const auto &methods= java_class_loader.class_map.at(classname).parsed_class.methods; - for(const auto& method : methods) + for(const auto &method : methods) { const irep_idt methodid="java::"+id2string(classname)+"."+ id2string(method.name)+":"+ @@ -551,7 +551,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( method_worklist2.push_back(main_function.main_function.name); std::set needed_classes; - initialise_needed_classes( + initialize_needed_classes( method_worklist2, namespacet(symbol_table), ch, @@ -567,7 +567,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( while(method_worklist2.size()!=0) { std::swap(method_worklist1, method_worklist2); - for(const auto& mname : method_worklist1) + for(const auto &mname : method_worklist1) { if(!methods_already_populated.insert(mname).second) continue; @@ -578,7 +578,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( continue; } debug() << "CI lazy methods: elaborate " << mname << eom; - const auto& parsed_method=findit->second; + const auto &parsed_method=findit->second; java_bytecode_convert_method( *parsed_method.first, *parsed_method.second, @@ -604,7 +604,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( << " callsites)" << eom; - for(const auto& callsite : virtual_callsites) + for(const auto &callsite : virtual_callsites) { // This will also create a stub if a virtual callsite has no targets. get_virtual_method_targets(*callsite, needed_classes, method_worklist2, @@ -616,7 +616,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( // Remove symbols for methods that were declared but never used: symbol_tablet keep_symbols; - for(const auto& sym : symbol_table.symbols) + for(const auto &sym : symbol_table.symbols) { if(sym.second.is_static_lifetime) continue; diff --git a/src/java_bytecode/java_entry_point.h b/src/java_bytecode/java_entry_point.h index 82071aa26b1..e6575734d80 100644 --- a/src/java_bytecode/java_entry_point.h +++ b/src/java_bytecode/java_entry_point.h @@ -29,6 +29,6 @@ main_function_resultt get_main_symbol( symbol_tablet &symbol_table, const irep_idt &main_class, message_handlert &, - bool allow_no_body = false); + bool allow_no_body=false); #endif // CPROVER_JAVA_BYTECODE_JAVA_ENTRY_POINT_H diff --git a/src/langapi/language_ui.cpp b/src/langapi/language_ui.cpp index 3775c3c6125..45198cbffd2 100644 --- a/src/langapi/language_ui.cpp +++ b/src/langapi/language_ui.cpp @@ -104,8 +104,8 @@ bool language_uit::parse(const std::string &filename) return true; } - std::pair - result=language_files.filemap.insert( + std::pair + result=language_files.file_map.insert( std::pair(filename, language_filet())); language_filet &lf=result.first->second; diff --git a/src/util/language_file.cpp b/src/util/language_file.cpp index 29775ad8849..efd7b8d53b6 100644 --- a/src/util/language_file.cpp +++ b/src/util/language_file.cpp @@ -66,9 +66,9 @@ void language_filet::get_modules() void language_filet::convert_lazy_method( const irep_idt &id, - symbol_tablet &symtab) + symbol_tablet &symbol_table) { - language->convert_lazy_method(id, symtab); + language->convert_lazy_method(id, symbol_table); } /*******************************************************************\ @@ -85,8 +85,8 @@ Function: language_filest::show_parse void language_filest::show_parse(std::ostream &out) { - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) it->second.language->show_parse(out); } @@ -104,8 +104,8 @@ Function: language_filest::parse bool language_filest::parse() { - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { // open file @@ -151,8 +151,8 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) { // typecheck interfaces - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(it->second.language->interfaces(symbol_table)) return true; @@ -162,8 +162,8 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) unsigned collision_counter=0; - for(filemapt::iterator fm_it=filemap.begin(); - fm_it!=filemap.end(); fm_it++) + for(file_mapt::iterator fm_it=file_map.begin(); + fm_it!=file_map.end(); fm_it++) { const language_filet::modulest &modules= fm_it->second.modules; @@ -176,7 +176,7 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) // these may collide, and then get renamed std::string module_name=*mo_it; - while(modulemap.find(module_name)!=modulemap.end()) + while(module_map.find(module_name)!=module_map.end()) { module_name=*mo_it+"#"+std::to_string(collision_counter); collision_counter++; @@ -185,15 +185,15 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) language_modulet module; module.file=&fm_it->second; module.name=module_name; - modulemap.insert( + module_map.insert( std::pair(module.name, module)); } } // typecheck files - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(it->second.modules.empty()) { @@ -205,14 +205,14 @@ bool language_filest::typecheck(symbol_tablet &symbol_table) std::set lazy_method_ids; it->second.language->lazy_methods_provided(lazy_method_ids); for(const auto &id : lazy_method_ids) - lazymethodmap[id]=&it->second; + lazy_method_map[id]=&it->second; } } // typecheck modules - for(modulemapt::iterator it=modulemap.begin(); - it!=modulemap.end(); it++) + for(module_mapt::iterator it=module_map.begin(); + it!=module_map.end(); it++) { if(typecheck_module(symbol_table, it->second)) return true; @@ -238,8 +238,8 @@ bool language_filest::final( { std::set languages; - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(languages.insert(it->second.language->id()).second) if(it->second.language->final(symbol_table)) @@ -264,8 +264,8 @@ Function: language_filest::interfaces bool language_filest::interfaces( symbol_tablet &symbol_table) { - for(filemapt::iterator it=filemap.begin(); - it!=filemap.end(); it++) + for(file_mapt::iterator it=file_map.begin(); + it!=file_map.end(); it++) { if(it->second.language->interfaces(symbol_table)) return true; @@ -292,9 +292,9 @@ bool language_filest::typecheck_module( { // check module map - modulemapt::iterator it=modulemap.find(module); + module_mapt::iterator it=module_map.find(module); - if(it==modulemap.end()) + if(it==module_map.end()) { error() << "found no file that provides module " << module << eom; return true; diff --git a/src/util/language_file.h b/src/util/language_file.h index b538c24e84f..d3184d48697 100644 --- a/src/util/language_file.h +++ b/src/util/language_file.h @@ -44,7 +44,7 @@ class language_filet void convert_lazy_method( const irep_idt &id, - symbol_tablet &symtab); + symbol_tablet &symbol_table); language_filet(const language_filet &rhs); @@ -58,21 +58,21 @@ class language_filet class language_filest:public messaget { public: - typedef std::map filemapt; - filemapt filemap; + typedef std::map file_mapt; + file_mapt file_map; - // Contains pointers into filemapt! - typedef std::map modulemapt; - modulemapt modulemap; + // Contains pointers into file_mapt! + typedef std::map module_mapt; + module_mapt module_map; // Contains pointers into filemapt! // This is safe-ish as long as this is std::map. - typedef std::map lazymethodmapt; - lazymethodmapt lazymethodmap; + typedef std::map lazy_method_mapt; + lazy_method_mapt lazy_method_map; void clear_files() { - filemap.clear(); + file_map.clear(); } bool parse(); @@ -87,21 +87,24 @@ class language_filest:public messaget bool has_lazy_method(const irep_idt &id) { - return lazymethodmap.count(id); + return lazy_method_map.count(id); } + // The method must have been added to the symbol table and registered + // in lazy_method_map (currently always in language_filest::typecheck) + // for this to be legal. void convert_lazy_method( const irep_idt &id, - symbol_tablet &symtab) + symbol_tablet &symbol_table) { - return lazymethodmap.at(id)->convert_lazy_method(id, symtab); + return lazy_method_map.at(id)->convert_lazy_method(id, symbol_table); } void clear() { - filemap.clear(); - modulemap.clear(); - lazymethodmap.clear(); + file_map.clear(); + module_map.clear(); + lazy_method_map.clear(); } protected: From 145c2d4be896444f5805cefedc64a029ab40d855 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 15 Feb 2017 11:34:20 +0000 Subject: [PATCH 16/18] Use safe pointers for optional arguments This basic safe-pointer class doesn't do any lifetime management; rather it just enforces explicit null-checks and throws if they don't go as expected. --- .../java_bytecode_convert_method.cpp | 12 ++-- .../java_bytecode_convert_method.h | 9 +-- .../java_bytecode_convert_method_class.h | 9 +-- src/java_bytecode/java_bytecode_language.cpp | 4 +- src/util/safe_pointer.h | 64 +++++++++++++++++++ 5 files changed, 84 insertions(+), 14 deletions(-) create mode 100644 src/util/safe_pointer.h diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 498f880c965..099ab695481 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -1720,7 +1720,10 @@ codet java_bytecode_convert_methodt::convert_instructions( const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); if(needed_classes && arg0.type().id()==ID_symbol) - needed_classes->insert(to_symbol_type(arg0.type()).get_identifier()); + { + needed_classes->insert( + to_symbol_type(arg0.type()).get_identifier()); + } results[0]=java_bytecode_promotion(symbol_expr); // set $assertionDisabled to false @@ -1739,7 +1742,8 @@ codet java_bytecode_convert_methodt::convert_instructions( const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); if(needed_classes && arg0.type().id()==ID_symbol) - needed_classes->insert(to_symbol_type(arg0.type()).get_identifier()); + needed_classes->insert( + to_symbol_type(arg0.type()).get_identifier()); c=code_assignt(symbol_expr, op[0]); } else if(statement==patternt("?2?")) // i2c etc. @@ -2203,8 +2207,8 @@ void java_bytecode_convert_method( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - std::vector *needed_methods, - std::set *needed_classes) + safe_pointer > needed_methods, + safe_pointer > needed_classes) { java_bytecode_convert_methodt java_bytecode_convert_method( symbol_table, diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index 68b0dd4e0a8..e81881f44e1 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -11,6 +11,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include "java_bytecode_parse_tree.h" @@ -23,8 +24,8 @@ void java_bytecode_convert_method( message_handlert &message_handler, bool disable_runtime_checks, size_t max_array_length, - std::vector *needed_methods, - std::set *needed_classes); + safe_pointer > needed_methods, + safe_pointer > needed_classes); // Must provide both the optional parameters or neither. inline void java_bytecode_convert_method( @@ -42,8 +43,8 @@ inline void java_bytecode_convert_method( message_handler, disable_runtime_checks, max_array_length, - nullptr, - nullptr); + safe_pointer >::create_null(), + safe_pointer >::create_null()); } void java_bytecode_convert_method_lazy( diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 8b11c831266..0d5476d12f7 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -13,6 +13,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include "java_bytecode_parse_tree.h" #include "java_bytecode_convert_class.h" @@ -31,8 +32,8 @@ class java_bytecode_convert_methodt:public messaget message_handlert &_message_handler, bool _disable_runtime_checks, size_t _max_array_length, - std::vector *_needed_methods, - std::set *_needed_classes): + safe_pointer > _needed_methods, + safe_pointer > _needed_classes): messaget(_message_handler), symbol_table(_symbol_table), disable_runtime_checks(_disable_runtime_checks), @@ -57,8 +58,8 @@ class java_bytecode_convert_methodt:public messaget symbol_tablet &symbol_table; const bool disable_runtime_checks; const size_t max_array_length; - std::vector *needed_methods; - std::set *needed_classes; + safe_pointer > needed_methods; + safe_pointer > needed_classes; irep_idt method_id; irep_idt current_method; diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 4512ec97709..03b5a51a4a2 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -586,8 +586,8 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( get_message_handler(), disable_runtime_checks, max_user_array_length, - &method_worklist2, - &needed_classes); + safe_pointer >::create_non_null(&method_worklist2), + safe_pointer >::create_non_null(&needed_classes)); gather_virtual_callsites( symbol_table.lookup(mname).value, virtual_callsites); diff --git a/src/util/safe_pointer.h b/src/util/safe_pointer.h new file mode 100644 index 00000000000..47e52ac13fe --- /dev/null +++ b/src/util/safe_pointer.h @@ -0,0 +1,64 @@ +/*******************************************************************\ + +Module: Simple checked pointers + +Author: Chris Smowton, chris@smowton.net + +\*******************************************************************/ + +#ifndef CPROVER_UTIL_SAFE_POINTER_H +#define CPROVER_UTIL_SAFE_POINTER_H + +template class safe_pointer +{ + public: + operator bool() const + { + return !!ptr; + } + + T *get() const + { + assert(ptr && "dereferenced a null safe pointer"); + return ptr; + } + + T &operator*() const + { + return *get(); + } + + T *operator->() const + { + return get(); + } + + static safe_pointer create_null() + { + return safe_pointer(); + } + + static safe_pointer create_non_null( + T *target) + { + assert(target && "initialized safe pointer with null"); + return safe_pointer(target); + } + + static safe_pointer create_maybe_null( + T *target) + { + return safe_pointer(target); + } + + protected: + T *ptr; + + explicit safe_pointer(T *target) : ptr(target) + {} + + safe_pointer() : ptr(nullptr) + {} +}; + +#endif From 8e38c0e9970b6fbe6d8fc70f53e3f92cc6ca671e Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 15 Feb 2017 12:58:30 +0000 Subject: [PATCH 17/18] Add lazy conversion documentation Also fix some trivial linting errors. No functional changes. --- .../java_bytecode_convert_method.cpp | 2 + src/java_bytecode/java_bytecode_language.cpp | 86 +++++++++++++++++-- src/java_bytecode/java_bytecode_language.h | 4 +- 3 files changed, 82 insertions(+), 10 deletions(-) diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 099ab695481..5e85451398a 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -1742,8 +1742,10 @@ codet java_bytecode_convert_methodt::convert_instructions( const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); if(needed_classes && arg0.type().id()==ID_symbol) + { needed_classes->insert( to_symbol_type(arg0.type()).get_identifier()); + } c=code_assignt(symbol_expr, op[0]); } else if(statement==patternt("?2?")) // i2c etc. diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 03b5a51a4a2..e9d5a87186b 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -502,6 +502,7 @@ bool java_bytecode_languaget::typecheck( // that are reachable from this entry point. if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE) { + // ci: context-insensitive. if(do_ci_lazy_method_conversion(symbol_table, lazy_methods)) return true; } @@ -514,6 +515,32 @@ bool java_bytecode_languaget::typecheck( return false; } +/*******************************************************************\ + +Function: java_bytecode_languaget::do_ci_lazy_method_conversion + + Inputs: `symbol_table`: global symbol table + `lazy_methods`: map from method names to relevant symbol + and parsed-method objects. + + Outputs: Elaborates lazily-converted methods that may be reachable + starting from the main entry point (usually provided with + the --function command-line option) (side-effect on the + symbol_table). Returns false on success. + + Purpose: Uses a simple context-insensitive ('ci') analysis to + determine which methods may be reachable from the main + entry point. In brief, static methods are reachable if we + find a callsite in another reachable site, while virtual + methods are reachable if we find a virtual callsite + targeting a compatible type *and* a constructor callsite + indicating an object of that type may be instantiated (or + evidence that an object of that type exists before the + main function is entered, such as being passed as a + parameter). + +\*******************************************************************/ + bool java_bytecode_languaget::do_ci_lazy_method_conversion( symbol_tablet &symbol_table, lazy_methodst &lazy_methods) @@ -558,7 +585,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( needed_classes); std::set methods_already_populated; - std::vector virtual_callsites; + std::vector virtual_callsites; bool any_new_methods; do @@ -586,8 +613,10 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( get_message_handler(), disable_runtime_checks, max_user_array_length, - safe_pointer >::create_non_null(&method_worklist2), - safe_pointer >::create_non_null(&needed_classes)); + safe_pointer >::create_non_null( + &method_worklist2), + safe_pointer >::create_non_null( + &needed_classes)); gather_virtual_callsites( symbol_table.lookup(mname).value, virtual_callsites); @@ -607,11 +636,15 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( for(const auto &callsite : virtual_callsites) { // This will also create a stub if a virtual callsite has no targets. - get_virtual_method_targets(*callsite, needed_classes, method_worklist2, - symbol_table, ch); + get_virtual_method_targets( + *callsite, + needed_classes, + method_worklist2, + symbol_table, + ch); } - - } while(any_new_methods); + } + while(any_new_methods); // Remove symbols for methods that were declared but never used: symbol_tablet keep_symbols; @@ -640,12 +673,49 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( return false; } -void java_bytecode_languaget::lazy_methods_provided(std::set &methods) const +/*******************************************************************\ + +Function: java_bytecode_languaget::lazy_methods_provided + + Inputs: None + + Outputs: Populates `methods` with the complete list of lazy methods + that are available to convert (those which are valid + parameters for `convert_lazy_method`) + + Purpose: Provide feedback to `language_filest` so that when asked + for a lazy method, it can delegate to this instance of + java_bytecode_languaget. + +\*******************************************************************/ + +void java_bytecode_languaget::lazy_methods_provided( + std::set &methods) const { for(const auto &kv : lazy_methods) methods.insert(kv.first); } +/*******************************************************************\ + +Function: java_bytecode_languaget::convert_lazy_method + + Inputs: `id`: method ID to convert + `symtab`: global symbol table + + Outputs: Amends the symbol table entry for function `id`, which + should be a lazy method provided by this instance of + `java_bytecode_languaget`. It should initially have a nil + value. After this method completes, it will have a value + representing the method body, identical to that produced + using eager method conversion. + + Purpose: Promote a lazy-converted method (one whose type is known + but whose body hasn't been converted) into a fully- + elaborated one. + +\*******************************************************************/ + void java_bytecode_languaget::convert_lazy_method( const irep_idt &id, symbol_tablet &symtab) diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index afdd275ddf6..ea177e0226a 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -24,8 +24,8 @@ enum lazy_methods_modet }; typedef std::pair< - const symbolt*, - const java_bytecode_parse_treet::methodt*> + const symbolt *, + const java_bytecode_parse_treet::methodt *> lazy_method_valuet; typedef std::map lazy_methodst; From 73e0479edb181b9a6a4ff7bb8e4113eff37844c6 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Wed, 15 Feb 2017 17:28:55 +0000 Subject: [PATCH 18/18] Improve failed test printer --- regression/failed-tests-printer.pl | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/regression/failed-tests-printer.pl b/regression/failed-tests-printer.pl index 832ba2d2c64..40767185d5c 100755 --- a/regression/failed-tests-printer.pl +++ b/regression/failed-tests-printer.pl @@ -4,23 +4,25 @@ open LOG,") { chomp; if (/^Test '(.+)'/) { $current_test = $1; - $ignore = 0; - } elsif (1 == $ignore) { - next; + $printed_this_test = 0; } elsif (/\[FAILED\]\s*$/) { - $ignore = 1; - print "Failed test: $current_test\n"; - my $outf = `sed -n '2p' $current_test/test.desc`; - $outf =~ s/\..*$/.out/; - system("cat $current_test/$outf"); - print "\n\n"; + if(0 == $printed_this_test) { + $printed_this_test = 1; + print "\n\n"; + print "Failed test: $current_test\n"; + my $outf = `sed -n '2p' $current_test/test.desc`; + $outf =~ s/\..*$/.out/; + system("cat $current_test/$outf"); + print "\n\nFailed test.desc lines:\n"; + } + print "$_\n"; } }