diff --git a/regression/cbmc-java/NullPointer1/test.desc b/regression/cbmc-java/NullPointer1/test.desc index bf4dc51e8da..0394ce544f3 100644 --- a/regression/cbmc-java/NullPointer1/test.desc +++ b/regression/cbmc-java/NullPointer1/test.desc @@ -3,7 +3,7 @@ NullPointer1.class --pointer-check --stop-on-fail ^EXIT=10$ ^SIGNAL=0$ -^ file NullPointer1.java line 16$ +^ file NullPointer1.java line 16 function java::NullPointer1.main:(\[Ljava/lang/String;)V$ ^VERIFICATION FAILED$ -- ^warning: ignoring diff --git a/regression/cbmc-java/NullPointer2/test.desc b/regression/cbmc-java/NullPointer2/test.desc index 8bf602bc2af..f89103d5f52 100644 --- a/regression/cbmc-java/NullPointer2/test.desc +++ b/regression/cbmc-java/NullPointer2/test.desc @@ -3,7 +3,7 @@ NullPointer2.class --pointer-check --stop-on-fail ^EXIT=10$ ^SIGNAL=0$ -^ file NullPointer2.java line 9$ +^ file NullPointer2.java line 9 function java::NullPointer2.main:(\[Ljava/lang/String;)V$ ^VERIFICATION FAILED$ -- ^warning: ignoring diff --git a/regression/cbmc-java/NullPointer3/test.desc b/regression/cbmc-java/NullPointer3/test.desc index d422f43b99d..f4956e75540 100644 --- a/regression/cbmc-java/NullPointer3/test.desc +++ b/regression/cbmc-java/NullPointer3/test.desc @@ -3,7 +3,7 @@ NullPointer3.class --pointer-check --stop-on-fail ^EXIT=10$ ^SIGNAL=0$ -^ file NullPointer3.java line 5$ +^ file NullPointer3.java line 5 function java::NullPointer3.main:(\[Ljava/lang/String;)V bytecode_index 1$ ^VERIFICATION FAILED$ -- ^warning: ignoring diff --git a/regression/cbmc-java/NullPointer4/test.desc b/regression/cbmc-java/NullPointer4/test.desc index 788cb61fdf1..653f7cf47bd 100644 --- a/regression/cbmc-java/NullPointer4/test.desc +++ b/regression/cbmc-java/NullPointer4/test.desc @@ -3,7 +3,7 @@ NullPointer4.class --pointer-check --stop-on-fail ^EXIT=10$ ^SIGNAL=0$ -^ file NullPointer4.java line 6$ +^ file NullPointer4.java line 6 function java::NullPointer4.main:(\[Ljava/lang/String;)V$ ^VERIFICATION FAILED$ -- ^warning: ignoring diff --git a/src/cegis/refactor/constraint/constraint_factory.cpp b/src/cegis/refactor/constraint/constraint_factory.cpp index abe6d33e4fa..42a2c324f71 100644 --- a/src/cegis/refactor/constraint/constraint_factory.cpp +++ b/src/cegis/refactor/constraint/constraint_factory.cpp @@ -41,7 +41,7 @@ void create_or_redirect_entry(symbol_tablet &st, goto_functionst &gf) if (fmap.end() == it) { config.main=CONSTRAINT_CALLER; - assert(!java_entry_point(st, ID_empty, msg)); + assert(!java_entry_point(st, ID_empty, msg, false, 0)); goto_convert(CPROVER_INIT, st, gf, msg); goto_convert(goto_functionst::entry_point(), st, gf, msg); } else diff --git a/src/java_bytecode/Makefile b/src/java_bytecode/Makefile index f13c327ef77..e27bc3e038d 100644 --- a/src/java_bytecode/Makefile +++ b/src/java_bytecode/Makefile @@ -5,7 +5,8 @@ SRC = java_bytecode_language.cpp java_bytecode_parse_tree.cpp \ java_bytecode_typecheck_type.cpp java_bytecode_internal_additions.cpp \ java_root_class.cpp java_bytecode_parser.cpp bytecode_info.cpp \ java_class_loader.cpp jar_file.cpp java_object_factory.cpp \ - java_bytecode_convert_method.cpp java_local_variable_table.cpp + java_bytecode_convert_method.cpp java_local_variable_table.cpp \ + java_pointer_casts.cpp java_utils.cpp INCLUDES= -I .. diff --git a/src/java_bytecode/expr2java.cpp b/src/java_bytecode/expr2java.cpp index 8cf4cdd1c59..7fb050dddad 100644 --- a/src/java_bytecode/expr2java.cpp +++ b/src/java_bytecode/expr2java.cpp @@ -15,45 +15,12 @@ Author: Daniel Kroening, kroening@cs.cmu.edu #include #include +#include #include #include "java_types.h" #include "expr2java.h" -class expr2javat:public expr2ct -{ -public: - expr2javat(const namespacet &_ns):expr2ct(_ns) { } - - std::string convert(const exprt &src) override - { - return expr2ct::convert(src); - } - - std::string convert(const typet &src) override - { - return expr2ct::convert(src); - } - -protected: - std::string convert(const exprt &src, unsigned &precedence) override; - std::string convert_java_this(const exprt &src, unsigned precedence); - std::string convert_java_instanceof(const exprt &src, unsigned precedence); - std::string convert_java_new(const exprt &src, unsigned precedence); - std::string convert_code_java_delete(const exprt &src, unsigned precedence); - std::string convert_struct(const exprt &src, unsigned &precedence) override; - std::string convert_code(const codet &src, unsigned indent) override; - std::string convert_constant(const constant_exprt &src, unsigned &precedence) override; - std::string convert_code_function_call(const code_function_callt &src, unsigned indent); - - std::string convert_rec( - const typet &src, - const c_qualifierst &qualifiers, - const std::string &declarator) override; - - typedef std::unordered_set id_sett; -}; - /*******************************************************************\ Function: expr2javat::convert_code_function_call @@ -83,7 +50,6 @@ std::string expr2javat::convert_code_function_call( unsigned p; std::string lhs_str=convert(src.lhs(), p); - // TODO: ggf. Klammern je nach p dest+=lhs_str; dest+='='; } @@ -124,8 +90,10 @@ std::string expr2javat::convert_code_function_call( unsigned p; std::string arg_str=convert(*it, p); - if(first) first=false; else dest+=", "; - // TODO: ggf. Klammern je nach p + if(first) + first=false; + else + dest+=", "; dest+=arg_str; } } @@ -244,15 +212,16 @@ std::string expr2javat::convert_constant( else if(src.type()==java_char_type()) { std::string dest; - dest.reserve(10); + dest.reserve(char_representation_length); mp_integer int_value; - to_integer(src, int_value); + if(!to_integer(src, int_value)) + assert(false); - dest+='\''; + dest+="(char)'"; if(int_value>=' ' && int_value<127) - dest+=(char)(int_value.to_long()); + dest+=static_cast(int_value.to_long()); else { std::string hex=integer2string(int_value, 16); @@ -265,6 +234,26 @@ std::string expr2javat::convert_constant( dest+='\''; return dest; } + else if(src.type()==java_byte_type()) + { + // No byte-literals in Java, so just cast: + mp_integer int_value; + if(!to_integer(src, int_value)) + assert(false); + std::string dest="(byte)"; + dest+=integer2string(int_value); + return dest; + } + else if(src.type()==java_short_type()) + { + // No short-literals in Java, so just cast: + mp_integer int_value; + if(!to_integer(src, int_value)) + assert(false); + std::string dest="(short)"; + dest+=integer2string(int_value); + return dest; + } else if(src.type()==java_long_type()) { // long integer literals must have 'L' at the end @@ -319,7 +308,7 @@ std::string expr2javat::convert_rec( else if(src==java_double_type()) return q+"double"+d; else if(src==java_boolean_type()) - return q+"bool"+d; + return q+"boolean"+d; else if(src==java_byte_type()) return q+"byte"+d; else if(src.id()==ID_code) @@ -348,7 +337,8 @@ std::string expr2javat::convert_rec( if(code_type.has_ellipsis()) { - if(!parameters.empty()) dest+=", "; + if(!parameters.empty()) + dest+=", "; dest+="..."; } @@ -491,9 +481,10 @@ std::string expr2javat::convert( const exprt &src, unsigned &precedence) { + const typet &type=ns.follow(src.type()); if(src.id()=="java-this") return convert_java_this(src, precedence=15); - if(src.id()=="java_instanceof") + if(src.id()==ID_java_instanceof) return convert_java_instanceof(src, precedence=15); else if(src.id()==ID_side_effect && (src.get(ID_statement)==ID_java_new || @@ -509,9 +500,22 @@ std::string expr2javat::convert( else if(src.id()=="pod_constructor") return "pod_constructor"; else if(src.id()==ID_virtual_function) - return convert_function(src, "VIRTUAL_FUNCTION", precedence=16); + { + return "VIRTUAL_FUNCTION(" + + id2string(src.get(ID_C_class)) + + "." + + id2string(src.get(ID_component_name)) + + ")"; + } else if(src.id()==ID_java_string_literal) - return '"'+id2string(src.get(ID_value))+'"'; // Todo: add escaping as needed + return '"'+MetaString(src.get_string(ID_value))+'"'; + else if(src.id()==ID_constant && (type.id()==ID_bool || type.id()==ID_c_bool)) + { + if(src.is_true()) + return "true"; + else + return "false"; + } else return expr2ct::convert(src, precedence); } @@ -536,7 +540,7 @@ std::string expr2javat::convert_code( if(statement==ID_java_new || statement==ID_java_new_array) - return convert_java_new(src,indent); + return convert_java_new(src, indent); if(statement==ID_function_call) return convert_code_function_call(to_code_function_call(src), indent); diff --git a/src/java_bytecode/expr2java.h b/src/java_bytecode/expr2java.h index f69563a3697..d7889dc73a6 100644 --- a/src/java_bytecode/expr2java.h +++ b/src/java_bytecode/expr2java.h @@ -10,10 +10,51 @@ Author: Daniel Kroening, kroening@cs.cmu.edu #define CPROVER_JAVA_BYTECODE_EXPR2JAVA_H #include - -class exprt; -class namespacet; -class typet; +#include + +class expr2javat:public expr2ct +{ +public: + explicit expr2javat(const namespacet &_ns):expr2ct(_ns) { } + + virtual std::string convert(const exprt &src) + { + return expr2ct::convert(src); + } + + virtual std::string convert(const typet &src) + { + return expr2ct::convert(src); + } + +protected: + virtual std::string convert(const exprt &src, unsigned &precedence); + virtual std::string convert_java_this(const exprt &src, unsigned precedence); + virtual std::string convert_java_instanceof( + const exprt &src, + unsigned precedence); + virtual std::string convert_java_new(const exprt &src, unsigned precedence); + virtual std::string convert_code_java_delete( + const exprt &src, + unsigned precedence); + virtual std::string convert_struct(const exprt &src, unsigned &precedence); + virtual std::string convert_code(const codet &src, unsigned indent); + virtual std::string convert_constant( + const constant_exprt &src, + unsigned &precedence); + virtual std::string convert_code_function_call( + const code_function_callt &src, + unsigned indent); + + virtual std::string convert_rec( + const typet &src, + const c_qualifierst &qualifiers, + const std::string &declarator); + + // length of string representation of Java Char + // representation is '\u0000' + const std::size_t char_representation_length=8; +}; std::string expr2java(const exprt &expr, const namespacet &ns); std::string type2java(const typet &type, const namespacet &ns); diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 2db872c3f45..113a8b9422d 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -6,8 +6,6 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ -#define DEBUG - #ifdef DEBUG #include #endif @@ -20,15 +18,18 @@ Author: Daniel Kroening, kroening@kroening.com #include #include -namespace { class java_bytecode_convert_classt:public messaget { public: java_bytecode_convert_classt( symbol_tablet &_symbol_table, - message_handlert &_message_handler): + message_handlert &_message_handler, + bool _disable_runtime_checks, + size_t _max_array_length): messaget(_message_handler), - symbol_table(_symbol_table) + symbol_table(_symbol_table), + disable_runtime_checks(_disable_runtime_checks), + max_array_length(_max_array_length) { } @@ -47,6 +48,8 @@ class java_bytecode_convert_classt:public messaget protected: symbol_tablet &symbol_table; + const bool disable_runtime_checks; + const size_t max_array_length; // conversion void convert(const classt &c); @@ -55,7 +58,6 @@ class java_bytecode_convert_classt:public messaget void generate_class_stub(const irep_idt &class_name); void add_array_types(); }; -} /*******************************************************************\ @@ -121,7 +123,12 @@ void java_bytecode_convert_classt::convert(const classt &c) // now do methods for(const auto &method : c.methods) java_bytecode_convert_method( - *class_symbol, method, symbol_table, get_message_handler()); + *class_symbol, + method, + symbol_table, + get_message_handler(), + disable_runtime_checks, + max_array_length); // is this a root class? if(c.extends.empty()) @@ -140,7 +147,8 @@ Function: java_bytecode_convert_classt::generate_class_stub \*******************************************************************/ -void java_bytecode_convert_classt::generate_class_stub(const irep_idt &class_name) +void java_bytecode_convert_classt::generate_class_stub( + const irep_idt &class_name) { class_typet class_type; @@ -163,7 +171,8 @@ void java_bytecode_convert_classt::generate_class_stub(const irep_idt &class_nam if(symbol_table.move(new_symbol, class_symbol)) { - warning() << "stub class symbol "+id2string(new_symbol.name)+" already exists"; + warning() << "stub class symbol " << new_symbol.name + << " already exists" << eom; } else { @@ -202,16 +211,19 @@ void java_bytecode_convert_classt::convert( new_symbol.name=id2string(class_symbol.name)+"."+id2string(f.name); new_symbol.base_name=f.name; new_symbol.type=field_type; - new_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+id2string(f.name); + new_symbol.pretty_name=id2string(class_symbol.pretty_name)+ + "."+id2string(f.name); new_symbol.mode=ID_java; new_symbol.is_type=false; new_symbol.value=gen_zero(field_type); + // Do we have the static field symbol already? + const auto s_it=symbol_table.symbols.find(new_symbol.name); + if(s_it!=symbol_table.symbols.end()) + symbol_table.symbols.erase(s_it); // erase, we stubbed it + if(symbol_table.add(new_symbol)) - { - error() << "failed to add static field symbol" << eom; - throw 0; - } + assert(false && "failed to add static field symbol"); } else { @@ -296,10 +308,15 @@ Function: java_bytecode_convert_class bool java_bytecode_convert_class( const java_bytecode_parse_treet &parse_tree, symbol_tablet &symbol_table, - message_handlert &message_handler) + message_handlert &message_handler, + bool disable_runtime_checks, + size_t max_array_length) { java_bytecode_convert_classt java_bytecode_convert_class( - symbol_table, message_handler); + symbol_table, + message_handler, + disable_runtime_checks, + max_array_length); try { diff --git a/src/java_bytecode/java_bytecode_convert_class.h b/src/java_bytecode/java_bytecode_convert_class.h index cae00f571d0..ffaaf6e34da 100644 --- a/src/java_bytecode/java_bytecode_convert_class.h +++ b/src/java_bytecode/java_bytecode_convert_class.h @@ -17,6 +17,8 @@ Author: Daniel Kroening, kroening@kroening.com bool java_bytecode_convert_class( const java_bytecode_parse_treet &parse_tree, symbol_tablet &symbol_table, - message_handlert &message_handler); + message_handlert &message_handler, + bool disable_runtime_checks, + size_t max_array_length); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_CLASS_H diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 854d50ac9ba..a2ca21ef80d 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -70,43 +70,80 @@ static size_t get_variable_slots(const code_typet::parametert ¶m) return count_slots(0, param); } -static bool is_constructor(const class_typet::methodt &method) +static irep_idt strip_java_namespace_prefix(const irep_idt to_strip) +{ + const auto to_strip_str=id2string(to_strip); + assert(has_prefix(to_strip_str, "java::")); + return to_strip_str.substr(6, std::string::npos); +} + +// name contains or +bool java_bytecode_convert_methodt::is_constructor( + const class_typet::methodt &method) { const std::string &name(id2string(method.get_name())); const std::string::size_type &npos(std::string::npos); return npos!=name.find("") || npos!=name.find(""); } -static void cast_if_necessary(binary_relation_exprt &condition) +exprt::operandst java_bytecode_convert_methodt::pop(std::size_t n) +{ + if(stack.size()::max(); variables[param_index][0].is_parameter=true; @@ -336,8 +324,8 @@ void java_bytecode_convert_methodt::convert( method_symbol.name=method.get_name(); method_symbol.base_name=method.get_base_name(); method_symbol.mode=ID_java; - method_symbol.name=method.get_name(); - method_symbol.base_name=method.get_base_name(); + method_symbol.location=m.source_location; + method_symbol.location.set_function(method_identifier); if(method.get_base_name()=="") method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ @@ -347,6 +335,8 @@ void java_bytecode_convert_methodt::convert( id2string(method.get_base_name())+"()"; method_symbol.type=member_type; + if(is_constructor(method)) + method_symbol.type.set(ID_constructor, true); current_method=method_symbol.name; method_has_this=code_type.has_this(); @@ -378,7 +368,8 @@ const bytecode_infot &java_bytecode_convert_methodt::get_bytecode_info( const irep_idt &statement) { for(const bytecode_infot *p=bytecode_info; p->mnemonic!=0; p++) - if(statement==p->mnemonic) return *p; + if(statement==p->mnemonic) + return *p; error() << "failed to find bytecode mnemonic `" << statement << '\'' << eom; @@ -387,22 +378,20 @@ const bytecode_infot &java_bytecode_convert_methodt::get_bytecode_info( static irep_idt get_if_cmp_operator(const irep_idt &stmt) { - if(stmt==patternt("if_?cmplt")) return ID_lt; - if(stmt==patternt("if_?cmple")) return ID_le; - if(stmt==patternt("if_?cmpgt")) return ID_gt; - if(stmt==patternt("if_?cmpge")) return ID_ge; - if(stmt==patternt("if_?cmpeq")) return ID_equal; - if(stmt==patternt("if_?cmpne")) return ID_notequal; - - throw "Unhandled java comparison instruction"; -} - -static constant_exprt as_number(const mp_integer value, const typet &type) -{ - const std::size_t java_int_width(type.get_unsigned_int(ID_width)); - const std::string significant_bits(integer2string(value, 2)); - std::string binary_width(java_int_width-significant_bits.length(), '0'); - return constant_exprt(binary_width+=significant_bits, type); + if(stmt==patternt("if_?cmplt")) + return ID_lt; + if(stmt==patternt("if_?cmple")) + return ID_le; + if(stmt==patternt("if_?cmpgt")) + return ID_gt; + if(stmt==patternt("if_?cmpge")) + return ID_ge; + if(stmt==patternt("if_?cmpeq")) + return ID_equal; + if(stmt==patternt("if_?cmpne")) + return ID_notequal; + + throw "unhandled java comparison instruction"; } static member_exprt to_member(const exprt &pointer, const exprt &fieldref) @@ -415,7 +404,38 @@ static member_exprt to_member(const exprt &pointer, const exprt &fieldref) const dereference_exprt obj_deref(pointer2, class_type); return member_exprt( - obj_deref, fieldref.get(ID_component_name), fieldref.type()); + obj_deref, + fieldref.get(ID_component_name), + fieldref.type()); +} + +codet java_bytecode_convert_methodt::get_array_bounds_check( + const exprt &arraystruct, + const exprt &idx, + const source_locationt &original_sloc) +{ + constant_exprt intzero=from_integer(0, java_int_type()); + binary_relation_exprt gezero(idx, ID_ge, intzero); + const member_exprt length_field(arraystruct, "length", java_int_type()); + binary_relation_exprt ltlength(idx, ID_lt, length_field); + code_blockt bounds_checks; + + bounds_checks.add(code_assertt(gezero)); + bounds_checks.operands().back().add_source_location()=original_sloc; + bounds_checks.operands().back().add_source_location() + .set_comment("Array index < 0"); + bounds_checks.operands().back().add_source_location() + .set_property_class("array-index-out-of-bounds-low"); + bounds_checks.add(code_assertt(ltlength)); + + bounds_checks.operands().back().add_source_location()=original_sloc; + bounds_checks.operands().back().add_source_location() + .set_comment("Array index >= length"); + bounds_checks.operands().back().add_source_location() + .set_property_class("array-index-out-of-bounds-high"); + + // TODO make this throw ArrayIndexOutOfBoundsException instead of asserting. + return bounds_checks; } /*******************************************************************\ @@ -715,8 +735,7 @@ codet java_bytecode_convert_methodt::convert_instructions( const methodt &method, const code_typet &method_type) { - - const instructionst& instructions=method.instructions; + const instructionst &instructions=method.instructions; // Run a worklist algorithm, assuming that the bytecode has not // been tampered with. See "Leroy, X. (2003). Java bytecode @@ -736,10 +755,9 @@ codet java_bytecode_convert_methodt::convert_instructions( i_it!=instructions.end(); i_it++) { + converted_instructiont ins=converted_instructiont(i_it, code_skipt()); std::pair a_entry= - address_map.insert(std::make_pair( - i_it->address, - converted_instructiont(i_it, code_skipt()))); + address_map.insert(std::make_pair(i_it->address, ins)); assert(a_entry.second); // addresses are strictly increasing, hence we must have inserted // a new maximal key @@ -777,8 +795,10 @@ codet java_bytecode_convert_methodt::convert_instructions( if(i_it->statement=="jsr" || i_it->statement=="jsr_w") { - instructionst::const_iterator next=i_it; - assert(++next!=instructions.end() && "jsr without valid return address?"); + instructionst::const_iterator next=i_it+1; + assert( + next!=instructions.end() && + "jsr without valid return address?"); targets.insert(next->address); jsr_ret_targets.push_back(next->address); } @@ -806,12 +826,11 @@ codet java_bytecode_convert_methodt::convert_instructions( } } - // Draw edges from every `ret` to every `jsr` successor. - // Could do better with flow analysis to distinguish multiple subroutines within - // the same function. + // Draw edges from every `ret` to every `jsr` successor. Could do better with + // flow analysis to distinguish multiple subroutines within the same function. for(const auto retinst : ret_instructions) { - auto& a_entry=address_map.at(retinst->address); + auto &a_entry=address_map.at(retinst->address); a_entry.successors.insert( a_entry.successors.end(), jsr_ret_targets.begin(), @@ -831,9 +850,11 @@ codet java_bytecode_convert_methodt::convert_instructions( // Now that the control flow graph is built, set up our local variables // (these require the graph to determine live ranges) - setup_local_variables(method,address_map); + setup_local_variables(method, address_map); std::set working_set; + bool assertion_failure=false; + if(!instructions.empty()) working_set.insert(instructions.front().address); @@ -844,19 +865,22 @@ codet java_bytecode_convert_methodt::convert_instructions( assert(a_it!=address_map.end()); working_set.erase(cur); - if(a_it->second.done) continue; - working_set.insert(a_it->second.successors.begin(), - a_it->second.successors.end()); + if(a_it->second.done) + continue; + working_set + .insert(a_it->second.successors.begin(), a_it->second.successors.end()); instructionst::const_iterator i_it=a_it->second.source; stack.swap(a_it->second.stack); a_it->second.stack.clear(); codet &c=a_it->second.code; - assert(stack.empty() || - a_it->second.predecessors.size()<=1 || - has_prefix(stack.front().get_string(ID_C_base_name), - "$stack")); + assert( + stack.empty() || + a_it->second.predecessors.size()<=1 || + has_prefix( + stack.front().get_string(ID_C_base_name), + "$stack")); irep_idt statement=i_it->statement; exprt arg0=i_it->args.size()>=1?i_it->args[0]:nil_exprt(); @@ -887,19 +911,39 @@ codet java_bytecode_convert_methodt::convert_instructions( else if(statement=="athrow") { assert(op.size()==1 && results.size()==1); - side_effect_expr_throwt throw_expr; - throw_expr.add_source_location()=i_it->source_location; - throw_expr.copy_to_operands(op[0]); - c=code_expressiont(throw_expr); - results[0]=op[0]; + if(!assertion_failure) + { + side_effect_expr_throwt throw_expr; + throw_expr.add_source_location()=i_it->source_location; + throw_expr.copy_to_operands(op[0]); + c=code_expressiont(throw_expr); + results[0]=op[0]; + } + else + { + // TODO: this can be removed once we properly handle throw + // if this athrow is generated by an assertion, then skip it + c=code_skipt(); + assertion_failure=false; + } } else if(statement=="checkcast") { - // checkcast throws an exception in case a cast of object - // on stack to given type fails. - // The stack isn't modified. - assert(op.size()==1 && results.size()==1); - results[0]=op[0]; + if(!disable_runtime_checks) + { + // checkcast throws an exception in case a cast of object + // on stack to given type fails. + // The stack isn't modified. + // TODO: convert assertions to exceptions. + assert(op.size()==1 && results.size()==1); + binary_predicate_exprt check(op[0], ID_java_instanceof, arg0); + c=code_assertt(check); + c.add_source_location().set_comment("Dynamic cast check"); + c.add_source_location().set_property_class("bad-dynamic-cast"); + results[0]=op[0]; + } + else + c=code_skipt(); } else if(statement=="invokedynamic") { @@ -927,6 +971,13 @@ codet java_bytecode_convert_methodt::convert_instructions( statement=="invokevirtual" || statement=="invokestatic") { + // Remember that this is triggered by an assertion + if(statement=="invokespecial" && + id2string(arg0.get(ID_identifier)) + .find("AssertionError")!=std::string::npos) + { + assertion_failure=true; + } const bool use_this(statement!="invokestatic"); const bool is_virtual( statement=="invokevirtual" || statement=="invokeinterface"); @@ -938,8 +989,19 @@ codet java_bytecode_convert_methodt::convert_instructions( { if(parameters.empty() || !parameters[0].get_this()) { - const empty_typet empty; - pointer_typet object_ref_type(empty); + irep_idt classname=arg0.get(ID_C_class); + typet thistype=symbol_typet(classname); + // Note invokespecial is used for super-method calls as well as + // constructors. + if(statement=="invokespecial") + { + if(as_string(arg0.get(ID_identifier)) + .find("")!=std::string::npos) + code_type.set(ID_constructor, true); + else + code_type.set(ID_java_super_method_call, true); + } + pointer_typet object_ref_type(thistype); code_typet::parametert this_p(object_ref_type); this_p.set_this(); this_p.set_base_name("this"); @@ -948,7 +1010,10 @@ codet java_bytecode_convert_methodt::convert_instructions( } code_function_callt call; - call.add_source_location()=i_it->source_location; + source_locationt loc=i_it->source_location; + loc.set_function(method_id); + + call.add_source_location()=loc; call.arguments()=pop(parameters.size()); // double-check a bit @@ -995,6 +1060,7 @@ codet java_bytecode_convert_methodt::convert_instructions( // does the function symbol exist? irep_idt id=arg0.get(ID_identifier); + if(symbol_table.symbols.find(id)==symbol_table.symbols.end()) { // no, create stub @@ -1020,7 +1086,7 @@ codet java_bytecode_convert_methodt::convert_instructions( call.function()=symbol_exprt(arg0.get(ID_identifier), arg0.type()); } - call.function().add_source_location()=i_it->source_location; + call.function().add_source_location()=loc; c=call; } else if(statement=="return") @@ -1034,7 +1100,8 @@ codet java_bytecode_convert_methodt::convert_instructions( // conversion. assert(op.size()==1 && results.empty()); exprt r=op[0]; - if(r.type()!=method_return_type) r=typecast_exprt(r, method_return_type); + if(r.type()!=method_return_type) + r=typecast_exprt(r, method_return_type); c=code_returnt(r); } else if(statement==patternt("?astore")) @@ -1049,20 +1116,34 @@ codet java_bytecode_convert_methodt::convert_instructions( const dereference_exprt deref(pointer, pointer.type().subtype()); const member_exprt data_ptr( - deref, "data", pointer_typet(java_type_from_char(type_char))); + deref, + "data", + pointer_typet(java_type_from_char(type_char))); plus_exprt data_plus_offset(data_ptr, op[1], data_ptr.type()); typet element_type=data_ptr.type().subtype(); const dereference_exprt element(data_plus_offset, element_type); - c=code_assignt(element, op[2]); + c=code_blockt(); + if(!disable_runtime_checks) + { + codet bounds_check= + get_array_bounds_check(deref, op[1], i_it->source_location); + bounds_check.add_source_location()=i_it->source_location; + c.move_to_operands(bounds_check); + } + code_assignt array_put(element, op[2]); + array_put.add_source_location()=i_it->source_location; + c.move_to_operands(array_put); + c.add_source_location()=i_it->source_location; } else if(statement==patternt("?store")) { // store value into some local variable assert(op.size()==1 && results.empty()); - exprt var=variable(arg0, statement[0], i_it->address, NO_CAST); + exprt var= + variable(arg0, statement[0], i_it->address, NO_CAST); exprt toassign=op[0]; if('a'==statement[0] && toassign.type()!=var.type()) @@ -1082,18 +1163,26 @@ codet java_bytecode_convert_methodt::convert_instructions( const dereference_exprt deref(pointer, pointer.type().subtype()); const member_exprt data_ptr( - deref, "data", pointer_typet(java_type_from_char(type_char))); + deref, + "data", + pointer_typet(java_type_from_char(type_char))); plus_exprt data_plus_offset(data_ptr, op[1], data_ptr.type()); typet element_type=data_ptr.type().subtype(); dereference_exprt element(data_plus_offset, element_type); + if(!disable_runtime_checks) + { + c=get_array_bounds_check(deref, op[1], i_it->source_location); + c.add_source_location()=i_it->source_location; + } results[0]=java_bytecode_promotion(element); } else if(statement==patternt("?load")) { // load a value from a local variable - results[0]=variable(arg0, statement[0], i_it->address, CAST_AS_NEEDED); + results[0]= + variable(arg0, statement[0], i_it->address, CAST_AS_NEEDED); } else if(statement=="ldc" || statement=="ldc_w" || statement=="ldc2" || statement=="ldc2_w") @@ -1150,7 +1239,7 @@ codet java_bytecode_convert_methodt::convert_instructions( irep_idt number=to_constant_expr(arg0).get_value(); code_gotot code_goto(label(number)); c=code_goto; - results[0]=as_number( + results[0]=from_integer( std::next(i_it)->address, pointer_typet(void_typet(), 64)); } @@ -1160,7 +1249,7 @@ codet java_bytecode_convert_methodt::convert_instructions( // and write something like: // if(retaddr==5) goto 5; else if(retaddr==10) goto 10; ... assert(op.empty() && results.empty()); - code_blockt branches; + c=code_blockt(); auto retvar=variable(arg0, 'a', i_it->address, NO_CAST); assert(!jsr_ret_targets.empty()); for(size_t idx=0, idxlim=jsr_ret_targets.size(); idx!=idxlim; ++idx) @@ -1169,21 +1258,20 @@ codet java_bytecode_convert_methodt::convert_instructions( code_gotot g(label(number)); g.add_source_location()=i_it->source_location; if(idx==idxlim-1) - branches.move_to_operands(g); + c.move_to_operands(g); else { code_ifthenelset branch; - auto address_ptr=as_number( + auto address_ptr=from_integer( jsr_ret_targets[idx], pointer_typet(void_typet(), 64)); branch.cond()=equal_exprt(retvar, address_ptr); branch.cond().add_source_location()=i_it->source_location; branch.then_case()=g; branch.add_source_location()=i_it->source_location; - branches.move_to_operands(branch); + c.move_to_operands(branch); } } - c=std::move(branches); } else if(statement=="iconst_m1") { @@ -1201,8 +1289,7 @@ codet java_bytecode_convert_methodt::convert_instructions( if(is_double || is_float) { const ieee_float_spect spec( - is_float ? - ieee_float_spect::single_precision() : + is_float?ieee_float_spect::single_precision(): ieee_float_spect::double_precision()); ieee_floatt value(spec); @@ -1216,9 +1303,9 @@ codet java_bytecode_convert_methodt::convert_instructions( } else { - const unsigned int value(arg0.get_unsigned_int(ID_value)); + const unsigned value(arg0.get_unsigned_int(ID_value)); const typet type=java_type_from_char(statement[0]); - results[0]=as_number(value, type); + results[0]=from_integer(value, type); } } else if(statement==patternt("?ipush")) @@ -1236,8 +1323,14 @@ codet java_bytecode_convert_methodt::convert_instructions( binary_relation_exprt condition(op[0], cmp_op, op[1]); - cast_if_necessary(condition); + exprt &lhs(condition.lhs()); + exprt &rhs(condition.rhs()); + const typet &lhs_type(lhs.type()); + if(lhs_type!=rhs.type()) + rhs=typecast_exprt(rhs, lhs_type); + code_branch.cond()=condition; + code_branch.cond().add_source_location()=i_it->source_location; code_branch.then_case()=code_gotot(label(number)); code_branch.then_case().add_source_location()=i_it->source_location; code_branch.add_source_location()=i_it->source_location; @@ -1262,9 +1355,12 @@ codet java_bytecode_convert_methodt::convert_instructions( code_branch.cond()= binary_relation_exprt(op[0], id, gen_zero(op[0].type())); code_branch.cond().add_source_location()=i_it->source_location; + code_branch.cond().add_source_location().set_function(method_id); code_branch.then_case()=code_gotot(label(number)); code_branch.then_case().add_source_location()=i_it->source_location; + code_branch.then_case().add_source_location().set_function(method_id); code_branch.add_source_location()=i_it->source_location; + code_branch.add_source_location().set_function(method_id); c=code_branch; } @@ -1299,7 +1395,8 @@ codet java_bytecode_convert_methodt::convert_instructions( else if(statement=="iinc") { code_assignt code_assign; - code_assign.lhs()=variable(arg0, 'i', i_it->address, NO_CAST); + code_assign.lhs()= + variable(arg0, 'i', i_it->address, NO_CAST); code_assign.rhs()=plus_exprt( variable(arg0, 'i', i_it->address, CAST_AS_NEEDED), typecast_exprt(arg1, java_int_type())); @@ -1341,7 +1438,7 @@ codet java_bytecode_convert_methodt::convert_instructions( const typecast_exprt lhs(op[0], target); const typecast_exprt rhs(op[1], target); - results[0]=lshr_exprt(lhs, rhs); + results[0]=typecast_exprt(lshr_exprt(lhs, rhs), op[0].type()); } else if(statement==patternt("?add")) { @@ -1386,11 +1483,19 @@ codet java_bytecode_convert_methodt::convert_instructions( // 1 if op[0] is greater than op[1] const typet t=java_int_type(); + exprt one=from_integer(1, t); + exprt minus_one=from_integer(-1, t); + + if_exprt greater=if_exprt( + binary_relation_exprt(op[0], ID_gt, op[1]), + one, + minus_one); results[0]= - if_exprt(binary_relation_exprt(op[0], ID_equal, op[1]), gen_zero(t), - if_exprt(binary_relation_exprt(op[0], ID_gt, op[1]), from_integer(1, t), - from_integer(-1, t))); + if_exprt( + binary_relation_exprt(op[0], ID_equal, op[1]), + from_integer(0, t), + greater); } else if(statement==patternt("?cmp?")) { @@ -1409,19 +1514,21 @@ codet java_bytecode_convert_methodt::convert_instructions( // (value1 == NaN || value2 == NaN) ? // nan_value : value1 == value2 ? 0 : value1 < value2 -1 ? 1 : 0; + exprt nan_op0=ieee_float_equal_exprt(nan_expr, op[0]); + exprt nan_op1=ieee_float_equal_exprt(nan_expr, op[1]); + exprt one=from_integer(1, result_type); + exprt minus_one=from_integer(-1, result_type); results[0]= if_exprt( - or_exprt( - ieee_float_equal_exprt(nan_expr, op[0]), - ieee_float_equal_exprt(nan_expr, op[1])), + or_exprt(nan_op0, nan_op1), nan_result, if_exprt( ieee_float_equal_exprt(op[0], op[1]), gen_zero(result_type), if_exprt( binary_relation_exprt(op[0], ID_lt, op[1]), - from_integer(-1, result_type), - from_integer(1, result_type)))); + minus_one, + one))); } else if(statement==patternt("?cmpl")) { @@ -1512,9 +1619,13 @@ codet java_bytecode_convert_methodt::convert_instructions( { assert(op.empty() && results.size()==1); symbol_exprt symbol_expr(arg0.type()); - symbol_expr.set_identifier( - arg0.get_string(ID_class)+"."+arg0.get_string(ID_component_name)); + const auto &field_name=arg0.get_string(ID_component_name); + symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); results[0]=java_bytecode_promotion(symbol_expr); + + // set $assertionDisabled to false + if(field_name.find("$assertionsDisabled")!=std::string::npos) + c=code_assignt(symbol_expr, false_exprt()); } else if(statement=="putfield") { @@ -1525,8 +1636,8 @@ codet java_bytecode_convert_methodt::convert_instructions( { assert(op.size()==1 && results.empty()); symbol_exprt symbol_expr(arg0.type()); - symbol_expr.set_identifier( - arg0.get_string(ID_class)+"."+arg0.get_string(ID_component_name)); + const auto &field_name=arg0.get_string(ID_component_name); + symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); c=code_assignt(symbol_expr, op[0]); } else if(statement==patternt("?2?")) // i2c etc. @@ -1590,22 +1701,41 @@ codet java_bytecode_convert_methodt::convert_instructions( if(!i_it->source_location.get_line().empty()) java_new_array.add_source_location()=i_it->source_location; + c=code_blockt(); + if(!disable_runtime_checks) + { + // TODO make this throw NegativeArrayIndexException instead. + constant_exprt intzero=from_integer(0, java_int_type()); + binary_relation_exprt gezero(op[0], ID_ge, intzero); + code_assertt check(gezero); + check.add_source_location().set_comment("Array size < 0"); + check.add_source_location() + .set_property_class("array-create-negative-size"); + c.move_to_operands(check); + } + if(max_array_length!=0) + { + constant_exprt size_limit= + from_integer(max_array_length, java_int_type()); + binary_relation_exprt le_max_size(op[0], ID_le, size_limit); + code_assumet assume_le_max_size(le_max_size); + c.move_to_operands(assume_le_max_size); + } const exprt tmp=tmp_variable("newarray", ref_type); - c=code_assignt(tmp, java_new_array); + c.copy_to_operands(code_assignt(tmp, java_new_array)); results[0]=tmp; } else if(statement=="multianewarray") { - // The first argument is the type, the second argument is the dimension. - // The size of each dimension is on the stack. + // The first argument is the type, the second argument is the number of + // dimensions. The size of each dimension is on the stack. irep_idt number=to_constant_expr(arg1).get_value(); std::size_t dimension=safe_string2size_t(id2string(number)); op=pop(dimension); assert(results.size()==1); - // arg0.type() - const pointer_typet ref_type=java_array_type('a'); + const pointer_typet ref_type(arg0.type()); side_effect_exprt java_new_array(ID_java_new_array, ref_type); java_new_array.operands()=op; @@ -1613,6 +1743,27 @@ codet java_bytecode_convert_methodt::convert_instructions( if(!i_it->source_location.get_line().empty()) java_new_array.add_source_location()=i_it->source_location; + code_blockt checkandcreate; + if(!disable_runtime_checks) + { + // TODO make this throw NegativeArrayIndexException instead. + constant_exprt intzero=from_integer(0, java_int_type()); + binary_relation_exprt gezero(op[0], ID_ge, intzero); + code_assertt check(gezero); + check.add_source_location().set_comment("Array size < 0"); + check.add_source_location() + .set_property_class("array-create-negative-size"); + checkandcreate.move_to_operands(check); + + if(max_array_length!=0) + { + constant_exprt size_limit= + from_integer(max_array_length, java_int_type()); + binary_relation_exprt le_max_size(op[0], ID_le, size_limit); + code_assumet assume_le_max_size(le_max_size); + checkandcreate.move_to_operands(assume_le_max_size); + } + } const exprt tmp=tmp_variable("newarray", ref_type); c=code_assignt(tmp, java_new_array); results[0]=tmp; @@ -1693,7 +1844,7 @@ codet java_bytecode_convert_methodt::convert_instructions( assert(op.size()==1 && results.size()==1); results[0]= - binary_predicate_exprt(op[0], "java_instanceof", arg0); + binary_predicate_exprt(op[0], ID_java_instanceof, arg0); } else if(statement=="monitorenter") { @@ -1766,8 +1917,7 @@ codet java_bytecode_convert_methodt::convert_instructions( stackt::const_iterator os_it=a_it2->second.stack.begin(); for(auto &expr : stack) { - assert(has_prefix(os_it->get_string(ID_C_base_name), - "$stack")); + assert(has_prefix(os_it->get_string(ID_C_base_name), "$stack")); symbol_exprt lhs=to_symbol_expr(*os_it); code_assignt a(lhs, expr); more_code.copy_to_operands(a); @@ -1785,7 +1935,7 @@ codet java_bytecode_convert_methodt::convert_instructions( else { c.make_block(); - auto& last_statement=to_code_block(c).find_last_statement(); + auto &last_statement=to_code_block(c).find_last_statement(); if(last_statement.get_statement()==ID_goto) { // Insert stack twiddling before branch: @@ -1937,10 +2087,15 @@ void java_bytecode_convert_method( const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &method, symbol_tablet &symbol_table, - message_handlert &message_handler) + message_handlert &message_handler, + bool disable_runtime_checks, + size_t max_array_length) { java_bytecode_convert_methodt java_bytecode_convert_method( - symbol_table, message_handler); + symbol_table, + message_handler, + disable_runtime_checks, + max_array_length); java_bytecode_convert_method(class_symbol, method); } diff --git a/src/java_bytecode/java_bytecode_convert_method.h b/src/java_bytecode/java_bytecode_convert_method.h index ff652b3fe91..8945af95bd1 100644 --- a/src/java_bytecode/java_bytecode_convert_method.h +++ b/src/java_bytecode/java_bytecode_convert_method.h @@ -18,6 +18,8 @@ void java_bytecode_convert_method( const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &, symbol_tablet &symbol_table, - message_handlert &message_handler); + message_handlert &message_handler, + bool disable_runtime_checks, + size_t max_array_length); #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_METHOD_H diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 003492355d1..b5ee7b28f55 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -27,7 +27,15 @@ class java_bytecode_convert_methodt:public messaget public: java_bytecode_convert_methodt( symbol_tablet &_symbol_table, - message_handlert &_message_handler); + message_handlert &_message_handler, + bool _disable_runtime_checks, + size_t _max_array_length): + messaget(_message_handler), + symbol_table(_symbol_table), + disable_runtime_checks(_disable_runtime_checks), + max_array_length(_max_array_length) + { + } typedef java_bytecode_parse_treet::methodt methodt; typedef java_bytecode_parse_treet::instructiont instructiont; @@ -41,9 +49,11 @@ class java_bytecode_convert_methodt:public messaget } protected: - irep_idt method_id; symbol_tablet &symbol_table; + const bool disable_runtime_checks; + const size_t max_array_length; + irep_idt method_id; irep_idt current_method; typet method_return_type; @@ -72,7 +82,7 @@ class java_bytecode_convert_methodt:public messaget size_t length; bool is_parameter; std::vector holes; - variablet() : symbol_expr(), is_parameter(false) {} + variablet() : symbol_expr(), start_pc(0), length(0), is_parameter(false) {} }; typedef std::vector variablest; @@ -80,8 +90,21 @@ class java_bytecode_convert_methodt:public messaget std::set used_local_names; bool method_has_this; + typedef enum instruction_sizet + { + INST_INDEX=2, + INST_INDEX_CONST=3 + } instruction_sizet; + + codet get_array_bounds_check( + const exprt &arraystruct, + const exprt &idx, + const source_locationt &original_sloc); + // return corresponding reference of variable - const variablet &find_variable_for_slot(size_t address, variablest &var_list); + const variablet &find_variable_for_slot( + size_t address, + variablest &var_list); // JVM local variables enum variable_cast_argumentt @@ -112,6 +135,8 @@ class java_bytecode_convert_methodt:public messaget void push(const exprt::operandst &o); + bool is_constructor(const class_typet::methodt &method); + struct converted_instructiont { converted_instructiont( @@ -129,23 +154,23 @@ class java_bytecode_convert_methodt:public messaget public: typedef std::map address_mapt; - typedef std::pair method_with_amapt; + typedef std::pair method_with_amapt; typedef cfg_dominators_templatet java_cfg_dominatorst; protected: void find_initialisers( - local_variable_table_with_holest& vars, - const address_mapt& amap, - const java_cfg_dominatorst& doms); + local_variable_table_with_holest &vars, + const address_mapt &amap, + const java_cfg_dominatorst &doms); void find_initialisers_for_slot( local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, - const address_mapt& amap, - const java_cfg_dominatorst& doms); + const address_mapt &amap, + const java_cfg_dominatorst &doms); - void setup_local_variables(const methodt& m, const address_mapt& amap); + void setup_local_variables(const methodt &m, const address_mapt &amap); struct block_tree_nodet { @@ -180,7 +205,6 @@ class java_bytecode_convert_methodt:public messaget // conversion void convert(const symbolt &class_symbol, const methodt &); - void convert(const instructiont &); codet convert_instructions( const methodt &, diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index a015f928115..1d9d9f57b41 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -6,9 +6,13 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ +#include + #include #include #include +#include +#include #include "java_bytecode_language.h" #include "java_bytecode_convert_class.h" @@ -22,6 +26,29 @@ Author: Daniel Kroening, kroening@kroening.com /*******************************************************************\ +Function: java_bytecode_languaget::get_language_options + + Inputs: Command-line options + + Outputs: None + + Purpose: Consume options that are java bytecode specific. + +\*******************************************************************/ + +void java_bytecode_languaget::get_language_options(const cmdlinet &cmd) +{ + disable_runtime_checks=cmd.isset("disable-runtime-check"); + assume_inputs_non_null=cmd.isset("java-assume-inputs-non-null"); + if(cmd.isset("java-max-input-array-length")) + max_nondet_array_length= + std::stoi(cmd.get_value("java-max-input-array-length")); + if(cmd.isset("java-max-vla-length")) + max_user_array_length=std::stoi(cmd.get_value("java-max-vla-length")); +} + +/*******************************************************************\ + Function: java_bytecode_languaget::extensions Inputs: @@ -169,7 +196,11 @@ bool java_bytecode_languaget::typecheck( debug() << "Converting class " << c_it->first << eom; if(java_bytecode_convert_class( - c_it->second, symbol_table, get_message_handler())) + c_it->second, + symbol_table, + get_message_handler(), + disable_runtime_checks, + max_user_array_length)) return true; } @@ -200,10 +231,21 @@ bool java_bytecode_languaget::final(symbol_tablet &symbol_table) */ java_internal_additions(symbol_table); - if(java_entry_point(symbol_table, main_class, get_message_handler())) - return true; - return false; + main_function_resultt res= + get_main_symbol(symbol_table, main_class, get_message_handler()); + if(res.stop_convert) + return res.error_found; + + symbolt entry=res.main_function; + + return( + java_entry_point( + symbol_table, + main_class, + get_message_handler(), + assume_inputs_non_null, + max_nondet_array_length)); } /*******************************************************************\ diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index a6b5dd6d56b..be43a606a8f 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -10,13 +10,18 @@ Author: Daniel Kroening, kroening@kroening.com #define CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_LANGUAGE_H #include +#include #include "java_class_loader.h" +#define MAX_NONDET_ARRAY_LENGTH_DEFAULT 5 + class java_bytecode_languaget:public languaget { public: - bool preprocess( + virtual void get_language_options(const cmdlinet &); + + virtual bool preprocess( std::istream &instream, const std::string &path, std::ostream &outstream) override; @@ -34,8 +39,11 @@ class java_bytecode_languaget:public languaget void show_parse(std::ostream &out) override; - ~java_bytecode_languaget() override; - java_bytecode_languaget() { } + virtual ~java_bytecode_languaget(); + java_bytecode_languaget(): + max_nondet_array_length(MAX_NONDET_ARRAY_LENGTH_DEFAULT), + max_user_array_length(0) + {} bool from_expr( const exprt &expr, @@ -65,6 +73,15 @@ class java_bytecode_languaget:public languaget protected: irep_idt main_class; java_class_loadert java_class_loader; + bool assume_inputs_non_null; // assume inputs variables to be non-null + + bool disable_runtime_checks; // disable run-time checks for java, i.e., + // ASSERTS for + // - checkcast / instanceof + // - array bounds check + // - array size for newarray + size_t max_nondet_array_length; // maximal length for non-det array creation + size_t max_user_array_length; // max size for user code created arrays }; languaget *new_java_bytecode_language(); diff --git a/src/java_bytecode/java_bytecode_parse_tree.h b/src/java_bytecode/java_bytecode_parse_tree.h index 87b55b27b4a..398e3959d1a 100644 --- a/src/java_bytecode/java_bytecode_parse_tree.h +++ b/src/java_bytecode/java_bytecode_parse_tree.h @@ -72,6 +72,7 @@ class java_bytecode_parse_treet public: irep_idt base_name; bool is_native, is_abstract, is_synchronized; + source_locationt source_location; typedef std::vector instructionst; instructionst instructions; @@ -84,6 +85,7 @@ class java_bytecode_parse_treet struct exceptiont { + public: std::size_t start_pc; std::size_t end_pc; std::size_t handler_pc; diff --git a/src/java_bytecode/java_bytecode_parser.cpp b/src/java_bytecode/java_bytecode_parser.cpp index 8a6483abef1..7138ab3951b 100644 --- a/src/java_bytecode/java_bytecode_parser.cpp +++ b/src/java_bytecode/java_bytecode_parser.cpp @@ -6,8 +6,10 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ +#include #include #include +#include #include #include @@ -136,10 +138,10 @@ class java_bytecode_parsert:public parsert } } - u8 read_bytes(unsigned bytes) + u8 read_bytes(size_t bytes) { u8 result=0; - for(unsigned i=0; itag=read_u1(); @@ -518,7 +521,8 @@ void java_bytecode_parsert::rconstant_pool() it++) { // the first entry isn't used - if(it==constant_pool.begin()) continue; + if(it==constant_pool.begin()) + continue; switch(it->tag) { @@ -635,13 +639,12 @@ void java_bytecode_parsert::rconstant_pool() { it->expr.id("invokedynamic"); const pool_entryt &nameandtype_entry=pool_entry(it->ref2); - //const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1); typet type=type_entry(nameandtype_entry.ref2); it->expr.type()=type; } break; - default:; + default:{}; } } } @@ -663,7 +666,8 @@ void java_bytecode_parsert::rinterfaces(classt &parsed_class) u2 interfaces_count=read_u2(); for(std::size_t i=0; isource_location.get_line(); else if(!line_number.empty()) it->source_location.set_line(line_number); + it->source_location + .set_function( + "java::"+id2string(parse_tree.parsed_class.name)+"."+ + id2string(method.name)+":"+method.signature); } + // line number of method + if(!method.instructions.empty()) + method.source_location.set_line( + method.instructions.begin()->source_location.get_line()); } else if(attribute_name=="RuntimeInvisibleAnnotations" || attribute_name=="RuntimeVisibleAnnotations") @@ -1063,7 +1088,8 @@ void java_bytecode_parsert::rcode_attribute(methodt &method) if(attribute_name=="LineNumberTable") { // address -> instructiont - typedef std::map instruction_mapt; + typedef std::map instruction_mapt; instruction_mapt instruction_map; for(methodt::instructionst::iterator @@ -1105,7 +1131,8 @@ void java_bytecode_parsert::rcode_attribute(methodt &method) method.local_variable_table[i].index=index; method.local_variable_table[i].name=pool_entry(name_index).s; - method.local_variable_table[i].signature=id2string(pool_entry(descriptor_index).s); + method.local_variable_table[i].signature= + id2string(pool_entry(descriptor_index).s); method.local_variable_table[i].start_pc=start_pc; method.local_variable_table[i].length=length; } @@ -1119,96 +1146,106 @@ void java_bytecode_parsert::rcode_attribute(methodt &method) for(size_t i=0; isource_location.set_file(sourcefile_name); for(instructionst::iterator i_it=m_it->instructions.begin(); i_it!=m_it->instructions.end(); i_it++) diff --git a/src/java_bytecode/java_bytecode_typecheck.h b/src/java_bytecode/java_bytecode_typecheck.h index 56d43c15f02..e22a9db545b 100644 --- a/src/java_bytecode/java_bytecode_typecheck.h +++ b/src/java_bytecode/java_bytecode_typecheck.h @@ -10,6 +10,7 @@ Author: Daniel Kroening, kroening@kroening.com #define CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_TYPECHECK_H #include +#include #include #include @@ -63,6 +64,8 @@ class java_bytecode_typecheckt:public typecheckt virtual std::string to_string(const typet &type); std::set already_typechecked; + std::map string_literal_to_symbol_name; + std::map escaped_string_literal_count; }; #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_TYPECHECK_H diff --git a/src/java_bytecode/java_bytecode_typecheck_expr.cpp b/src/java_bytecode/java_bytecode_typecheck_expr.cpp index 48b5f8cc462..941871d5cd3 100644 --- a/src/java_bytecode/java_bytecode_typecheck_expr.cpp +++ b/src/java_bytecode/java_bytecode_typecheck_expr.cpp @@ -10,6 +10,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "java_bytecode_typecheck.h" +#include "java_pointer_casts.h" /*******************************************************************\ @@ -28,6 +29,9 @@ void java_bytecode_typecheckt::typecheck_expr(exprt &expr) if(expr.id()==ID_code) return typecheck_code(to_code(expr)); + if(expr.id()==ID_typecast && expr.type().id()==ID_pointer) + expr=make_clean_pointer_cast(expr, expr.type(), ns); + // do operands recursively Forall_operands(it, expr) typecheck_expr(*it); @@ -79,13 +83,21 @@ Function: java_bytecode_typecheckt::typecheck_expr_java_new_array \*******************************************************************/ -void java_bytecode_typecheckt::typecheck_expr_java_new_array(side_effect_exprt &expr) +void java_bytecode_typecheckt::typecheck_expr_java_new_array( + side_effect_exprt &expr) { assert(expr.operands().size()>=1); // one per dimension typet &type=expr.type(); typecheck_type(type); } +static void escape_non_alnum(std::string &toescape) +{ + for(auto &ch : toescape) + if(!isalnum(ch)) + ch='_'; +} + /*******************************************************************\ Function: java_bytecode_typecheckt::typecheck_expr_java_string_literal @@ -101,37 +113,46 @@ Function: java_bytecode_typecheckt::typecheck_expr_java_string_literal void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) { const irep_idt value=expr.get(ID_value); - - // we create a symbol for these - const irep_idt identifier="java::java.lang.String.Literal."+ - id2string(value); - - symbol_tablet::symbolst::const_iterator s_it= - symbol_table.symbols.find(identifier); - const symbol_typet string_type("java::java.lang.String"); - if(s_it==symbol_table.symbols.end()) + auto findit=string_literal_to_symbol_name.find(value); + if(findit!=string_literal_to_symbol_name.end()) { - // no, create the symbol - symbolt new_symbol; - new_symbol.name=identifier; - new_symbol.type=string_type; - new_symbol.base_name="Literal"; - new_symbol.pretty_name=value; - new_symbol.mode=ID_java; - new_symbol.is_type=false; - new_symbol.is_lvalue=true; + expr=symbol_exprt(findit->second, pointer_typet(string_type)); + return; + } - if(symbol_table.add(new_symbol)) - { - error() << "failed to add string literal symbol to symbol table" << eom; - throw 0; - } + // Create a new symbol: + std::ostringstream identifier_str; + std::string escaped=id2string(value); + escape_non_alnum(escaped); + identifier_str << "java::java.lang.String.Literal." << escaped; + // Avoid naming clashes by virtue of escaping: + size_t unique_num=escaped_string_literal_count[identifier_str.str()]; + unique_num++; + if(unique_num!=1) + identifier_str << unique_num; + + irep_idt identifier_id=identifier_str.str(); + string_literal_to_symbol_name.insert(std::make_pair(value, identifier_id)); + + symbolt new_symbol; + new_symbol.name=identifier_id; + new_symbol.type=pointer_typet(string_type); + new_symbol.base_name="Literal"; + new_symbol.pretty_name=value; + new_symbol.mode=ID_java; + new_symbol.is_type=false; + new_symbol.is_lvalue=true; + new_symbol.is_static_lifetime=true; // These are basically const global data. + + if(symbol_table.add(new_symbol)) + { + error() << "failed to add string literal symbol to symbol table" << eom; + throw 0; } - expr=address_of_exprt( - symbol_exprt(identifier, string_type)); + expr=new_symbol.symbol_expr(); } /*******************************************************************\ @@ -208,26 +229,40 @@ Function: java_bytecode_typecheckt::typecheck_expr_symbol void java_bytecode_typecheckt::typecheck_expr_member(member_exprt &expr) { - // The member might be in a parent class, which we resolve here. + // The member might be in a parent class or an opaque class, which we resolve + // here. const irep_idt component_name=expr.get_component_name(); while(1) { - if(ns.follow(expr.struct_op().type()).id()!=ID_struct) + typet base_type(ns.follow(expr.struct_op().type())); + + if(base_type.id()!=ID_struct) break; // give up - const struct_typet &struct_type= - to_struct_type(ns.follow(expr.struct_op().type())); + struct_typet &struct_type= + to_struct_type(base_type); if(struct_type.has_component(component_name)) return; // done // look at parent - const struct_typet::componentst &components= + struct_typet::componentst &components= struct_type.components(); + if(struct_type.get_bool(ID_incomplete_class)) + { + // member doesn't exist. In this case struct_type should be an opaque + // stub, and we'll add the member to it. + components + .push_back(struct_typet::componentt(component_name, expr.type())); + components.back().set_base_name(component_name); + components.back().set_pretty_name(component_name); + return; + } + if(components.empty()) - break; // give up + break; const struct_typet::componentt &c=components.front(); diff --git a/src/java_bytecode/java_bytecode_vtable.cpp b/src/java_bytecode/java_bytecode_vtable.cpp index 5be6e9a751b..be0cf2a15b8 100644 --- a/src/java_bytecode/java_bytecode_vtable.cpp +++ b/src/java_bytecode/java_bytecode_vtable.cpp @@ -16,12 +16,6 @@ Author: Daniel Kroening, kroening@kroening.com #include #include -namespace { -bool is_virtual(const class_typet::methodt &method) { - return method.get_bool(ID_is_virtual) - && !method.get_bool(ID_constructor); -} - const char ID_virtual_name[] = "virtual_name"; class is_virtual_name_equalt { @@ -154,6 +148,12 @@ class java_bytecode_vtable_factoryt } } + bool is_virtual(const class_typet::methodt &method) + { + return method.get_bool(ID_is_virtual) + && !method.get_bool(ID_constructor); + } + void create_base_vtable_entries( struct_exprt &vtable_value, const class_typet &class_type, @@ -210,7 +210,6 @@ class java_bytecode_vtable_factoryt } }; -} /******************************************************************* diff --git a/src/java_bytecode/java_class_loader.cpp b/src/java_bytecode/java_class_loader.cpp index 3d0f8338caf..541faf6828d 100644 --- a/src/java_bytecode/java_class_loader.cpp +++ b/src/java_bytecode/java_class_loader.cpp @@ -35,6 +35,11 @@ java_bytecode_parse_treet &java_class_loadert::operator()( { std::stack queue; + // Always require java.lang.Object, as it is the base of + // internal classes such as array types. + queue.push("java.lang.Object"); + // java.lang.String + queue.push("java.lang.String"); queue.push(class_name); while(!queue.empty()) @@ -204,7 +209,8 @@ Function: java_class_loadert::read_jar_file void java_class_loadert::read_jar_file(const irep_idt &file) { // done already? - if(jar_map.find(file)!=jar_map.end()) return; + if(jar_map.find(file)!=jar_map.end()) + return; #ifndef HAVE_LIBZIP error() << "no support for reading JAR files configured" << eom; @@ -272,7 +278,8 @@ std::string java_class_loadert::file_to_class_name(const std::string &file) // slash to dot for(std::string::iterator it=result.begin(); it!=result.end(); it++) - if(*it=='/') *it='.'; + if(*it=='/') + *it='.'; return result; } @@ -296,11 +303,13 @@ std::string java_class_loadert::class_name_to_file(const irep_idt &class_name) // dots (package name separators) to slash, depending on OS for(std::string::iterator it=result.begin(); it!=result.end(); it++) if(*it=='.') + { #ifdef _WIN32 *it='\\'; #else *it='/'; #endif + } // add .class suffix result+=".class"; diff --git a/src/java_bytecode/java_entry_point.cpp b/src/java_bytecode/java_entry_point.cpp index ee0c0c1c2ae..e2c71735fec 100644 --- a/src/java_bytecode/java_entry_point.cpp +++ b/src/java_bytecode/java_entry_point.cpp @@ -10,6 +10,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -20,7 +21,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include -#include +#include #include #include @@ -28,6 +29,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_entry_point.h" #include "java_object_factory.h" +#include "java_types.h" #define INITIALIZE CPROVER_PREFIX "initialize" @@ -70,6 +72,18 @@ static void create_initialize(symbol_tablet &symbol_table) } +static bool should_init_symbol(const symbolt &sym) +{ + if(sym.type.id()!=ID_code && + sym.is_lvalue && + sym.is_state_var && + sym.is_static_lifetime && + sym.mode==ID_java) + return true; + + return has_prefix(id2string(sym.name), "java::java.lang.String.Literal"); +} + /*******************************************************************\ Function: java_static_lifetime_init @@ -85,27 +99,58 @@ Function: java_static_lifetime_init bool java_static_lifetime_init( symbol_tablet &symbol_table, const source_locationt &source_location, - message_handlert &message_handler) + message_handlert &message_handler, + bool assume_init_pointers_not_null, + unsigned max_nondet_array_length) { symbolt &initialize_symbol=symbol_table.lookup(INITIALIZE); code_blockt &code_block=to_code_block(to_code(initialize_symbol.value)); - // we need to zero out all static variables + // We need to zero out all static variables, or nondet-initialize if they're + // external. Iterate over a copy of the symtab, as its iterators are + // invalidated by object_factory: - for(symbol_tablet::symbolst::const_iterator - it=symbol_table.symbols.begin(); - it!=symbol_table.symbols.end(); - it++) + std::list symbol_names; + for(const auto &entry : symbol_table.symbols) + symbol_names.push_back(entry.first); + + for(const auto &symname : symbol_names) { - if(it->second.type.id()!=ID_code && - it->second.is_lvalue && - it->second.is_state_var && - it->second.is_static_lifetime && - it->second.value.is_not_nil() && - it->second.mode==ID_java) + const symbolt &sym=symbol_table.lookup(symname); + if(should_init_symbol(sym)) { - code_assignt assignment(it->second.symbol_expr(), it->second.value); - code_block.add(assignment); + if(sym.value.is_nil() && sym.type!=empty_typet()) + { + bool allow_null=!assume_init_pointers_not_null; + if(allow_null) + { + std::string namestr=id2string(sym.symbol_expr().get_identifier()); + const std::string suffix="@class_model"; + // Static '.class' fields are always non-null. + if(has_suffix(namestr, suffix)) + allow_null=false; + if(allow_null && has_prefix( + namestr, + "java::java.lang.String.Literal")) + allow_null=false; + } + auto newsym=object_factory( + sym.type, + code_block, + allow_null, + symbol_table, + max_nondet_array_length, + source_location, + message_handler); + code_assignt assignment(sym.symbol_expr(), newsym); + code_block.add(assignment); + } + else if(sym.value.is_not_nil()) + { + code_assignt assignment(sym.symbol_expr(), sym.value); + assignment.add_source_location()=source_location; + code_block.add(assignment); + } } } @@ -123,6 +168,7 @@ bool java_static_lifetime_init( code_function_callt function_call; function_call.lhs()=nil_exprt(); function_call.function()=it->second.symbol_expr(); + function_call.add_source_location()=source_location; code_block.add(function_call); } } @@ -130,7 +176,6 @@ bool java_static_lifetime_init( return false; } - /*******************************************************************\ Function: java_build_arguments @@ -146,7 +191,10 @@ Function: java_build_arguments exprt::operandst java_build_arguments( const symbolt &function, code_blockt &init_code, - symbol_tablet &symbol_table) + symbol_tablet &symbol_table, + bool assume_init_pointers_not_null, + unsigned max_nondet_array_length, + message_handlert &message_handler) { const code_typet::parameterst ¶meters= to_code_type(function.type).parameters(); @@ -158,13 +206,34 @@ exprt::operandst java_build_arguments( param_numbersecond; @@ -341,7 +406,10 @@ bool java_entry_point( { message.error() << "main symbol `" << config.main << "' not a function" << messaget::eom; - return true; + res.main_function=symbol; + res.error_found=true; + res.stop_convert=true; + return res; } // check if it has a body @@ -349,7 +417,10 @@ bool java_entry_point( { message.error() << "main method `" << main_class << "' has no body" << messaget::eom; - return true; + res.main_function=symbol; + res.error_found=true; + res.stop_convert=true; + return res; } } else @@ -359,7 +430,12 @@ bool java_entry_point( // are we given a main class? if(main_class.empty()) - return false; // silently ignore + { + res.main_function=symbol; + res.error_found=false; + res.stop_convert=true; + return res; // silently ignore + } std::string entry_method= id2string(main_class)+".main"; @@ -382,14 +458,20 @@ bool java_entry_point( if(matches.empty()) { // Not found, silently ignore - return false; + res.main_function=symbol; + res.error_found=false; + res.stop_convert=true; + return res; } if(matches.size()>=2) { message.error() << "main method in `" << main_class << "' is ambiguous" << messaget::eom; - return true; // give up with error, no main + res.main_function=symbolt(); + res.error_found=true; + res.stop_convert=true; + return res; // give up with error, no main } // function symbol @@ -400,16 +482,64 @@ bool java_entry_point( { message.error() << "main method `" << main_class << "' has no body" << messaget::eom; - return true; // give up with error + res.main_function=symbol; + res.error_found=true; + res.stop_convert=true; + return res; // give up with error } } + res.main_function=symbol; + res.error_found=false; + res.stop_convert=false; + return res; // give up with error +} + +/*******************************************************************\ + +Function: java_entry_point + + Inputs: symbol_table + main class + message_handler + allow pointers in initialization code to be null + + Outputs: true if error occurred on entry point search + + Purpose: find entry point and create initialization code for function + +\*******************************************************************/ + +bool java_entry_point( + symbol_tablet &symbol_table, + const irep_idt &main_class, + message_handlert &message_handler, + bool assume_init_pointers_not_null, + size_t max_nondet_array_length) +{ + // check if the entry point is already there + if(symbol_table.symbols.find(goto_functionst::entry_point())!= + symbol_table.symbols.end()) + return false; // silently ignore + + messaget message(message_handler); + main_function_resultt res= + get_main_symbol(symbol_table, main_class, message_handler); + if(res.stop_convert) + return res.stop_convert; + symbolt symbol=res.main_function; + assert(!symbol.value.is_nil()); assert(symbol.type.id()==ID_code); create_initialize(symbol_table); - if(java_static_lifetime_init(symbol_table, symbol.location, message_handler)) + if(java_static_lifetime_init( + symbol_table, + symbol.location, + message_handler, + assume_init_pointers_not_null, + max_nondet_array_length)) return true; code_blockt init_code; @@ -437,8 +567,15 @@ bool java_entry_point( // build call to the main method code_function_callt call_main; - call_main.add_source_location()=symbol.location; + + source_locationt loc=symbol.location; + loc.set_function(symbol.name); + source_locationt &dloc=loc; + + call_main.add_source_location()=dloc; call_main.function()=symbol.symbol_expr(); + call_main.function().add_source_location()=dloc; + if(to_code_type(symbol.type).return_type()!=empty_typet()) { auxiliary_symbolt return_symbol; @@ -453,7 +590,13 @@ bool java_entry_point( } exprt::operandst main_arguments= - java_build_arguments(symbol, init_code, symbol_table); + java_build_arguments( + symbol, + init_code, + symbol_table, + assume_init_pointers_not_null, + max_nondet_array_length, + message_handler); call_main.arguments()=main_arguments; init_code.move_to_operands(call_main); diff --git a/src/java_bytecode/java_entry_point.h b/src/java_bytecode/java_entry_point.h index 5cbee65febb..7fa562de89a 100644 --- a/src/java_bytecode/java_entry_point.h +++ b/src/java_bytecode/java_entry_point.h @@ -14,6 +14,20 @@ Author: Daniel Kroening, kroening@kroening.com bool java_entry_point( class symbol_tablet &symbol_table, const irep_idt &main_class, - class message_handlert &message_handler); + class message_handlert &message_handler, + bool assume_init_pointers_not_null, + size_t max_nondet_array_length); + +typedef struct +{ + symbolt main_function; + bool error_found; + bool stop_convert; +} main_function_resultt; + +main_function_resultt get_main_symbol( + symbol_tablet &symbol_table, + const irep_idt &main_class, + message_handlert &); #endif // CPROVER_JAVA_BYTECODE_JAVA_ENTRY_POINT_H diff --git a/src/java_bytecode/java_local_variable_table.cpp b/src/java_bytecode/java_local_variable_table.cpp index dd9e8eee4d1..3cfb5030136 100644 --- a/src/java_bytecode/java_local_variable_table.cpp +++ b/src/java_bytecode/java_local_variable_table.cpp @@ -109,8 +109,8 @@ static bool lt_index( return a.var.indexvar.start_pcvar.start_pc; } @@ -118,8 +118,8 @@ static bool lt_startpc( // The predecessor map, and a top-sorting comparator: typedef std::map< - local_variable_with_holest*, - std::set > + local_variable_with_holest *, + std::set > predecessor_mapt; struct is_predecessor_oft @@ -129,8 +129,8 @@ struct is_predecessor_oft explicit is_predecessor_oft(const predecessor_mapt &_order) : order(_order) {} bool operator()( - local_variable_with_holest* a, - local_variable_with_holest* b) const + local_variable_with_holest *a, + local_variable_with_holest *b) const { auto findit=order.find(a); if(findit==order.end()) @@ -156,9 +156,9 @@ Function: gather_transitive_predecessors \*******************************************************************/ static void gather_transitive_predecessors( - local_variable_with_holest* start, + local_variable_with_holest *start, const predecessor_mapt &predecessor_map, - std::set& result) + std::set &result) { if(!result.insert(start).second) return; @@ -251,7 +251,7 @@ Function: populate_variable_address_map static void populate_variable_address_map( local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, - std::vector& live_variable_at_address) + std::vector &live_variable_at_address) { for(auto it=firstvar, itend=varlimit; it!=itend; ++it) { @@ -298,7 +298,7 @@ Function: populate_predecessor_map static void populate_predecessor_map( local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, - const std::vector& live_variable_at_address, + const std::vector &live_variable_at_address, const address_mapt &amap, predecessor_mapt &predecessor_map, message_handlert &msg_handler) @@ -480,10 +480,10 @@ Function: populate_live_range_holes static void populate_live_range_holes( local_variable_with_holest &merge_into, - const std::set& merge_vars, + const std::set &merge_vars, unsigned expanded_live_range_start) { - std::vector sorted_by_startpc( + std::vector sorted_by_startpc( merge_vars.begin(), merge_vars.end()); std::sort(sorted_by_startpc.begin(), sorted_by_startpc.end(), lt_startpc); @@ -518,7 +518,7 @@ Function: merge_variable_table_entries static void merge_variable_table_entries( local_variable_with_holest &merge_into, - const std::set& merge_vars, + const std::set &merge_vars, const java_cfg_dominatorst &dominator_analysis, std::ostream &debug_out) { @@ -591,7 +591,7 @@ void java_bytecode_convert_methodt::find_initialisers_for_slot( // Build a simple map from instruction PC to the variable // live in this slot at that PC, and a map from each variable // to variables that flow into it: - std::vector live_variable_at_address; + std::vector live_variable_at_address; populate_variable_address_map(firstvar, varlimit, live_variable_at_address); // Now find variables that flow together by @@ -612,14 +612,14 @@ void java_bytecode_convert_methodt::find_initialisers_for_slot( // Take the transitive closure of the predecessor map: for(auto &kv : predecessor_map) { - std::set closed_preds; + std::set closed_preds; gather_transitive_predecessors(kv.first, predecessor_map, closed_preds); kv.second=std::move(closed_preds); } // Top-sort so that we get the bottom variables first: is_predecessor_oft comp(predecessor_map); - std::vector topsorted_vars; + std::vector topsorted_vars; for(auto it=firstvar, itend=varlimit; it!=itend; ++it) topsorted_vars.push_back(&*it); @@ -729,7 +729,7 @@ Function: cleanup_var_table \*******************************************************************/ static void cleanup_var_table( - std::vector& vars_with_holes) + std::vector &vars_with_holes) { size_t toremove=0; for(size_t i=0; i<(vars_with_holes.size()-toremove); ++i) @@ -795,7 +795,7 @@ void java_bytecode_convert_methodt::setup_local_variables( // to calculate which variable to use, one uses the address of the instruction // that uses the variable, the size of the instruction and the start_pc / // length values in the local variable table - for(const auto & v : vars_with_holes) + for(const auto &v : vars_with_holes) { if(v.var.start_pc==0) // Parameter? continue; diff --git a/src/java_bytecode/java_object_factory.cpp b/src/java_bytecode/java_object_factory.cpp index 6eeba5e23eb..f8f3d98748f 100644 --- a/src/java_bytecode/java_object_factory.cpp +++ b/src/java_bytecode/java_object_factory.cpp @@ -7,14 +7,23 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ #include +#include +#include #include #include #include +#include #include +#include +#include #include +#include + #include "java_object_factory.h" +#include "java_types.h" +#include "java_utils.h" /*******************************************************************\ @@ -28,20 +37,151 @@ Function: gen_nondet_init \*******************************************************************/ -namespace { -void gen_nondet_init( +class java_object_factoryt:public messaget +{ + code_blockt &init_code; + std::set recursion_set; + bool assume_non_null; + size_t max_nondet_array_length; + symbol_tablet &symbol_table; + message_handlert &message_handler; + namespacet ns; + +public: + java_object_factoryt( + code_blockt &_init_code, + bool _assume_non_null, + size_t _max_nondet_array_length, + symbol_tablet &_symbol_table, + message_handlert &_message_handler): + init_code(_init_code), + assume_non_null(_assume_non_null), + max_nondet_array_length(_max_nondet_array_length), + symbol_table(_symbol_table), + message_handler(_message_handler), + ns(_symbol_table) + {} + + exprt allocate_object( + const exprt &, + const typet &, + const source_locationt &, + bool create_dynamic_objects); + + void gen_nondet_array_init(const exprt &expr, const source_locationt &); + + void gen_nondet_init( + const exprt &expr, + bool is_sub, + irep_idt class_identifier, + const source_locationt &loc, + bool skip_classid, + bool create_dynamic_objects, + bool override=false, + const typet &override_type=empty_typet()); +}; + + +/*******************************************************************\ + +Function: gen_nondet_array_init + + Inputs: the target expression, desired object type, source location + and Boolean whether to create a dynamic object + + Outputs: the allocated object + + Purpose: Returns false if we can't figure out the size of allocate_type. + allocate_type may differ from target_expr, e.g. for target_expr + having type int* and allocate_type being an int[10]. + +\*******************************************************************/ + +exprt java_object_factoryt::allocate_object( + const exprt &target_expr, + const typet &allocate_type, + const source_locationt &loc, + bool create_dynamic_objects) +{ + const typet &allocate_type_resolved=ns.follow(allocate_type); + const typet &target_type=ns.follow(target_expr.type().subtype()); + bool cast_needed=allocate_type_resolved!=target_type; + if(!create_dynamic_objects) + { + symbolt &aux_symbol=new_tmp_symbol(symbol_table); + aux_symbol.type=allocate_type; + aux_symbol.is_static_lifetime=true; + + exprt object=aux_symbol.symbol_expr(); + exprt aoe=address_of_exprt(object); + if(cast_needed) + aoe=typecast_exprt(aoe, target_expr.type()); + code_assignt code(target_expr, aoe); + code.add_source_location()=loc; + init_code.copy_to_operands(code); + return aoe; + } + else + { + // build size expression + exprt object_size=size_of_expr(allocate_type, namespacet(symbol_table)); + + if(allocate_type.id()!=ID_empty) + { + assert(!object_size.is_nil() && "size of Java objects should be known"); + // malloc expression + exprt malloc_expr=side_effect_exprt(ID_malloc); + malloc_expr.copy_to_operands(object_size); + typet result_type=pointer_typet(allocate_type); + malloc_expr.type()=result_type; + // Create a symbol for the malloc expression so we can initialize + // without having to do it potentially through a double-deref, which + // breaks the to-SSA phase. + symbolt &malloc_sym=new_tmp_symbol(symbol_table, "malloc_site"); + malloc_sym.type=pointer_typet(allocate_type); + code_assignt assign=code_assignt(malloc_sym.symbol_expr(), malloc_expr); + code_assignt &malloc_assign=assign; + malloc_assign.add_source_location()=loc; + init_code.copy_to_operands(malloc_assign); + malloc_expr=malloc_sym.symbol_expr(); + if(cast_needed) + malloc_expr=typecast_exprt(malloc_expr, target_expr.type()); + code_assignt code(target_expr, malloc_expr); + code.add_source_location()=loc; + init_code.copy_to_operands(code); + return malloc_sym.symbol_expr(); + } + else + { + // make null + null_pointer_exprt null_pointer_expr(to_pointer_type(target_expr.type())); + code_assignt code(target_expr, null_pointer_expr); + code.add_source_location()=loc; + init_code.copy_to_operands(code); + return exprt(); + } + } +} + +// Override type says to ignore the LHS' real type, and init it with the given +// type regardless. Used at the moment for reference arrays, which are +// implemented as void* arrays but should be init'd as their true type with +// appropriate casts. +void java_object_factoryt::gen_nondet_init( const exprt &expr, - code_blockt &init_code, - const namespacet &ns, - std::set &recursion_set, bool is_sub, - irep_idt class_identifier) + irep_idt class_identifier, + const source_locationt &loc, + bool skip_classid, + bool create_dynamic_objects, + bool override, + const typet &override_type) { - const typet &type=ns.follow(expr.type()); + const typet &type= + override ? ns.follow(override_type) : ns.follow(expr.type()); if(type.id()==ID_pointer) { - #if 0 // dereferenced type const pointer_typet &pointer_type=to_pointer_type(type); const typet &subtype=ns.follow(pointer_type.subtype()); @@ -56,38 +196,76 @@ void gen_nondet_init( // make null null_pointer_exprt null_pointer_expr(pointer_type); code_assignt code(expr, null_pointer_expr); + code.add_source_location()=loc; init_code.copy_to_operands(code); return; } } - // build size expression - exprt object_size=size_of_expr(subtype, ns); + code_labelt set_null_label; + code_labelt init_done_label; - if(subtype.id()!=ID_empty && !object_size.is_nil()) - { - // malloc expression - side_effect_exprt malloc_expr(ID_malloc); - malloc_expr.copy_to_operands(object_size); - malloc_expr.type()=pointer_type; + static size_t synthetic_constructor_count=0; - code_assignt code(expr, malloc_expr); - init_code.copy_to_operands(code); - - // dereference expression - dereference_exprt deref_expr(expr, subtype); - - gen_nondet_init(deref_expr, init_code, ns, recursion_set, false, ""); + if(!assume_non_null) + { + auto returns_null_sym= + new_tmp_symbol(symbol_table, "opaque_returns_null"); + returns_null_sym.type=c_bool_typet(1); + auto returns_null=returns_null_sym.symbol_expr(); + auto assign_returns_null= + code_assignt(returns_null, get_nondet_bool(returns_null_sym.type)); + assign_returns_null.add_source_location()=loc; + init_code.move_to_operands(assign_returns_null); + + auto set_null_inst= + code_assignt(expr, null_pointer_exprt(pointer_type)); + set_null_inst.add_source_location()=loc; + + std::string fresh_label= + "post_synthetic_malloc_"+std::to_string(++synthetic_constructor_count); + set_null_label=code_labelt(fresh_label, set_null_inst); + + init_done_label=code_labelt(fresh_label+"_init_done", code_skipt()); + + code_ifthenelset null_check; + exprt null_return= + zero_initializer(returns_null_sym.type, loc, ns, message_handler); + null_check.cond()= + notequal_exprt(returns_null, null_return); + null_check.then_case()=code_gotot(fresh_label); + init_code.move_to_operands(null_check); } + + if(java_is_array_type(subtype)) + gen_nondet_array_init(expr, loc); else { - // make null - null_pointer_exprt null_pointer_expr(pointer_type); - code_assignt code(expr, null_pointer_expr); - init_code.copy_to_operands(code); + exprt allocated= + allocate_object(expr, subtype, loc, create_dynamic_objects); + { + exprt init_expr; + if(allocated.id()==ID_address_of) + init_expr=allocated.op0(); + else + init_expr=dereference_exprt(allocated, allocated.type().subtype()); + gen_nondet_init( + init_expr, + false, + "", + loc, + false, + create_dynamic_objects); + } + } + + if(!assume_non_null) + { + init_code.copy_to_operands(code_gotot(init_done_label.get_label())); + init_code.move_to_operands(set_null_label); + init_code.move_to_operands(init_done_label); } - #endif } else if(type.id()==ID_struct) { @@ -98,6 +276,9 @@ void gen_nondet_init( const componentst &components=struct_type.components(); + if(!is_sub) + class_identifier=struct_tag; + recursion_set.insert(struct_tag); assert(!recursion_set.empty()); @@ -110,30 +291,40 @@ void gen_nondet_init( if(name=="@class_identifier") { + if(skip_classid) + continue; + irep_idt qualified_clsid="java::"+as_string(class_identifier); constant_exprt ci(qualified_clsid, string_typet()); - code_assignt code(me, ci); + code.add_source_location()=loc; init_code.copy_to_operands(code); } else if(name=="@lock") { code_assignt code(me, gen_zero(me.type())); + code.add_source_location()=loc; init_code.copy_to_operands(code); } else { assert(!name.empty()); - bool _is_sub = name[0]=='@'; + bool _is_sub=name[0]=='@'; +#if 0 irep_idt _class_identifier= _is_sub?(class_identifier.empty()?struct_tag:class_identifier):""; +#endif gen_nondet_init( - me, init_code, ns, recursion_set, _is_sub, _class_identifier); + me, + _is_sub, + class_identifier, + loc, + false, + create_dynamic_objects); } } - recursion_set.erase(struct_tag); } else @@ -141,9 +332,134 @@ void gen_nondet_init( side_effect_expr_nondett se=side_effect_expr_nondett(type); code_assignt code(expr, se); + code.add_source_location()=loc; init_code.copy_to_operands(code); } } + +/*******************************************************************\ + +Function: gen_nondet_array_init + + Inputs: + + Outputs: + + Purpose: create code to initialize a Java array with size + `max_nondet_array_length`, loop over elements and initialize + them + +\*******************************************************************/ + +void java_object_factoryt::gen_nondet_array_init( + const exprt &expr, + const source_locationt &loc) +{ + assert(expr.type().id()==ID_pointer); + const typet &type=ns.follow(expr.type().subtype()); + const struct_typet &struct_type=to_struct_type(type); + assert(expr.type().subtype().id()==ID_symbol); + const typet &element_type= + static_cast(expr.type().subtype().find(ID_C_element_type)); + + auto max_length_expr=from_integer(max_nondet_array_length, java_int_type()); + + typet allocate_type; + symbolt &length_sym=new_tmp_symbol(symbol_table, "nondet_array_length"); + length_sym.type=java_int_type(); + const auto &length_sym_expr=length_sym.symbol_expr(); + + // Initialize array with some undetermined length: + gen_nondet_init(length_sym_expr, false, irep_idt(), loc, false, false); + + // Insert assumptions to bound its length: + binary_relation_exprt + assume1(length_sym_expr, ID_ge, from_integer(0, java_int_type())); + binary_relation_exprt + assume2(length_sym_expr, ID_le, max_length_expr); + code_assumet assume_inst1(assume1); + code_assumet assume_inst2(assume2); + init_code.move_to_operands(assume_inst1); + init_code.move_to_operands(assume_inst2); + + side_effect_exprt java_new_array(ID_java_new_array, expr.type()); + java_new_array.copy_to_operands(length_sym_expr); + java_new_array.type().subtype().set(ID_C_element_type, element_type); + codet assign=code_assignt(expr, java_new_array); + assign.add_source_location()=loc; + init_code.copy_to_operands(assign); + + exprt init_array_expr= + member_exprt( + dereference_exprt(expr, expr.type().subtype()), + "data", + struct_type.components()[2].type()); + if(init_array_expr.type()!=pointer_typet(element_type)) + init_array_expr= + typecast_exprt(init_array_expr, pointer_typet(element_type)); + + // Interpose a new symbol, as the goto-symex stage can't handle array indexing + // via a cast. + symbolt &array_init_symbol=new_tmp_symbol(symbol_table, "array_data_init"); + array_init_symbol.type=init_array_expr.type(); + const auto &array_init_symexpr=array_init_symbol.symbol_expr(); + codet data_assign=code_assignt(array_init_symexpr, init_array_expr); + data_assign.add_source_location()=loc; + init_code.copy_to_operands(data_assign); + + // Emit init loop for(array_init_iter=0; array_init_iter!=array.length; + // ++array_init_iter) init(array[array_init_iter]); + symbolt &counter=new_tmp_symbol(symbol_table, "array_init_iter"); + counter.type=length_sym_expr.type(); + exprt counter_expr=counter.symbol_expr(); + + exprt java_zero=from_integer(0, java_int_type()); + init_code.copy_to_operands(code_assignt(counter_expr, java_zero)); + + std::string head_name=as_string(counter.base_name)+"_header"; + code_labelt init_head_label(head_name, code_skipt()); + code_gotot goto_head(head_name); + + init_code.move_to_operands(init_head_label); + + std::string done_name=as_string(counter.base_name)+"_done"; + code_labelt init_done_label(done_name, code_skipt()); + code_gotot goto_done(done_name); + + code_ifthenelset done_test; + done_test.cond()=equal_exprt(counter_expr, length_sym_expr); + done_test.then_case()=goto_done; + + init_code.move_to_operands(done_test); + + // Add a redundant if(counter == max_length) break that is easier for the + // unwinder to understand. + code_ifthenelset max_test; + max_test.cond()=equal_exprt(counter_expr, max_length_expr); + max_test.then_case()=goto_done; + + init_code.move_to_operands(max_test); + + exprt arraycellref=dereference_exprt( + plus_exprt(array_init_symexpr, counter_expr, array_init_symexpr.type()), + array_init_symexpr.type().subtype()); + + gen_nondet_init( + arraycellref, + false, + irep_idt(), + loc, + false, + true, + true, + element_type); + + exprt java_one=from_integer(1, java_int_type()); + code_assignt incr(counter_expr, plus_exprt(counter_expr, java_one)); + + init_code.move_to_operands(incr); + init_code.move_to_operands(goto_head); + init_code.move_to_operands(init_done_label); } /*******************************************************************\ @@ -158,15 +474,30 @@ Function: gen_nondet_init \*******************************************************************/ -namespace { void gen_nondet_init( const exprt &expr, code_blockt &init_code, - const namespacet &ns) + symbol_tablet &symbol_table, + const source_locationt &loc, + bool skip_classid, + bool create_dyn_objs, + bool assume_non_null, + message_handlert &message_handler, + size_t max_nondet_array_length) { - std::set recursion_set; - gen_nondet_init(expr, init_code, ns, recursion_set, false, ""); -} + java_object_factoryt state( + init_code, + assume_non_null, + max_nondet_array_length, + symbol_table, + message_handler); + state.gen_nondet_init( + expr, + false, + "", + loc, + skip_classid, + create_dyn_objs); } /*******************************************************************\ @@ -181,10 +512,9 @@ Function: new_tmp_symbol \*******************************************************************/ -namespace { -symbolt &new_tmp_symbol(symbol_tablet &symbol_table) +symbolt &new_tmp_symbol(symbol_tablet &symbol_table, const std::string &prefix) { - static int temporary_counter=0; + static size_t temporary_counter=0; // TODO encapsulate as class variable auxiliary_symbolt new_symbol; symbolt *symbol_ptr; @@ -194,10 +524,29 @@ symbolt &new_tmp_symbol(symbol_tablet &symbol_table) new_symbol.name="tmp_object_factory$"+std::to_string(++temporary_counter); new_symbol.base_name=new_symbol.name; new_symbol.mode=ID_java; - } while(symbol_table.move(new_symbol, symbol_ptr)); + } + while(symbol_table.move(new_symbol, symbol_ptr)); return *symbol_ptr; } + +/*******************************************************************\ + +Function: get_nondet_bool + + Inputs: Desired type (C_bool or plain bool) + + Outputs: nondet expr of that type + + Purpose: + +\*******************************************************************/ + +exprt get_nondet_bool(const typet &type) +{ + // We force this to 0 and 1 and won't consider + // other values. + return typecast_exprt(side_effect_expr_nondett(bool_typet()), type); } /*******************************************************************\ @@ -216,27 +565,38 @@ exprt object_factory( const typet &type, code_blockt &init_code, bool allow_null, - symbol_tablet &symbol_table) + symbol_tablet &symbol_table, + size_t max_nondet_array_length, + const source_locationt &loc, + message_handlert &message_handler) { if(type.id()==ID_pointer) { symbolt &aux_symbol=new_tmp_symbol(symbol_table); - aux_symbol.type=type.subtype(); + aux_symbol.type=type; aux_symbol.is_static_lifetime=true; exprt object=aux_symbol.symbol_expr(); const namespacet ns(symbol_table); - gen_nondet_init(object, init_code, ns); - - // todo: need to pass null, possibly - return address_of_exprt(object); + gen_nondet_init( + object, + init_code, + symbol_table, + loc, + false, + false, + !allow_null, + message_handler, + max_nondet_array_length); + + return object; } else if(type.id()==ID_c_bool) { // We force this to 0 and 1 and won't consider // other values. - return typecast_exprt(side_effect_expr_nondett(bool_typet()), type); + return get_nondet_bool(type); } else return side_effect_expr_nondett(type); diff --git a/src/java_bytecode/java_object_factory.h b/src/java_bytecode/java_object_factory.h index 5797cb20aed..d111948e365 100644 --- a/src/java_bytecode/java_object_factory.h +++ b/src/java_bytecode/java_object_factory.h @@ -9,6 +9,7 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_JAVA_BYTECODE_JAVA_OBJECT_FACTORY_H #define CPROVER_JAVA_BYTECODE_JAVA_OBJECT_FACTORY_H +#include #include #include @@ -16,6 +17,27 @@ exprt object_factory( const typet &type, code_blockt &init_code, bool allow_null, - symbol_tablet &symbol_table); + symbol_tablet &symbol_table, + size_t max_nondet_array_length, + const source_locationt &, + message_handlert &message_handler); + +void gen_nondet_init( + const exprt &expr, + code_blockt &init_code, + symbol_tablet &symbol_table, + const source_locationt &, + bool skip_classid, + bool create_dynamic_objects, + bool assume_non_null, + message_handlert &message_handler, + size_t max_nondet_array_length=5); + + +exprt get_nondet_bool(const typet &); + +symbolt &new_tmp_symbol( + symbol_tablet &symbol_table, + const std::string &prefix="tmp_object_factory"); #endif // CPROVER_JAVA_BYTECODE_JAVA_OBJECT_FACTORY_H diff --git a/src/java_bytecode/java_pointer_casts.cpp b/src/java_bytecode/java_pointer_casts.cpp new file mode 100644 index 00000000000..174bad0a9b1 --- /dev/null +++ b/src/java_bytecode/java_pointer_casts.cpp @@ -0,0 +1,160 @@ +/*******************************************************************\ + +Module: JAVA Pointer Casts + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#include +#include +#include + +#include "java_pointer_casts.h" + +/*******************************************************************\ + +Function: clean_deref + + Inputs: pointer + + Outputs: dereferenced pointer + + Purpose: dereference pointer expression + +\*******************************************************************/ + +static exprt clean_deref(const exprt &ptr) +{ + return ptr.id()==ID_address_of + ? ptr.op0() + : dereference_exprt(ptr, ptr.type().subtype()); +} + +/*******************************************************************\ + +Function: find_superclass_with_type + + Inputs: pointer + target type to search + + Outputs: true iff a super class with target type is found + + Purpose: + +\*******************************************************************/ + +bool find_superclass_with_type( + exprt &ptr, + const typet &target_type, + const namespacet &ns) +{ + assert(ptr.type().id()==ID_pointer); + while(true) + { + const typet ptr_base=ns.follow(ptr.type().subtype()); + + if(ptr_base.id()!=ID_struct) + return false; + + const struct_typet &base_struct=to_struct_type(ptr_base); + + if(base_struct.components().empty()) + return false; + + const typet &first_field_type= + ns.follow(base_struct.components()[0].type()); + ptr=clean_deref(ptr); + ptr=member_exprt( + ptr, + base_struct.components()[0].get_name(), + first_field_type); + ptr=address_of_exprt(ptr); + + if(first_field_type==target_type) + return true; + } +} + + +/*******************************************************************\ + +Function: look_through_casts + + Inputs: input expression + + Outputs: recursively search target of typecast + + Purpose: + +\*******************************************************************/ + +static const exprt &look_through_casts(const exprt &in) +{ + if(in.id()==ID_typecast) + { + assert(in.type().id()==ID_pointer); + return look_through_casts(in.op0()); + } + else + return in; +} + + +/*******************************************************************\ + +Function: make_clean_pointer_cast + + Inputs: raw pointer + target type + namespace + + Outputs: cleaned up typecast expression + + Purpose: + +\*******************************************************************/ + +exprt make_clean_pointer_cast( + const exprt &rawptr, + const typet &target_type, + const namespacet &ns) +{ + assert( + target_type.id()==ID_pointer && + "Non-pointer target in make_clean_pointer_cast?"); + + const exprt &ptr=look_through_casts(rawptr); + + if(ptr.type()==target_type) + return ptr; + + if(ptr.type().subtype()==empty_typet() || + target_type.subtype()==empty_typet()) + return typecast_exprt(ptr, target_type); + + const typet &target_base=ns.follow(target_type.subtype()); + + exprt bare_ptr=ptr; + while(bare_ptr.id()==ID_typecast) + { + assert( + bare_ptr.type().id()==ID_pointer && + "Non-pointer in make_clean_pointer_cast?"); + if(bare_ptr.type().subtype()==empty_typet()) + bare_ptr=bare_ptr.op0(); + } + + assert( + bare_ptr.type().id()==ID_pointer && + "Non-pointer in make_clean_pointer_cast?"); + + if(bare_ptr.type()==target_type) + return bare_ptr; + + exprt superclass_ptr=bare_ptr; + if(find_superclass_with_type(superclass_ptr, target_base, ns)) + return superclass_ptr; + + return typecast_exprt(bare_ptr, target_type); +} diff --git a/src/java_bytecode/java_pointer_casts.h b/src/java_bytecode/java_pointer_casts.h new file mode 100644 index 00000000000..7b5d23a3c8a --- /dev/null +++ b/src/java_bytecode/java_pointer_casts.h @@ -0,0 +1,22 @@ +/*******************************************************************\ + +Module: JAVA Pointer Casts + +Author: DiffBlue + +\*******************************************************************/ + +#ifndef CPROVER_JAVA_BYTECODE_JAVA_POINTER_CASTS_H +#define CPROVER_JAVA_BYTECODE_JAVA_POINTER_CASTS_H + +bool find_superclass_with_type( + exprt &ptr, + const typet &target_type, + const namespacet &ns); + +exprt make_clean_pointer_cast( + const exprt &ptr, + const typet &target_type, + const namespacet &ns); + +#endif // CPROVER_JAVA_BYTECODE_JAVA_POINTER_CASTS_H diff --git a/src/java_bytecode/java_types.cpp b/src/java_bytecode/java_types.cpp index 2e1318df274..cd3d23b22bd 100644 --- a/src/java_bytecode/java_types.cpp +++ b/src/java_bytecode/java_types.cpp @@ -7,6 +7,7 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ #include +#include #include #include @@ -384,9 +385,15 @@ typet java_type_from_string(const std::string &src) case '[': // array type { + // If this is a reference array, we generate a plain array[reference] + // with void* members, but note the real type in ID_C_element_type. if(src.size()<=1) return nil_typet(); + char subtype_letter=src[1]; const typet subtype=java_type_from_string(src.substr(1, std::string::npos)); - typet tmp=java_array_type('a'); + if(subtype_letter=='L' || // [L denotes a reference array of some sort. + subtype_letter=='[') // Array-of-arrays + subtype_letter='A'; + typet tmp=java_array_type(std::tolower(subtype_letter)); tmp.subtype().set(ID_C_element_type, subtype); return tmp; } @@ -441,24 +448,24 @@ char java_char_from_type(const typet &type) if(id==ID_signedbv) { - const unsigned int width(type.get_unsigned_int(ID_width)); - if(java_int_type().get_unsigned_int(ID_width) == width) + const size_t width=to_signedbv_type(type).get_width(); + if(to_signedbv_type(java_int_type()).get_width()==width) return 'i'; - else if(java_long_type().get_unsigned_int(ID_width) == width) + else if(to_signedbv_type(java_long_type()).get_width()==width) return 'l'; - else if(java_short_type().get_unsigned_int(ID_width) == width) + else if(to_signedbv_type(java_short_type()).get_width()==width) return 's'; - else if(java_byte_type().get_unsigned_int(ID_width) == width) + else if(to_signedbv_type(java_byte_type()).get_width()==width) return 'b'; } else if(id==ID_unsignedbv) return 'c'; else if(id==ID_floatbv) { - const unsigned int width(type.get_unsigned_int(ID_width)); - if(java_float_type().get_unsigned_int(ID_width) == width) + const size_t width(to_floatbv_type(type).get_width()); + if(to_floatbv_type(java_float_type()).get_width()==width) return 'f'; - else if(java_double_type().get_unsigned_int(ID_width) == width) + else if(to_floatbv_type(java_double_type()).get_width()==width) return 'd'; } else if(id==ID_c_bool) diff --git a/src/java_bytecode/java_utils.cpp b/src/java_bytecode/java_utils.cpp new file mode 100644 index 00000000000..2e84c77f870 --- /dev/null +++ b/src/java_bytecode/java_utils.cpp @@ -0,0 +1,21 @@ +/*******************************************************************\ + +Module: + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#include +#include + +#include "java_utils.h" + +bool java_is_array_type(const typet &type) +{ + if(type.id()!=ID_struct) + return false; + return has_prefix(id2string( + to_struct_type(type).get_tag()), + "java::array["); +} diff --git a/src/java_bytecode/java_utils.h b/src/java_bytecode/java_utils.h new file mode 100644 index 00000000000..ec7e6a9ad8e --- /dev/null +++ b/src/java_bytecode/java_utils.h @@ -0,0 +1,16 @@ +/*******************************************************************\ + +Module: + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#include + +#ifndef CPROVER_JAVA_BYTECODE_JAVA_UTILS_H +#define CPROVER_JAVA_BYTECODE_JAVA_UTILS_H + +bool java_is_array_type(const typet &type); + +#endif // CPROVER_JAVA_BYTECODE_JAVA_UTILS_H diff --git a/src/util/irep_ids.txt b/src/util/irep_ids.txt index 82ff934bd2f..e320f329ecf 100644 --- a/src/util/irep_ids.txt +++ b/src/util/irep_ids.txt @@ -733,3 +733,6 @@ verilog_array low high bswap +java_bytecode_index +java_instanceof +java_super_method_call diff --git a/src/util/json_expr.cpp b/src/util/json_expr.cpp index ab03dc33a60..77f58eb0e18 100644 --- a/src/util/json_expr.cpp +++ b/src/util/json_expr.cpp @@ -45,6 +45,10 @@ json_objectt json(const source_locationt &location) if(!location.get_function().empty()) result["function"]=json_stringt(id2string(location.get_function())); + if(!location.get_java_bytecode_index().empty()) + result["bytecode_index"]= + json_stringt(id2string(location.get_java_bytecode_index())); + return result; } @@ -78,12 +82,14 @@ json_objectt json( else if(type.id()==ID_signedbv) { result["name"]=json_stringt("integer"); - result["width"]=json_numbert(std::to_string(to_signedbv_type(type).get_width())); + result["width"]= + json_numbert(std::to_string(to_signedbv_type(type).get_width())); } else if(type.id()==ID_floatbv) { result["name"]=json_stringt("float"); - result["width"]=json_numbert(std::to_string(to_floatbv_type(type).get_width())); + result["width"]= + json_numbert(std::to_string(to_floatbv_type(type).get_width())); } else if(type.id()==ID_bv) { @@ -104,7 +110,8 @@ json_objectt json( else if(type.id()==ID_fixedbv) { result["name"]=json_stringt("fixed"); - result["width"]=json_numbert(std::to_string(to_fixedbv_type(type).get_width())); + result["width"]= + json_numbert(std::to_string(to_fixedbv_type(type).get_width())); } else if(type.id()==ID_pointer) { diff --git a/src/util/source_location.cpp b/src/util/source_location.cpp index 9406fd61011..7583b40b496 100644 --- a/src/util/source_location.cpp +++ b/src/util/source_location.cpp @@ -31,20 +31,43 @@ std::string source_locationt::as_string(bool print_cwd) const const irep_idt &line=get_line(); const irep_idt &column=get_column(); const irep_idt &function=get_function(); + const irep_idt &bytecode=get_java_bytecode_index(); if(!file.empty()) { - if(dest!="") dest+=' '; + if(dest!="") + dest+=' '; dest+="file "; if(print_cwd) - dest+=concat_dir_file(id2string(get_working_directory()), - id2string(file)); + dest+= + concat_dir_file(id2string(get_working_directory()), id2string(file)); else dest+=id2string(file); } - if(!line.empty()) { if(dest!="") dest+=' '; dest+="line "+id2string(line); } - if(!column.empty()) { if(dest!="") dest+=' '; dest+="column "+id2string(column); } - if(!function.empty()) { if(dest!="") dest+=' '; dest+="function "+id2string(function); } + if(!line.empty()) + { + if(dest!="") + dest+=' '; + dest+="line "+id2string(line); + } + if(!column.empty()) + { + if(dest!="") + dest+=' '; + dest+="column "+id2string(column); + } + if(!function.empty()) + { + if(dest!="") + dest+=' '; + dest+="function "+id2string(function); + } + if(!bytecode.empty()) + { + if(dest!="") + dest+=' '; + dest+="bytecode_index "+id2string(bytecode); + } return dest; } @@ -65,9 +88,8 @@ std::ostream &operator << ( std::ostream &out, const source_locationt &source_location) { - if(source_location.is_nil()) return out; - + if(source_location.is_nil()) + return out; out << source_location.as_string(); - return out; } diff --git a/src/util/source_location.h b/src/util/source_location.h index c34cfb5f9d2..8d1e297b535 100644 --- a/src/util/source_location.h +++ b/src/util/source_location.h @@ -68,6 +68,11 @@ class source_locationt:public irept return get(ID_comment); } + inline const irep_idt &get_java_bytecode_index() const + { + return get(ID_java_bytecode_index); + } + inline void set_file(const irep_idt &file) { set(ID_file, file); @@ -118,6 +123,11 @@ class source_locationt:public irept set(ID_comment, comment); } + inline void set_java_bytecode_index(const irep_idt &index) + { + set(ID_java_bytecode_index, index); + } + inline void set_hide() { set(ID_hide, true); @@ -137,9 +147,6 @@ class source_locationt:public irept std::string as_string(bool print_cwd) const; }; -// will go away -//typedef source_locationt locationt; - std::ostream &operator <<(std::ostream &, const source_locationt &); #endif // CPROVER_UTIL_SOURCE_LOCATION_H