diff --git a/src/java_bytecode/bytecode_info.h b/src/java_bytecode/bytecode_info.h index 538de94ebb1..6376893ab14 100644 --- a/src/java_bytecode/bytecode_info.h +++ b/src/java_bytecode/bytecode_info.h @@ -1,3 +1,7 @@ +#pragma once + +#include + // http://en.wikipedia.org/wiki/Java_bytecode_instruction_listings // The 'result_type' is one of the following: @@ -38,3 +42,7 @@ struct bytecode_infot extern struct bytecode_infot const bytecode_info[]; +typedef uint8_t u1; +typedef uint16_t u2; +typedef uint32_t u4; +typedef uint64_t u8; diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index eae931120c6..ee44fa7c344 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -25,6 +25,8 @@ Author: Daniel Kroening, kroening@kroening.com #include "bytecode_info.h" #include "java_types.h" +#include + namespace { class patternt { @@ -79,21 +81,68 @@ class java_bytecode_convert_methodt:public messaget { public: symbol_exprt symbol_expr; + size_t start_pc; + size_t length; }; - - expanding_vector variables; + + typedef std::vector variablest; + expanding_vector variables; bool method_has_this; + typedef enum instruction_sizet + { + INST_INDEX = 2, INST_INDEX_CONST = 3 + } instruction_sizet; + + // return corresponding reference of variable + variablet &find_variable_for_slot(unsigned number_int, size_t address, + variablest &var_list, instruction_sizet inst_size) + { + size_t var_list_length = var_list.size(); + if(var_list_length > 1) + { + for(variablet &var : var_list) + { + size_t start_pc = var.start_pc; + size_t length = var.length; + if (address + (size_t) inst_size >= start_pc && address < start_pc + length) + return var; + } + // add unnamed local variable to end of list at this index + // with scope from 0 to INT_MAX + // as it is at the end of the vector, it will only be taken into account + // if no other variable is valid + size_t list_length = var_list.size(); + var_list.resize(list_length + 1); + var_list[list_length].start_pc = 0; + var_list[list_length].length = std::numeric_limits::max(); + return var_list[list_length]; + } + else if(var_list_length == 1) + return var_list[0]; + else + { + // return reference to unnamed local variable + // if no local variable is defined for this index + var_list.resize(1); + return var_list[0]; + } + } + // JVM local variables - const exprt variable(const exprt &arg, char type_char) + const exprt variable(const exprt &arg, char type_char, size_t address, instruction_sizet inst_size) { irep_idt number=to_constant_expr(arg).get_value(); std::size_t number_int=safe_string2size_t(id2string(number)); typet t=java_type_from_char(type_char); + variablest &var_list = variables[number_int]; + + // search variable in list for correct frame / address if necessary + variablet &var = find_variable_for_slot(number_int, address, var_list, inst_size); - if(variables[number_int].symbol_expr.get_identifier().empty()) + if(var.symbol_expr.get_identifier().empty()) { // an un-named local variable irep_idt base_name="local"+id2string(number)+type_char; @@ -106,7 +155,7 @@ class java_bytecode_convert_methodt:public messaget } else { - exprt result=variables[number_int].symbol_expr; + exprt result=var.symbol_expr; if(t!=result.type()) result=typecast_exprt(result, t); return result; } @@ -255,22 +304,32 @@ void java_bytecode_convert_methodt::convert( variables.clear(); - // Do the parameters and locals in the variable table, - // which is only available when compiled with -g + // Do the parameters and locals in the variable table, which is available when + // compiled with -g or for methods with many local variables in the latter + // case, different variables can have the same index, depending on the + // context. + // + // to calculate which variable to use, one uses the address of the instruction + // that uses the variable, the size of the instruction and the start_pc / + // length values in the local variable table for(const auto & v : m.local_variable_table) { typet t=java_type_from_string(v.signature); irep_idt identifier=id2string(method_identifier)+"::"+id2string(v.name); symbol_exprt result(identifier, t); result.set(ID_C_base_name, v.name); - variables[v.index].symbol_expr=result; + size_t number_index_entries = variables[v.index].size(); + variables[v.index].resize(number_index_entries + 1); + variables[v.index][number_index_entries].symbol_expr = result; + variables[v.index][number_index_entries].start_pc = v.start_pc; + variables[v.index][number_index_entries].length = v.length; } // set up variables array for(std::size_t i=0, param_index=0; i < parameters.size(); ++i) { - variables[param_index]; + variables[param_index].resize(1); param_index+=get_variable_slots(parameters[i]); } @@ -290,8 +349,8 @@ void java_bytecode_convert_methodt::convert( else { // in the variable table? - base_name=variables[param_index].symbol_expr.get(ID_C_base_name); - identifier=variables[param_index].symbol_expr.get(ID_identifier); + base_name=variables[param_index][0].symbol_expr.get(ID_C_base_name); + identifier=variables[param_index][0].symbol_expr.get(ID_identifier); if(base_name.empty()) { @@ -315,7 +374,7 @@ void java_bytecode_convert_methodt::convert( // add as a JVM variable std::size_t slots=get_variable_slots(parameters[i]); - variables[param_index].symbol_expr=parameter_symbol.symbol_expr(); + variables[param_index][0].symbol_expr=parameter_symbol.symbol_expr(); param_index+=slots; } @@ -468,7 +527,7 @@ codet java_bytecode_convert_methodt::convert_instructions( stackt stack; bool done; }; - + typedef std::map address_mapt; address_mapt address_map; std::set targets; @@ -765,7 +824,7 @@ codet java_bytecode_convert_methodt::convert_instructions( // store value into some local variable assert(op.size()==1 && results.empty()); - exprt var=variable(arg0, statement[0]); + exprt var=variable(arg0, statement[0], i_it->address, INST_INDEX); const bool is_array('a' == statement[0]); @@ -797,7 +856,7 @@ codet java_bytecode_convert_methodt::convert_instructions( else if(statement==patternt("?load")) { // load a value from a local variable - results[0]=variable(arg0, statement[0]); + results[0]=variable(arg0, statement[0], i_it->address, INST_INDEX); } else if(statement=="ldc" || statement=="ldc_w" || statement=="ldc2" || statement=="ldc2_w") @@ -959,9 +1018,9 @@ codet java_bytecode_convert_methodt::convert_instructions( else if(statement=="iinc") { code_assignt code_assign; - code_assign.lhs()=variable(arg0, 'i'); + code_assign.lhs()=variable(arg0, 'i', i_it->address, INST_INDEX_CONST); code_assign.rhs()=plus_exprt( - variable(arg0, 'i'), + variable(arg0, 'i', i_it->address, INST_INDEX_CONST), typecast_exprt(arg1, java_int_type())); c=code_assign; } diff --git a/src/java_bytecode/java_bytecode_parse_tree.h b/src/java_bytecode/java_bytecode_parse_tree.h index 550a4ae8584..aed96ed957c 100644 --- a/src/java_bytecode/java_bytecode_parse_tree.h +++ b/src/java_bytecode/java_bytecode_parse_tree.h @@ -14,6 +14,8 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include "bytecode_info.h" + class java_bytecode_parse_treet { public: @@ -93,11 +95,45 @@ class java_bytecode_parse_treet irep_idt name; std::string signature; std::size_t index; + std::size_t start_pc; + std::size_t length; }; - + typedef std::vector local_variable_tablet; local_variable_tablet local_variable_table; + class verification_type_infot + { + public: + enum verification_type_info_type { TOP, INTEGER, FLOAT, LONG, DOUBLE, + ITEM_NULL, UNINITIALIZED_THIS, + OBJECT, UNINITIALIZED}; + verification_type_info_type type; + u1 tag; + u2 cpool_index; + u2 offset; + }; + + class stack_map_table_entryt + { + public: + enum stack_frame_type { SAME, SAME_LOCALS_ONE_STACK, SAME_LOCALS_ONE_STACK_EXTENDED, + CHOP, SAME_EXTENDED, APPEND, FULL}; + stack_frame_type type; + size_t offset_delta; + size_t chops; + size_t appends; + + typedef std::vector local_verification_type_infot; + typedef std::vector stack_verification_type_infot; + + local_verification_type_infot locals; + stack_verification_type_infot stack; + }; + + typedef std::vector stack_map_tablet; + stack_map_tablet stack_map_table; + virtual void output(std::ostream &out) const; inline methodt():is_native(false), is_abstract(false), is_synchronized(false) diff --git a/src/java_bytecode/java_bytecode_parser.cpp b/src/java_bytecode/java_bytecode_parser.cpp index 6a6255ea7b4..73f9bd82a7b 100644 --- a/src/java_bytecode/java_bytecode_parser.cpp +++ b/src/java_bytecode/java_bytecode_parser.cpp @@ -49,11 +49,6 @@ class java_bytecode_parsert:public parsert java_bytecode_parse_treet parse_tree; - typedef unsigned char u1; - typedef unsigned short u2; - typedef unsigned int u4; - typedef unsigned long long u8; - struct pool_entryt { u1 tag; @@ -124,6 +119,7 @@ class java_bytecode_parsert:public parsert void rmethod_attribute(methodt &method); void rfield_attribute(fieldt &); void rcode_attribute(methodt &method); + void read_verification_type_info(methodt::verification_type_infot &); void rbytecode(methodt::instructionst &); void get_class_refs(); void get_class_refs_rec(const typet &); @@ -193,6 +189,16 @@ class java_bytecode_parsert:public parsert #define CONSTANT_MethodType 16 #define CONSTANT_InvokeDynamic 18 +#define VTYPE_INFO_TOP 0 +#define VTYPE_INFO_INTEGER 1 +#define VTYPE_INFO_FLOAT 2 +#define VTYPE_INFO_LONG 3 +#define VTYPE_INFO_DOUBLE 4 +#define VTYPE_INFO_ITEM_NULL 5 +#define VTYPE_INFO_UNINIT_THIS 6 +#define VTYPE_INFO_OBJECT 7 +#define VTYPE_INFO_UNINIT 8 + /*******************************************************************\ Function: java_bytecode_parsert::parse @@ -1041,9 +1047,9 @@ void java_bytecode_parsert::rcode_attribute(methodt &method) { u2 attribute_name_index=read_u2(); u4 attribute_length=read_u4(); - + irep_idt attribute_name=pool_entry(attribute_name_index).s; - + if(attribute_name=="LineNumberTable") { // address -> instructiont @@ -1051,13 +1057,13 @@ void java_bytecode_parsert::rcode_attribute(methodt &method) instruction_mapt instruction_map; for(methodt::instructionst::iterator - it=method.instructions.begin(); + it=method.instructions.begin(); it!=method.instructions.end(); it++) { instruction_map[it->address]=it; } - + u2 line_number_table_length=read_u2(); for(unsigned i=0; isecond->source_location.set_line(line_number); } @@ -1076,26 +1082,159 @@ void java_bytecode_parsert::rcode_attribute(methodt &method) else if(attribute_name=="LocalVariableTable") { u2 local_variable_table_length=read_u2(); - + method.local_variable_table.resize(local_variable_table_length); for(unsigned i=0; i