diff --git a/regression/cbmc-java/lvt-unexpected/Makefile b/regression/cbmc-java/lvt-unexpected/Makefile new file mode 100644 index 00000000000..8fb160582c0 --- /dev/null +++ b/regression/cbmc-java/lvt-unexpected/Makefile @@ -0,0 +1,7 @@ + +T=unexpected + +all : $T.class + +%.class : %.j + jasmin $< diff --git a/regression/cbmc-java/lvt-unexpected/test.desc b/regression/cbmc-java/lvt-unexpected/test.desc new file mode 100644 index 00000000000..a7efd74bb7e --- /dev/null +++ b/regression/cbmc-java/lvt-unexpected/test.desc @@ -0,0 +1,12 @@ +KNOWNBUG +unexpected.class +--verbosity 10 --show-symbol-table +^EXIT=0$ +^SIGNAL=0$ +-- +-- +The LVT in the only method in unexpected.class does not satisfy the constraints +expected by the bytecode to goto conversion algorithms. Namely, the live range +of parameter x in that method is not preceeded by a store_i instruction, which +is perfectly possibly as per the JVM specs but currently additionally required +in CBMC. diff --git a/regression/cbmc-java/lvt-unexpected/unexpected.class b/regression/cbmc-java/lvt-unexpected/unexpected.class new file mode 100644 index 00000000000..b7078721f99 Binary files /dev/null and b/regression/cbmc-java/lvt-unexpected/unexpected.class differ diff --git a/regression/cbmc-java/lvt-unexpected/unexpected.j b/regression/cbmc-java/lvt-unexpected/unexpected.j new file mode 100644 index 00000000000..91258c12ae5 --- /dev/null +++ b/regression/cbmc-java/lvt-unexpected/unexpected.j @@ -0,0 +1,34 @@ +.class public unexpected +.super java/lang/Object + +.method public static test1(I)I + .limit stack 10 + .limit locals 20 + + ; unexpected in previous code, as arg is the argument of the method and its + ; start_pc is != 0 + .var 0 is arg I from Label1 to Label2 + .var 1 is x I from Label1 to Label2 + + ; say hello + getstatic java/lang/System/out Ljava/io/PrintStream; + ldc "Starting test1!" + invokevirtual java/io/PrintStream/println(Ljava/lang/String;)V + +Label1: + ; x = arg + 3 + iload_0 + iconst_3 + iadd + istore_1 + + ; say bye + getstatic java/lang/System/out Ljava/io/PrintStream; + ldc "Returning from test1!" + invokevirtual java/io/PrintStream/println(Ljava/lang/String;)V + + ; return x + iload_1 + ireturn +Label2: +.end method diff --git a/regression/cbmc-java/method_parmeters1/method_parameters.class b/regression/cbmc-java/method_parmeters1/method_parameters.class new file mode 100644 index 00000000000..866f9b1c2da Binary files /dev/null and b/regression/cbmc-java/method_parmeters1/method_parameters.class differ diff --git a/regression/cbmc-java/method_parmeters1/method_parameters.java b/regression/cbmc-java/method_parmeters1/method_parameters.java new file mode 100644 index 00000000000..10dfba1bbea --- /dev/null +++ b/regression/cbmc-java/method_parmeters1/method_parameters.java @@ -0,0 +1,129 @@ +class method_parameters +{ + void test1() + {} + + void test2(boolean b) + {} + + void test3(byte b) + {} + + void test3a(char c) + {} + + void test4(int i) + {} + + void test5(float f) + {} + + void test6(String s) + {} + + void test7(long l) + {} + + void test8(double l) + {} + + void test9(int i, long l) + { + int local = 123; + } + + void test10(double i, String s) + { + int local = 123; + } + + void test11(int a[][]) + {} + + double test12(double a[][], double d) + { + if (a.length < 1) return 0; + if (a[0].length < 1) return 0; + return d + a[0][0]; + } + + void test12a(double a[][][], int i, double d) + {} + + double test12b(double a[][], double d) + { + return 123.123; + } + + void test12c(double a[][][], double d) + { + } + + void test13(double d, int i, byte b, char c) + {} + + // Same as above but now static + + static void ttest1() + {} + + static void ttest2(boolean b) + {} + + static void ttest3(byte b) + {} + + static void ttest3a(char c) + {} + + static void ttest4(int i) + {} + + static void ttest5(float f) + {} + + static void ttest6(String s) + {} + + static void ttest7(long l) + {} + + static void ttest8(double l) + {} + + static void ttest9(int i, long l) + { + int local = 123; + } + + static void ttest10(double i, String s) + { + int local = 123; + } + + static void ttest11(int a[][]) + {} + + static double ttest12(double a[][], double d) + { + if (a.length < 1) return 0; + if (a[0].length < 1) return 0; + return d + a[0][0]; + } + + static void ttest12a(double a[][][], int i, double d) + {} + + static double ttest12b(double a[][], double d) + { + return 123.123; + } + + static void ttest12c(double a[][][], double d) + { + } + + static void ttest13(double d, int i, byte b, char c) + {} + +} diff --git a/regression/cbmc-java/method_parmeters1/test.desc b/regression/cbmc-java/method_parmeters1/test.desc new file mode 100644 index 00000000000..909aa8ab6b4 --- /dev/null +++ b/regression/cbmc-java/method_parmeters1/test.desc @@ -0,0 +1,9 @@ +CORE +method_parameters.class +--verbosity 10 --show-symbol-table +^EXIT=0$ +^SIGNAL=0$ +-- +-- +The purpose of the test is ensuring that the bytecode -> symbol table AST +generation happens correctly. The generated .class file does not contain LVTs. diff --git a/regression/cbmc-java/method_parmeters2/method_parameters.class b/regression/cbmc-java/method_parmeters2/method_parameters.class new file mode 100644 index 00000000000..5f065c68585 Binary files /dev/null and b/regression/cbmc-java/method_parmeters2/method_parameters.class differ diff --git a/regression/cbmc-java/method_parmeters2/method_parameters.java b/regression/cbmc-java/method_parmeters2/method_parameters.java new file mode 100644 index 00000000000..10dfba1bbea --- /dev/null +++ b/regression/cbmc-java/method_parmeters2/method_parameters.java @@ -0,0 +1,129 @@ +class method_parameters +{ + void test1() + {} + + void test2(boolean b) + {} + + void test3(byte b) + {} + + void test3a(char c) + {} + + void test4(int i) + {} + + void test5(float f) + {} + + void test6(String s) + {} + + void test7(long l) + {} + + void test8(double l) + {} + + void test9(int i, long l) + { + int local = 123; + } + + void test10(double i, String s) + { + int local = 123; + } + + void test11(int a[][]) + {} + + double test12(double a[][], double d) + { + if (a.length < 1) return 0; + if (a[0].length < 1) return 0; + return d + a[0][0]; + } + + void test12a(double a[][][], int i, double d) + {} + + double test12b(double a[][], double d) + { + return 123.123; + } + + void test12c(double a[][][], double d) + { + } + + void test13(double d, int i, byte b, char c) + {} + + // Same as above but now static + + static void ttest1() + {} + + static void ttest2(boolean b) + {} + + static void ttest3(byte b) + {} + + static void ttest3a(char c) + {} + + static void ttest4(int i) + {} + + static void ttest5(float f) + {} + + static void ttest6(String s) + {} + + static void ttest7(long l) + {} + + static void ttest8(double l) + {} + + static void ttest9(int i, long l) + { + int local = 123; + } + + static void ttest10(double i, String s) + { + int local = 123; + } + + static void ttest11(int a[][]) + {} + + static double ttest12(double a[][], double d) + { + if (a.length < 1) return 0; + if (a[0].length < 1) return 0; + return d + a[0][0]; + } + + static void ttest12a(double a[][][], int i, double d) + {} + + static double ttest12b(double a[][], double d) + { + return 123.123; + } + + static void ttest12c(double a[][][], double d) + { + } + + static void ttest13(double d, int i, byte b, char c) + {} + +} diff --git a/regression/cbmc-java/method_parmeters2/test.desc b/regression/cbmc-java/method_parmeters2/test.desc new file mode 100644 index 00000000000..507e8d4ef11 --- /dev/null +++ b/regression/cbmc-java/method_parmeters2/test.desc @@ -0,0 +1,10 @@ +CORE +method_parameters.class +--verbosity 10 --show-symbol-table +^EXIT=0$ +^SIGNAL=0$ +-- +-- +The purpose of the test is ensuring that the bytecode -> symbol table AST +generation happens correctly. The generated .class file DOES contain LVTs, i.e., +it has been compiled with with "javac -g". diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 69fb187aa7b..b9a3cf00063 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -30,6 +30,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_bytecode_convert_method_class.h" #include "bytecode_info.h" #include "java_types.h" +#include "java_utils.h" #include "java_string_library_preprocess.h" #include @@ -106,26 +107,13 @@ static bool operator==(const irep_idt &what, const patternt &pattern) return pattern==what; } -const size_t SLOTS_PER_INTEGER(1u); -const size_t INTEGER_WIDTH(64u); -static size_t count_slots( - const size_t value, - const code_typet::parametert ¶m) -{ - const std::size_t width(param.type().get_unsigned_int(ID_width)); - return value+SLOTS_PER_INTEGER+width/INTEGER_WIDTH; -} - -static size_t get_variable_slots(const code_typet::parametert ¶m) -{ - return count_slots(0, param); -} - static irep_idt strip_java_namespace_prefix(const irep_idt to_strip) { - const auto to_strip_str=id2string(to_strip); - assert(has_prefix(to_strip_str, "java::")); - return to_strip_str.substr(6, std::string::npos); + const std::string to_strip_str=id2string(to_strip); + const std::string prefix="java::"; + + PRECONDITION(has_prefix(to_strip_str, prefix)); + return to_strip_str.substr(prefix.size(), std::string::npos); } // name contains or @@ -297,25 +285,44 @@ void java_bytecode_convert_methodt::convert( const symbolt &class_symbol, const methodt &m) { + // Construct the fully qualified method name + // (e.g. "my.package.ClassName.myMethodName:(II)I") and query the symbol table + // to retrieve the symbol (constructed by java_bytecode_convert_method_lazy) + // associated to the method const irep_idt method_identifier= id2string(class_symbol.name)+"."+id2string(m.name)+":"+m.signature; method_id=method_identifier; const auto &old_sym=symbol_table.lookup(method_identifier); + // Obtain a std::vector of code_typet::parametert objects from the + // (function) type of the symbol typet member_type=old_sym.type; code_typet &code_type=to_code_type(member_type); method_return_type=code_type.return_type(); code_typet::parameterst ¶meters=code_type.parameters(); + // Determine the number of local variable slots used by the JVM to maintan the + // formal parameters + slots_for_parameters=java_method_parameter_slots(code_type); + + debug() << "Generating codet: class " + << class_symbol.name << ", method " + << m.name << eom; + + // We now set up the local variables for the method parameters variables.clear(); - // find parameter names in the local variable table: + // Find parameter names in the local variable table: for(const auto &v : m.local_variable_table) { - if(v.start_pc!=0) // Local? + // Skip this variable if it is not a method parameter + if(!is_parameter(v)) continue; + // Construct a fully qualified name for the parameter v, + // e.g. my.package.ClassName.myMethodName:(II)I::anIntParam, and then a + // symbol_exprt with the parameter and its type typet t=java_type_from_string(v.signature); std::ostringstream id_oss; id_oss << method_id << "::" << v.name; @@ -323,6 +330,10 @@ void java_bytecode_convert_methodt::convert( symbol_exprt result(identifier, t); result.set(ID_C_base_name, v.name); + // Create a new variablet in the variables vector; in fact this entry will + // be rewritten below in the loop that iterates through the method + // parameters; the only field that seem to be useful to write here is the + // symbol_expr, others will be rewritten variables[v.index].push_back(variablet()); auto &newv=variables[v.index].back(); newv.symbol_expr=result; @@ -330,46 +341,59 @@ void java_bytecode_convert_methodt::convert( newv.length=v.length; } - // set up variables array + // The variables is a expanding_vectort, and the loop above may have expanded + // the vector introducing gaps where the entries are empty vectors. We now + // make sure that the vector of each LV slot contains exactly one variablet, + // possibly default-initialized std::size_t param_index=0; for(const auto ¶m : parameters) { variables[param_index].resize(1); - param_index+=get_variable_slots(param); + param_index+=java_local_variable_slots(param.type()); } + INVARIANT( + param_index==slots_for_parameters, + "java_parameter_count and local computation must agree"); - // assign names to parameters + // Assign names to parameters param_index=0; for(auto ¶m : parameters) { irep_idt base_name, identifier; + // Construct a sensible base name (base_name) and a fully qualified name + // (identifier) for every parameter of the method under translation, + // regardless of whether we have an LVT or not; and assign it to the + // parameter object (which is stored in the type of the symbol, not in the + // symbol table) if(param_index==0 && param.get_this()) { + // my.package.ClassName.myMethodName:(II)I::this base_name="this"; identifier=id2string(method_identifier)+"::"+id2string(base_name); - param.set_base_name(base_name); - param.set_identifier(identifier); } else { - // in the variable table? + // if already present in the LVT ... base_name=variables[param_index][0].symbol_expr.get(ID_C_base_name); identifier=variables[param_index][0].symbol_expr.get(ID_identifier); + // ... then base_name will not be empty if(base_name.empty()) { + // my.package.ClassName.myMethodName:(II)I::argNT, where N is the local + // variable slot where the parameter is stored and T is a character + // indicating the type const typet &type=param.type(); char suffix=java_char_from_type(type); base_name="arg"+std::to_string(param_index)+suffix; identifier=id2string(method_identifier)+"::"+id2string(base_name); } - - param.set_base_name(base_name); - param.set_identifier(identifier); } + param.set_base_name(base_name); + param.set_identifier(identifier); - // add to symbol table + // Build a new symbol for the parameter and add it to the symbol table parameter_symbolt parameter_symbol; parameter_symbol.base_name=base_name; parameter_symbol.mode=ID_java; @@ -377,47 +401,45 @@ void java_bytecode_convert_methodt::convert( parameter_symbol.type=param.type(); symbol_table.add(parameter_symbol); - // add as a JVM variable - std::size_t slots=get_variable_slots(param); + // Add as a JVM local variable variables[param_index][0].symbol_expr=parameter_symbol.symbol_expr(); variables[param_index][0].is_parameter=true; variables[param_index][0].start_pc=0; variables[param_index][0].length=std::numeric_limits::max(); - variables[param_index][0].is_parameter=true; - param_index+=slots; - assert(param_index>0); + param_index+=java_local_variable_slots(param.type()); } + // The parameter slots detected in this function should agree with what + // java_parameter_count() thinks about this method + INVARIANT( + param_index==slots_for_parameters, + "java_parameter_count and local computation must agree"); + const bool is_virtual=!m.is_static && !m.is_final; - #if 0 - class_type.methods().push_back(class_typet::methodt()); - class_typet::methodt &method=class_type.methods().back(); - #else + // Construct a methodt, which lives within the class type; this object is + // never used for anything useful and could be removed class_typet::methodt method; - #endif - method.set_base_name(m.base_name); method.set_name(method_identifier); - method.set(ID_abstract, m.is_abstract); method.set(ID_is_virtual, is_virtual); - + method.type()=member_type; if(is_constructor(method)) method.set(ID_constructor, true); - method.type()=member_type; - // we add the symbol for the method - symbolt method_symbol; - method_symbol.name=method.get_name(); method_symbol.base_name=method.get_base_name(); method_symbol.mode=ID_java; method_symbol.location=m.source_location; method_symbol.location.set_function(method_identifier); + // Set up the pretty name for the method entry in the symbol table. + // The pretty name of a constructor includes the base name of the class + // instead of the internal method name "". For regular methods, it's + // just the base name of the method. if(method.get_base_name()=="") method_symbol.pretty_name=id2string(class_symbol.pretty_name)+"."+ id2string(class_symbol.base_name)+"()"; @@ -428,19 +450,20 @@ void java_bytecode_convert_methodt::convert( method_symbol.type=member_type; if(is_constructor(method)) method_symbol.type.set(ID_constructor, true); + current_method=method_symbol.name; method_has_this=code_type.has_this(); - - tmp_vars.clear(); if((!m.is_abstract) && (!m.is_native)) - method_symbol.value=convert_instructions( - m, code_type, method_symbol.name); + method_symbol.value=convert_instructions(m, code_type, method_symbol.name); // Replace the existing stub symbol with the real deal: const auto s_it=symbol_table.symbols.find(method.get_name()); - assert(s_it!=symbol_table.symbols.end()); + INVARIANT( + s_it!=symbol_table.symbols.end(), + "the symbol was there earlier on this function; it must be there now"); symbol_table.symbols.erase(s_it); + // Insert the method symbol in the symbol table symbol_table.add(method_symbol); } @@ -1138,6 +1161,12 @@ codet java_bytecode_convert_methodt::convert_instructions( } } + // Clean the list of temporary variables created by a call to `tmp_variable`. + // These are local variables in the goto function used to represent temporary + // values of the JVM operand stack, newly allocated objects before the + // constructor is called, ... + tmp_vars.clear(); + // Now that the control flow graph is built, set up our local variables // (these require the graph to determine live ranges) setup_local_variables(method, address_map); diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 17393900c64..8eba408ddff 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -41,7 +41,8 @@ class java_bytecode_convert_methodt:public messaget symbol_table(_symbol_table), max_array_length(_max_array_length), lazy_methods(_lazy_methods), - string_preprocess(_string_preprocess) + string_preprocess(_string_preprocess), + slots_for_parameters(0) { } @@ -61,11 +62,25 @@ class java_bytecode_convert_methodt:public messaget const size_t max_array_length; safe_pointer lazy_methods; + /// Fully qualified name of the method under translation. + /// Initialized by `convert`. + /// Example: "my.package.ClassName.myMethodName:(II)I" irep_idt method_id; + + /// A copy of `method_id` :/ irep_idt current_method; + + /// Return type of the method under conversion. + /// Initialized by `convert`. typet method_return_type; + java_string_library_preprocesst &string_preprocess; + /// Number of local variable slots used by the JVM to pass parameters upon + /// invocation of the method under translation. + /// Initialized in `convert`. + unsigned slots_for_parameters; + public: struct holet { @@ -76,6 +91,7 @@ class java_bytecode_convert_methodt:public messaget struct local_variable_with_holest { local_variablet var; + bool is_parameter; std::vector holes; }; @@ -93,7 +109,7 @@ class java_bytecode_convert_methodt:public messaget variablet() : symbol_expr(), start_pc(0), length(0), is_parameter(false) {} }; - protected: +protected: typedef std::vector variablest; expanding_vectort variables; std::set used_local_names; @@ -147,8 +163,18 @@ class java_bytecode_convert_methodt:public messaget void pop_residue(std::size_t n); void push(const exprt::operandst &o); + /// Determines whether the `method` is a constructor or a static initializer, + /// by checking whether its name equals either or bool is_constructor(const class_typet::methodt &method); + /// Returns true iff the slot index of the local variable of a method (coming + /// from the LVT) is a parameter of that method. Assumes that + /// `slots_for_parameters` is initialized upon call. + bool is_parameter(const local_variablet &v) + { + return v.indexsecond.parsed_class.name.empty()) continue; - debug() << "Converting class " << c_it->first << eom; + debug() << "Generating class/member symbols: " << c_it->first << eom; if(java_bytecode_convert_class( c_it->second, diff --git a/src/java_bytecode/java_local_variable_table.cpp b/src/java_bytecode/java_local_variable_table.cpp index ee7d061f9d6..508ae02891c 100644 --- a/src/java_bytecode/java_local_variable_table.cpp +++ b/src/java_bytecode/java_local_variable_table.cpp @@ -11,6 +11,7 @@ Author: Chris Smowton, chris.smowton@diffblue.com #include "java_bytecode_convert_method_class.h" #include "java_types.h" +#include "java_utils.h" #include @@ -55,8 +56,9 @@ struct procedure_local_cfg_baset< for(const auto &table_entry : method.exception_table) { auto findit=amap.find(table_entry.start_pc); - assert(findit!=amap.end() && - "Exception table entry doesn't point to an instruction?"); + INVARIANT( + findit!=amap.end(), + "Exception table entry doesn't point to an instruction?"); for(; findit->first=from); + PRECONDITION(to>=from); if(to!=from) var.holes.push_back({from, to-from}); } @@ -230,26 +236,35 @@ static void populate_variable_address_map( idx!=idxlim; ++idx) { - assert((!live_variable_at_address[idx]) && "Local variable table clash?"); + INVARIANT(!live_variable_at_address[idx], "Local variable table clash?"); live_variable_at_address[idx]=&*it; } } } -/// Usually a live variable range begins with a store instruction initialising +/// Populates the `predecessor_map` with a graph from local variable table +/// entries to their predecessors (table entries which may flow together and +/// thus may be considered the same live range). +/// +/// Usually a live variable range begins with a store instruction initializing /// the relevant local variable slot, but instead of or in addition to this, /// control flow edges may exist from bytecode addresses that fall under a table -/// entry which differs, but which has the same variable name and type -/// descriptor. This indicates a split live range, and will be recorded in the -/// predecessor map. -/// \par parameters: `firstvar`-`varlimit`: range of local variable table -/// entries to consider -/// `live_variable_at_address`: map from bytecode address to table entry (drawn -/// from firstvar-varlimit) live at that address -/// `amap`: map from bytecode address to instructions -/// \return Populates `predecessor_map` with a graph from local variable table -/// entries to their predecessors (table entries which may flow together and -/// thus may be considered the same live range). +/// entry which differs (or which fall under no table entry at all), but which +/// has the same variable name and type descriptor. This indicates a split live +/// range, and will be recorded in the predecessor map. +/// +/// \param firstvar: +/// range of local variable table entries to consider +/// \param varlimit: +/// range of local variable table entries to consider +/// \param live_variable_at_address: +/// map from bytecode address to table entry (drawn from firstvar-varlimit) +// live at that address +/// \param amap: +/// map from bytecode address to instructions, this is the CFG of the java +/// method +/// \param [out] predecessor_map: +/// the output of the function, populated as described above static void populate_predecessor_map( local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, @@ -261,45 +276,76 @@ static void populate_predecessor_map( messaget msg(msg_handler); for(auto it=firstvar, itend=varlimit; it!=itend; ++it) { - // Parameters are irrelevant to us and shouldn't be changed: - if(it->var.start_pc==0) + // All entries of the "local_variable_table_with_holest" processed in this + // function concern the same Java Local Variable Table slot/register. This + // is because "find_initializers()" has already sorted them. + INVARIANT( + it->var.index==firstvar->var.index, + "all entries are for the same local variable slot"); + + // Parameters are irrelevant to us and shouldn't be changed. This is because + // they cannot have live predecessor ranges: they are initialized by the JVM + // and no other live variable range can flow into them. + if(it->is_parameter) continue; + msg.debug() << "ppm: processing var idx " << it->var.index + << " name '" << it->var.name << "' start-pc " + << it->var.start_pc << " len " << it->var.length + << "; holes " << it->holes.size() << messaget::eom; + // Find the last instruction within the live range: unsigned end_pc=it->var.start_pc+it->var.length; auto amapit=amap.find(end_pc); - assert(amapit!=amap.begin()); + INVARIANT( + amapit!=amap.begin(), + "current bytecode shall not be the first"); auto old_amapit=amapit; --amapit; if(old_amapit==amap.end()) { - assert( - end_pc>amapit->first && + INVARIANT( + end_pc>amapit->first, "Instruction live range doesn't align to instruction boundary?"); } - // Find vartable entries that flow into this one: + // Find vartable entries that flow into this one. For unknown reasons this + // loop iterates backwards, walking back from the last bytecode in the live + // range of variable it to the first one. For each value of the iterator + // "amapit" we search for instructions that jump into amapit's address + // (predecessors) unsigned new_start_pc=it->var.start_pc; for(; amapit->first>=it->var.start_pc; --amapit) { for(auto pred : amapit->second.predecessors) { + // pred is the address (byecode offset) of a instruction that jumps into + // amapit. Compute now a pointer to the variable-with-holes whose index + // equals that of amapit and which was alive on instruction pred, or a + // null pointer if no such variable exists (e.g., because no live range + // covers that instruction) auto pred_var= (predvar.index else if(!pred_var) { - // Flow from out of range? - // Check if this is an initialiser, and if so expand the live range + // Check if this is an initializer, and if so expand the live range // to include it, but don't check its predecessors: auto inst_before_this=amapit; - assert(inst_before_this!=amap.begin()); + INVARIANT( + inst_before_this!=amap.begin(), + "we shall not be on the first bytecode of the method"); --inst_before_this; if(amapit->first!=it->var.start_pc || inst_before_this->first!=pred) { @@ -316,10 +362,17 @@ static void populate_predecessor_map( *(inst_before_this->second.source), it->var.index)) { - throw "local variable table: didn't find initialising store"; + msg.error() << "Local variable table: didn't find initializing " + << "store for predecessor of bytecode at address " + << amapit->first << " (" + << amapit->second.predecessors.size() + << " predecessors)" << msg.eom; + throw "local variable table: unexpected live ranges"; } new_start_pc=pred; } + // 3. Predecessor instruction is a different range associated to the + // same variable slot else { if(pred_var->var.name!=it->var.name || @@ -341,7 +394,7 @@ static void populate_predecessor_map( } } - // If a simple pre-block initialiser was found, + // If a simple pre-block initializer was found, // add it to the live range now: it->var.length+=(it->var.start_pc-new_start_pc); it->var.start_pc=new_start_pc; @@ -352,7 +405,7 @@ static void populate_predecessor_map( /// variable live ranges. /// \par parameters: `merge_vars`: Set of variables we want the common dominator /// for. -/// `dominator_analysis`: Already-initialised dominator tree +/// `dominator_analysis`: Already-initialized dominator tree /// \return Returns the bytecode address of the closest common dominator of all /// given variable table entries. In the worst case the function entry point /// should always satisfy this criterion. @@ -360,7 +413,7 @@ static unsigned get_common_dominator( const std::set &merge_vars, const java_cfg_dominatorst &dominator_analysis) { - assert(!merge_vars.empty()); + PRECONDITION(!merge_vars.empty()); unsigned first_pc=UINT_MAX; for(auto v : merge_vars) @@ -495,8 +548,8 @@ static void merge_variable_table_entries( /// `amap`: Map from bytecode address to instruction /// \return Side-effect: merges variable table entries which flow into one /// another (e.g. there are branches from one live range to another without -/// re-initialising the local slot). -void java_bytecode_convert_methodt::find_initialisers_for_slot( +/// re-initializing the local slot). +void java_bytecode_convert_methodt::find_initializers_for_slot( local_variable_table_with_holest::iterator firstvar, local_variable_table_with_holest::iterator varlimit, const address_mapt &amap, @@ -509,7 +562,7 @@ void java_bytecode_convert_methodt::find_initialisers_for_slot( populate_variable_address_map(firstvar, varlimit, live_variable_at_address); // Now find variables that flow together by - // walking backwards to find initialisers + // walking backwards to find initializers // or branches from other live ranges: predecessor_mapt predecessor_map; populate_predecessor_map( @@ -552,7 +605,7 @@ void java_bytecode_convert_methodt::find_initialisers_for_slot( continue; const auto &merge_vars=findit->second; - assert(merge_vars.size()>=2); + INVARIANT(merge_vars.size()>=2, "merging requires at least 2 variables"); merge_variable_table_entries( *merge_into, @@ -587,13 +640,13 @@ static void walk_to_next_index( it1=old_it2; } -/// See `find_initialisers_for_slot` above for more detail. +/// See `find_initializers_for_slot` above for more detail. /// \par parameters: `vars`: Local variable table /// `amap`: Map from bytecode index to instruction /// `dominator_analysis`: Already computed dominator tree for the Java function /// described by `amap` /// \return Combines entries in `vars` which flow together -void java_bytecode_convert_methodt::find_initialisers( +void java_bytecode_convert_methodt::find_initializers( local_variable_table_with_holest &vars, const address_mapt &amap, const java_cfg_dominatorst &dominator_analysis) @@ -608,7 +661,7 @@ void java_bytecode_convert_methodt::find_initialisers( auto itend=vars.end(); walk_to_next_index(it1, it2, itend); for(; it1!=itend; walk_to_next_index(it1, it2, itend)) - find_initialisers_for_slot(it1, it2, amap, dominator_analysis); + find_initializers_for_slot(it1, it2, amap, dominator_analysis); } /// See above @@ -636,7 +689,7 @@ static void cleanup_var_table( vars_with_holes.resize(vars_with_holes.size()-toremove); } -/// See `find_initialisers_for_slot` above for more detail. +/// See `find_initializers_for_slot` above for more detail. /// \par parameters: `m`: Java method /// `amap`: Map from bytecode indices to instructions in `m` /// \return Populates `this->vars_with_holes` equal to @@ -651,14 +704,24 @@ void java_bytecode_convert_methodt::setup_local_variables( method_with_amapt dominator_args(m, amap); dominator_analysis(dominator_args); +#ifdef DEBUG + debug() << "jcm: setup-local-vars: m.is_static " + << m.is_static << " m.signature " << m.signature << eom; + debug() << "jcm: setup-local-vars: lv arg slots " + << slots_for_parameters << eom; + debug() << "jcm: setup-local-vars: lvt size " + << m.local_variable_table.size() << eom; +#endif + // Find out which local variable table entries should be merged: - // Wrap each entry so we have somewhere to record live ranges with holes: + // Wrap each entry so we have a data structure to work during function calls, + // where we record live ranges with holes: std::vector vars_with_holes; for(const auto &v : m.local_variable_table) - vars_with_holes.push_back({v, {}}); + vars_with_holes.push_back({v, is_parameter(v), {}}); // Merge variable records: - find_initialisers(vars_with_holes, amap, dominator_analysis); + find_initializers(vars_with_holes, amap, dominator_analysis); // Clean up removed records from the variable table: cleanup_var_table(vars_with_holes); @@ -673,9 +736,14 @@ void java_bytecode_convert_methodt::setup_local_variables( // length values in the local variable table for(const auto &v : vars_with_holes) { - if(v.var.start_pc==0) // Parameter? + if(v.is_parameter) continue; +#ifdef DEBUG + debug() << "jcm: setup-local-vars: merged variable: idx " << v.var.index + << " name " << v.var.name << " v.var.signature '" + << v.var.signature << "' holes " << v.holes.size() << eom; +#endif typet t=java_type_from_string(v.var.signature); std::ostringstream id_oss; id_oss << method_id << "::" << v.var.start_pc << "::" << v.var.name; @@ -683,6 +751,9 @@ void java_bytecode_convert_methodt::setup_local_variables( symbol_exprt result(identifier, t); result.set(ID_C_base_name, v.var.name); + // Create a new local variable in the variables[] array, the result of + // merging multiple local variables with equal name (parameters excluded) + // into a single local variable with holes variables[v.var.index].push_back(variablet()); auto &newv=variables[v.var.index].back(); newv.symbol_expr=result; @@ -690,6 +761,7 @@ void java_bytecode_convert_methodt::setup_local_variables( newv.length=v.var.length; newv.holes=std::move(v.holes); + // Register the local variable in the symbol table symbolt new_symbol; new_symbol.name=identifier; new_symbol.type=t; diff --git a/src/java_bytecode/java_utils.cpp b/src/java_bytecode/java_utils.cpp index 0754eae13a3..656bfe1d7b2 100644 --- a/src/java_bytecode/java_utils.cpp +++ b/src/java_bytecode/java_utils.cpp @@ -9,8 +9,10 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include "java_utils.h" +#include "java_types.h" bool java_is_array_type(const typet &type) { @@ -20,3 +22,33 @@ bool java_is_array_type(const typet &type) to_struct_type(type).get_tag()), "java::array["); } + +unsigned java_local_variable_slots(const typet &t) +{ + unsigned bitwidth; + + bitwidth=t.get_unsigned_int(ID_width); + INVARIANT( + bitwidth==0 || + bitwidth==8 || + bitwidth==16 || + bitwidth==32 || + bitwidth==64, + "all types constructed in java_types.cpp encode JVM types " + "with these bit widths"); + INVARIANT( + bitwidth!=0 || t.id()==ID_pointer, + "if bitwidth is 0, then this a reference to a class, which is 1 slot"); + + return bitwidth==64 ? 2 : 1; +} + +unsigned java_method_parameter_slots(const code_typet &t) +{ + unsigned slots=0; + + for(const auto &p : t.parameters()) + slots+=java_local_variable_slots(p.type()); + + return slots; +} diff --git a/src/java_bytecode/java_utils.h b/src/java_bytecode/java_utils.h index af69bdad4b9..bd88cb71bb8 100644 --- a/src/java_bytecode/java_utils.h +++ b/src/java_bytecode/java_utils.h @@ -9,9 +9,19 @@ Author: Daniel Kroening, kroening@kroening.com #include +#include "java_bytecode_parse_tree.h" + #ifndef CPROVER_JAVA_BYTECODE_JAVA_UTILS_H #define CPROVER_JAVA_BYTECODE_JAVA_UTILS_H bool java_is_array_type(const typet &type); +/// Returns the number of JVM local variables (slots) taken by a local variable +/// that, when translated to goto, has type \p t. +unsigned java_local_variable_slots(const typet &t); + +/// Returns the the number of JVM local variables (slots) used by the JVM to +/// pass, upon call, the arguments of a Java method whose type is \p t. +unsigned java_method_parameter_slots(const code_typet &t); + #endif // CPROVER_JAVA_BYTECODE_JAVA_UTILS_H