diff --git a/src/java_bytecode/Makefile b/src/java_bytecode/Makefile index 5a9626b1a6d..f13c327ef77 100644 --- a/src/java_bytecode/Makefile +++ b/src/java_bytecode/Makefile @@ -5,7 +5,7 @@ SRC = java_bytecode_language.cpp java_bytecode_parse_tree.cpp \ java_bytecode_typecheck_type.cpp java_bytecode_internal_additions.cpp \ java_root_class.cpp java_bytecode_parser.cpp bytecode_info.cpp \ java_class_loader.cpp jar_file.cpp java_object_factory.cpp \ - java_bytecode_convert_method.cpp + java_bytecode_convert_method.cpp java_local_variable_table.cpp INCLUDES= -I .. diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 8ee9ba3db46..854d50ac9ba 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -6,8 +6,6 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ -#define DEBUG - #ifdef DEBUG #include #endif @@ -21,12 +19,18 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include +#include + #include "java_bytecode_convert_method.h" +#include "java_bytecode_convert_method_class.h" #include "bytecode_info.h" #include "java_types.h" #include #include +#include +#include class patternt { @@ -51,237 +55,6 @@ class patternt const char *p; }; -class java_bytecode_convert_methodt:public messaget -{ -public: - java_bytecode_convert_methodt( - symbol_tablet &_symbol_table, - message_handlert &_message_handler): - messaget(_message_handler), - symbol_table(_symbol_table) - { - } - - typedef java_bytecode_parse_treet::methodt methodt; - typedef java_bytecode_parse_treet::instructiont instructiont; - typedef methodt::instructionst instructionst; - - void operator()(const symbolt &class_symbol, const methodt &method) - { - convert(class_symbol, method); - } - -protected: - symbol_tablet &symbol_table; - - irep_idt current_method; - typet method_return_type; - - class variablet - { - public: - symbol_exprt symbol_expr; - size_t start_pc; - size_t length; - bool is_parameter; - }; - - typedef std::vector variablest; - expanding_vector variables; - std::set used_local_names; - bool method_has_this; - - typedef enum instruction_sizet - { - INST_INDEX=2, INST_INDEX_CONST=3 - } instruction_sizet; - - // return corresponding reference of variable - const variablet &find_variable_for_slot( - size_t address, - variablest &var_list, - instruction_sizet inst_size) - { - for(const variablet &var : var_list) - { - size_t start_pc=var.start_pc; - size_t length=var.length; - if(address+(size_t)inst_size>=start_pc && - address::max(); - return var_list[list_length]; - } - - // JVM local variables - enum variable_cast_argumentt - { - CAST_AS_NEEDED, - NO_CAST - }; - - const exprt variable( - const exprt &arg, - char type_char, - size_t address, - instruction_sizet inst_size, - variable_cast_argumentt do_cast) - { - irep_idt number=to_constant_expr(arg).get_value(); - - std::size_t number_int=safe_string2size_t(id2string(number)); - typet t=java_type_from_char(type_char); - variablest &var_list=variables[number_int]; - - // search variable in list for correct frame / address if necessary - const variablet &var= - find_variable_for_slot(address, var_list, inst_size); - - if(var.symbol_expr.get_identifier().empty()) - { - // an un-named local variable - irep_idt base_name="anonlocal::"+id2string(number)+type_char; - irep_idt identifier=id2string(current_method)+"::"+id2string(base_name); - - symbol_exprt result(identifier, t); - result.set(ID_C_base_name, base_name); - used_local_names.insert(result); - - return result; - } - else - { - exprt result=var.symbol_expr; - if(do_cast==CAST_AS_NEEDED && t!=result.type()) - result=typecast_exprt(result, t); - return result; - } - } - - // temporary variables - std::list tmp_vars; - - symbol_exprt tmp_variable(const std::string &prefix, const typet &type) - { - irep_idt base_name=prefix+"_tmp"+std::to_string(tmp_vars.size()); - irep_idt identifier=id2string(current_method)+"::"+id2string(base_name); - - auxiliary_symbolt tmp_symbol; - tmp_symbol.base_name=base_name; - tmp_symbol.is_static_lifetime=false; - tmp_symbol.mode=ID_java; - tmp_symbol.name=identifier; - tmp_symbol.type=type; - symbol_table.add(tmp_symbol); - - symbol_exprt result(identifier, type); - result.set(ID_C_base_name, base_name); - tmp_vars.push_back(result); - - return result; - } - - // JVM program locations - irep_idt label(const irep_idt &address) - { - return "pc"+id2string(address); - } - - // JVM Stack - typedef std::vector stackt; - stackt stack; - - exprt::operandst pop(std::size_t n) - { - if(stack.size() successors; - std::set predecessors; - codet code; - stackt stack; - bool done; - }; - - typedef std::map address_mapt; - - struct block_tree_nodet - { - bool leaf; - std::vector branch_addresses; - std::vector branch; - block_tree_nodet():leaf(false) {} - explicit block_tree_nodet(bool l):leaf(l) {} - static block_tree_nodet get_leaf() { return block_tree_nodet(true); } - }; - - static void replace_goto_target( - codet &repl, - const irep_idt &old_label, - const irep_idt &new_label); - - code_blockt &get_block_for_pcrange( - block_tree_nodet &tree, - code_blockt &this_block, - unsigned address_start, - unsigned address_limit, - unsigned next_block_start_address); - - code_blockt &get_or_create_block_for_pcrange( - block_tree_nodet &tree, - code_blockt &this_block, - unsigned address_start, - unsigned address_limit, - unsigned next_block_start_address, - const address_mapt &amap, - bool allow_merge=true); - - // conversion - void convert(const symbolt &class_symbol, const methodt &); - void convert(const instructiont &); - - codet convert_instructions( - const instructionst &, const code_typet &); - - const bytecode_infot &get_bytecode_info(const irep_idt &statement); -}; - const size_t SLOTS_PER_INTEGER(1u); const size_t INTEGER_WIDTH(64u); static size_t count_slots( @@ -309,7 +82,8 @@ static void cast_if_necessary(binary_relation_exprt &condition) exprt &lhs(condition.lhs()); exprt &rhs(condition.rhs()); const typet &lhs_type(lhs.type()); - if(lhs_type==rhs.type()) return; + if(lhs_type==rhs.type()) + return; rhs=typecast_exprt(rhs, lhs_type); } @@ -320,6 +94,103 @@ static irep_idt strip_java_namespace_prefix(const irep_idt to_strip) return to_strip_str.substr(6, std::string::npos); } +java_bytecode_convert_methodt::java_bytecode_convert_methodt( + symbol_tablet &_symbol_table, + message_handlert &_message_handler) : + messaget(_message_handler), + symbol_table(_symbol_table) +{ +} + +const exprt java_bytecode_convert_methodt::variable( + const exprt &arg, + char type_char, + size_t address, + variable_cast_argumentt do_cast) +{ + irep_idt number=to_constant_expr(arg).get_value(); + + std::size_t number_int=safe_string2size_t(id2string(number)); + typet t=java_type_from_char(type_char); + variablest &var_list=variables[number_int]; + + // search variable in list for correct frame / address if necessary + const variablet &var=find_variable_for_slot(address, var_list); + + if(var.symbol_expr.get_identifier().empty()) + { + // an un-named local variable + irep_idt base_name="anonlocal::"+id2string(number)+type_char; + irep_idt identifier=id2string(current_method)+"::"+id2string(base_name); + + symbol_exprt result(identifier, t); + result.set(ID_C_base_name, base_name); + used_local_names.insert(result); + return result; + } + else + { + exprt result=var.symbol_expr; + if(do_cast==CAST_AS_NEEDED && t!=result.type()) + result=typecast_exprt(result, t); + return result; + } +} + +symbol_exprt java_bytecode_convert_methodt::tmp_variable( + const std::string &prefix, + const typet &type) +{ + irep_idt base_name=prefix+"_tmp"+std::to_string(tmp_vars.size()); + irep_idt identifier=id2string(current_method)+"::"+id2string(base_name); + + auxiliary_symbolt tmp_symbol; + tmp_symbol.base_name=base_name; + tmp_symbol.is_static_lifetime=false; + tmp_symbol.mode=ID_java; + tmp_symbol.name=identifier; + tmp_symbol.type=type; + symbol_table.add(tmp_symbol); + + symbol_exprt result(identifier, type); + result.set(ID_C_base_name, base_name); + tmp_vars.push_back(result); + + return result; +} + +irep_idt java_bytecode_convert_methodt::label(const irep_idt &address) +{ + return "pc"+id2string(address); +} + +exprt::operandst java_bytecode_convert_methodt::pop(std::size_t n) +{ + if(stack.size() working_set; if(!instructions.empty()) working_set.insert(instructions.front().address); @@ -1198,7 +1062,7 @@ codet java_bytecode_convert_methodt::convert_instructions( // store value into some local variable assert(op.size()==1 && results.empty()); - exprt var=variable(arg0, statement[0], i_it->address, INST_INDEX, NO_CAST); + exprt var=variable(arg0, statement[0], i_it->address, NO_CAST); exprt toassign=op[0]; if('a'==statement[0] && toassign.type()!=var.type()) @@ -1229,7 +1093,7 @@ codet java_bytecode_convert_methodt::convert_instructions( else if(statement==patternt("?load")) { // load a value from a local variable - results[0]=variable(arg0, statement[0], i_it->address, INST_INDEX, CAST_AS_NEEDED); + results[0]=variable(arg0, statement[0], i_it->address, CAST_AS_NEEDED); } else if(statement=="ldc" || statement=="ldc_w" || statement=="ldc2" || statement=="ldc2_w") @@ -1297,7 +1161,7 @@ codet java_bytecode_convert_methodt::convert_instructions( // if(retaddr==5) goto 5; else if(retaddr==10) goto 10; ... assert(op.empty() && results.empty()); code_blockt branches; - auto retvar=variable(arg0, 'a', i_it->address, INST_INDEX, NO_CAST); + auto retvar=variable(arg0, 'a', i_it->address, NO_CAST); assert(!jsr_ret_targets.empty()); for(size_t idx=0, idxlim=jsr_ret_targets.size(); idx!=idxlim; ++idx) { @@ -1435,9 +1299,9 @@ codet java_bytecode_convert_methodt::convert_instructions( else if(statement=="iinc") { code_assignt code_assign; - code_assign.lhs()=variable(arg0, 'i', i_it->address, INST_INDEX_CONST, NO_CAST); + code_assign.lhs()=variable(arg0, 'i', i_it->address, NO_CAST); code_assign.rhs()=plus_exprt( - variable(arg0, 'i', i_it->address, INST_INDEX_CONST, CAST_AS_NEEDED), + variable(arg0, 'i', i_it->address, CAST_AS_NEEDED), typecast_exprt(arg1, java_int_type())); c=code_assign; } @@ -1970,10 +1834,12 @@ codet java_bytecode_convert_methodt::convert_instructions( // Try to recover block structure as indicated in the local variable table: // The block tree node mirrors the block structure of root_block, - // indexing the Java PCs were each subblock starts and ends. + // indexing the Java PCs where each subblock starts and ends. block_tree_nodet root; code_blockt root_block; + // First create a simple flat list of basic blocks. We'll add lexical nesting + // constructs as variable live-ranges require next. bool start_new_block=true; for(const auto &address_pair : address_map) { @@ -2038,13 +1904,13 @@ codet java_bytecode_convert_methodt::convert_instructions( // Skip anonymous variables: if(v.symbol_expr.get_identifier()==irep_idt()) continue; - code_declt d(v.symbol_expr); auto &block=get_block_for_pcrange( root, root_block, v.start_pc, v.start_pc+v.length, std::numeric_limits::max()); + code_declt d(v.symbol_expr); block.operands().insert(block.operands().begin(), d); } } diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h new file mode 100644 index 00000000000..8a456d0a99a --- /dev/null +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -0,0 +1,191 @@ +/*******************************************************************\ + +Module: JAVA Bytecode Language Conversion + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#ifndef CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_METHOD_CLASS_H +#define CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_METHOD_CLASS_H + +#include +#include +#include +#include +#include +#include "java_bytecode_parse_tree.h" + +#include +#include + +class symbol_tablet; +class symbolt; + +class java_bytecode_convert_methodt:public messaget +{ +public: + java_bytecode_convert_methodt( + symbol_tablet &_symbol_table, + message_handlert &_message_handler); + + typedef java_bytecode_parse_treet::methodt methodt; + typedef java_bytecode_parse_treet::instructiont instructiont; + typedef methodt::instructionst instructionst; + typedef methodt::local_variable_tablet local_variable_tablet; + typedef methodt::local_variablet local_variablet; + + void operator()(const symbolt &class_symbol, const methodt &method) + { + convert(class_symbol, method); + } + +protected: + irep_idt method_id; + symbol_tablet &symbol_table; + + irep_idt current_method; + typet method_return_type; + +public: + struct holet + { + unsigned start_pc; + unsigned length; + }; + + struct local_variable_with_holest + { + local_variablet var; + std::vector holes; + }; + + typedef std::vector + local_variable_table_with_holest; + +protected: + class variablet + { + public: + symbol_exprt symbol_expr; + size_t start_pc; + size_t length; + bool is_parameter; + std::vector holes; + variablet() : symbol_expr(), is_parameter(false) {} + }; + + typedef std::vector variablest; + expanding_vector variables; + std::set used_local_names; + bool method_has_this; + + // return corresponding reference of variable + const variablet &find_variable_for_slot(size_t address, variablest &var_list); + + // JVM local variables + enum variable_cast_argumentt + { + CAST_AS_NEEDED, + NO_CAST + }; + + const exprt variable( + const exprt &arg, + char type_char, + size_t address, + variable_cast_argumentt do_cast); + + // temporary variables + std::list tmp_vars; + + symbol_exprt tmp_variable(const std::string &prefix, const typet &type); + + // JVM program locations + irep_idt label(const irep_idt &address); + + // JVM Stack + typedef std::vector stackt; + stackt stack; + + exprt::operandst pop(std::size_t n); + + void push(const exprt::operandst &o); + + struct converted_instructiont + { + converted_instructiont( + const instructionst::const_iterator &it, + const codet &_code):source(it), code(_code), done(false) + {} + + instructionst::const_iterator source; + std::list successors; + std::set predecessors; + codet code; + stackt stack; + bool done; + }; + +public: + typedef std::map address_mapt; + typedef cfg_dominators_templatet + java_cfg_dominatorst; + +protected: + void find_initialisers( + local_variable_table_with_holest& vars, + const address_mapt& amap, + const java_cfg_dominatorst& doms); + + void find_initialisers_for_slot( + local_variable_table_with_holest::iterator firstvar, + local_variable_table_with_holest::iterator varlimit, + const address_mapt& amap, + const java_cfg_dominatorst& doms); + + void setup_local_variables(const methodt& m, const address_mapt& amap); + + struct block_tree_nodet + { + bool leaf; + std::vector branch_addresses; + std::vector branch; + block_tree_nodet():leaf(false) {} + explicit block_tree_nodet(bool l):leaf(l) {} + static block_tree_nodet get_leaf() { return block_tree_nodet(true); } + }; + + static void replace_goto_target( + codet &repl, + const irep_idt &old_label, + const irep_idt &new_label); + + code_blockt &get_block_for_pcrange( + block_tree_nodet &tree, + code_blockt &this_block, + unsigned address_start, + unsigned address_limit, + unsigned next_block_start_address); + + code_blockt &get_or_create_block_for_pcrange( + block_tree_nodet &tree, + code_blockt &this_block, + unsigned address_start, + unsigned address_limit, + unsigned next_block_start_address, + const address_mapt &amap, + bool allow_merge=true); + + // conversion + void convert(const symbolt &class_symbol, const methodt &); + void convert(const instructiont &); + + codet convert_instructions( + const methodt &, + const code_typet &); + + const bytecode_infot &get_bytecode_info(const irep_idt &statement); +}; + +#endif diff --git a/src/java_bytecode/java_local_variable_table.cpp b/src/java_bytecode/java_local_variable_table.cpp new file mode 100644 index 00000000000..d70fd18264e --- /dev/null +++ b/src/java_bytecode/java_local_variable_table.cpp @@ -0,0 +1,850 @@ +/*******************************************************************\ + +Module: Java local variable table processing + +Author: Chris Smowton, chris.smowton@diffblue.com + +\*******************************************************************/ + +#include "java_bytecode_convert_method_class.h" +#include "java_types.h" + +#include + +#include +#include + +// Specialise the CFG representation to work over Java instead of GOTO programs. +// This must be done at global scope due to template resolution rules. + +template +struct procedure_local_cfg_baset< + T, + const java_bytecode_convert_methodt::address_mapt, + unsigned> : + public graph > +{ + typedef java_bytecode_convert_methodt::address_mapt address_mapt; + typedef std::map entry_mapt; + entry_mapt entry_map; + + procedure_local_cfg_baset() {} + + void operator()(const address_mapt& amap) + { + for(const auto& inst : amap) + { + // Map instruction PCs onto node indices: + entry_map[inst.first]=this->add_node(); + // Map back: + (*this)[entry_map[inst.first]].PC=inst.first; + } + for(const auto& inst : amap) + { + for(auto succ : inst.second.successors) + this->add_edge(entry_map.at(inst.first), entry_map.at(succ)); + } + } + + unsigned get_first_node(const address_mapt& amap) const + { + return amap.begin()->first; + } + unsigned get_last_node(const address_mapt& amap) const + { + return (--amap.end())->first; + } + unsigned nodes_empty(const address_mapt& amap) const + { + return amap.empty(); + } +}; + +// Grab some class typdefs for brevity: +typedef java_bytecode_convert_methodt::holet + holet; +typedef java_bytecode_convert_methodt::local_variable_with_holest + local_variable_with_holest; +typedef java_bytecode_convert_methodt::local_variable_table_with_holest + local_variable_table_with_holest; +typedef java_bytecode_convert_methodt::address_mapt + address_mapt; +typedef java_bytecode_convert_methodt::java_cfg_dominatorst + java_cfg_dominatorst; + +// Comparators for local variables: + +static bool lt_index( + const local_variable_with_holest& a, + const local_variable_with_holest& b) +{ + return a.var.indexvar.start_pcvar.start_pc; +} + +// The predecessor map, and a top-sorting comparator: + +typedef std::map< + local_variable_with_holest*, + std::set > + predecessor_mapt; + +struct is_predecessor_oft +{ + const predecessor_mapt& order; + + explicit is_predecessor_oft(const predecessor_mapt& _order) : order(_order) {} + + bool operator()( + local_variable_with_holest* a, + local_variable_with_holest* b) const + { + auto findit=order.find(a); + if(findit==order.end()) + return false; + return findit->second.count(b)>0; + } +}; + +// Helper routines for the find-initialisers code below: + +/*******************************************************************\ + +Function: gather_transitive_predecessors + + Inputs: `start`: Variable to find the predecessors of + `predecessor_map`: Map from local variables + to sets of predecessors + Outputs: `result`: populated with all transitive predecessors of + `start` found in `predecessor_map` + + Purpose: See above + +\*******************************************************************/ + +static void gather_transitive_predecessors( + local_variable_with_holest* start, + const predecessor_mapt& predecessor_map, + std::set& result) +{ + if(!result.insert(start).second) + return; + auto findit=predecessor_map.find(start); + if(findit==predecessor_map.end()) + return; + for(const auto pred : findit->second) + gather_transitive_predecessors(pred, predecessor_map, result); +} + +/*******************************************************************\ + +Function: is_store_to_slot + + Inputs: `inst`: Java bytecode instruction + `slotidx`: local variable slot number + + Outputs: Returns true if `inst` is any form of store instruction + targeting slot `slotidx` + + Purpose: See above + +\*******************************************************************/ + +static bool is_store_to_slot( + const java_bytecode_convert_methodt::instructiont& inst, + unsigned slotidx) +{ + const std::string prevstatement=id2string(inst.statement); + if(!(prevstatement.size()>=1 && prevstatement.substr(1, 5)=="store")) + return false; + + std::string storeslot; + if(inst.args.size()==1) + { + // Store with an argument: + const auto& arg=inst.args[0]; + storeslot=id2string(to_constant_expr(arg).get_value()); + } + else + { + // Store shorthands, like "store_0", "store_1" + assert(prevstatement[6]=='_' && prevstatement.size()==8); + storeslot=prevstatement[7]; + assert(isdigit(storeslot[0])); + } + auto storeslotidx=safe_string2unsigned(storeslot); + return storeslotidx==slotidx; +} + +/*******************************************************************\ + +Function: maybe_add_hole + + Inputs: `from`, `to`: bounds of a gap in `var`'s live range, + inclusive and exclusive respectively + + Outputs: Adds a hole to `var`, unless it would be of zero size. + + Purpose: See above + +\*******************************************************************/ + +static void maybe_add_hole( + local_variable_with_holest& var, + unsigned from, + unsigned to) +{ + assert(to>=from); + if(to!=from) + var.holes.push_back({from, to-from}); +} + +/*******************************************************************\ + +Function: populate_variable_address_map + + Inputs: `firstvar`-`varlimit`: range of local variable table + entries to consider + + Outputs: `live_variable_at_address` is populated with a sequence of + local variable table entry pointers, such that + `live_variable_at_address[addr]` yields the unique table + entry covering that address. Asserts if entries overlap. + + Purpose: See above + +\*******************************************************************/ + +static void populate_variable_address_map( + local_variable_table_with_holest::iterator firstvar, + local_variable_table_with_holest::iterator varlimit, + std::vector& live_variable_at_address) +{ + for(auto it=firstvar, itend=varlimit; it!=itend; ++it) + { + if(it->var.start_pc+it->var.length>live_variable_at_address.size()) + live_variable_at_address.resize(it->var.start_pc+it->var.length); + + for(unsigned idx=it->var.start_pc, + idxlim=it->var.start_pc+it->var.length; + idx!=idxlim; + ++idx) + { + assert((!live_variable_at_address[idx]) && "Local variable table clash?"); + live_variable_at_address[idx]=&*it; + } + } +} + +/*******************************************************************\ + +Function: populate_predecessor_map + + Inputs: `firstvar`-`varlimit`: range of local variable table + entries to consider + `live_variable_at_address`: map from bytecode address + to table entry (drawn from firstvar-varlimit) live + at that address + `amap`: map from bytecode address to instructions + + Outputs: Populates `predecessor_map` with a graph from local variable + table entries to their predecessors (table entries which + may flow together and thus may be considered the same live + range). + + Purpose: Usually a live variable range begins with a store + instruction initialising the relevant local variable slot, + but instead of or in addition to this, control flow edges + may exist from bytecode addresses that fall under a + table entry which differs, but which has the same variable + name and type descriptor. This indicates a split live + range, and will be recorded in the predecessor map. + +\*******************************************************************/ + +static void populate_predecessor_map( + local_variable_table_with_holest::iterator firstvar, + local_variable_table_with_holest::iterator varlimit, + const std::vector& live_variable_at_address, + const address_mapt& amap, + predecessor_mapt& predecessor_map, + message_handlert& msg_handler) +{ + messaget msg(msg_handler); + for(auto it=firstvar, itend=varlimit; it!=itend; ++it) + { + // Parameters are irrelevant to us and shouldn't be changed: + if(it->var.start_pc==0) + continue; + + // Find the last instruction within the live range: + unsigned end_pc=it->var.start_pc+it->var.length; + auto amapit=amap.find(end_pc); + assert(amapit!=amap.begin()); + auto old_amapit=amapit; + --amapit; + if(old_amapit==amap.end()) + { + assert( + end_pc>amapit->first && + "Instruction live range doesn't align to instruction boundary?"); + } + + // Find vartable entries that flow into this one: + unsigned new_start_pc=it->var.start_pc; + for(; amapit->first>=it->var.start_pc; --amapit) + { + for(auto pred : amapit->second.predecessors) + { + auto pred_var= + (predfirst!=it->var.start_pc || inst_before_this->first!=pred) + { + // These sorts of infeasible edges can occur because jsr + // handling is presently vague (any subroutine is assumed to + // be able to return to any callsite) + msg.warning() << "Local variable table: ignoring flow from " + << "out of range for " << it->var.name << " " + << pred << " -> " << amapit->first + << messaget::eom; + continue; + } + if(!is_store_to_slot( + *(inst_before_this->second.source), + it->var.index)) + { + throw "local variable table: didn't find initialising store"; + } + new_start_pc=pred; + } + else + { + if(pred_var->var.name!=it->var.name || + pred_var->var.signature!=it->var.signature) + { + // These sorts of infeasible edges can occur because + // jsr handling is presently vague (any subroutine is + // assumed to be able to return to any callsite) + msg.warning() << "Local variable table: ignoring flow from " + << "clashing variable for " + << it->var.name << " " << pred << " -> " + << amapit->first << messaget::eom; + continue; + } + // OK, this is a flow from a similar but + // distinct entry in the local var table. + predecessor_map[&*it].insert(pred_var); + } + } + } + + // If a simple pre-block initialiser was found, + // add it to the live range now: + it->var.length+=(it->var.start_pc-new_start_pc); + it->var.start_pc=new_start_pc; + } +} + +/*******************************************************************\ + +Function: get_common_dominator + + Inputs: `merge_vars`: Set of variables we want the common dominator + for. + `dominator_analysis`: Already-initialised dominator tree + + Outputs: Returns the bytecode address of the closest common + dominator of all given variable table entries. + In the worst case the function entry point should always + satisfy this criterion. + + Purpose: Used to find out where to put a variable declaration that + subsumes several variable live ranges. + +\*******************************************************************/ + +static unsigned get_common_dominator( + const std::set& merge_vars, + const java_cfg_dominatorst& dominator_analysis) +{ + assert(!merge_vars.empty()); + + unsigned first_pc=UINT_MAX; + for(auto v : merge_vars) + { + if(v->var.start_pcvar.start_pc; + } + + std::vector candidate_dominators; + for(auto v : merge_vars) + { + const auto& dominator_nodeidx= + dominator_analysis.cfg.entry_map.at(v->var.start_pc); + const auto& this_var_doms= + dominator_analysis.cfg[dominator_nodeidx].dominators; + for(const auto this_var_dom : this_var_doms) + if(this_var_dom<=first_pc) + candidate_dominators.push_back(this_var_dom); + } + std::sort(candidate_dominators.begin(), candidate_dominators.end()); + + // Working from the back, simply find the first PC + // that occurs merge_vars.size() times and therefore + // dominates all vars we seek to merge: + for(auto domit=candidate_dominators.rbegin(), + domitend=candidate_dominators.rend(); + domit!=domitend; + /* Don't increment here */) + { + unsigned repeats=0; + auto dom=*domit; + while(domit!=domitend && *domit==dom) + { + ++domit; + ++repeats; + } + assert(repeats<=merge_vars.size()); + if(repeats==merge_vars.size()) + return dom; + } + + throw "variable live ranges with no common dominator?"; +} + +/*******************************************************************\ + +Function: populate_live_range_holes + + Inputs: `merge_vars`: a set of 2+ variable table entries to merge + `expanded_live_range_start`: address where the merged + variable will be declared + + Outputs: Adds holes to `merge_into`, indicating where gaps in the + variable's live range fall. For example, if the + declaration happens at address 10 and the entries in + `merge_into` have live ranges [(20-30), (40-50)] then + holes will be added at (10-20) and (30-40). + + Purpose: See above + +\*******************************************************************/ + +static void populate_live_range_holes( + local_variable_with_holest& merge_into, + const std::set& merge_vars, + unsigned expanded_live_range_start) +{ + std::vector sorted_by_startpc( + merge_vars.begin(), merge_vars.end()); + std::sort(sorted_by_startpc.begin(), sorted_by_startpc.end(), lt_startpc); + + maybe_add_hole( + merge_into, + expanded_live_range_start, + sorted_by_startpc[0]->var.start_pc); + for(unsigned idx=0; idxvar.start_pc+sorted_by_startpc[idx]->var.length, + sorted_by_startpc[idx+1]->var.start_pc); + } +} + +/*******************************************************************\ + +Function: merge_variable_table_entries + + Inputs: `merge_vars`: a set of 2+ variable table entries to merge + `dominator_analysis`: already-calculated dominator tree + + Outputs: Populates `merge_into` as a combined variable table entry, + with live range holes if the `merge_vars` entries do not + cover a contiguous address range, beginning the combined + live range at the common dominator of all `merge_vars`. + + Purpose: See above + +\*******************************************************************/ + +static void merge_variable_table_entries( + local_variable_with_holest& merge_into, + const std::set& merge_vars, + const java_cfg_dominatorst& dominator_analysis, + std::ostream& debug_out) +{ + // Because we need a lexically-scoped declaration, + // we must have the merged variable + // enter scope both in a block that dominates all entries, and which + // precedes them in program order. + unsigned found_dominator= + get_common_dominator(merge_vars, dominator_analysis); + + // Populate the holes in the live range + // (addresses where the new variable will be in scope, + // but references to this stack slot should not resolve to it + // as it was not visible in the original local variable table) + populate_live_range_holes(merge_into, merge_vars, found_dominator); + + unsigned last_pc=0; + for(auto v : merge_vars) + { + if(v->var.start_pc+v->var.length>last_pc) + last_pc=v->var.start_pc+v->var.length; + } + + // Apply the changes: + merge_into.var.start_pc=found_dominator; + merge_into.var.length=last_pc-found_dominator; + +#ifdef DEBUG + debug_out << "Merged " << merge_vars.size() << " variables named " + << merge_into.var.name << "; new live range " + << merge_into.var.start_pc << "-" + << merge_into.var.start_pc + merge_into.var.length + << messaget::eom; +#endif + + // Nuke the now-subsumed var-table entries: + for(auto& v : merge_vars) + if(v!=&merge_into) + v->var.length=0; +} + +/*******************************************************************\ + +Function: find_initialisers_for_slot + + Inputs: `firstvar`-`varlimit`: sequence of variable table entries, + all of which should concern the same slot index. + `amap`: Map from bytecode address to instruction + + Outputs: Side-effect: merges variable table entries which flow into + one another (e.g. there are branches from one live range + to another without re-initialising the local slot). + + Purpose: Given a sequence of users of the same local variable slot, + this figures out which ones are related by control flow, + and combines them into a single entry with holes, such that + after combination we can create a single + declaration per variable table entry, + placed at the live range's start address, which may + be moved back so that the declaration dominates all uses. + +\*******************************************************************/ + +void java_bytecode_convert_methodt::find_initialisers_for_slot( + local_variable_table_with_holest::iterator firstvar, + local_variable_table_with_holest::iterator varlimit, + const address_mapt& amap, + const java_cfg_dominatorst& dominator_analysis) +{ + // Build a simple map from instruction PC to the variable + // live in this slot at that PC, and a map from each variable + // to variables that flow into it: + std::vector live_variable_at_address; + populate_variable_address_map(firstvar, varlimit, live_variable_at_address); + + // Now find variables that flow together by + // walking backwards to find initialisers + // or branches from other live ranges: + predecessor_mapt predecessor_map; + populate_predecessor_map( + firstvar, + varlimit, + live_variable_at_address, + amap, + predecessor_map, + get_message_handler()); + + // OK, we've established the flows all seem sensible. + // Now merge vartable entries according to the predecessor_map: + + // Take the transitive closure of the predecessor map: + for(auto& kv : predecessor_map) + { + std::set closed_preds; + gather_transitive_predecessors(kv.first, predecessor_map, closed_preds); + kv.second=std::move(closed_preds); + } + + // Top-sort so that we get the bottom variables first: + is_predecessor_oft comp(predecessor_map); + std::vector topsorted_vars; + for(auto it=firstvar, itend=varlimit; it!=itend; ++it) + topsorted_vars.push_back(&*it); + + std::sort(topsorted_vars.begin(), topsorted_vars.end(), comp); + + // Now merge the entries: + for(auto merge_into : topsorted_vars) + { + // Already merged into another variable? + if(merge_into->var.length==0) + continue; + + auto findit=predecessor_map.find(merge_into); + // Nothing to do? + if(findit==predecessor_map.end()) + continue; + + const auto& merge_vars=findit->second; + assert(merge_vars.size()>=2); + + merge_variable_table_entries( + *merge_into, + merge_vars, + dominator_analysis, + status()); + } +} + +/*******************************************************************\ + +Function: walk_to_next_index + + Inputs: `it1` and `it2`, which are iterators into the same vector, + of which `itend` is the end() iterator. + + Outputs: Moves `it1` and `it2` to delimit a sequence of variable + table entries with slot index equal to it2->var.index + on entering this function, or to both equal itend if + it2==itend on entry. + + Purpose: Walk a vector, a contiguous block of entries with equal + slot index at a time. + +\*******************************************************************/ + +static void walk_to_next_index( + local_variable_table_with_holest::iterator& it1, + local_variable_table_with_holest::iterator& it2, + local_variable_table_with_holest::iterator itend) +{ + if(it2==itend) + { + it1=itend; + return; + } + + auto old_it2=it2; + auto index=it2->var.index; + while(it2!=itend && it2->var.index==index) + ++it2; + it1=old_it2; +} + +/*******************************************************************\ + +Function: find_initialisers + + Inputs: `vars`: Local variable table + `amap`: Map from bytecode index to instruction + `dominator_analysis`: Already computed dominator tree for + the Java function described by `amap` + + Outputs: Combines entries in `vars` which flow together + + Purpose: See `find_initialisers_for_slot` above for more detail. + +\*******************************************************************/ + +void java_bytecode_convert_methodt::find_initialisers( + local_variable_table_with_holest& vars, + const address_mapt& amap, + const java_cfg_dominatorst& dominator_analysis) +{ + // Sort table entries by local slot index: + std::sort(vars.begin(), vars.end(), lt_index); + + // For each block of entries using a particular index, + // try to combine them: + auto it1=vars.begin(); + auto it2=it1; + auto itend=vars.end(); + walk_to_next_index(it1, it2, itend); + for(; it1!=itend; walk_to_next_index(it1, it2, itend)) + find_initialisers_for_slot(it1, it2, amap, dominator_analysis); +} + +/*******************************************************************\ + +Function: cleanup_var_table + + Inputs: `vars_with_holes`: variable table + + Outputs: Removes zero-size entries from `vars_with_holes` + + Purpose: See above + +\*******************************************************************/ + +static void cleanup_var_table( + std::vector& vars_with_holes) +{ + size_t toremove=0; + for(size_t i=0; i<(vars_with_holes.size()-toremove); ++i) + { + auto &v=vars_with_holes[i]; + if(v.var.length==0) + { + // Move to end; consider the new element we've swapped in: + ++toremove; + if(i!=vars_with_holes.size()-toremove) // Already where it needs to be? + std::swap(v, vars_with_holes[vars_with_holes.size()-toremove]); + --i; // May make i (size_t)-1, but it will immediately be + // re-incremented as the loop iterates. + } + } + + // Remove un-needed entries. + vars_with_holes.resize(vars_with_holes.size()-toremove); +} + +/*******************************************************************\ + +Function: setup_local_variables + + Inputs: `m`: Java method + `amap`: Map from bytecode indices to instructions in `m` + + Outputs: Populates `this->vars_with_holes` equal to + `this->local_variable_table`, only with variable table + entries that flow together combined. + Also symbol-table registers all locals. + + Purpose: See `find_initialisers_for_slot` above for more detail. + +\*******************************************************************/ + +void java_bytecode_convert_methodt::setup_local_variables( + const methodt& m, + const address_mapt& amap) +{ + // Compute CFG dominator tree + java_cfg_dominatorst dominator_analysis; + dominator_analysis(amap); + + // Find out which local variable table entries should be merged: + // Wrap each entry so we have somewhere to record live ranges with holes: + std::vector vars_with_holes; + for(const auto& v : m.local_variable_table) + vars_with_holes.push_back({v, {}}); + + // Merge variable records: + find_initialisers(vars_with_holes, amap, dominator_analysis); + + // Clean up removed records from the variable table: + cleanup_var_table(vars_with_holes); + + // Do the locals and parameters in the variable table, which is available when + // compiled with -g or for methods with many local variables in the latter + // case, different variables can have the same index, depending on the + // context. + // + // to calculate which variable to use, one uses the address of the instruction + // that uses the variable, the size of the instruction and the start_pc / + // length values in the local variable table + for(const auto & v : vars_with_holes) + { + if(v.var.start_pc==0) // Parameter? + continue; + + typet t=java_type_from_string(v.var.signature); + std::ostringstream id_oss; + id_oss << method_id << "::" << v.var.start_pc << "::" << v.var.name; + irep_idt identifier(id_oss.str()); + symbol_exprt result(identifier, t); + result.set(ID_C_base_name, v.var.name); + + variables[v.var.index].push_back(variablet()); + auto &newv=variables[v.var.index].back(); + newv.symbol_expr=result; + newv.start_pc=v.var.start_pc; + newv.length=v.var.length; + newv.holes=std::move(v.holes); + + symbolt new_symbol; + new_symbol.name=identifier; + new_symbol.type=t; + new_symbol.base_name=v.var.name; + new_symbol.pretty_name=id2string(identifier).substr(6, std::string::npos); + new_symbol.mode=ID_java; + new_symbol.is_type=false; + new_symbol.is_file_local=true; + new_symbol.is_thread_local=true; + new_symbol.is_lvalue=true; + symbol_table.add(new_symbol); + } +} + +/*******************************************************************\ + +Function: find_variable_for_slot + + Inputs: `address`: Address to find a variable table entry for + `var_list`: List of candidates that use the slot we're + interested in + + Outputs: Returns the list entry covering this address (taking live + range holes into account), or creates/returns an anonymous + variable entry if nothing covers `address`. + + Purpose: See above + +\*******************************************************************/ + +const java_bytecode_convert_methodt::variablet & +java_bytecode_convert_methodt::find_variable_for_slot( + size_t address, + variablest &var_list) +{ + for(const variablet &var : var_list) + { + size_t start_pc=var.start_pc; + size_t length=var.length; + if(address>=start_pc && address<(start_pc+length)) + { + bool found_hole=false; + for(auto &hole : var.holes) + if(address>=hole.start_pc && address<(hole.start_pc+hole.length)) + { + found_hole=true; + break; + } + if(found_hole) + continue; + return var; + } + } + // add unnamed local variable to end of list at this index + // with scope from 0 to SIZE_T_MAX + // as it is at the end of the vector, it will only be taken into account + // if no other variable is valid + size_t list_length=var_list.size(); + var_list.resize(list_length+1); + var_list[list_length].start_pc=0; + var_list[list_length].length=std::numeric_limits::max(); + return var_list[list_length]; +}