Skip to content

Java frontend: create String and Class literals earlier #1766

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/java_bytecode/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ SRC = bytecode_info.cpp \
java_pointer_casts.cpp \
java_root_class.cpp \
java_string_library_preprocess.cpp \
java_string_literals.cpp \
java_types.cpp \
java_utils.cpp \
generate_java_generic_type.cpp \
Expand Down
2 changes: 2 additions & 0 deletions src/java_bytecode/ci_lazy_methods.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,8 @@ void ci_lazy_methodst::gather_needed_globals(
findit->second.is_static_lifetime)
{
needed.add(findit->second);
// Gather any globals referenced in the initialiser:
gather_needed_globals(findit->second.value, symbol_table, needed);
}
}
else
Expand Down
39 changes: 5 additions & 34 deletions src/java_bytecode/java_bytecode_convert_method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1712,41 +1712,12 @@ codet java_bytecode_convert_methodt::convert_instructions(
{
assert(op.empty() && results.size()==1);

// 1) Pushing a String causes a reference to a java.lang.String object
// to be constructed and pushed onto the operand stack.
INVARIANT(
arg0.id() != ID_java_string_literal && arg0.id() != ID_type,
"String and Class literals should have been lowered in "
"generate_constant_global_variables");

// 2) Pushing an int or a float causes a primitive value to be pushed
// onto the stack.

// 3) Pushing a Class constant causes a reference to a java.lang.Class
// to be pushed onto the operand stack

if(arg0.id()==ID_java_string_literal)
{
// these need to be references to java.lang.String
results[0]=arg0;
symbol_typet string_type("java::java.lang.String");
results[0].type()=java_reference_type(string_type);
}
else if(arg0.id()==ID_type)
{
irep_idt class_id=arg0.type().get(ID_identifier);
symbol_typet java_lang_Class("java::java.lang.Class");
symbol_exprt symbol_expr(
id2string(class_id)+"@class_model",
java_lang_Class);
address_of_exprt address_of_expr(symbol_expr);
results[0]=address_of_expr;
}
else if(arg0.id()==ID_constant)
{
results[0]=arg0;
}
else
{
error() << "unexpected ldc argument" << eom;
throw 0;
}
results[0] = arg0;
}
else if(statement=="goto" || statement=="goto_w")
{
Expand Down
130 changes: 130 additions & 0 deletions src/java_bytecode/java_bytecode_language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Author: Daniel Kroening, [email protected]
#include "java_entry_point.h"
#include "java_bytecode_parser.h"
#include "java_class_loader.h"
#include "java_string_literals.h"
#include "java_utils.h"
#include <java_bytecode/ci_lazy_methods.h>
#include <java_bytecode/generate_java_generic_type.h>
Expand Down Expand Up @@ -256,6 +257,118 @@ static void infer_opaque_type_fields(
}
}

/// Create if necessary, then return the constant global java.lang.Class symbol
/// for a given class id
/// \param class_id: class identifier
/// \param symbol_table: global symbol table; a symbol may be added
/// \return java.lang.Class typed symbol expression
static symbol_exprt get_or_create_class_literal_symbol(
const irep_idt &class_id, symbol_tablet &symbol_table)
{
symbol_typet java_lang_Class("java::java.lang.Class");
symbol_exprt symbol_expr(
id2string(class_id)+"@class_model",
java_lang_Class);
if(!symbol_table.has_symbol(symbol_expr.get_identifier()))
{
symbolt new_class_symbol;
new_class_symbol.name = symbol_expr.get_identifier();
new_class_symbol.type = symbol_expr.type();
INVARIANT(
has_prefix(id2string(new_class_symbol.name), "java::"),
"class identifier should have 'java::' prefix");
new_class_symbol.base_name =
id2string(new_class_symbol.name).substr(6);
new_class_symbol.mode = ID_java;
new_class_symbol.is_lvalue = true;
new_class_symbol.is_state_var = true;
new_class_symbol.is_static_lifetime = true;
symbol_table.add(new_class_symbol);
}

return symbol_expr;
}

/// Get result of a Java load-constant (ldc) instruction.
/// Possible cases:
/// 1) Pushing a String causes a reference to a java.lang.String object
/// to be constructed and pushed onto the operand stack.
/// 2) Pushing an int or a float causes a primitive value to be pushed
/// onto the stack.
/// 3) Pushing a Class constant causes a reference to a java.lang.Class
/// to be pushed onto the operand stack
/// \param ldc_arg0: raw operand to the ldc opcode
/// \param symbol_table: global symbol table. If the argument `ldc_arg0` is a
/// String or Class constant then a new constant global may be added.
/// \param string_refinement_enabled: true if --refine-strings is enabled, which
/// influences how String literals are structured.
/// \return ldc result
static exprt get_ldc_result(
const exprt &ldc_arg0,
symbol_tablet &symbol_table,
bool string_refinement_enabled)
{
if(ldc_arg0.id() == ID_type)
{
const irep_idt &class_id = ldc_arg0.type().get(ID_identifier);
return
address_of_exprt(
get_or_create_class_literal_symbol(class_id, symbol_table));
}
else if(ldc_arg0.id() == ID_java_string_literal)
{
return
address_of_exprt(
get_or_create_string_literal_symbol(
ldc_arg0, symbol_table, string_refinement_enabled));
}
else
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This else is redundant.

{
INVARIANT(
ldc_arg0.id() == ID_constant,
"ldc argument should be constant, string literal or class literal");
return ldc_arg0;
}
}

/// Creates global variables for constants mentioned in a given method. These
/// are either string literals, or class literals (the java.lang.Class instance
/// returned by `(some_reference_typed_expression).class`). The method parse
/// tree is rewritten to directly reference these globals.
/// \param parse_tree: parse tree to search for constant global references
/// \param symbol_table: global symbol table, to which constant globals will be
/// added.
/// \param string_refinement_enabled: true if `--refine-stings` is active,
/// which changes how string literals are structured.
static void generate_constant_global_variables(
java_bytecode_parse_treet &parse_tree,
symbol_tablet &symbol_table,
bool string_refinement_enabled)
{
for(auto &method : parse_tree.parsed_class.methods)
{
for(auto &instruction : method.instructions)
{
// ldc* instructions are Java bytecode "load constant" ops, which can
// retrieve a numeric constant, String literal, or Class literal.
if(instruction.statement == "ldc" ||
instruction.statement == "ldc2" ||
instruction.statement == "ldc_w" ||
instruction.statement == "ldc2_w")
{
INVARIANT(
instruction.args.size() != 0,
"ldc instructions should have an argument");
instruction.args[0] =
get_ldc_result(
instruction.args[0],
symbol_table,
string_refinement_enabled);
}
}
}
}

bool java_bytecode_languaget::typecheck(
symbol_tablet &symbol_table,
const std::string &module)
Expand Down Expand Up @@ -317,6 +430,23 @@ bool java_bytecode_languaget::typecheck(
infer_opaque_type_fields(c.second, symbol_table);
}

// Create global variables for constants (String and Class literals) up front.
// This means that when running with lazy loading, we will be aware of these
// literal globals' existence when __CPROVER_initialize is generated in
// `generate_support_functions`.
const std::size_t before_constant_globals_size = symbol_table.symbols.size();
for(auto &c : java_class_loader.class_map)
{
generate_constant_global_variables(
c.second,
symbol_table,
string_refinement_enabled);
}
status() << "Java: added "
<< (symbol_table.symbols.size() - before_constant_globals_size)
<< " String or Class constant symbols"
<< messaget::eom;

// Now incrementally elaborate methods
// that are reachable from this entry point.
if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE)
Expand Down
1 change: 0 additions & 1 deletion src/java_bytecode/java_bytecode_typecheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ class java_bytecode_typecheckt:public typecheckt
void typecheck_code(codet &);
void typecheck_type(typet &);
void typecheck_expr_symbol(symbol_exprt &);
void typecheck_expr_java_string_literal(exprt &);
void typecheck_expr_member(member_exprt &);
void typecheck_expr_java_new(side_effect_exprt &);
void typecheck_expr_java_new_array(side_effect_exprt &);
Expand Down
Loading