Skip to content

Commit b0cb1ee

Browse files
authored
Merge pull request #1766 from smowton/smowton/feature/java-frontend-create-literal-globals-early
Java frontend: create String and Class literals earlier
2 parents a2e3af5 + 8a27950 commit b0cb1ee

8 files changed

+373
-204
lines changed

src/java_bytecode/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ SRC = bytecode_info.cpp \
2424
java_pointer_casts.cpp \
2525
java_root_class.cpp \
2626
java_string_library_preprocess.cpp \
27+
java_string_literals.cpp \
2728
java_types.cpp \
2829
java_utils.cpp \
2930
generate_java_generic_type.cpp \

src/java_bytecode/ci_lazy_methods.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,8 @@ void ci_lazy_methodst::gather_needed_globals(
479479
findit->second.is_static_lifetime)
480480
{
481481
needed.add(findit->second);
482+
// Gather any globals referenced in the initialiser:
483+
gather_needed_globals(findit->second.value, symbol_table, needed);
482484
}
483485
}
484486
else

src/java_bytecode/java_bytecode_convert_method.cpp

Lines changed: 5 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1712,41 +1712,12 @@ codet java_bytecode_convert_methodt::convert_instructions(
17121712
{
17131713
assert(op.empty() && results.size()==1);
17141714

1715-
// 1) Pushing a String causes a reference to a java.lang.String object
1716-
// to be constructed and pushed onto the operand stack.
1715+
INVARIANT(
1716+
arg0.id() != ID_java_string_literal && arg0.id() != ID_type,
1717+
"String and Class literals should have been lowered in "
1718+
"generate_constant_global_variables");
17171719

1718-
// 2) Pushing an int or a float causes a primitive value to be pushed
1719-
// onto the stack.
1720-
1721-
// 3) Pushing a Class constant causes a reference to a java.lang.Class
1722-
// to be pushed onto the operand stack
1723-
1724-
if(arg0.id()==ID_java_string_literal)
1725-
{
1726-
// these need to be references to java.lang.String
1727-
results[0]=arg0;
1728-
symbol_typet string_type("java::java.lang.String");
1729-
results[0].type()=java_reference_type(string_type);
1730-
}
1731-
else if(arg0.id()==ID_type)
1732-
{
1733-
irep_idt class_id=arg0.type().get(ID_identifier);
1734-
symbol_typet java_lang_Class("java::java.lang.Class");
1735-
symbol_exprt symbol_expr(
1736-
id2string(class_id)+"@class_model",
1737-
java_lang_Class);
1738-
address_of_exprt address_of_expr(symbol_expr);
1739-
results[0]=address_of_expr;
1740-
}
1741-
else if(arg0.id()==ID_constant)
1742-
{
1743-
results[0]=arg0;
1744-
}
1745-
else
1746-
{
1747-
error() << "unexpected ldc argument" << eom;
1748-
throw 0;
1749-
}
1720+
results[0] = arg0;
17501721
}
17511722
else if(statement=="goto" || statement=="goto_w")
17521723
{

src/java_bytecode/java_bytecode_language.cpp

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Author: Daniel Kroening, [email protected]
3030
#include "java_entry_point.h"
3131
#include "java_bytecode_parser.h"
3232
#include "java_class_loader.h"
33+
#include "java_string_literals.h"
3334
#include "java_utils.h"
3435
#include <java_bytecode/ci_lazy_methods.h>
3536
#include <java_bytecode/generate_java_generic_type.h>
@@ -267,6 +268,118 @@ static void infer_opaque_type_fields(
267268
}
268269
}
269270

271+
/// Create if necessary, then return the constant global java.lang.Class symbol
272+
/// for a given class id
273+
/// \param class_id: class identifier
274+
/// \param symbol_table: global symbol table; a symbol may be added
275+
/// \return java.lang.Class typed symbol expression
276+
static symbol_exprt get_or_create_class_literal_symbol(
277+
const irep_idt &class_id, symbol_tablet &symbol_table)
278+
{
279+
symbol_typet java_lang_Class("java::java.lang.Class");
280+
symbol_exprt symbol_expr(
281+
id2string(class_id)+"@class_model",
282+
java_lang_Class);
283+
if(!symbol_table.has_symbol(symbol_expr.get_identifier()))
284+
{
285+
symbolt new_class_symbol;
286+
new_class_symbol.name = symbol_expr.get_identifier();
287+
new_class_symbol.type = symbol_expr.type();
288+
INVARIANT(
289+
has_prefix(id2string(new_class_symbol.name), "java::"),
290+
"class identifier should have 'java::' prefix");
291+
new_class_symbol.base_name =
292+
id2string(new_class_symbol.name).substr(6);
293+
new_class_symbol.mode = ID_java;
294+
new_class_symbol.is_lvalue = true;
295+
new_class_symbol.is_state_var = true;
296+
new_class_symbol.is_static_lifetime = true;
297+
symbol_table.add(new_class_symbol);
298+
}
299+
300+
return symbol_expr;
301+
}
302+
303+
/// Get result of a Java load-constant (ldc) instruction.
304+
/// Possible cases:
305+
/// 1) Pushing a String causes a reference to a java.lang.String object
306+
/// to be constructed and pushed onto the operand stack.
307+
/// 2) Pushing an int or a float causes a primitive value to be pushed
308+
/// onto the stack.
309+
/// 3) Pushing a Class constant causes a reference to a java.lang.Class
310+
/// to be pushed onto the operand stack
311+
/// \param ldc_arg0: raw operand to the ldc opcode
312+
/// \param symbol_table: global symbol table. If the argument `ldc_arg0` is a
313+
/// String or Class constant then a new constant global may be added.
314+
/// \param string_refinement_enabled: true if --refine-strings is enabled, which
315+
/// influences how String literals are structured.
316+
/// \return ldc result
317+
static exprt get_ldc_result(
318+
const exprt &ldc_arg0,
319+
symbol_tablet &symbol_table,
320+
bool string_refinement_enabled)
321+
{
322+
if(ldc_arg0.id() == ID_type)
323+
{
324+
const irep_idt &class_id = ldc_arg0.type().get(ID_identifier);
325+
return
326+
address_of_exprt(
327+
get_or_create_class_literal_symbol(class_id, symbol_table));
328+
}
329+
else if(ldc_arg0.id() == ID_java_string_literal)
330+
{
331+
return
332+
address_of_exprt(
333+
get_or_create_string_literal_symbol(
334+
ldc_arg0, symbol_table, string_refinement_enabled));
335+
}
336+
else
337+
{
338+
INVARIANT(
339+
ldc_arg0.id() == ID_constant,
340+
"ldc argument should be constant, string literal or class literal");
341+
return ldc_arg0;
342+
}
343+
}
344+
345+
/// Creates global variables for constants mentioned in a given method. These
346+
/// are either string literals, or class literals (the java.lang.Class instance
347+
/// returned by `(some_reference_typed_expression).class`). The method parse
348+
/// tree is rewritten to directly reference these globals.
349+
/// \param parse_tree: parse tree to search for constant global references
350+
/// \param symbol_table: global symbol table, to which constant globals will be
351+
/// added.
352+
/// \param string_refinement_enabled: true if `--refine-stings` is active,
353+
/// which changes how string literals are structured.
354+
static void generate_constant_global_variables(
355+
java_bytecode_parse_treet &parse_tree,
356+
symbol_tablet &symbol_table,
357+
bool string_refinement_enabled)
358+
{
359+
for(auto &method : parse_tree.parsed_class.methods)
360+
{
361+
for(auto &instruction : method.instructions)
362+
{
363+
// ldc* instructions are Java bytecode "load constant" ops, which can
364+
// retrieve a numeric constant, String literal, or Class literal.
365+
if(instruction.statement == "ldc" ||
366+
instruction.statement == "ldc2" ||
367+
instruction.statement == "ldc_w" ||
368+
instruction.statement == "ldc2_w")
369+
{
370+
INVARIANT(
371+
instruction.args.size() != 0,
372+
"ldc instructions should have an argument");
373+
instruction.args[0] =
374+
get_ldc_result(
375+
instruction.args[0],
376+
symbol_table,
377+
string_refinement_enabled);
378+
}
379+
}
380+
}
381+
}
382+
270383
bool java_bytecode_languaget::typecheck(
271384
symbol_tablet &symbol_table,
272385
const std::string &module)
@@ -346,6 +459,23 @@ bool java_bytecode_languaget::typecheck(
346459
infer_opaque_type_fields(c.second, symbol_table);
347460
}
348461

462+
// Create global variables for constants (String and Class literals) up front.
463+
// This means that when running with lazy loading, we will be aware of these
464+
// literal globals' existence when __CPROVER_initialize is generated in
465+
// `generate_support_functions`.
466+
const std::size_t before_constant_globals_size = symbol_table.symbols.size();
467+
for(auto &c : java_class_loader.class_map)
468+
{
469+
generate_constant_global_variables(
470+
c.second,
471+
symbol_table,
472+
string_refinement_enabled);
473+
}
474+
status() << "Java: added "
475+
<< (symbol_table.symbols.size() - before_constant_globals_size)
476+
<< " String or Class constant symbols"
477+
<< messaget::eom;
478+
349479
// Now incrementally elaborate methods
350480
// that are reachable from this entry point.
351481
if(lazy_methods_mode==LAZY_METHODS_MODE_CONTEXT_INSENSITIVE)

src/java_bytecode/java_bytecode_typecheck.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ class java_bytecode_typecheckt:public typecheckt
6767
void typecheck_code(codet &);
6868
void typecheck_type(typet &);
6969
void typecheck_expr_symbol(symbol_exprt &);
70-
void typecheck_expr_java_string_literal(exprt &);
7170
void typecheck_expr_member(member_exprt &);
7271
void typecheck_expr_java_new(side_effect_exprt &);
7372
void typecheck_expr_java_new_array(side_effect_exprt &);

0 commit comments

Comments
 (0)