Skip to content

Commit 2af582b

Browse files
committed
Java class literals: init using a library hook when possible
The Java library definition of java.lang.Class, when nondet initialized, can be quite time consuming due to the enumeration constant dictionary's internal array. Instead, let's give the library a chance to set Class constants (literals like MyClass.class) itself. While we're at it we might as well gain the ability to prove some properties of the literals. We currently supply those class attributes that are easy to come by in the parse tree; with a little more effort in the parser IsLocalClass and IsMemberClass could be determined.
1 parent 34b0ac6 commit 2af582b

20 files changed

+258
-8
lines changed
Binary file not shown.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
2+
public @interface ExampleAnnotation {
3+
4+
}
Binary file not shown.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
public enum ExampleEnum {
3+
4+
}
5+
Binary file not shown.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
2+
interface ExampleInterface {
3+
4+
}
1.16 KB
Binary file not shown.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
2+
public class Test {
3+
4+
public static void main() {
5+
6+
assert ExampleAnnotation.class.isAnnotation();
7+
assert ExampleInterface.class.isInterface();
8+
assert ExampleEnum.class.isEnum();
9+
10+
assert char[].class.isArray();
11+
assert short[].class.isArray();
12+
assert int[].class.isArray();
13+
assert long[].class.isArray();
14+
assert float[].class.isArray();
15+
assert double[].class.isArray();
16+
assert boolean[].class.isArray();
17+
assert Object[].class.isArray();
18+
assert Object[][].class.isArray();
19+
20+
// Note use of '==' not '.equals', as we expect the same exact literal,
21+
// which in jbmc always have the same address.
22+
// Note names of array classes are not tested yet, as arrays' types are
23+
// printed as their raw signature, to be addressed separately.
24+
// Note also primitive types (e.g. int.class) are not addresses here, as
25+
// they are created through box types' static initializers (e.g. Integer
26+
// has a static member TYPE that holds the Class for `int.class`)
27+
28+
assert ExampleAnnotation.class.getName() == "ExampleAnnotation";
29+
assert ExampleInterface.class.getName() == "ExampleInterface";
30+
assert ExampleEnum.class.getName() == "ExampleEnum";
31+
32+
}
33+
34+
}
Binary file not shown.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package java.lang;
2+
3+
public class Class {
4+
5+
private String name;
6+
7+
private boolean isAnnotation;
8+
private boolean isArray;
9+
private boolean isInterface;
10+
private boolean isSynthetic;
11+
private boolean isLocalClass;
12+
private boolean isMemberClass;
13+
private boolean isEnum;
14+
15+
public void cproverInitializeClassLiteral(
16+
String name,
17+
boolean isAnnotation,
18+
boolean isArray,
19+
boolean isInterface,
20+
boolean isSynthetic,
21+
boolean isLocalClass,
22+
boolean isMemberClass,
23+
boolean isEnum) {
24+
25+
this.name = name;
26+
this.isAnnotation = isAnnotation;
27+
this.isArray = isArray;
28+
this.isInterface = isInterface;
29+
this.isSynthetic = isSynthetic;
30+
this.isLocalClass = isLocalClass;
31+
this.isMemberClass = isMemberClass;
32+
this.isEnum = isEnum;
33+
34+
}
35+
36+
public String getName() { return name; }
37+
38+
public boolean isAnnotation() { return isAnnotation; }
39+
public boolean isArray() { return isArray; }
40+
public boolean isInterface() { return isInterface; }
41+
public boolean isSynthetic() { return isSynthetic; }
42+
public boolean isLocalClass() { return isLocalClass; }
43+
public boolean isMemberClass() { return isMemberClass; }
44+
public boolean isEnum() { return isEnum; }
45+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE
2+
Test.class
3+
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
^VERIFICATION SUCCESSFUL$
7+
--
8+
^warning: ignoring
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE
2+
Test.class
3+
--lazy-methods
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
^VERIFICATION SUCCESSFUL$
7+
--
8+
^warning: ignoring

jbmc/src/java_bytecode/ci_lazy_methods.cpp

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "java_string_library_preprocess.h"
1414
#include "remove_exceptions.h"
1515

16+
#include <util/expr_iterator.h>
1617
#include <util/suffix.h>
1718

1819
#include <goto-programs/resolve_inherited_component.h>
@@ -52,6 +53,30 @@ ci_lazy_methodst::ci_lazy_methodst(
5253
class_hierarchy(symbol_table);
5354
}
5455

56+
/// Checks if an expression refers to any class literals (e.g. MyType.class)
57+
/// These are expressed as ldc instructions in Java bytecode, and as symbols
58+
/// of the form MyType@class_model in GOTO programs.
59+
/// \param expr: expression to check
60+
/// \return true if the expression or any of its subexpressions refer to a
61+
/// class
62+
static bool references_class_model(const exprt &expr)
63+
{
64+
symbol_typet class_type("java::java.lang.Class");
65+
66+
for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it)
67+
{
68+
if(can_cast_expr<symbol_exprt>(*it) &&
69+
it->type() == class_type &&
70+
has_suffix(
71+
id2string(to_symbol_expr(*it).get_identifier()), "@class_model"))
72+
{
73+
return true;
74+
}
75+
}
76+
77+
return false;
78+
}
79+
5580
/// Uses a simple context-insensitive ('ci') analysis to determine which methods
5681
/// may be reachable from the main entry point. In brief, static methods are
5782
/// reachable if we find a callsite in another reachable site, while virtual
@@ -122,6 +147,7 @@ bool ci_lazy_methodst::operator()(
122147

123148
std::unordered_set<irep_idt> methods_already_populated;
124149
std::unordered_set<exprt, irep_hash> virtual_function_calls;
150+
bool class_initializer_seen = false;
125151

126152
bool any_new_classes = true;
127153
while(any_new_classes)
@@ -149,8 +175,19 @@ bool ci_lazy_methodst::operator()(
149175
// Couldn't convert this function
150176
continue;
151177
}
152-
gather_virtual_callsites(
153-
symbol_table.lookup_ref(mname).value, virtual_function_calls);
178+
const exprt &method_body = symbol_table.lookup_ref(mname).value;
179+
180+
gather_virtual_callsites(method_body, virtual_function_calls);
181+
182+
if(!class_initializer_seen && references_class_model(method_body))
183+
{
184+
class_initializer_seen = true;
185+
irep_idt initializer_signature =
186+
get_java_class_literal_initializer_signature();
187+
if(symbol_table.has_symbol(initializer_signature))
188+
methods_to_convert_later.insert(initializer_signature);
189+
}
190+
154191
any_new_methods=true;
155192
}
156193
}

jbmc/src/java_bytecode/java_bytecode_convert_class.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,9 @@ void java_bytecode_convert_classt::convert(
265265
class_type.set_tag(c.name);
266266
class_type.set(ID_base_name, c.name);
267267
class_type.set(ID_abstract, c.is_abstract);
268+
class_type.set(ID_is_annotation, c.is_annotation);
269+
class_type.set(ID_interface, c.is_interface);
270+
class_type.set(ID_synthetic, c.is_synthetic);
268271
class_type.set_final(c.is_final);
269272
if(c.is_enum)
270273
{

jbmc/src/java_bytecode/java_bytecode_language.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,8 @@ bool java_bytecode_languaget::generate_support_functions(
785785
get_message_handler(),
786786
assume_inputs_non_null,
787787
object_factory_parameters,
788-
get_pointer_type_selector());
788+
get_pointer_type_selector(),
789+
string_refinement_enabled);
789790
}
790791

791792
/// Uses a simple context-insensitive ('ci') analysis to determine which methods

jbmc/src/java_bytecode/java_bytecode_parse_tree.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,9 @@ class java_bytecode_parse_treet
204204
bool is_enum=false;
205205
bool is_public=false, is_protected=false, is_private=false;
206206
bool is_final = false;
207+
bool is_interface = false;
208+
bool is_synthetic = false;
209+
bool is_annotation = false;
207210
bool attribute_bootstrapmethods_read = false;
208211
size_t enum_elements=0;
209212

jbmc/src/java_bytecode/java_bytecode_parser.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,9 +451,11 @@ bool java_bytecode_parsert::parse()
451451
#define ACC_BRIDGE 0x0040
452452
#define ACC_VARARGS 0x0080
453453
#define ACC_NATIVE 0x0100
454+
#define ACC_INTERFACE 0x0200
454455
#define ACC_ABSTRACT 0x0400
455456
#define ACC_STRICT 0x0800
456457
#define ACC_SYNTHETIC 0x1000
458+
#define ACC_ANNOTATION 0x2000
457459
#define ACC_ENUM 0x4000
458460

459461
#ifdef _MSC_VER
@@ -496,6 +498,9 @@ void java_bytecode_parsert::rClassFile()
496498
parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0;
497499
parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0;
498500
parsed_class.is_final = (access_flags & ACC_FINAL) != 0;
501+
parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0;
502+
parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
503+
parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0;
499504
parsed_class.name=
500505
constant(this_class).type().get(ID_C_base_name);
501506

jbmc/src/java_bytecode/java_entry_point.cpp

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Author: Daniel Kroening, [email protected]
1717
#include <linking/static_lifetime_init.h>
1818

1919
#include "java_object_factory.h"
20+
#include "java_string_literals.h"
2021
#include "java_utils.h"
2122

2223
static void create_initialize(symbol_table_baset &symbol_table)
@@ -65,12 +66,50 @@ static bool should_init_symbol(const symbolt &sym)
6566
return is_java_string_literal_id(sym.name);
6667
}
6768

69+
/// Get the symbol name of java.lang.Class' initializer method. This method
70+
/// should initialize a Class instance with known properties of the type it
71+
/// represents, such as its name, whether it is abstract, or an enumeration,
72+
/// etc. The method may or may not exist in any particular symbol table; it is
73+
/// up to the caller to check.
74+
/// \return Class initializer method's symbol name.
75+
irep_idt get_java_class_literal_initializer_signature()
76+
{
77+
static irep_idt signature =
78+
"java::java.lang.Class.cproverInitializeClassLiteral:"
79+
"(Ljava/lang/String;ZZZZZZZ)V";
80+
return signature;
81+
}
82+
83+
/// If symbol is a class literal, and an appropriate initializer method exists,
84+
/// return a pointer to its symbol. If not, return null.
85+
/// \param symbol: possible class literal symbol
86+
/// \param symbol_table: table to search
87+
/// \return pointer to the initializer method symbol or null
88+
static const symbolt *get_class_literal_initializer(
89+
const symbolt &symbol,
90+
const symbol_table_baset &symbol_table)
91+
{
92+
if(symbol.value.is_not_nil())
93+
return nullptr;
94+
if(symbol.type != symbol_typet("java::java.lang.Class"))
95+
return nullptr;
96+
if(!has_suffix(id2string(symbol.name), "@class_model"))
97+
return nullptr;
98+
return symbol_table.lookup(get_java_class_literal_initializer_signature());
99+
}
100+
101+
static constant_exprt constant_bool(bool val)
102+
{
103+
return from_integer(val ? 1 : 0, java_boolean_type());
104+
}
105+
68106
static void java_static_lifetime_init(
69107
symbol_table_baset &symbol_table,
70108
const source_locationt &source_location,
71109
bool assume_init_pointers_not_null,
72110
const object_factory_parameterst &object_factory_parameters,
73-
const select_pointer_typet &pointer_type_selector)
111+
const select_pointer_typet &pointer_type_selector,
112+
bool string_refinement_enabled)
74113
{
75114
symbolt &initialize_symbol=*symbol_table.get_writeable(INITIALIZE_FUNCTION);
76115
code_blockt &code_block=to_code_block(to_code(initialize_symbol.value));
@@ -88,7 +127,53 @@ static void java_static_lifetime_init(
88127
const symbolt &sym=*symbol_table.lookup(symname);
89128
if(should_init_symbol(sym))
90129
{
91-
if(sym.value.is_nil() && sym.type!=empty_typet())
130+
if(const symbolt *class_literal_init_method =
131+
get_class_literal_initializer(sym, symbol_table))
132+
{
133+
const std::string &name_str = id2string(sym.name);
134+
irep_idt class_name =
135+
name_str.substr(0, name_str.size() - strlen("@class_model"));
136+
const symbolt &class_symbol = symbol_table.lookup_ref(class_name);
137+
138+
bool class_is_array = is_java_array_tag(sym.name);
139+
140+
exprt name_literal(ID_java_string_literal);
141+
name_literal.set(ID_value, to_class_type(class_symbol.type).get_tag());
142+
143+
symbol_exprt class_name_literal =
144+
get_or_create_string_literal_symbol(
145+
name_literal, symbol_table, string_refinement_enabled);
146+
147+
// Call the literal initializer method instead of a nondet initializer:
148+
149+
// For arguments we can't parse yet:
150+
side_effect_expr_nondett nondet_bool(java_boolean_type());
151+
152+
// Argument order is: name, isAnnotation, isArray, isInterface,
153+
// isSynthetic, isLocalClass, isMemberClass, isEnum
154+
155+
code_function_callt initializer_call;
156+
initializer_call.function() = class_literal_init_method->symbol_expr();
157+
158+
code_function_callt::argumentst &args = initializer_call.arguments();
159+
160+
args.push_back(address_of_exprt(sym.symbol_expr())); /* this */
161+
args.push_back(address_of_exprt(class_name_literal));
162+
args.push_back(
163+
constant_bool(class_symbol.type.get_bool(ID_is_annotation)));
164+
args.push_back(constant_bool(class_is_array));
165+
args.push_back(
166+
constant_bool(class_symbol.type.get_bool(ID_interface)));
167+
args.push_back(
168+
constant_bool(class_symbol.type.get_bool(ID_synthetic)));
169+
args.push_back(nondet_bool);
170+
args.push_back(nondet_bool);
171+
args.push_back(
172+
constant_bool(class_symbol.type.get_bool(ID_enumeration)));
173+
174+
code_block.move_to_operands(initializer_call);
175+
}
176+
else if(sym.value.is_nil() && sym.type!=empty_typet())
92177
{
93178
bool allow_null=!assume_init_pointers_not_null;
94179
if(allow_null)
@@ -403,7 +488,8 @@ bool java_entry_point(
403488
message_handlert &message_handler,
404489
bool assume_init_pointers_not_null,
405490
const object_factory_parameterst &object_factory_parameters,
406-
const select_pointer_typet &pointer_type_selector)
491+
const select_pointer_typet &pointer_type_selector,
492+
bool string_refinement_enabled)
407493
{
408494
// check if the entry point is already there
409495
if(symbol_table.symbols.find(goto_functionst::entry_point())!=
@@ -426,7 +512,8 @@ bool java_entry_point(
426512
symbol.location,
427513
assume_init_pointers_not_null,
428514
object_factory_parameters,
429-
pointer_type_selector);
515+
pointer_type_selector,
516+
string_refinement_enabled);
430517

431518
return generate_java_start_function(
432519
symbol,

jbmc/src/java_bytecode/java_entry_point.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ bool java_entry_point(
2525
class message_handlert &message_handler,
2626
bool assume_init_pointers_not_null,
2727
const object_factory_parameterst &object_factory_parameters,
28-
const select_pointer_typet &pointer_type_selector);
28+
const select_pointer_typet &pointer_type_selector,
29+
bool string_refinement_enabled);
2930

3031
struct main_function_resultt
3132
{
@@ -60,6 +61,8 @@ struct main_function_resultt
6061
}
6162
};
6263

64+
irep_idt get_java_class_literal_initializer_signature();
65+
6366
/// Figures out the entry point of the code to verify
6467
main_function_resultt get_main_symbol(
6568
const symbol_table_baset &symbol_table,

0 commit comments

Comments
 (0)