Skip to content

Commit 262affb

Browse files
author
Thomas Kiley
authored
Merge pull request #2350 from thk123/feature/TG-3813/load-specified-methods
[TG-3813] Allow specifying specific methods by regex to be loaded by lazy methods
2 parents 122108a + 783fcea commit 262affb

File tree

14 files changed

+337
-70
lines changed

14 files changed

+337
-70
lines changed
Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
CORE symex-driven-lazy-loading-expected-failure
22
test.class
33
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point test.sety
4-
^EXIT=6$
54
^SIGNAL=0$
6-
entry point 'test\.sety' is ambiguous between:
7-
test\.sety:\(I\)V
8-
test\.sety:\(F\)V
5+
CI lazy methods: elaborate java::test\.sety:\(I\)V
6+
CI lazy methods: elaborate java::test\.sety:\(F\)V
97
--
108
--
119
This doesn't work under symex-driven lazy loading because it is incompatible with --lazy-methods

jbmc/regression/jbmc/lazyloading11/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
CORE symex-driven-lazy-loading-expected-failure
22
test.class
3-
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(I)V' --lazy-methods-extra-entry-point 'test.sety:(F)V'
3+
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(I\)V' --lazy-methods-extra-entry-point 'test.sety:\(F\)V'
44
^EXIT=0$
55
^SIGNAL=0$
66
VERIFICATION SUCCESSFUL

jbmc/regression/jbmc/lazyloading7/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
CORE symex-driven-lazy-loading-expected-failure
22
test.class
3-
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(I)V'
3+
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(I\)V'
44
^EXIT=0$
55
^SIGNAL=0$
66
VERIFICATION SUCCESSFUL

jbmc/regression/jbmc/lazyloading8/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
CORE symex-driven-lazy-loading-expected-failure
22
test.class
3-
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:(F)V'
3+
--lazy-methods --verbosity 10 --function test.f --lazy-methods-extra-entry-point 'test.sety:\(F\)V'
44
^EXIT=0$
55
^SIGNAL=0$
66
VERIFICATION SUCCESSFUL

jbmc/src/java_bytecode/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ SRC = bytecode_info.cpp \
3232
java_string_literals.cpp \
3333
java_types.cpp \
3434
java_utils.cpp \
35+
load_method_by_regex.cpp \
3536
mz_zip_archive.cpp \
3637
remove_exceptions.cpp \
3738
remove_instanceof.cpp \

jbmc/src/java_bytecode/ci_lazy_methods.cpp

Lines changed: 8 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ ci_lazy_methodst::ci_lazy_methodst(
3434
const symbol_tablet &symbol_table,
3535
const irep_idt &main_class,
3636
const std::vector<irep_idt> &main_jar_classes,
37-
const std::vector<irep_idt> &lazy_methods_extra_entry_points,
37+
const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points,
3838
java_class_loadert &java_class_loader,
3939
const std::vector<irep_idt> &extra_instantiated_classes,
4040
const select_pointer_typet &pointer_type_selector,
@@ -103,8 +103,13 @@ bool ci_lazy_methodst::operator()(
103103

104104
// Add any extra entry points specified; we should elaborate these in the
105105
// same way as the main function.
106-
std::vector<irep_idt> extra_entry_points=lazy_methods_extra_entry_points;
107-
resolve_method_names(extra_entry_points, symbol_table);
106+
std::vector<irep_idt> extra_entry_points;
107+
for(const auto &extra_function_generator : lazy_methods_extra_entry_points)
108+
{
109+
const auto &extra_methods = extra_function_generator(symbol_table);
110+
extra_entry_points.insert(
111+
extra_entry_points.end(), extra_methods.begin(), extra_methods.end());
112+
}
108113
methods_to_convert_later.insert(
109114
extra_entry_points.begin(), extra_entry_points.end());
110115

@@ -355,54 +360,6 @@ ci_lazy_methodst::entry_point_methods(const symbol_tablet &symbol_table)
355360
return methods_to_convert_later;
356361
}
357362

358-
/// Translates the given list of method names from human-readable to
359-
/// internal syntax.
360-
/// Expands any wildcards (entries ending in '.*') in the given method
361-
/// list to include all non-static methods defined on the given class.
362-
/// \param [in, out] methods: List of methods to expand. Any wildcard entries
363-
/// will be deleted and the expanded entries appended to the end.
364-
/// \param symbol_table: global symbol table
365-
void ci_lazy_methodst::resolve_method_names(
366-
std::vector<irep_idt> &methods,
367-
const symbol_tablet &symbol_table)
368-
{
369-
std::vector<irep_idt> new_methods;
370-
for(const irep_idt &method : methods)
371-
{
372-
const std::string &method_str=id2string(method);
373-
if(!has_suffix(method_str, ".*"))
374-
{
375-
std::string error_message;
376-
irep_idt internal_name=
377-
resolve_friendly_method_name(
378-
method_str,
379-
symbol_table,
380-
error_message);
381-
if(internal_name==irep_idt())
382-
throw "entry point "+error_message;
383-
new_methods.push_back(internal_name);
384-
}
385-
else
386-
{
387-
irep_idt classname="java::"+method_str.substr(0, method_str.length()-2);
388-
if(!symbol_table.has_symbol(classname))
389-
throw "wildcard entry point '"+method_str+"': unknown class";
390-
391-
for(const auto &name_symbol : symbol_table.symbols)
392-
{
393-
if(name_symbol.second.type.id()!=ID_code)
394-
continue;
395-
if(!to_code_type(name_symbol.second.type).has_this())
396-
continue;
397-
if(has_prefix(id2string(name_symbol.first), id2string(classname)))
398-
new_methods.push_back(name_symbol.first);
399-
}
400-
}
401-
}
402-
403-
methods=std::move(new_methods);
404-
}
405-
406363
/// Build up a list of methods whose type may be passed around reachable
407364
/// from the entry point.
408365
/// \param entry_points: list of fully-qualified function names that

jbmc/src/java_bytecode/ci_lazy_methods.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,17 @@ typedef std::function<
9191
bool(const irep_idt &function_id, ci_lazy_methods_neededt)>
9292
method_convertert;
9393

94+
typedef std::function<std::vector<irep_idt>(const symbol_tablet &)>
95+
load_extra_methodst;
96+
9497
class ci_lazy_methodst:public messaget
9598
{
9699
public:
97100
ci_lazy_methodst(
98101
const symbol_tablet &symbol_table,
99102
const irep_idt &main_class,
100103
const std::vector<irep_idt> &main_jar_classes,
101-
const std::vector<irep_idt> &lazy_methods_extra_entry_points,
104+
const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points,
102105
java_class_loadert &java_class_loader,
103106
const std::vector<irep_idt> &extra_instantiated_classes,
104107
const select_pointer_typet &pointer_type_selector,
@@ -112,10 +115,6 @@ class ci_lazy_methodst:public messaget
112115
const method_convertert &method_converter);
113116

114117
private:
115-
void resolve_method_names(
116-
std::vector<irep_idt> &methods,
117-
const symbol_tablet &symbol_table);
118-
119118
void initialize_instantiated_classes(
120119
const std::unordered_set<irep_idt> &entry_points,
121120
const namespacet &ns,
@@ -149,7 +148,7 @@ class ci_lazy_methodst:public messaget
149148
class_hierarchyt class_hierarchy;
150149
irep_idt main_class;
151150
std::vector<irep_idt> main_jar_classes;
152-
std::vector<irep_idt> lazy_methods_extra_entry_points;
151+
const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points;
153152
java_class_loadert &java_class_loader;
154153
const std::vector<irep_idt> &extra_instantiated_classes;
155154
const select_pointer_typet &pointer_type_selector;

jbmc/src/java_bytecode/java_bytecode_language.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Author: Daniel Kroening, [email protected]
3737
#include "ci_lazy_methods.h"
3838

3939
#include "expr2java.h"
40+
#include "load_method_by_regex.h"
4041

4142
/// Consume options that are java bytecode specific.
4243
/// \param Command:line options
@@ -94,10 +95,11 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd)
9495

9596
const std::list<std::string> &extra_entry_points=
9697
cmd.get_values("lazy-methods-extra-entry-point");
97-
lazy_methods_extra_entry_points.insert(
98-
lazy_methods_extra_entry_points.end(),
98+
std::transform(
9999
extra_entry_points.begin(),
100-
extra_entry_points.end());
100+
extra_entry_points.end(),
101+
std::back_inserter(extra_methods),
102+
build_load_method_by_regex);
101103

102104
if(cmd.isset("java-cp-include-files"))
103105
{
@@ -815,7 +817,7 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion(
815817
symbol_table,
816818
main_class,
817819
main_jar_classes,
818-
lazy_methods_extra_entry_points,
820+
extra_methods,
819821
java_class_loader,
820822
java_load_classes,
821823
get_pointer_type_selector(),

jbmc/src/java_bytecode/java_bytecode_language.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,10 @@ Author: Daniel Kroening, [email protected]
6262
" --lazy-methods-extra-entry-point METHODNAME\n" /* NOLINT(*) */ \
6363
" treat METHODNAME as a possible program entry point for\n" /* NOLINT(*) */ \
6464
" the purpose of lazy method loading\n" /* NOLINT(*) */ \
65-
" A '.*' wildcard is allowed to specify all class members\n"
65+
" METHODNAME can be a regex that will be matched against\n" /* NOLINT(*) */ \
66+
" all symbols. If missing a java:: prefix will be added\n" /* NOLINT(*) */ \
67+
" If no descriptor is found, all overloads of a method will\n"/* NOLINT(*) */ \
68+
" also be added." /* NOLINT(*) */
6669
// clang-format on
6770

6871
class symbolt;
@@ -173,7 +176,6 @@ class java_bytecode_languaget:public languaget
173176
size_t max_user_array_length; // max size for user code created arrays
174177
method_bytecodet method_bytecode;
175178
lazy_methods_modet lazy_methods_mode;
176-
std::vector<irep_idt> lazy_methods_extra_entry_points;
177179
bool string_refinement_enabled;
178180
bool throw_runtime_exceptions;
179181
bool assert_uncaught_exceptions;
@@ -195,6 +197,8 @@ class java_bytecode_languaget:public languaget
195197
class_hierarchyt class_hierarchy;
196198
// List of classes to never load
197199
std::unordered_set<std::string> no_load_classes;
200+
201+
std::vector<load_extra_methodst> extra_methods;
198202
};
199203

200204
std::unique_ptr<languaget> new_java_bytecode_language();
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
2+
/*******************************************************************\
3+
4+
Module: Java Bytecode
5+
6+
Author: Diffblue Ltd.
7+
8+
\*******************************************************************/
9+
10+
#include "load_method_by_regex.h"
11+
12+
#include <regex>
13+
14+
#include <util/symbol_table.h>
15+
16+
/// For a given user provided pattern, return a regex, having dealt with the
17+
/// cases where the user has not prefixed with java:: or suffixed with the
18+
/// descriptor
19+
/// \param pattern: The user provided pattern
20+
/// \return The regex to match with
21+
static std::regex build_regex_from_pattern(const std::string &pattern)
22+
{
23+
std::string modified_pattern = pattern;
24+
if(does_pattern_miss_descriptor(pattern))
25+
modified_pattern += R"(:\(.*\).*)";
26+
27+
if(!has_prefix(pattern, "java::"))
28+
modified_pattern = "java::" + modified_pattern;
29+
30+
return std::regex{modified_pattern};
31+
}
32+
33+
/// Identify if a parameter includes a part that will match a descriptor. That
34+
/// is, does it have a colon separtor.
35+
/// \param pattern: The user provided pattern
36+
/// \return True if no descriptor is found (that is, the only : relates to the
37+
/// java:: prefix.
38+
bool does_pattern_miss_descriptor(const std::string &pattern)
39+
{
40+
const size_t descriptor_index = pattern.rfind(':');
41+
if(descriptor_index == std::string::npos)
42+
return true;
43+
44+
static const std::string java_prefix = "java::";
45+
return descriptor_index == java_prefix.length() - 1 &&
46+
has_prefix(pattern, java_prefix);
47+
}
48+
49+
/// Create a lambda that returns the symbols that the given pattern should be
50+
/// loaded.If the pattern doesn't include a colon for matching the descriptor,
51+
/// append a `:\(.*\).*` to the regex. Note this will mean all overloaded
52+
/// methods will be marked as extra entry points for CI lazy loading.
53+
/// If the pattern doesn't include the java:: prefix, prefix that
54+
/// \param pattern: The user provided pattern
55+
/// \return The lambda to execute.
56+
std::function<std::vector<irep_idt>(const symbol_tablet &symbol_table)>
57+
build_load_method_by_regex(const std::string &pattern)
58+
{
59+
std::regex regex = build_regex_from_pattern(pattern);
60+
61+
return [=](const symbol_tablet &symbol_table) {
62+
std::vector<irep_idt> matched_methods;
63+
for(const auto &symbol : symbol_table.symbols)
64+
{
65+
if(
66+
symbol.second.is_function() &&
67+
std::regex_match(id2string(symbol.first), regex))
68+
{
69+
matched_methods.push_back(symbol.first);
70+
}
71+
}
72+
return matched_methods;
73+
};
74+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*******************************************************************\
2+
3+
Module: Java Bytecode
4+
5+
Author: Diffblue Ltd.
6+
7+
\*******************************************************************/
8+
9+
/// \file
10+
/// Process a pattern to use as a regex for selecting extra entry points for
11+
/// ci_lazy_methodst
12+
13+
#ifndef CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H
14+
#define CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H
15+
16+
#include <java_bytecode/ci_lazy_methods.h>
17+
18+
class symbol_tablet;
19+
20+
std::function<std::vector<irep_idt>(const symbol_tablet &symbol_table)>
21+
build_load_method_by_regex(const std::string &pattern);
22+
23+
bool does_pattern_miss_descriptor(const std::string &pattern);
24+
25+
#endif // CPROVER_JAVA_BYTECODE_LOAD_METHOD_BY_REGEX_H

jbmc/unit/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ SRC += java_bytecode/goto-programs/class_hierarchy_output.cpp \
2121
java_bytecode/java_types/java_generic_symbol_type.cpp \
2222
java_bytecode/java_types/java_type_from_string.cpp \
2323
java_bytecode/java_utils_test.cpp \
24+
java_bytecode/load_method_by_regex.cpp \
2425
java_bytecode/inherited_static_fields/inherited_static_fields.cpp \
2526
pointer-analysis/custom_value_set_analysis.cpp \
2627
solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp \

0 commit comments

Comments
 (0)