Skip to content

Commit 040542b

Browse files
committed
Java frontend: support lambda autoboxing against generic types
It's possible to use a primitive-typed method-reference to implement an appropriately specialised generic functional interface (e.g. something that takes an integer can satisfy Consumer<Byte> by direct method reference without needing to introduce a stub method to do the conversion). Here we implement that by unboxing using the java.lang.Number interface where necessary, thereby catching all valid widening numeric casts at the same time (Short.intValue for example will do the widening and deliver the required primitive type). When boxing we always box to the primitive type's corresponding boxed type, as this is all the compiler seems to allow (i.e. it won't allow satisfying Producer<Long> using a function that returns an int, even though that is type-safe). Note this solution uses virtual dispatch a little more often than is needed: in some simple cases the class file contains enough information to know that the Object we're unboxing must in practice be an Integer (for example) and so no dispatch is necessary. That doesn't account for all cases where a generic interface is implemented however, so I did not pursue this possibility.
1 parent 70f9729 commit 040542b

File tree

11 files changed

+197
-46
lines changed

11 files changed

+197
-46
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
public class Test {
2+
3+
interface TakesT<T> {
4+
public Object take(T t);
5+
}
6+
7+
interface YieldsT<T> {
8+
public T yield();
9+
}
10+
11+
interface TakesInt {
12+
public Object take(int i);
13+
}
14+
15+
interface TakesLong {
16+
public Object take(long l);
17+
}
18+
19+
interface YieldsInt {
20+
public int yield();
21+
}
22+
23+
public static Object takesLong(long l) { return l; }
24+
25+
public static short yieldsShort() { return (short)1; }
26+
27+
public static void test() {
28+
29+
// Implement a generic interface using a primitive-typed function
30+
// that requires us to unbox the Short then widen it:
31+
TakesT<Short> takesShort = Test::takesLong;
32+
assert takesShort.take((short)1).equals(1L);
33+
34+
// Surprisingly, the following case doesn't compile despite
35+
// being type-safe and symmetrical to the one above:
36+
// YieldsT<Long> yieldsLong = Test::yieldsShort;
37+
38+
// So instead, here's a simpler case, that just requires us
39+
// to box the return value, not widen it then box:
40+
YieldsT<Short> yieldsShort = Test::yieldsShort;
41+
assert yieldsShort.yield() == (short)1;
42+
43+
// Now test the reverse: using an instantiated generic to
44+
// implement a primitive-typed interface:
45+
46+
TakesT<Long> takesLong = x -> x + 1;
47+
// Again this doesn't compile:
48+
// TakesInt takesInt = takesLong::take;
49+
// So here's a simpler example that only boxes to implement the
50+
// primitive interface, rather than boxes then widens:
51+
TakesLong takesPrimitiveLong = takesLong::take;
52+
assert takesPrimitiveLong.take(1L).equals(2L);
53+
54+
// And finally, using an instantiated generic to satisfy a primitive
55+
// return value. This one does work with the conversion requiring both
56+
// to unbox the value and to widen it.
57+
YieldsT<Short> yieldsShortGeneric = () -> (short)1;
58+
YieldsInt yieldsInt = yieldsShortGeneric::yield;
59+
assert yieldsInt.yield() == 1;
60+
61+
// Check we got to the end:
62+
assert false;
63+
64+
}
65+
66+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
CORE
2+
Test
3+
--function Test.test --cp `../../../../scripts/format_classpath.sh . ../../../lib/java-models-library/target/core-models.jar`
4+
\[java::Test\.test:\(\)V\.assertion\.1\] line 32 assertion at file Test\.java line 32 function java::Test\.test:\(\)V bytecode-index 15: SUCCESS
5+
\[java::Test\.test:\(\)V\.assertion\.2\] line 41 assertion at file Test\.java line 41 function java::Test\.test:\(\)V bytecode-index 29: SUCCESS
6+
\[java::Test\.test:\(\)V\.assertion\.3\] line 52 assertion at file Test\.java line 52 function java::Test\.test:\(\)V bytecode-index 50: SUCCESS
7+
\[java::Test\.test:\(\)V\.assertion\.4\] line 59 assertion at file Test\.java line 59 function java::Test\.test:\(\)V bytecode-index 68: SUCCESS
8+
\[java::Test\.test:\(\)V\.assertion\.5\] line 62 assertion at file Test\.java line 62 function java::Test\.test:\(\)V bytecode-index 74: FAILURE
9+
^EXIT=10$
10+
^SIGNAL=0$

jbmc/src/java_bytecode/java_utils.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,33 +72,41 @@ get_java_primitive_type_info(const typet &maybe_primitive_type)
7272
type_info_by_primitive_type = {
7373
{java_boolean_type(),
7474
{"java::java.lang.Boolean",
75-
"java::java.lang.Boolean.valueOf:(Z)Ljava/lang/Boolean;"}},
75+
"java::java.lang.Boolean.valueOf:(Z)Ljava/lang/Boolean;",
76+
"java::java.lang.Boolean.booleanValue:()Z"}},
7677
{java_byte_type(),
7778
{"java::java.lang.Byte",
78-
"java::java.lang.Byte.valueOf:(B)Ljava/lang/Byte;"}},
79+
"java::java.lang.Byte.valueOf:(B)Ljava/lang/Byte;",
80+
"java::java.lang.Number.byteValue:()B"}},
7981
{java_char_type(),
8082
{"java::java.lang.Character",
8183
"java::java.lang.Character.valueOf:(C)"
82-
"Ljava/lang/Character;"}},
84+
"Ljava/lang/Character;",
85+
"java::java.lang.Character.charValue:()C"}},
8386
{java_double_type(),
8487
{"java::java.lang.Double",
8588
"java::java.lang.Double.valueOf:(D)"
86-
"Ljava/lang/Double;"}},
89+
"Ljava/lang/Double;",
90+
"java::java.lang.Number.doubleValue:()D"}},
8791
{java_float_type(),
8892
{"java::java.lang.Float",
8993
"java::java.lang.Float.valueOf:(F)"
90-
"Ljava/lang/Float;"}},
94+
"Ljava/lang/Float;",
95+
"java::java.lang.Number.floatValue:()F"}},
9196
{java_int_type(),
9297
{"java::java.lang.Integer",
9398
"java::java.lang.Integer.valueOf:(I)"
94-
"Ljava/lang/Integer;"}},
99+
"Ljava/lang/Integer;",
100+
"java::java.lang.Number.intValue:()I"}},
95101
{java_long_type(),
96102
{"java::java.lang.Long",
97-
"java::java.lang.Long.valueOf:(J)Ljava/lang/Long;"}},
103+
"java::java.lang.Long.valueOf:(J)Ljava/lang/Long;",
104+
"java::java.lang.Number.longValue:()J"}},
98105
{java_short_type(),
99106
{"java::java.lang.Short",
100107
"java::java.lang.Short.valueOf:(S)"
101-
"Ljava/lang/Short;"}}};
108+
"Ljava/lang/Short;",
109+
"java::java.lang.Number.shortValue:()S"}}};
102110

103111
auto found = type_info_by_primitive_type.find(maybe_primitive_type);
104112
return found == type_info_by_primitive_type.end() ? nullptr : &found->second;

jbmc/src/java_bytecode/java_utils.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ struct java_primitive_type_infot
4040
/// Full identifier of the boxed type's factory method that takes the
4141
/// corresponding primitive as its sole argument
4242
const irep_idt boxed_type_factory_method;
43+
/// Full identifier of the most general boxed-type method that yields this
44+
/// type. For most primitives that means the corresponding method on
45+
/// java.lang.Number; for Character and Boolean it means the method on their
46+
/// specific boxed type.
47+
const irep_idt unboxing_function_name;
4348
};
4449

4550
/// If \p primitive_type is a Java primitive type, return information about it,

jbmc/src/java_bytecode/lambda_synthesis.cpp

Lines changed: 100 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -610,12 +610,78 @@ static symbol_exprt instantiate_new_object(
610610
return new_instance_var;
611611
}
612612

613+
/// If \p maybe_boxed_type is a boxed primitive return its unboxing method;
614+
/// otherwise return empty.
615+
static optionalt<irep_idt> get_unboxing_method(const typet &maybe_boxed_type)
616+
{
617+
const irep_idt &boxed_type_id =
618+
to_struct_tag_type(maybe_boxed_type.subtype()).get_identifier();
619+
const java_boxed_type_infot *boxed_type_info =
620+
get_boxed_type_info_by_name(boxed_type_id);
621+
return boxed_type_info ? boxed_type_info->unboxing_function_name
622+
: optionalt<irep_idt>{};
623+
}
624+
625+
/// Produce a class_method_descriptor_exprt or symbol_exprt for
626+
/// \p function_symbol depending on whether virtual dispatch could be required
627+
/// (i.e., if it is non-static and its 'this' parameter is a non-final type)
628+
exprt make_function_expr(
629+
const symbolt &function_symbol,
630+
const symbol_tablet &symbol_table)
631+
{
632+
const auto &method_type = to_java_method_type(function_symbol.type);
633+
if(!method_type.has_this())
634+
return function_symbol.symbol_expr();
635+
const irep_idt &declared_on_class_id =
636+
to_struct_tag_type(method_type.get_this()->type().subtype())
637+
.get_identifier();
638+
const auto &this_symbol = symbol_table.lookup_ref(declared_on_class_id);
639+
if(to_java_class_type(this_symbol.type).get_final())
640+
return function_symbol.symbol_expr();
641+
642+
// Neither final nor static; make a class_method_descriptor_exprt that will
643+
// trigger remove_virtual_functions to produce a virtual dispatch table:
644+
645+
const std::string &function_name = id2string(function_symbol.name);
646+
const auto method_name_start_idx = function_name.rfind('.');
647+
const irep_idt mangled_method_name =
648+
function_name.substr(method_name_start_idx + 1);
649+
650+
return class_method_descriptor_exprt{function_symbol.type,
651+
mangled_method_name,
652+
declared_on_class_id,
653+
function_symbol.base_name};
654+
}
655+
613656
/// If \p expr needs (un)boxing to satisfy \p required_type, add the required
614657
/// symbols to \p symbol_table and code to \p code_block, then return an
615658
/// expression giving the adjusted expression. Otherwise return \p expr
616659
/// unchanged. \p role is a suggested name prefix for any temporary variable
617660
/// needed; \p function_id is the id of the function any created code it
618661
/// added to.
662+
///
663+
/// Regarding the apparent behaviour of the Java compiler / runtime with regard
664+
/// to adapting generic methods to/from primtitive types:
665+
///
666+
/// When unboxing, it appears to permit widening numeric conversions at the
667+
/// same time. For example, implementing Consumer<Short> by a function of
668+
/// type long -> void is possible, as the generated function will look like
669+
/// impl(Object o) { realfunc(((Number)o).longValue()); }
670+
///
671+
/// On the other hand when boxing to satisfy a generic interface type this is
672+
/// not permitted: in theory we should be able to implement Producer<Long> by a
673+
/// function of type () -> int, generating boxing code like
674+
/// impl() { return Long.valueOf(realfunc()); }
675+
///
676+
/// However it appears there is no way to convey to the lambda metafactory
677+
/// that a Long is really required rather than an Integer (the obvious
678+
/// conversion from int), so the compiler forbids this and requires that only
679+
/// simple boxing is performed.
680+
///
681+
/// Therefore when unboxing we cast to Number first, while when boxing and the
682+
/// target type is not a specific boxed type (i.e. the target is Object or
683+
/// Number etc) then we use the primitive type as our cue regarding the boxed
684+
/// type to produce.
619685
exprt box_or_unbox_type_if_necessary(
620686
exprt expr,
621687
const typet &required_type,
@@ -635,44 +701,40 @@ exprt box_or_unbox_type_if_necessary(
635701

636702
// One is a pointer, the other a primitive -- box or unbox as necessary, and
637703
// check the types are consistent:
638-
if(original_is_pointer)
639-
{
640-
const irep_idt &boxed_type_id =
641-
to_struct_tag_type(original_type.subtype()).get_identifier();
642-
const auto *boxed_type_info = get_boxed_type_info_by_name(boxed_type_id);
643-
INVARIANT(
644-
boxed_type_info != nullptr,
645-
"Only boxed primitive types should participate in a pointer vs."
646-
" primitive type disagreement");
647-
648-
symbol_exprt fresh_local = create_and_declare_local(
649-
function_id, role + "_unboxed", required_type, symbol_table, code_block);
650-
const symbolt &unboxing_function_symbol =
651-
symbol_table.lookup_ref(boxed_type_info->unboxing_function_name);
652-
code_block.add(code_function_callt{
653-
fresh_local, unboxing_function_symbol.symbol_expr(), {expr}});
654-
655-
return std::move(fresh_local);
656-
}
657-
else
658-
{
659-
const auto *primitive_type_info =
660-
get_java_primitive_type_info(original_type);
661-
INVARIANT(
662-
primitive_type_info != nullptr,
663-
"A Java non-pointer type involved in a type disagreement should"
664-
" be a primitive");
665-
666-
symbol_exprt fresh_local = create_and_declare_local(
667-
function_id, role + "_boxed", required_type, symbol_table, code_block);
668-
const symbolt &boxed_type_factory_method =
669-
symbol_table.lookup_ref(primitive_type_info->boxed_type_factory_method);
670-
671-
code_block.add(code_function_callt{
672-
fresh_local, boxed_type_factory_method.symbol_expr(), {expr}});
673-
674-
return std::move(fresh_local);
675-
}
704+
705+
const auto *primitive_type_info = get_java_primitive_type_info(
706+
original_is_pointer ? required_type : original_type);
707+
INVARIANT(
708+
primitive_type_info != nullptr,
709+
"A Java non-pointer type involved in a type disagreement should"
710+
" be a primitive");
711+
712+
const irep_idt fresh_local_name =
713+
role + (original_is_pointer ? "_unboxed" : "_boxed");
714+
715+
const symbol_exprt fresh_local = create_and_declare_local(
716+
function_id, fresh_local_name, required_type, symbol_table, code_block);
717+
718+
const irep_idt transform_function_id =
719+
original_is_pointer
720+
? get_unboxing_method(original_type) // Use static type if known
721+
.value_or(primitive_type_info->unboxing_function_name)
722+
: primitive_type_info->boxed_type_factory_method;
723+
724+
const symbolt &transform_function_symbol =
725+
symbol_table.lookup_ref(transform_function_id);
726+
727+
const typet &transform_function_param_type =
728+
to_code_type(transform_function_symbol.type).parameters()[0].type();
729+
const exprt cast_expr =
730+
typecast_exprt::conditional_cast(expr, transform_function_param_type);
731+
732+
code_block.add(code_function_callt{
733+
fresh_local,
734+
make_function_expr(transform_function_symbol, symbol_table),
735+
{expr}});
736+
737+
return std::move(fresh_local);
676738
}
677739

678740
/// Box or unbox expr as per \ref box_or_unbox_type_if_necessary, then cast the

0 commit comments

Comments
 (0)