Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion kmir/src/kmir/decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,19 @@
BoolT,
EnumT,
Initialized,
IntegerLength,
IntT,
IntTy,
Multiple,
Pointer,
PrimitiveInt,
PtrT,
RefT,
Single,
StrT,
StructT,
UintT,
WrappingRange,
)
from .value import (
NO_SIZE,
Expand All @@ -43,7 +46,7 @@

from pyk.kast import KInner

from .ty import FieldsShape, IntegerLength, LayoutShape, MachineSize, Scalar, TagEncoding, Ty, TypeMetadata, UintTy
from .ty import FieldsShape, LayoutShape, MachineSize, Scalar, TagEncoding, Ty, TypeMetadata, UintTy
from .value import MetadataSize


Expand Down Expand Up @@ -419,5 +422,10 @@ def _extract_tag(*, data: bytes, tag_offset: MachineSize, tag: Scalar) -> tuple[
tag_data = data[tag_offset.in_bytes : tag_offset.in_bytes + length.value]
tag_value = int.from_bytes(tag_data, byteorder='little', signed=False)
return tag_value, length
# special case: niche-encoded optional pointer, None == 0x00000000
case Initialized(value=Pointer(), valid_range=WrappingRange(start=1, end=0)) if (
data == b'\x00\x00\x00\x00\x00\x00\x00\x00'
):
return 0, IntegerLength.I64
case _:
raise ValueError(f'Unsupported tag: {tag}')
111 changes: 110 additions & 1 deletion kmir/src/kmir/kdist/mir-semantics/rt/decoding.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,10 @@ Known element sizes for common types:

### Enum decoding

Enum decoding is for now restricted to enums wihout any fields.
Enum decoding is for now restricted to a few special cases.

#### Enums without any fields
If there are no fields, the enum can be decoded by using their data as the discriminant.

```k
rule #decodeValue(
Expand Down Expand Up @@ -289,6 +292,112 @@ Enum decoding is for now restricted to enums wihout any fields.
requires TAG =/=Int DISCRIMINANT
```

#### Enums with two variants

Having two variants with possibly zero or one field each is a very common case,
it includes a number of standard library types such as `Option` and `Result`.

The following rules are specialised to the case of encoding an `Option`.
An important distinction here is whether or not the tag is niche-encoded.
If the option holds data that has all-zero as a possible value, a separate tag is used, usually as the first field.
In both cases we expect the tag to be in the single shared field, and the discriminant to be just 0 and 1.

```k
rule #decodeValue(
BYTES
, typeInfoEnumType(...
name: _
, adtDef: _
, discriminants: discriminant(0) discriminant(1) .Discriminants
, fields: (.Tys : (FIELD_TYPE .Tys) : .Tyss)
, layout:
someLayoutShape(layoutShape(...
fields: fieldsShapeArbitrary(mk(... offsets: machineSize(0) .MachineSizes))
, variants:
variantsShapeMultiple(
mk(...
tag: scalarInitialized(
mk(...
value: primitiveInt(mk(... length: TAG_WIDTH, signed: _))
, validRange: _RANGE
)
)
, tagEncoding: tagEncodingDirect
, tagField: 0
, variants: _VARIANTS
)
)
, abi: _ABI
, abiAlign: _ABI_ALIGN
, size: _SIZE
))
) #as ENUM_TYPE
)
=> #decodeOptionTag01(BYTES, TAG_WIDTH, FIELD_TYPE, ENUM_TYPE)

syntax Evaluation ::= #decodeOptionTag01 ( Bytes , IntegerLength , Ty , TypeInfo ) [function, total]
// --------------------------------------------------------------------------------------
rule #decodeOptionTag01(BYTES, _LEN, _TY, _ENUM_TYPE)
=> Aggregate(variantIdx(0), .List)
requires 0 ==Int BYTES[0] // expect only 0 or 1 as tags, so higher bytes do not matter
[preserves-definedness]
rule #decodeOptionTag01(BYTES, LEN, TY, _ENUM_TYPE)
=> Aggregate(variantIdx(1), ListItem(#decodeValue(substrBytes(BYTES, #byteLength(LEN), lengthBytes(BYTES)), lookupTy(TY))))
requires 1 ==Int BYTES[0] // expect only 0 or 1 as tags, so higher bytes do not matter
[preserves-definedness]
rule #decodeOptionTag01(BYTES, _LEN, _TY, ENUM_TYPE)
=> UnableToDecode(BYTES, ENUM_TYPE)
[owise]

syntax Int ::= #byteLength ( IntegerLength ) [function, total]
// -----------------------------------------------------------
rule #byteLength(integerLengthI8 ) => 1
rule #byteLength(integerLengthI16 ) => 2
rule #byteLength(integerLengthI32 ) => 4
rule #byteLength(integerLengthI64 ) => 8
rule #byteLength(integerLengthI128) => 16

```

If the option holds a reference or pointer, no extra tag field is required.
The tag is niche-encoded with an all-zero value representing `None`.
However, in this case only a `None` can actually be decoded.
Any pointer or reference would have a very different encoding in KMIR, not a non-zero address.

```k
rule #decodeValue(
BYTES
, typeInfoEnumType(...
name: _
, adtDef: _
, discriminants: discriminant(0) discriminant(1) .Discriminants
, fields: (.Tys : (_FIELD_TYPE .Tys) : .Tyss)
, layout:
someLayoutShape(layoutShape(...
fields: fieldsShapeArbitrary(mk(... offsets: machineSize(0) .MachineSizes))
, variants:
variantsShapeMultiple(
mk(...
tag: scalarInitialized(
mk(...
value: primitivePointer(_ADDR_SPACE)
, validRange: wrappingRange(1, 0)
)
)
, tagEncoding: tagEncodingNiche() // FIXME incomplete data model in the AST
, tagField: 0
, variants: _VARIANTS
)
)
, abi: _ABI
, abiAlign: _ABI_ALIGN
, size: _SIZE
))
)
)
=> Aggregate(variantIdx(0), .List)
requires 0 ==Int Bytes2Int(BYTES, LE, Unsigned)
```

## Array Allocations

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Aggregate ( variantIdx ( 0 ) , .List )
181 changes: 181 additions & 0 deletions kmir/src/tests/integration/data/decode-value/enum-option-ref-none.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
{
"bytes": [
0,
0,
0,
0,
0,
0,
0,
0
],
"types": [
[
0,
{
"PrimitiveType": {
"Uint": "U8"
}
}
],
[
11,
{
"RefType": {
"pointee_type": 0,
"layout": {
"fields": "Primitive",
"variants": {
"Single": {
"index": 0
}
},
"abi": {
"Scalar": {
"Initialized": {
"value": {
"Pointer": 0
},
"valid_range": {
"start": 1,
"end": 18446744073709551615
}
}
}
},
"abi_align": 8,
"size": {
"num_bits": 64
}
}
}
}
]
],
"typeInfo": {
"EnumType": {
"name": "std::option::Option<&u8>",
"adt_def": 24,
"discriminants": [
0,
1
],
"fields": [
[],
[
4
]
],
"layout": {
"fields": {
"Arbitrary": {
"offsets": [
{
"num_bits": 0
}
]
}
},
"variants": {
"Multiple": {
"tag": {
"Initialized": {
"value": {
"Pointer": 0
},
"valid_range": {
"start": 1,
"end": 0
}
}
},
"tag_encoding": {
"Niche": {
"untagged_variant": 1,
"niche_variants": {
"start": 0,
"end": 0
},
"niche_start": 0
}
},
"tag_field": 0,
"variants": [
{
"fields": {
"Arbitrary": {
"offsets": []
}
},
"variants": {
"Single": {
"index": 0
}
},
"abi": {
"Aggregate": {
"sized": true
}
},
"abi_align": 1,
"size": {
"num_bits": 0
}
},
{
"fields": {
"Arbitrary": {
"offsets": [
{
"num_bits": 0
}
]
}
},
"variants": {
"Single": {
"index": 1
}
},
"abi": {
"Scalar": {
"Initialized": {
"value": {
"Pointer": 0
},
"valid_range": {
"start": 1,
"end": 18446744073709551615
}
}
}
},
"abi_align": 8,
"size": {
"num_bits": 64
}
}
]
}
},
"abi": {
"Scalar": {
"Initialized": {
"value": {
"Pointer": 0
},
"valid_range": {
"start": 1,
"end": 0
}
}
}
},
"abi_align": 8,
"size": {
"num_bits": 64
}
}
}
}
}
32 changes: 32 additions & 0 deletions kmir/src/tests/integration/data/exec-smir/allocs/option_consts.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
const OPT_NO_U32: Option<u32> = None;
const OPT_A_U64: Option<u64> = Some(42);
const OPT_NO_REF: Option<&[u32;3]> = None;

fn main() {
let a = 1;
let result = sub_(0,a);
assert_eq!(result, OPT_NO_U32);
assert_eq!(OPT_A_U64, Some(41 + a as u64));
assert!(OPT_NO_REF.is_none());
let arr0 = [0u32; 3];
assert_eq!(opt_ref(&arr0), None);
let arr = [a;3];
assert_eq!(opt_ref(&arr), Some(&arr));
}

// basically checked_sub
fn sub_(a:u32, b:u32) -> Option<u32> {
if a < b {
None // this becomes an Operand::Constant
} else {
Some(a - b)
}
}

fn opt_ref(arr: &[u32;3]) -> Option<&[u32;3]> {
if arr[0] == 0 {
None
} else {
Some(arr)
}
}
Loading