Skip to content

Commit c8b7465

Browse files
committed
Byte extract lowering: support extracting non-byte-aligned arrays
Alignment to bytes may be true for ANSI C, but isn't the case for our C front-end (which supports arrays of single bits), and not true for the overall framework in general. This required fixing alignment assumptions both in unpack_array_vector (which turns an arbitrary type into an array of bytes) as well as lower_byte_extract_array_vector (which picks an array/vector of arbitrary subtype from an array of bytes as prepared by unpack_array_vector).
1 parent 6f3c16c commit c8b7465

File tree

2 files changed

+165
-54
lines changed

2 files changed

+165
-54
lines changed

src/util/lower_byte_operators.cpp

Lines changed: 143 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -563,29 +563,32 @@ static exprt unpack_array_vector(
563563
// refine the number of elements to extract in case the element width is known
564564
// and a multiple of bytes; otherwise we will expand the entire array/vector
565565
optionalt<mp_integer> num_elements = src_size;
566-
if(element_bits > 0 && element_bits % bits_per_byte == 0)
566+
if(element_bits > 0)
567567
{
568568
if(!num_elements.has_value())
569569
{
570570
// turn bytes into elements, rounding up
571-
num_elements = (*max_bytes + el_bytes - 1) / el_bytes;
571+
num_elements =
572+
(*max_bytes * bits_per_byte + element_bits - 1) / element_bits;
572573
}
573574

574575
if(offset_bytes.has_value())
575576
{
576577
// compute offset as number of elements
577-
first_element = *offset_bytes / el_bytes;
578-
// insert offset_bytes-many nil bytes into the output array
578+
first_element = *offset_bytes * bits_per_byte / element_bits;
579+
// insert offset_bytes-many zero bytes into the output array
579580
byte_operands.resize(
580581
numeric_cast_v<std::size_t>(std::min(
581-
*offset_bytes - (*offset_bytes % el_bytes),
582-
*num_elements * el_bytes)),
582+
(*offset_bytes * bits_per_byte -
583+
((*offset_bytes * bits_per_byte) % element_bits)) /
584+
bits_per_byte,
585+
*num_elements * element_bits / bits_per_byte)),
583586
from_integer(0, bv_typet{bits_per_byte}));
584587
}
585588
}
586589

587590
// the maximum number of bytes is an upper bound in case the size of the
588-
// array/vector is unknown; if element_bits was usable above this will
591+
// array/vector is unknown; if element_bits was non-zero above this will
589592
// have been turned into a number of elements already
590593
if(!num_elements)
591594
num_elements = *max_bytes;
@@ -595,42 +598,90 @@ static exprt unpack_array_vector(
595598
? to_array_type(src_simp.type()).index_type()
596599
: to_vector_type(src_simp.type()).index_type();
597600

598-
for(mp_integer i = first_element; i < *num_elements; ++i)
601+
if(element_bits % bits_per_byte == 0)
599602
{
600-
exprt element;
601-
602-
if(
603-
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
604-
i < src_simp.operands().size())
603+
for(mp_integer i = first_element; i < *num_elements; ++i)
605604
{
606-
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
607-
element = src_simp.operands()[index_int];
605+
exprt element;
606+
607+
if(
608+
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
609+
i < src_simp.operands().size())
610+
{
611+
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
612+
element = src_simp.operands()[index_int];
613+
}
614+
else
615+
{
616+
element = index_exprt(src_simp, from_integer(i, index_type));
617+
}
618+
619+
// recursively unpack each element so that we eventually just have an
620+
// array of bytes left
621+
622+
const optionalt<mp_integer> element_max_bytes =
623+
max_bytes
624+
? std::min(mp_integer{el_bytes}, *max_bytes - byte_operands.size())
625+
: optionalt<mp_integer>{};
626+
const std::size_t element_max_bytes_int =
627+
element_max_bytes ? numeric_cast_v<std::size_t>(*element_max_bytes)
628+
: el_bytes;
629+
630+
exprt sub = unpack_rec(
631+
element, little_endian, {}, element_max_bytes, bits_per_byte, ns, true);
632+
exprt::operandst sub_operands = instantiate_byte_array(
633+
sub, 0, element_max_bytes_int, bits_per_byte, ns);
634+
byte_operands.insert(
635+
byte_operands.end(), sub_operands.begin(), sub_operands.end());
636+
637+
if(max_bytes && byte_operands.size() >= *max_bytes)
638+
break;
608639
}
609-
else
640+
}
641+
else
642+
{
643+
const std::size_t element_bits_int =
644+
numeric_cast_v<std::size_t>(element_bits);
645+
646+
exprt::operandst elements;
647+
for(mp_integer i = *num_elements - 1; i >= first_element; --i)
610648
{
611-
element = index_exprt(src_simp, from_integer(i, index_type));
649+
if(
650+
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
651+
i < src_simp.operands().size())
652+
{
653+
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
654+
elements.push_back(typecast_exprt::conditional_cast(
655+
src_simp.operands()[index_int], bv_typet{element_bits_int}));
656+
}
657+
else
658+
{
659+
elements.push_back(typecast_exprt::conditional_cast(
660+
index_exprt(src_simp, from_integer(i, index_type)),
661+
bv_typet{element_bits_int}));
662+
}
612663
}
613664

614-
// recursively unpack each element so that we eventually just have an array
615-
// of bytes left
616-
617-
const optionalt<mp_integer> element_max_bytes =
618-
max_bytes
619-
? std::min(mp_integer{el_bytes}, *max_bytes - byte_operands.size())
620-
: optionalt<mp_integer>{};
621-
const std::size_t element_max_bytes_int =
622-
element_max_bytes ? numeric_cast_v<std::size_t>(*element_max_bytes)
623-
: el_bytes;
624-
625-
exprt sub = unpack_rec(
626-
element, little_endian, {}, element_max_bytes, bits_per_byte, ns, true);
627-
exprt::operandst sub_operands =
628-
instantiate_byte_array(sub, 0, element_max_bytes_int, bits_per_byte, ns);
665+
const std::size_t merged_bit_width = numeric_cast_v<std::size_t>(
666+
(*num_elements - first_element) * element_bits);
667+
if(!little_endian)
668+
std::reverse(elements.begin(), elements.end());
669+
concatenation_exprt merged{std::move(elements), bv_typet{merged_bit_width}};
670+
671+
exprt merged_as_bytes =
672+
unpack_rec(merged, little_endian, {}, max_bytes, bits_per_byte, ns, true);
673+
674+
const std::size_t merged_byte_width =
675+
(merged_bit_width + bits_per_byte - 1) / bits_per_byte;
676+
const std::size_t max_bytes_int =
677+
max_bytes.has_value()
678+
? std::min(numeric_cast_v<std::size_t>(*max_bytes), merged_byte_width)
679+
: merged_byte_width;
680+
681+
exprt::operandst sub_operands = instantiate_byte_array(
682+
merged_as_bytes, 0, max_bytes_int, bits_per_byte, ns);
629683
byte_operands.insert(
630684
byte_operands.end(), sub_operands.begin(), sub_operands.end());
631-
632-
if(max_bytes && byte_operands.size() >= *max_bytes)
633-
break;
634685
}
635686

636687
const std::size_t size = byte_operands.size();
@@ -1120,17 +1171,55 @@ static exprt lower_byte_extract_array_vector(
11201171
for(std::size_t i = 0; i < *num_elements; ++i)
11211172
#pragma GCC diagnostic pop
11221173
{
1123-
plus_exprt new_offset(
1124-
unpacked.offset(),
1125-
from_integer(
1126-
i * element_bits / src.get_bits_per_byte(),
1127-
unpacked.offset().type()));
1174+
if(element_bits % src.get_bits_per_byte() == 0)
1175+
{
1176+
plus_exprt new_offset(
1177+
unpacked.offset(),
1178+
from_integer(
1179+
i * element_bits / src.get_bits_per_byte(),
1180+
unpacked.offset().type()));
11281181

1129-
byte_extract_exprt tmp(unpacked);
1130-
tmp.type() = subtype;
1131-
tmp.offset() = new_offset;
1182+
byte_extract_exprt tmp(unpacked);
1183+
tmp.type() = subtype;
1184+
tmp.offset() = new_offset;
11321185

1133-
operands.push_back(lower_byte_extract(tmp, ns));
1186+
operands.push_back(lower_byte_extract(tmp, ns));
1187+
}
1188+
else
1189+
{
1190+
const mp_integer first_index =
1191+
i * element_bits / src.get_bits_per_byte();
1192+
const mp_integer last_index =
1193+
((i + 1) * element_bits + src.get_bits_per_byte() - 1) /
1194+
src.get_bits_per_byte();
1195+
1196+
exprt::operandst to_concat;
1197+
to_concat.reserve(
1198+
numeric_cast_v<std::size_t>(last_index - first_index));
1199+
for(mp_integer j = first_index; j < last_index; ++j)
1200+
{
1201+
to_concat.push_back(index_exprt{
1202+
unpacked.op(),
1203+
plus_exprt{
1204+
unpacked.offset(), from_integer(j, unpacked.offset().type())}});
1205+
}
1206+
1207+
const std::size_t base = numeric_cast_v<std::size_t>(
1208+
(i * element_bits) % src.get_bits_per_byte());
1209+
const std::size_t last =
1210+
numeric_cast_v<std::size_t>(base + element_bits - 1);
1211+
bv_typet concat_type{src.get_bits_per_byte() * to_concat.size()};
1212+
endianness_mapt endianness_map(
1213+
concat_type, src.id() == ID_byte_extract_little_endian, ns);
1214+
const auto bounds = map_bounds(endianness_map, base, last);
1215+
extractbits_exprt element{
1216+
concatenation_exprt{to_concat, std::move(concat_type)},
1217+
from_integer(bounds.ub, size_type()),
1218+
from_integer(bounds.lb, size_type()),
1219+
subtype};
1220+
1221+
operands.push_back(element);
1222+
}
11341223
}
11351224

11361225
exprt result;
@@ -1154,6 +1243,10 @@ static exprt lower_byte_extract_array_vector(
11541243
std::to_string(array_comprehension_index_counter),
11551244
array_type.index_type()};
11561245

1246+
PRECONDITION_WITH_DIAGNOSTICS(
1247+
element_bits % src.get_bits_per_byte() == 0,
1248+
"byte extracting non-byte-aligned arrays requires constant size");
1249+
11571250
plus_exprt new_offset{
11581251
unpacked.offset(),
11591252
typecast_exprt::conditional_cast(
@@ -1300,17 +1393,13 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
13001393
{
13011394
const typet &subtype = to_type_with_subtype(src.type()).subtype();
13021395

1303-
// consider ways of dealing with arrays of unknown subtype size or with a
1304-
// subtype size that does not fit byte boundaries; currently we fall back to
1305-
// stitching together consecutive elements down below
13061396
auto element_bits = pointer_offset_bits(subtype, ns);
1307-
if(
1308-
element_bits.has_value() && *element_bits >= 1 &&
1309-
*element_bits % src.get_bits_per_byte() == 0)
1310-
{
1311-
return lower_byte_extract_array_vector(
1312-
src, unpacked, subtype, *element_bits, ns);
1313-
}
1397+
PRECONDITION_WITH_DIAGNOSTICS(
1398+
element_bits.has_value() && *element_bits >= 1,
1399+
"byte extracting arrays of unknown/zero subtype is not yet implemented");
1400+
1401+
return lower_byte_extract_array_vector(
1402+
src, unpacked, subtype, *element_bits, ns);
13141403
}
13151404
else if(src.type().id() == ID_complex)
13161405
{

unit/util/lower_byte_operators.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@ TEST_CASE("byte extract and bits", "[core][util][lowering][byte_extract]")
6262
bit_array_type};
6363
const exprt lower_be1 = lower_byte_extract(be1, ns);
6464
REQUIRE(lower_be1 == *array_of_bits);
65+
66+
const byte_extract_exprt be2{
67+
little_endian ? ID_byte_extract_little_endian
68+
: ID_byte_extract_big_endian,
69+
*array_of_bits,
70+
from_integer(0, index_type()),
71+
config.ansi_c.char_width,
72+
u16};
73+
const exprt lower_be2 = lower_byte_extract(be2, ns);
74+
REQUIRE(lower_be2 == sixteen_bits);
6575
}
6676

6777
GIVEN("Big endian")
@@ -89,6 +99,16 @@ TEST_CASE("byte extract and bits", "[core][util][lowering][byte_extract]")
8999
bit_array_type};
90100
const exprt lower_be1 = lower_byte_extract(be1, ns);
91101
REQUIRE(lower_be1 == *array_of_bits);
102+
103+
const byte_extract_exprt be2{
104+
little_endian ? ID_byte_extract_little_endian
105+
: ID_byte_extract_big_endian,
106+
*array_of_bits,
107+
from_integer(0, index_type()),
108+
config.ansi_c.char_width,
109+
u16};
110+
const exprt lower_be2 = lower_byte_extract(be2, ns);
111+
REQUIRE(lower_be2 == sixteen_bits);
92112
}
93113
}
94114

@@ -270,6 +290,7 @@ SCENARIO("byte_extract_lowering", "[core][util][lowering][byte_extract]")
270290
union_typet({{"compA", u32}, {"compB", u64}}),
271291
c_enum_typet(u16),
272292
c_enum_typet(unsignedbv_typet(128)),
293+
array_typet{bv_typet{1}, from_integer(128, size_type())},
273294
array_typet(u8, size),
274295
array_typet(s32, size),
275296
array_typet(u64, size),
@@ -426,6 +447,7 @@ SCENARIO("byte_update_lowering", "[core][util][lowering][byte_update]")
426447
union_typet({{"compA", u32}, {"compB", u64}}),
427448
c_enum_typet(u16),
428449
c_enum_typet(unsignedbv_typet(128)),
450+
array_typet{bv_typet{1}, from_integer(128, size_type())},
429451
array_typet(u8, size),
430452
array_typet(s32, size),
431453
array_typet(u64, size),

0 commit comments

Comments
 (0)