Skip to content

Commit 627c2c7

Browse files
committed
Byte extract lowering: support extracting non-byte-aligned arrays
Alignment to bytes may be true for ANSI C, but isn't the case for our C front-end (which supports arrays of single bits), and not true for the overall framework in general. This required fixing alignment assumptions both in unpack_array_vector (which turns an arbitrary type into an array of bytes) as well as lower_byte_extract_array_vector (which picks an array/vector of arbitrary subtype from an array of bytes as prepared by unpack_array_vector).
1 parent b22ded7 commit 627c2c7

File tree

2 files changed

+156
-50
lines changed

2 files changed

+156
-50
lines changed

src/solvers/lowering/byte_operators.cpp

Lines changed: 132 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -535,69 +535,117 @@ static exprt unpack_array_vector(
535535
// refine the number of elements to extract in case the element width is known
536536
// and a multiple of bytes; otherwise we will expand the entire array/vector
537537
optionalt<mp_integer> num_elements = src_size;
538-
if(element_bits > 0 && element_bits % 8 == 0)
538+
if(element_bits > 0)
539539
{
540540
if(!num_elements.has_value())
541541
{
542542
// turn bytes into elements, rounding up
543-
num_elements = (*max_bytes + el_bytes - 1) / el_bytes;
543+
num_elements = (*max_bytes * 8 + element_bits - 1) / element_bits;
544544
}
545545

546546
if(offset_bytes.has_value())
547547
{
548548
// compute offset as number of elements
549-
first_element = *offset_bytes / el_bytes;
550-
// insert offset_bytes-many nil bytes into the output array
549+
first_element = *offset_bytes * 8 / element_bits;
550+
// insert offset_bytes-many zero bytes into the output array
551551
byte_operands.resize(
552-
numeric_cast_v<std::size_t>(*offset_bytes - (*offset_bytes % el_bytes)),
552+
numeric_cast_v<std::size_t>(
553+
(*offset_bytes * 8 - ((*offset_bytes * 8) % element_bits)) / 8),
553554
from_integer(0, bv_typet{8}));
554555
}
555556
}
556557

557558
// the maximum number of bytes is an upper bound in case the size of the
558-
// array/vector is unknown; if element_bits was usable above this will
559+
// array/vector is unknown; if element_bits was non-zero above this will
559560
// have been turned into a number of elements already
560561
if(!num_elements)
561562
num_elements = *max_bytes;
562563

563564
const exprt src_simp = simplify_expr(src, ns);
564565

565-
for(mp_integer i = first_element; i < *num_elements; ++i)
566+
if(element_bits % 8 == 0)
566567
{
567-
exprt element;
568-
569-
if(
570-
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
571-
i < src_simp.operands().size())
568+
for(mp_integer i = first_element; i < *num_elements; ++i)
572569
{
573-
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
574-
element = src_simp.operands()[index_int];
570+
exprt element;
571+
572+
if(
573+
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
574+
i < src_simp.operands().size())
575+
{
576+
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
577+
element = src_simp.operands()[index_int];
578+
}
579+
else
580+
{
581+
element = index_exprt(src_simp, from_integer(i, index_type()));
582+
}
583+
584+
// recursively unpack each element so that we eventually just have an
585+
// array of bytes left
586+
587+
const optionalt<mp_integer> element_max_bytes =
588+
max_bytes
589+
? std::min(mp_integer{el_bytes}, *max_bytes - byte_operands.size())
590+
: optionalt<mp_integer>{};
591+
const std::size_t element_max_bytes_int =
592+
element_max_bytes ? numeric_cast_v<std::size_t>(*element_max_bytes)
593+
: el_bytes;
594+
595+
exprt sub =
596+
unpack_rec(element, little_endian, {}, element_max_bytes, ns, true);
597+
exprt::operandst sub_operands =
598+
instantiate_byte_array(sub, 0, element_max_bytes_int, ns);
599+
byte_operands.insert(
600+
byte_operands.end(), sub_operands.begin(), sub_operands.end());
601+
602+
if(max_bytes && byte_operands.size() >= *max_bytes)
603+
break;
575604
}
576-
else
605+
}
606+
else
607+
{
608+
const std::size_t element_bits_int =
609+
numeric_cast_v<std::size_t>(element_bits);
610+
611+
exprt::operandst elements;
612+
for(mp_integer i = *num_elements - 1; i >= first_element; --i)
577613
{
578-
element = index_exprt(src_simp, from_integer(i, c_index_type()));
614+
if(
615+
(src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
616+
i < src_simp.operands().size())
617+
{
618+
const std::size_t index_int = numeric_cast_v<std::size_t>(i);
619+
elements.push_back(typecast_exprt::conditional_cast(
620+
src_simp.operands()[index_int], bv_typet{element_bits_int}));
621+
}
622+
else
623+
{
624+
elements.push_back(typecast_exprt::conditional_cast(
625+
index_exprt(src_simp, from_integer(i, c_index_type())),
626+
bv_typet{element_bits_int}));
627+
}
579628
}
580629

581-
// recursively unpack each element so that we eventually just have an array
582-
// of bytes left
630+
const std::size_t merged_bit_width = numeric_cast_v<std::size_t>(
631+
(*num_elements - first_element) * element_bits);
632+
if(!little_endian)
633+
std::reverse(elements.begin(), elements.end());
634+
concatenation_exprt merged{std::move(elements), bv_typet{merged_bit_width}};
583635

584-
const optionalt<mp_integer> element_max_bytes =
585-
max_bytes
586-
? std::min(mp_integer{el_bytes}, *max_bytes - byte_operands.size())
587-
: optionalt<mp_integer>{};
588-
const std::size_t element_max_bytes_int =
589-
element_max_bytes ? numeric_cast_v<std::size_t>(*element_max_bytes)
590-
: el_bytes;
636+
exprt merged_as_bytes =
637+
unpack_rec(merged, little_endian, {}, max_bytes, ns, true);
638+
639+
const std::size_t merged_byte_width = (merged_bit_width + 7) / 8;
640+
const std::size_t max_bytes_int =
641+
max_bytes.has_value()
642+
? std::min(numeric_cast_v<std::size_t>(*max_bytes), merged_byte_width)
643+
: merged_byte_width;
591644

592-
exprt sub =
593-
unpack_rec(element, little_endian, {}, element_max_bytes, ns, true);
594645
exprt::operandst sub_operands =
595-
instantiate_byte_array(sub, 0, element_max_bytes_int, ns);
646+
instantiate_byte_array(merged_as_bytes, 0, max_bytes_int, ns);
596647
byte_operands.insert(
597648
byte_operands.end(), sub_operands.begin(), sub_operands.end());
598-
599-
if(max_bytes && byte_operands.size() >= *max_bytes)
600-
break;
601649
}
602650

603651
const std::size_t size = byte_operands.size();
@@ -1000,15 +1048,51 @@ static exprt lower_byte_extract_array_vector(
10001048
operands.reserve(*num_elements);
10011049
for(std::size_t i = 0; i < *num_elements; ++i)
10021050
{
1003-
plus_exprt new_offset(
1004-
unpacked.offset(),
1005-
from_integer(i * element_bits / 8, unpacked.offset().type()));
1051+
if(element_bits % 8 == 0)
1052+
{
1053+
plus_exprt new_offset(
1054+
unpacked.offset(),
1055+
from_integer(i * element_bits / 8, unpacked.offset().type()));
10061056

1007-
byte_extract_exprt tmp(unpacked);
1008-
tmp.type() = subtype;
1009-
tmp.offset() = new_offset;
1057+
byte_extract_exprt tmp(unpacked);
1058+
tmp.type() = subtype;
1059+
tmp.offset() = new_offset;
10101060

1011-
operands.push_back(lower_byte_extract(tmp, ns));
1061+
operands.push_back(lower_byte_extract(tmp, ns));
1062+
}
1063+
else
1064+
{
1065+
const mp_integer first_index = i * element_bits / 8;
1066+
const mp_integer last_index = ((i + 1) * element_bits + 7) / 8;
1067+
1068+
exprt::operandst to_concat;
1069+
to_concat.reserve(
1070+
numeric_cast_v<std::size_t>(last_index - first_index));
1071+
for(mp_integer j = first_index; j < last_index; ++j)
1072+
{
1073+
to_concat.push_back(index_exprt{
1074+
unpacked.op(),
1075+
plus_exprt{
1076+
unpacked.offset(), from_integer(j, unpacked.offset().type())}});
1077+
}
1078+
1079+
const std::size_t base =
1080+
numeric_cast_v<std::size_t>((i * element_bits) % 8);
1081+
const std::size_t last =
1082+
numeric_cast_v<std::size_t>(base + element_bits - 1);
1083+
endianness_mapt endianness_map(
1084+
bv_typet{8 * to_concat.size()},
1085+
src.id() == ID_byte_extract_little_endian,
1086+
ns);
1087+
const auto bounds = map_bounds(endianness_map, base, last);
1088+
extractbits_exprt element{
1089+
concatenation_exprt{to_concat, bv_typet{8 * to_concat.size()}},
1090+
from_integer(bounds.ub, size_type()),
1091+
from_integer(bounds.lb, size_type()),
1092+
subtype};
1093+
1094+
operands.push_back(element);
1095+
}
10121096
}
10131097

10141098
exprt result;
@@ -1032,6 +1116,10 @@ static exprt lower_byte_extract_array_vector(
10321116
std::to_string(array_comprehension_index_counter),
10331117
c_index_type()};
10341118

1119+
PRECONDITION_WITH_DIAGNOSTICS(
1120+
element_bits % 8 == 0,
1121+
"byte extracting non-byte-aligned arrays requires constant size");
1122+
10351123
plus_exprt new_offset{
10361124
unpacked.offset(),
10371125
typecast_exprt::conditional_cast(
@@ -1169,15 +1257,13 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
11691257
{
11701258
const typet &subtype = to_type_with_subtype(src.type()).subtype();
11711259

1172-
// consider ways of dealing with arrays of unknown subtype size or with a
1173-
// subtype size that does not fit byte boundaries; currently we fall back to
1174-
// stitching together consecutive elements down below
11751260
auto element_bits = pointer_offset_bits(subtype, ns);
1176-
if(element_bits.has_value() && *element_bits >= 1 && *element_bits % 8 == 0)
1177-
{
1178-
return lower_byte_extract_array_vector(
1179-
src, unpacked, subtype, *element_bits, ns);
1180-
}
1261+
PRECONDITION_WITH_DIAGNOSTICS(
1262+
element_bits.has_value() && *element_bits >= 1,
1263+
"byte extracting arrays of unknown/zero subtype is not yet implemented");
1264+
1265+
return lower_byte_extract_array_vector(
1266+
src, unpacked, subtype, *element_bits, ns);
11811267
}
11821268
else if(src.type().id() == ID_complex)
11831269
{

unit/solvers/lowering/byte_operators.cpp

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,15 @@ TEST_CASE("byte extract and bits", "[core][solvers][lowering][byte_extract]")
6262
bit_array_type};
6363
const exprt lower_be1 = lower_byte_extract(be1, ns);
6464
REQUIRE(lower_be1 == *array_of_bits);
65+
66+
const byte_extract_exprt be2{
67+
little_endian ? ID_byte_extract_little_endian
68+
: ID_byte_extract_big_endian,
69+
*array_of_bits,
70+
from_integer(0, index_type()),
71+
u16};
72+
const exprt lower_be2 = lower_byte_extract(be2, ns);
73+
REQUIRE(lower_be2 == sixteen_bits);
6574
}
6675

6776
GIVEN("Big endian")
@@ -87,6 +96,15 @@ TEST_CASE("byte extract and bits", "[core][solvers][lowering][byte_extract]")
8796
bit_array_type};
8897
const exprt lower_be1 = lower_byte_extract(be1, ns);
8998
REQUIRE(lower_be1 == *array_of_bits);
99+
100+
const byte_extract_exprt be2{
101+
little_endian ? ID_byte_extract_little_endian
102+
: ID_byte_extract_big_endian,
103+
*array_of_bits,
104+
from_integer(0, index_type()),
105+
u16};
106+
const exprt lower_be2 = lower_byte_extract(be2, ns);
107+
REQUIRE(lower_be2 == sixteen_bits);
90108
}
91109
}
92110

@@ -255,13 +273,15 @@ SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]")
255273
std::vector<typet> types = {
256274
struct_typet({{"comp1", u16}, {"comp2", u16}}),
257275
struct_typet({{"comp1", u32}, {"comp2", u64}}),
258-
struct_typet({{"comp1", u32},
259-
{"compX", c_bit_field_typet(u8, 4)},
260-
{"pad", c_bit_field_typet(u8, 4)},
261-
{"comp2", u8}}),
276+
struct_typet(
277+
{{"comp1", u32},
278+
{"compX", c_bit_field_typet(u8, 4)},
279+
{"pad", c_bit_field_typet(u8, 4)},
280+
{"comp2", u8}}),
262281
union_typet({{"compA", u32}, {"compB", u64}}),
263282
c_enum_typet(u16),
264283
c_enum_typet(unsignedbv_typet(128)),
284+
array_typet{bv_typet{1}, from_integer(128, size_type())},
265285
array_typet(u8, size),
266286
array_typet(s32, size),
267287
array_typet(u64, size),

0 commit comments

Comments
 (0)