From 41811bab142f54ebd417a5f56a76dfb4e7307236 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 30 May 2025 12:52:17 +0000 Subject: [PATCH] Value set: lift offset from numeric constants to expressions We can safely track arbitrary expressions as pointer offsets rather than limit ourselves to just constant offsets (and then treating all other expressions as "unknown"). --- src/goto-symex/goto_symex_state.cpp | 28 +------ src/goto-symex/shadow_memory_util.cpp | 2 +- src/pointer-analysis/value_set.cpp | 112 ++++++++++++++++---------- src/pointer-analysis/value_set.h | 25 ++---- 4 files changed, 75 insertions(+), 92 deletions(-) diff --git a/src/goto-symex/goto_symex_state.cpp b/src/goto-symex/goto_symex_state.cpp index 9ac435c89a4..435b7ce4cea 100644 --- a/src/goto-symex/goto_symex_state.cpp +++ b/src/goto-symex/goto_symex_state.cpp @@ -26,8 +26,6 @@ Author: Daniel Kroening, kroening@kroening.com #include "goto_symex_can_forward_propagate.h" #include "symex_target_equation.h" -static void get_l1_name(exprt &expr); - goto_symex_statet::goto_symex_statet( const symex_targett::sourcet &_source, std::size_t max_field_sensitive_array_size, @@ -123,20 +121,7 @@ renamedt goto_symex_statet::assignment( else propagation.erase_if_exists(l1_identifier); - { - // update value sets - exprt l1_rhs(rhs); - get_l1_name(l1_rhs); - - const ssa_exprt l1_lhs = remove_level_2(lhs); - if(run_validation_checks) - { - DATA_INVARIANT(!check_renaming_l1(l1_lhs), "lhs renaming failed on l1"); - DATA_INVARIANT(!check_renaming_l1(l1_rhs), "rhs renaming failed on l1"); - } - - value_set.assign(l1_lhs, l1_rhs, ns, rhs_is_simplified, is_shared); - } + value_set.assign(lhs, rhs, ns, rhs_is_simplified, is_shared); #ifdef DEBUG std::cout << "Assigning " << l1_identifier << '\n'; @@ -783,17 +768,6 @@ void goto_symex_statet::rename( l1_type_entry.first->second=type; } -static void get_l1_name(exprt &expr) -{ - // do not reset the type ! - - if(is_ssa_expr(expr)) - to_ssa_expr(expr).remove_level_2(); - else - Forall_operands(it, expr) - get_l1_name(*it); -} - /// Dumps the current state of symex, printing the function name and location /// number for each stack frame in the currently active thread. /// This is for use from the debugger or in debug code; please don't delete it diff --git a/src/goto-symex/shadow_memory_util.cpp b/src/goto-symex/shadow_memory_util.cpp index ebc6dde0867..045c1edd4a6 100644 --- a/src/goto-symex/shadow_memory_util.cpp +++ b/src/goto-symex/shadow_memory_util.cpp @@ -981,7 +981,7 @@ normalize(const object_descriptor_exprt &expr, const namespacet &ns) { return expr; } - if(expr.offset().id() == ID_unknown) + if(!expr.offset().is_constant()) { return expr; } diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index 2c41b0484b6..96b8b089b4b 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -23,14 +23,13 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include #include #ifdef DEBUG -#include -#include -#include +# include #endif #include "add_failed_symbols.h" @@ -184,7 +183,7 @@ void value_sett::output(std::ostream &out, const std::string &indent) const stream << "<" << format(o) << ", "; if(o_it->second) - stream << *o_it->second; + stream << format(*o_it->second); else stream << '*'; @@ -261,7 +260,7 @@ exprt value_sett::to_expr(const object_map_dt::value_type &it) const od.object()=object; if(it.second) - od.offset() = from_integer(*it.second, c_index_type()); + od.offset() = *it.second; od.type()=od.object().type(); @@ -352,7 +351,7 @@ bool value_sett::eval_pointer_offset( it=object_map.begin(); it!=object_map.end(); it++) - if(!it->second) + if(!it->second || !it->second->is_constant()) return false; else { @@ -362,7 +361,8 @@ bool value_sett::eval_pointer_offset( if(!ptr_offset.has_value()) return false; - *ptr_offset += *it->second; + *ptr_offset += + numeric_cast_v(to_constant_expr(*it->second)); if(mod && *ptr_offset != previous_offset) return false; @@ -556,8 +556,11 @@ void value_sett::get_value_set_rec( } else if(expr.id()==ID_symbol) { - auto entry_index = get_index_of_symbol( - to_symbol_expr(expr).get_identifier(), expr.type(), suffix, ns); + const symbol_exprt expr_l1 = is_ssa_expr(expr) + ? remove_level_2(to_ssa_expr(expr)) + : to_symbol_expr(expr); + auto entry_index = + get_index_of_symbol(expr_l1.get_identifier(), expr.type(), suffix, ns); if(entry_index.has_value()) make_union(dest, find_entry(*entry_index)->object_map); @@ -623,7 +626,7 @@ void value_sett::get_value_set_rec( insert( dest, exprt(ID_null_object, to_pointer_type(expr.type()).base_type()), - mp_integer{0}); + from_integer(0, c_index_type())); } else if( expr.type().id() == ID_unsignedbv || expr.type().id() == ID_signedbv) @@ -655,7 +658,10 @@ void value_sett::get_value_set_rec( if(op.is_zero()) { - insert(dest, exprt(ID_null_object, empty_typet{}), mp_integer{0}); + insert( + dest, + exprt(ID_null_object, empty_typet{}), + from_integer(0, c_index_type())); } else { @@ -696,7 +702,7 @@ void value_sett::get_value_set_rec( throw expr.id_string()+" expected to have at least two operands"; object_mapt pointer_expr_set; - std::optional i; + std::optional additional_offset; // special case for plus/minus and exactly one pointer std::optional ptr_operand; @@ -704,7 +710,6 @@ void value_sett::get_value_set_rec( expr.type().id() == ID_pointer && (expr.id() == ID_plus || expr.id() == ID_minus)) { - bool non_const_offset = false; for(const auto &op : expr.operands()) { if(op.type().id() == ID_pointer) @@ -717,24 +722,20 @@ void value_sett::get_value_set_rec( ptr_operand = op; } - else if(!non_const_offset) + else { - auto offset = numeric_cast(op); - if(!offset.has_value()) - { - i.reset(); - non_const_offset = true; - } + if(!additional_offset.has_value()) + additional_offset = op; else { - if(!i.has_value()) - i = mp_integer{0}; - i = *i + *offset; + additional_offset = plus_exprt{ + *additional_offset, + typecast_exprt::conditional_cast(op, additional_offset->type())}; } } } - if(ptr_operand.has_value() && i.has_value()) + if(ptr_operand.has_value() && additional_offset.has_value()) { typet pointer_base_type = to_pointer_type(ptr_operand->type()).base_type(); @@ -745,18 +746,22 @@ void value_sett::get_value_set_rec( if(!size.has_value() || (*size) == 0) { - i.reset(); + additional_offset.reset(); } else { - *i *= *size; + additional_offset = mult_exprt{ + *additional_offset, from_integer(*size, additional_offset->type())}; if(expr.id()==ID_minus) { DATA_INVARIANT( to_minus_expr(expr).lhs() == *ptr_operand, "unexpected subtraction of pointer from integer"); - i->negate(); + DATA_INVARIANT( + additional_offset->type().id() != ID_unsignedbv, + "offset type must support negation"); + additional_offset = unary_minus_exprt{*additional_offset}; } } } @@ -790,8 +795,15 @@ void value_sett::get_value_set_rec( offsett offset = it->second; // adjust by offset - if(offset && i.has_value()) - *offset += *i; + if(offset && additional_offset.has_value()) + { + offset = simplify_expr( + plus_exprt{ + *offset, + typecast_exprt::conditional_cast( + *additional_offset, offset->type())}, + ns); + } else offset.reset(); @@ -871,7 +883,7 @@ void value_sett::get_value_set_rec( dynamic_object.set_instance(location_number); dynamic_object.valid()=true_exprt(); - insert(dest, dynamic_object, mp_integer{0}); + insert(dest, dynamic_object, from_integer(0, c_index_type())); } else if(statement==ID_cpp_new || statement==ID_cpp_new_array) @@ -884,7 +896,7 @@ void value_sett::get_value_set_rec( dynamic_object.set_instance(location_number); dynamic_object.valid()=true_exprt(); - insert(dest, dynamic_object, mp_integer{0}); + insert(dest, dynamic_object, from_integer(0, c_index_type())); } else insert(dest, exprt(ID_unknown, original_type)); @@ -1331,12 +1343,17 @@ void value_sett::get_reference_set_rec( expr.id()==ID_string_constant || expr.id()==ID_array) { + exprt l1_expr = + is_ssa_expr(expr) ? remove_level_2(to_ssa_expr(expr)) : expr; + if( expr.type().id() == ID_array && to_array_type(expr.type()).element_type().id() == ID_array) - insert(dest, expr); + { + insert(dest, l1_expr); + } else - insert(dest, expr, mp_integer{0}); + insert(dest, l1_expr, from_integer(0, c_index_type())); return; } @@ -1365,7 +1382,7 @@ void value_sett::get_reference_set_rec( const index_exprt &index_expr=to_index_expr(expr); const exprt &array=index_expr.array(); - const exprt &offset=index_expr.index(); + const exprt &index = index_expr.index(); DATA_INVARIANT( array.type().id() == ID_array, "index takes array-typed operand"); @@ -1393,22 +1410,24 @@ void value_sett::get_reference_set_rec( from_integer(0, c_index_type())); offsett o = a_it->second; - const auto i = numeric_cast(offset); - if(offset.is_zero()) - { - } - else if(i.has_value() && o) + if(!index.is_zero() && o.has_value()) { auto size = pointer_offset_size(array_type.element_type(), ns); if(!size.has_value() || *size == 0) o.reset(); else - *o = *i * (*size); + { + o = simplify_expr( + plus_exprt{ + *o, + typecast_exprt::conditional_cast( + mult_exprt{index, from_integer(*size, index.type())}, + o->type())}, + ns); + } } - else - o.reset(); insert(dest, deref_index_expr, o); } @@ -1658,7 +1677,9 @@ void value_sett::assign_rec( if(lhs.id()==ID_symbol) { - const irep_idt &identifier=to_symbol_expr(lhs).get_identifier(); + const symbol_exprt lhs_l1 = + is_ssa_expr(lhs) ? remove_level_2(to_ssa_expr(lhs)) : to_symbol_expr(lhs); + const irep_idt &identifier = lhs_l1.get_identifier(); update_entry( entryt{identifier, suffix}, lhs.type(), values_rhs, add_to_sets); @@ -1864,8 +1885,11 @@ void value_sett::apply_code_rec( (lhs_type.id() == ID_array && to_array_type(lhs_type).element_type().id() == ID_pointer)) { + const symbol_exprt lhs_l1 = is_ssa_expr(lhs) + ? remove_level_2(to_ssa_expr(lhs)) + : to_symbol_expr(lhs); // assign the address of the failed object - if(auto failed = get_failed_symbol(to_symbol_expr(lhs), ns)) + if(auto failed = get_failed_symbol(to_symbol_expr(lhs_l1), ns)) { address_of_exprt address_of_expr(*failed, to_pointer_type(lhs.type())); assign(lhs, address_of_expr, ns, false, false); diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index 5c71946acd8..73119a8ffb6 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -12,15 +12,14 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_POINTER_ANALYSIS_VALUE_SET_H #define CPROVER_POINTER_ANALYSIS_VALUE_SET_H -#include - -#include #include #include #include "object_numbering.h" #include "value_sets.h" +#include + class namespacet; class xmlt; @@ -76,9 +75,9 @@ class value_sett /// in value sets. static object_numberingt object_numbering; - /// Represents the offset into an object: either a unique integer offset, - /// or an unknown value, represented by `!offset`. - typedef std::optional offsett; + /// Represents the offset into an object: either some (possibly constant) + /// expression, or an unknown value, represented by `!offset`. + typedef std::optional offsett; /// Represents a set of expressions (`exprt` instances) with corresponding /// offsets (`offsett` instances). This is the RHS set of a single row of @@ -140,20 +139,6 @@ class value_sett return insert(dest, object_numbering.number(src), offsett()); } - /// Adds an expression to an object map, with known offset. If the - /// destination map has an existing element for the same expression - /// with a differing offset its offset is marked unknown. - /// \param dest: object map to update - /// \param src: expression to add - /// \param offset_value: offset into `src` - bool insert( - object_mapt &dest, - const exprt &src, - const mp_integer &offset_value) const - { - return insert(dest, object_numbering.number(src), offsett(offset_value)); - } - /// Adds a numbered expression and offset to an object map. If the /// destination map has an existing element for the same expression /// with a differing offset its offset is marked unknown.