Skip to content

Improve the handling of if expressions representing character arrays #1157

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2 changes: 0 additions & 2 deletions src/solvers/refinement/string_constraint_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,6 @@ class string_constraint_generatort

static constant_exprt constant_char(int i, const typet &char_type);

// Used by string refinement
void add_axioms_for_if_array(const exprt &lhs, const if_exprt &expr);

private:
// The integer with the longest string is Integer.MIN_VALUE which is -2^31,
Expand Down
65 changes: 22 additions & 43 deletions src/solvers/refinement/string_constraint_generator_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ Author: Romain Brenguier, [email protected]
#include <util/arith_tools.h>
#include <util/pointer_predicates.h>
#include <util/ssa_expr.h>
#include <iostream>

unsigned string_constraint_generatort::next_symbol_id=1;

Expand Down Expand Up @@ -232,6 +231,25 @@ string_exprt string_constraint_generatort::add_axioms_for_refined_string(
else if(string.id()==ID_struct)
{
const string_exprt &s=to_string_expr(string);
INVARIANT(
s.length().id()==ID_symbol || s.length().id()==ID_constant,
"string length should be a symbol or a constant");
irep_idt content_id=s.content().id();
INVARIANT(
content_id==ID_symbol || content_id==ID_array || content_id==ID_if,
"string content should be a symbol, a constant array, or an if");
if(content_id==ID_if)
{
// If the string content is an if expression, we add axioms ensuring
// the content is the same as the content in the 'true' branch when the
// condition holds and the 'false' branch otherwise.
if_exprt if_expr=to_if_expr(s.content());
string_exprt str_true=add_axioms_for_refined_string(
string_exprt(s.length(), if_expr.true_case(), type));
string_exprt str_false=add_axioms_for_refined_string(
string_exprt(s.length(), if_expr.false_case(), type));
return add_axioms_for_if(if_exprt(if_expr.cond(), str_true, str_false));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new check for the universal invariants will restrict what can be in if_expr.cond(), i.e. no array accesses (with the current implementation of add_axioms_for_if at least).

}
add_default_axioms(s);
return s;
}
Expand Down Expand Up @@ -267,56 +285,17 @@ string_exprt string_constraint_generatort::add_axioms_for_if(
implies_exprt(expr.cond(), res.axiom_for_has_same_length_as(t)));
symbol_exprt qvar=fresh_univ_index("QA_string_if_true", index_type);
equal_exprt qequal(res[qvar], t[qvar]);
axioms.push_back(string_constraintt(qvar, t.length(), expr.cond(), qequal));
string_constraintt sc1(qvar, t.length(), implies_exprt(expr.cond(), qequal));
axioms.push_back(sc1);
axioms.push_back(
implies_exprt(not_exprt(expr.cond()), res.axiom_for_has_same_length_as(f)));
symbol_exprt qvar2=fresh_univ_index("QA_string_if_false", index_type);
equal_exprt qequal2(res[qvar2], f[qvar2]);
string_constraintt sc2(qvar2, f.length(), not_exprt(expr.cond()), qequal2);
string_constraintt sc2(qvar2, f.length(), or_exprt(expr.cond(), qequal2));
axioms.push_back(sc2);
return res;
}

/// Add axioms enforcing that the content of the first array is equal to
/// the true case array if the condition is true and
/// the else case array otherwise.
/// \param lhs: an expression
/// \param expr: an if expression of type array
void string_constraint_generatort::add_axioms_for_if_array(
const exprt &lhs, const if_exprt &expr)
{
PRECONDITION(lhs.type()==expr.type());
PRECONDITION(expr.type().id()==ID_array);
exprt t=expr.true_case();
exprt f=expr.false_case();
INVARIANT(t.type()==f.type(), "branches of if_exprt should have same type");
const array_typet &type=to_array_type(t.type());
const typet &index_type=type.size().type();
const exprt max_length=from_integer(max_string_length, index_type);

// We add axioms:
// a1 : forall qvar<max_length, cond => lhs[qvar] = t[qvar]
// a1 : forall qvar2<max_length, !cond => lhs[qvar] = f[qvar]
symbol_exprt qvar=fresh_univ_index("QA_array_if_true", index_type);
equal_exprt qequal(index_exprt(lhs, qvar), index_exprt(t, qvar));

// In case t is a constant array of fixed length is does not make sense
// to talk about indexes exceeding this length
exprt upper_bound_t=
(t.id()==ID_array)?from_integer(t.operands().size(), index_type):max_length;
string_constraintt a1(qvar, upper_bound_t, expr.cond(), qequal);
axioms.push_back(a1);

symbol_exprt qvar2=fresh_univ_index("QA_array_if_false", index_type);
equal_exprt qequal2(index_exprt(lhs, qvar2), index_exprt(f, qvar2));
// In case f is a constant array of fixed length is does not make sense
// to talk about indexes exceeding this length
exprt upper_bound_f=
(f.id()==ID_array)?from_integer(f.operands().size(), index_type):max_length;
string_constraintt a2(qvar2, upper_bound_f, not_exprt(expr.cond()), qequal2);
axioms.push_back(a2);
}

/// if a symbol representing a string is present in the symbol_to_string table,
/// returns the corresponding string, if the symbol is not yet present, creates
/// a new string with the correct type depending on whether the mode is java or
Expand Down
110 changes: 85 additions & 25 deletions src/solvers/refinement/string_refinement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,28 +117,71 @@ void string_refinementt::add_instantiations()
}
}

/// List the simple expressions on which the expression depends in the
/// `symbol_resolve` map. A simple expression is either a symbol or a
/// constant array
/// \param expr: an expression
static void depends_in_symbol_map(const exprt &expr, std::vector<exprt> &accu)
{
if(expr.id()==ID_if)
{
if_exprt if_expr=to_if_expr(expr);
depends_in_symbol_map(if_expr.true_case(), accu);
depends_in_symbol_map(if_expr.false_case(), accu);
}
else if(expr.id()==ID_struct)
{
string_exprt str=to_string_expr(expr);
depends_in_symbol_map(str.content(), accu);
}
else
{
INVARIANT(
expr.id()==ID_symbol || expr.id()==ID_array || expr.id()==ID_array_of,
"leaf in symbol resolve should be a symbol or a constant array");
accu.push_back(expr);
}
}

/// keeps a map of symbols to expressions, such as none of the mapped values
/// exist as a key
/// \param lhs: a symbol expression
/// \param rhs: an expression to map it to
/// \param rhs: an expression to map it to, which should be either a symbol
/// a string_exprt, an array_exprt, an array_of_exprt or an
/// if_exprt with branches of the previous kind
void string_refinementt::add_symbol_to_symbol_map(
const exprt &lhs, const exprt &rhs)
{
PRECONDITION(lhs.id()==ID_symbol);
PRECONDITION(rhs.id()==ID_symbol ||
rhs.id()==ID_array ||
rhs.id()==ID_array_of ||
rhs.id()==ID_if ||
(rhs.id()==ID_struct &&
refined_string_typet::is_refined_string_type(rhs.type())));

// We insert the mapped value of the rhs, if it exists.
auto it=symbol_resolve.find(rhs);
const exprt &new_rhs=it!=symbol_resolve.end()?it->second:rhs;

symbol_resolve[lhs]=new_rhs;
reverse_symbol_resolve[new_rhs].push_back(lhs);

const std::list<exprt> &symbols_to_update_with_new_rhs(
reverse_symbol_resolve[lhs]);
for(exprt item : symbols_to_update_with_new_rhs)
// List the leaves of new_rhs
std::vector<exprt> leaves;
depends_in_symbol_map(new_rhs, leaves);

const auto &symbols_to_update_with_new_rhs=reverse_symbol_resolve[lhs];

// We need to update all the symbols which depend on lhs
for(const exprt &item : symbols_to_update_with_new_rhs)
replace_expr(symbol_resolve, symbol_resolve[item]);

// Every time a symbol at the leaves is updated we need to update lhs
// and the symbols that depend on it
for(const auto &leaf : leaves)
{
symbol_resolve[item]=new_rhs;
reverse_symbol_resolve[new_rhs].push_back(item);
reverse_symbol_resolve[leaf].push_back(lhs);
for(const exprt &item : symbols_to_update_with_new_rhs)
reverse_symbol_resolve[leaf].push_back(item);
}
}

Expand All @@ -158,9 +201,6 @@ void string_refinementt::set_char_array_equality(
index_exprt arraycell(rhs, from_integer(i, index_type));
equal_exprt arrayeq(arraycell, rhs.operands()[i]);
add_lemma(arrayeq, false);
#if 0
generator.axioms.push_back(arrayeq);
#endif
}
}
// At least for Java (as it is currently pre-processed), we need not consider
Expand Down Expand Up @@ -205,7 +245,8 @@ bool string_refinementt::is_char_array(const typet &type) const
/// add lemmas to the solver corresponding to the given equation
/// \param lhs: left hand side of an equality expression
/// \param rhs: right and side of the equality
/// \return false if the lemmas were added successfully, true otherwise
/// \return true if the assignemnt needs to be handled by the parent class
/// via `set_to`
bool string_refinementt::add_axioms_for_string_assigns(
const exprt &lhs, const exprt &rhs)
{
Expand All @@ -225,8 +266,8 @@ bool string_refinementt::add_axioms_for_string_assigns(
}
else if(rhs.id()==ID_if)
{
generator.add_axioms_for_if_array(lhs, to_if_expr(rhs));
return false;
add_symbol_to_symbol_map(lhs, rhs);
return true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not quite clear why we return true here. I'd say the documentation of the enclosing function should be updated to mention that we expect the equality to be sent to super::set_to depending on that return value. Some comments should probably also be added in set_to to illustrate what is happening. This is not directly related to that PR and should be done in a separate commit, but I think it should be nevertheless done, because for the moment it is not clear to me that this solution works.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added some comments in the documentation of this function, but yes we need to clarify what is the behaviour of set_to

}
else
{
Expand Down Expand Up @@ -632,7 +673,7 @@ void string_refinementt::add_lemma(
/// \return an array expression or an array_of_exprt
exprt string_refinementt::get_array(const exprt &arr, const exprt &size) const
{
exprt arr_val=get_array(arr);
exprt arr_val=simplify_expr(get_array(arr), ns);
exprt size_val=supert::get(size);
size_val=simplify_expr(size_val, ns);
typet char_type=arr.type().subtype();
Expand Down Expand Up @@ -1412,6 +1453,30 @@ void string_refinementt::update_index_set(const std::vector<exprt> &cur)
update_index_set(axiom);
}

/// An expression representing an array of characters can be in the form of an
/// if expression for instance `cond?array1:(cond2:array2:array3)`.
/// We return all the array expressions contained in `array_expr`.
/// \param array_expr : an expression representing an array
/// \return a vector containing symbols and constant arrays contained in the
/// expression
static std::vector<exprt> sub_arrays(const exprt &array_expr)
{
if(array_expr.id()==ID_if)
{
std::vector<exprt> res1=sub_arrays(to_if_expr(array_expr).true_case());
std::vector<exprt> res2=sub_arrays(to_if_expr(array_expr).false_case());
res1.insert(res1.end(), res2.begin(), res2.end());
return res1;
}
else
{
INVARIANT(
array_expr.id()==ID_symbol || array_expr.id()==ID_array,
"character arrays should be symbol, constant array, or if expression");
return std::vector<exprt>(1, array_expr);
}
}

/// add to the index set all the indices that appear in the formula and the
/// upper bound minus one
/// \par parameters: a string constraint
Expand All @@ -1423,17 +1488,13 @@ void string_refinementt::add_to_index_set(const exprt &s, exprt i)
mp_integer mpi;
to_integer(i, mpi);
if(mpi<0)
{
debug() << "add_to_index_set : ignoring negative number " << mpi << eom;
return;
}
}
if(index_set[s].insert(i).second)
{
debug() << "adding to index set of " << from_expr(ns, "", s)
<< ": " << from_expr(ns, "", i) << eom;
current_index_set[s].insert(i);
}

std::vector<exprt> subs=sub_arrays(s);
for(const auto &sub : subs)
if(index_set[sub].insert(i).second)
current_index_set[sub].insert(i);
}

void string_refinementt::initial_index_set(const string_constraintt &axiom)
Expand Down Expand Up @@ -1504,7 +1565,6 @@ void string_refinementt::update_index_set(const exprt &formula)
}
}


// Will be used to visit an expression and return the index used
// with the given char array that contains qvar
class find_index_visitort: public const_expr_visitort
Expand Down