Skip to content

Commit 14a52dc

Browse files
Move add_axioms_for_to_lower/upper_case to builtin functions
1 parent 7a19c43 commit 14a52dc

6 files changed

+129
-157
lines changed

src/solvers/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -275,11 +275,11 @@ allocates a new string before calling a primitive.
275275
\copybrief add_axioms_for_substring(symbol_generatort &fresh_symbol, const function_application_exprt &f, array_poolt &array_pool)
276276
\link add_axioms_for_substring(symbol_generatort &fresh_symbol, const function_application_exprt &f, array_poolt &array_pool) More... \endlink
277277
* `cprover_string_to_lower_case` :
278-
\copybrief add_axioms_for_to_lower_case
279-
\link add_axioms_for_to_lower_case More... \endlink
278+
\copybrief string_to_lower_case_builtin_functiont::constraints
279+
\link string_to_lower_case_builtin_functiont::constraints More... \endlink
280280
* `cprover_string_to_upper_case` :
281-
\copybrief add_axioms_for_to_upper_case
282-
\link add_axioms_for_to_upper_case More... \endlink
281+
\copybrief string_to_upper_case_builtin_functiont::constraints(symbol_generatort&)
282+
\link string_to_upper_case_builtin_functiont::constraints(symbol_generatort&) More... \endlink
283283
* `cprover_string_trim` :
284284
\copybrief add_axioms_for_trim(symbol_generatort &fresh_symbol, const function_application_exprt &f, array_poolt &array_pool)
285285
\link add_axioms_for_trim(symbol_generatort &fresh_symbol, const function_application_exprt &f, array_poolt &array_pool) More... \endlink

src/solvers/refinement/string_builtin_function.cpp

Lines changed: 91 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -315,13 +315,82 @@ optionalt<exprt> string_to_lower_case_builtin_functiont::eval(
315315
return make_string(input_opt.value(), type);
316316
}
317317

318+
/// Expression which is true for uppercase characters of the Basic Latin and
319+
/// Latin-1 supplement of unicode.
320+
static exprt is_upper_case(const exprt &character)
321+
{
322+
const exprt char_A = from_integer('A', character.type());
323+
const exprt char_Z = from_integer('Z', character.type());
324+
exprt::operandst upper_case;
325+
// Characters between 'A' and 'Z' are upper-case
326+
upper_case.push_back(
327+
and_exprt(
328+
binary_relation_exprt(char_A, ID_le, character),
329+
binary_relation_exprt(character, ID_le, char_Z)));
330+
331+
// Characters between 0xc0 (latin capital A with grave)
332+
// and 0xd6 (latin capital O with diaeresis) are upper-case
333+
upper_case.push_back(
334+
and_exprt(
335+
binary_relation_exprt(
336+
from_integer(0xc0, character.type()), ID_le, character),
337+
binary_relation_exprt(
338+
character, ID_le, from_integer(0xd6, character.type()))));
339+
340+
// Characters between 0xd8 (latin capital O with stroke)
341+
// and 0xde (latin capital thorn) are upper-case
342+
upper_case.push_back(
343+
and_exprt(
344+
binary_relation_exprt(
345+
from_integer(0xd8, character.type()), ID_le, character),
346+
binary_relation_exprt(
347+
character, ID_le, from_integer(0xde, character.type()))));
348+
return disjunction(upper_case);
349+
}
350+
351+
/// Expression which is true for lower_case characters of the Basic Latin and
352+
/// Latin-1 supplement of unicode.
353+
static exprt is_lower_case(const exprt &character)
354+
{
355+
return is_upper_case(
356+
minus_exprt(character, from_integer(0x20, character.type())));
357+
}
358+
359+
/// Set of constraints ensuring `result` corresponds to `input` in which
360+
/// uppercase characters have been converted to lowercase.
361+
/// These constraints are:
362+
/// 1. result.length = input.length && return_code = 0
363+
/// 2. forall i < input.length.
364+
/// result[i] = is_upper_case(input[i]) ? input[i] + diff : input[i]
365+
///
366+
/// Where `diff` is the difference between lower case and upper case
367+
/// characters: `diff = 'a'-'A' = 0x20` and `is_upper_case` is true for the
368+
/// upper case characters of Basic Latin and Latin-1 supplement of unicode.
318369
string_constraintst string_to_lower_case_builtin_functiont::constraints(
319370
string_constraint_generatort &generator) const
320371
{
321-
auto pair =
322-
add_axioms_for_to_lower_case(generator.fresh_symbol, result, input);
323-
pair.second.existential.push_back(equal_exprt(pair.first, return_code));
324-
return pair.second;
372+
// \todo for now, only characters in Basic Latin and Latin-1 supplement
373+
// are supported (up to 0x100), we should add others using case mapping
374+
// information from the UnicodeData file.
375+
string_constraintst constraints;
376+
constraints.existential.push_back(length_constraint());
377+
constraints.universal.push_back([&] {
378+
const symbol_exprt idx =
379+
generator.fresh_symbol("QA_lower_case", result.length().type());
380+
const exprt conditional_convert = [&] {
381+
// The difference between upper-case and lower-case for the basic
382+
// latin and latin-1 supplement is 0x20.
383+
const typet &char_type = result.type().subtype();
384+
const exprt diff = from_integer(0x20, char_type);
385+
const exprt converted =
386+
equal_exprt(result[idx], plus_exprt(input[idx], diff));
387+
const exprt non_converted = equal_exprt(result[idx], input[idx]);
388+
return if_exprt(is_upper_case(input[idx]), converted, non_converted);
389+
}();
390+
return string_constraintt(
391+
idx, zero_if_negative(result.length()), conditional_convert);
392+
}());
393+
return constraints;
325394
}
326395

327396
optionalt<exprt> string_to_upper_case_builtin_functiont::eval(
@@ -349,13 +418,26 @@ optionalt<exprt> string_to_upper_case_builtin_functiont::eval(
349418
/// is_lower_case(str[i]) ? res[i] = str[i] - 0x20 : res[i] = str[i]
350419
///
351420
/// \param fresh_symbol: generator of fresh symbols
421+
/// \return set of constraints
352422
string_constraintst string_to_upper_case_builtin_functiont::constraints(
353-
string_constraint_generatort &generator) const
423+
symbol_generatort &fresh_symbol) const
354424
{
355-
auto pair =
356-
add_axioms_for_to_upper_case(generator.fresh_symbol, result, input);
357-
pair.second.existential.push_back(equal_exprt(pair.first, return_code));
358-
return pair.second;
425+
string_constraintst constraints;
426+
constraints.existential.push_back(length_constraint());
427+
constraints.universal.push_back([&] {
428+
const symbol_exprt idx =
429+
fresh_symbol("QA_upper_case", result.length().type());
430+
const typet &char_type = input.content().type().subtype();
431+
const exprt converted =
432+
minus_exprt(input[idx], from_integer(0x20, char_type));
433+
return string_constraintt(
434+
idx,
435+
zero_if_negative(result.length()),
436+
equal_exprt(
437+
result[idx],
438+
if_exprt(is_lower_case(input[idx]), converted, input[idx])));
439+
}());
440+
return constraints;
359441
}
360442

361443
std::vector<mp_integer> string_insertion_builtin_functiont::eval(

src/solvers/refinement/string_builtin_function.h

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <vector>
88
#include <util/optional.h>
99
#include <util/string_expr.h>
10+
#include "string_constraint_generator.h"
1011

1112
class array_poolt;
1213
struct string_constraintst;
@@ -211,7 +212,9 @@ class string_to_lower_case_builtin_functiont
211212

212213
exprt length_constraint() const override
213214
{
214-
return equal_exprt(result.length(), input.length());
215+
return and_exprt(
216+
equal_exprt(result.length(), input.length()),
217+
equal_exprt(return_code, from_integer(0, return_code.type())));
215218
};
216219
};
217220

@@ -229,6 +232,17 @@ class string_to_upper_case_builtin_functiont
229232
{
230233
}
231234

235+
string_to_upper_case_builtin_functiont(
236+
exprt return_code,
237+
array_string_exprt result,
238+
array_string_exprt input)
239+
: string_transformation_builtin_functiont(
240+
std::move(return_code),
241+
std::move(result),
242+
std::move(input))
243+
{
244+
}
245+
232246
optionalt<exprt>
233247
eval(const std::function<exprt(const exprt &)> &get_value) const override;
234248

@@ -237,12 +251,20 @@ class string_to_upper_case_builtin_functiont
237251
return "to_upper_case";
238252
}
239253

254+
string_constraintst constraints(class symbol_generatort &fresh_symbol) const;
255+
256+
/// \copydoc constraints(class symbol_generatort&)
240257
string_constraintst
241-
constraints(string_constraint_generatort &generator) const override;
258+
constraints(string_constraint_generatort &generator) const override
259+
{
260+
return constraints(generator.fresh_symbol);
261+
};
242262

243263
exprt length_constraint() const override
244264
{
245-
return equal_exprt(result.length(), input.length());
265+
return and_exprt(
266+
equal_exprt(result.length(), input.length()),
267+
equal_exprt(return_code, from_integer(0, return_code.type())));
246268
};
247269
};
248270

src/solvers/refinement/string_constraint_generator.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -485,15 +485,6 @@ std::pair<exprt, string_constraintst> add_axioms_for_substring(
485485
const function_application_exprt &f,
486486
array_poolt &array_pool);
487487

488-
std::pair<exprt, string_constraintst> add_axioms_for_to_lower_case(
489-
symbol_generatort &fresh_symbol,
490-
const array_string_exprt &res,
491-
const array_string_exprt &str);
492-
493-
std::pair<exprt, string_constraintst> add_axioms_for_to_upper_case(
494-
symbol_generatort &fresh_symbol,
495-
const array_string_exprt &res,
496-
const array_string_exprt &expr);
497488
std::pair<exprt, string_constraintst> add_axioms_for_trim(
498489
symbol_generatort &fresh_symbol,
499490
const function_application_exprt &f,

src/solvers/refinement/string_constraint_generator_format.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Date: May 2017
1919
#include <util/std_expr.h>
2020
#include <util/unicode.h>
2121

22+
#include "string_builtin_function.h"
2223
#include "string_constraint_generator.h"
2324

2425
// Format specifier describes how a value should be printed.
@@ -338,10 +339,14 @@ add_axioms_for_format_specifier(
338339
array_pool,
339340
message,
340341
ns);
341-
auto upper_case_result = add_axioms_for_to_upper_case(
342-
fresh_symbol, res, format_specifier_result.first);
343-
merge(upper_case_result.second, std::move(format_specifier_result.second));
344-
return {res, std::move(upper_case_result.second)};
342+
343+
const exprt return_code_upper_case =
344+
fresh_symbol("return_code_upper_case", get_return_code_type());
345+
const string_to_upper_case_builtin_functiont upper_case_function(
346+
return_code_upper_case, res, format_specifier_result.first);
347+
auto upper_case_constraints = upper_case_function.constraints(fresh_symbol);
348+
merge(upper_case_constraints, std::move(format_specifier_result.second));
349+
return {res, std::move(upper_case_constraints)};
345350
}
346351
case format_specifiert::OCTAL_INTEGER:
347352
/// \todo Conversion of octal is not implemented.

src/solvers/refinement/string_constraint_generator_transformation.cpp

Lines changed: 0 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -245,134 +245,6 @@ std::pair<exprt, string_constraintst> add_axioms_for_trim(
245245
return {from_integer(0, f.type()), constraints};
246246
}
247247

248-
/// Expression which is true for uppercase characters of the Basic Latin and
249-
/// Latin-1 supplement of unicode.
250-
static exprt is_upper_case(const exprt &character)
251-
{
252-
const exprt char_A = from_integer('A', character.type());
253-
const exprt char_Z = from_integer('Z', character.type());
254-
exprt::operandst upper_case;
255-
// Characters between 'A' and 'Z' are upper-case
256-
upper_case.push_back(
257-
and_exprt(
258-
binary_relation_exprt(char_A, ID_le, character),
259-
binary_relation_exprt(character, ID_le, char_Z)));
260-
261-
// Characters between 0xc0 (latin capital A with grave)
262-
// and 0xd6 (latin capital O with diaeresis) are upper-case
263-
upper_case.push_back(
264-
and_exprt(
265-
binary_relation_exprt(
266-
from_integer(0xc0, character.type()), ID_le, character),
267-
binary_relation_exprt(
268-
character, ID_le, from_integer(0xd6, character.type()))));
269-
270-
// Characters between 0xd8 (latin capital O with stroke)
271-
// and 0xde (latin capital thorn) are upper-case
272-
upper_case.push_back(
273-
and_exprt(
274-
binary_relation_exprt(
275-
from_integer(0xd8, character.type()), ID_le, character),
276-
binary_relation_exprt(
277-
character, ID_le, from_integer(0xde, character.type()))));
278-
return disjunction(upper_case);
279-
}
280-
281-
/// Expression which is true for lower_case characters of the Basic Latin and
282-
/// Latin-1 supplement of unicode.
283-
static exprt is_lower_case(const exprt &character)
284-
{
285-
return is_upper_case(
286-
minus_exprt(character, from_integer(0x20, character.type())));
287-
}
288-
289-
/// Add axioms ensuring `res` corresponds to `str` in which uppercase characters
290-
/// have been converted to lowercase.
291-
/// These axioms are:
292-
/// 1. res.length = str.length
293-
/// 2. forall i < str.length.
294-
/// res[i] = is_upper_case(str[i]) ? str[i] + diff : str[i]
295-
///
296-
/// Where `diff` is the difference between lower case and upper case
297-
/// characters: `diff = 'a'-'A' = 0x20` and `is_upper_case` is true for the
298-
/// upper case characters of Basic Latin and Latin-1 supplement of unicode.
299-
std::pair<exprt, string_constraintst> add_axioms_for_to_lower_case(
300-
symbol_generatort &fresh_symbol,
301-
const array_string_exprt &res,
302-
const array_string_exprt &str)
303-
{
304-
string_constraintst constraints;
305-
const typet &char_type = res.type().subtype();
306-
const typet &index_type = res.length().type();
307-
const exprt char_A = from_integer('A', char_type);
308-
const exprt char_Z = from_integer('Z', char_type);
309-
310-
// \todo for now, only characters in Basic Latin and Latin-1 supplement
311-
// are supported (up to 0x100), we should add others using case mapping
312-
// information from the UnicodeData file.
313-
314-
constraints.existential.push_back(equal_exprt(res.length(), str.length()));
315-
316-
constraints.universal.push_back([&] {
317-
const symbol_exprt idx = fresh_symbol("QA_lower_case", index_type);
318-
const exprt conditional_convert = [&] {
319-
// The difference between upper-case and lower-case for the basic latin and
320-
// latin-1 supplement is 0x20.
321-
const exprt diff = from_integer(0x20, char_type);
322-
const exprt converted = equal_exprt(res[idx], plus_exprt(str[idx], diff));
323-
const exprt non_converted = equal_exprt(res[idx], str[idx]);
324-
return if_exprt(is_upper_case(str[idx]), converted, non_converted);
325-
}();
326-
327-
return string_constraintt(
328-
idx, zero_if_negative(res.length()), conditional_convert);
329-
}());
330-
331-
return {from_integer(0, get_return_code_type()), std::move(constraints)};
332-
}
333-
334-
/// Add axioms ensuring `res` corresponds to `str` in which lowercase characters
335-
/// of Basic Latin and Latin-1 supplement of unicode have been converted to
336-
/// uppercase.
337-
///
338-
/// These axioms are:
339-
/// 1. res.length = str.length
340-
/// 2. forall i < str.length.
341-
/// is_lower_case(str[i]) ? res[i] = str[i] - 0x20 : res[i] = str[i]
342-
///
343-
/// \param fresh_symbol: generator of fresh symbols
344-
/// \param res: array of characters expression
345-
/// \param str: array of characters expression
346-
/// \return integer expression which is different from `0` when there is an
347-
/// exception to signal
348-
std::pair<exprt, string_constraintst> add_axioms_for_to_upper_case(
349-
symbol_generatort &fresh_symbol,
350-
const array_string_exprt &res,
351-
const array_string_exprt &str)
352-
{
353-
string_constraintst constraints;
354-
const typet &char_type = str.content().type().subtype();
355-
const typet &index_type = str.length().type();
356-
exprt char_a = from_integer('a', char_type);
357-
exprt char_A = from_integer('A', char_type);
358-
exprt char_z = from_integer('z', char_type);
359-
360-
constraints.existential.push_back(equal_exprt(res.length(), str.length()));
361-
362-
constraints.universal.push_back([&] {
363-
const symbol_exprt idx = fresh_symbol("QA_upper_case", index_type);
364-
const exprt converted =
365-
minus_exprt(str[idx], from_integer(0x20, char_type));
366-
return string_constraintt(
367-
idx,
368-
zero_if_negative(res.length()),
369-
equal_exprt(
370-
res[idx], if_exprt(is_lower_case(str[idx]), converted, str[idx])));
371-
}());
372-
373-
return {from_integer(0, get_return_code_type()), std::move(constraints)};
374-
}
375-
376248
/// Convert two expressions to pair of chars
377249
/// If both expressions are characters, return pair of them
378250
/// If both expressions are 1-length strings, return first character of each

0 commit comments

Comments
 (0)