diff --git a/regression/jbmc-strings/StringMiscellaneous04/StringMiscellaneous04.class b/regression/jbmc-strings/StringMiscellaneous04/StringMiscellaneous04.class index c7f02bedbb2..96733a2b713 100644 Binary files a/regression/jbmc-strings/StringMiscellaneous04/StringMiscellaneous04.class and b/regression/jbmc-strings/StringMiscellaneous04/StringMiscellaneous04.class differ diff --git a/regression/jbmc-strings/StringMiscellaneous04/StringMiscellaneous04.java b/regression/jbmc-strings/StringMiscellaneous04/StringMiscellaneous04.java index e9fe0292317..c06e96a5ced 100644 --- a/regression/jbmc-strings/StringMiscellaneous04/StringMiscellaneous04.java +++ b/regression/jbmc-strings/StringMiscellaneous04/StringMiscellaneous04.java @@ -1,40 +1,52 @@ public class StringMiscellaneous04 { - public static void main(String[] args) - { - String s1 = "diffblue"; - String s2 = "TESTGENERATION"; - String s3 = " automated "; - - assert s1.equals("diffblue"); - assert s2.equals("TESTGENERATION"); - assert s3.equals(" automated "); - - System.out.printf( - "Replace 'f' with 'F' in s1: %s\n\n", s1.replace('f', 'F')); - String tmp=s1.replace('f', 'F'); - assert tmp.equals("diFFblue"); - - tmp=s1.toUpperCase(); - assert tmp.equals("DIFFBLUE"); - - tmp=s2.toLowerCase(); - assert tmp.equals("testgeneration"); - - tmp=s3.trim(); - assert tmp.equals("automated"); - - // test toCharArray method - char[] charArray = s1.toCharArray(); - System.out.print("s1 as a character array = "); - - int i=0; - for (char character : charArray) - { - assert character=="diffblue".charAt(i); - ++i; - } - - System.out.println(); - } + // This is a model of the String.toCharArray method + public static char[] toCharArray(String s) + { + int length=s.length(); + assert(length<10); + char arr[]=new char[s.length()]; + // We limit arbitrarly the loop unfolding to 10 + for(int i=0; i= 4) + { + sb.append(str, 2, 4); + String res = sb.toString(); + assert(res.startsWith(init)); + assert(res.endsWith(str.substring(2, 4))); + assert(res.length() == init.length() + 2); + assert(!res.equals("foobarfuz")); + } + } + } diff --git a/regression/strings-smoke-tests/java_append_string/test_substring.desc b/regression/strings-smoke-tests/java_append_string/test_substring.desc new file mode 100644 index 00000000000..d62a4a8be0a --- /dev/null +++ b/regression/strings-smoke-tests/java_append_string/test_substring.desc @@ -0,0 +1,10 @@ +CORE +test_append_string.class +--refine-strings --string-max-length 10 --function test_append_string.check --java-assume-inputs-non-null +^EXIT=10$ +^SIGNAL=0$ +^\[.*assertion.*\].* line 22.* SUCCESS$ +^\[.*assertion.*\].* line 23.* SUCCESS$ +^\[.*assertion.*\].* line 24.* SUCCESS$ +^\[.*assertion.*\].* line 25.* FAILURE$ +-- diff --git a/regression/strings-smoke-tests/java_char_array/test.desc b/regression/strings-smoke-tests/java_char_array/test.desc index 5c8a13eddd3..c5d89dd2810 100644 --- a/regression/strings-smoke-tests/java_char_array/test.desc +++ b/regression/strings-smoke-tests/java_char_array/test.desc @@ -3,11 +3,12 @@ test_char_array.class --refine-strings --string-max-length 1000 ^EXIT=10$ ^SIGNAL=0$ -.*assertion.* test_char_array.java line 9 .* SUCCESS$ -.*assertion.* test_char_array.java line 10 .* SUCCESS$ -.*assertion.* test_char_array.java line 11 .* SUCCESS$ -.*assertion.* test_char_array.java line 13 .* FAILURE$ -.*assertion.* test_char_array.java line 15 .* FAILURE$ -.*assertion.* test_char_array.java line 17 .* FAILURE$ +.*assertion.* test_char_array.java line 7 .* SUCCESS$ +.*assertion.* test_char_array.java line 21 .* SUCCESS$ +.*assertion.* test_char_array.java line 22 .* SUCCESS$ +.*assertion.* test_char_array.java line 23 .* SUCCESS$ +.*assertion.* test_char_array.java line 25 .* FAILURE$ +.*assertion.* test_char_array.java line 27 .* FAILURE$ +.*assertion.* test_char_array.java line 29 .* FAILURE$ ^VERIFICATION FAILED$ -- diff --git a/regression/strings-smoke-tests/java_char_array/test_char_array.class b/regression/strings-smoke-tests/java_char_array/test_char_array.class index e509fe289d3..709157cf470 100644 Binary files a/regression/strings-smoke-tests/java_char_array/test_char_array.class and b/regression/strings-smoke-tests/java_char_array/test_char_array.class differ diff --git a/regression/strings-smoke-tests/java_char_array/test_char_array.java b/regression/strings-smoke-tests/java_char_array/test_char_array.java index e0efe1777e7..34d6f009e55 100644 --- a/regression/strings-smoke-tests/java_char_array/test_char_array.java +++ b/regression/strings-smoke-tests/java_char_array/test_char_array.java @@ -1,19 +1,31 @@ public class test_char_array { - public static void main(int i) - { - String s = "abc"; - char [] str = s.toCharArray(); - char c = str[2]; - char a = s.charAt(0); - assert(str.length == 3); - assert(a == 'a'); - assert(c == 'c'); - if(i==0) - assert(str.length != 3); - if(i==2) - assert(a != 'a'); - if(i==3) - assert(c != 'c'); - } + // This is a model of the String.toCharArray method + public static char[] toCharArray(String s) + { + int length = s.length(); + assert(length < 5); + char arr[] = new char[s.length()]; + // We limit arbitrarly the loop unfolding to 5 + for(int i = 0; i < length && i < 5; i++) + arr[i] = s.charAt(i); + return arr; + } + + public static void main(int i) + { + String s = "abc"; + char [] str = toCharArray(s); + char c = str[2]; + char a = s.charAt(0); + assert(str.length == 3); + assert(a == 'a'); + assert(c == 'c'); + if(i == 0) + assert(str.length != 3); + if(i == 2) + assert(a != 'a'); + if(i == 3) + assert(c != 'c'); + } } diff --git a/regression/strings-smoke-tests/java_if/test.class b/regression/strings-smoke-tests/java_if/test.class index 11958bd6ce9..f8d7bfeb855 100644 Binary files a/regression/strings-smoke-tests/java_if/test.class and b/regression/strings-smoke-tests/java_if/test.class differ diff --git a/regression/strings-smoke-tests/java_if/test.desc b/regression/strings-smoke-tests/java_if/test.desc index 382365ad33b..1be275e233e 100644 --- a/regression/strings-smoke-tests/java_if/test.desc +++ b/regression/strings-smoke-tests/java_if/test.desc @@ -3,7 +3,7 @@ test.class --refine-strings --string-max-length 100 ^EXIT=10$ ^SIGNAL=0$ -^\[.*assertion.1\].* line 12.* SUCCESS$ +^\[.*assertion.1\].* line 11.* SUCCESS$ ^\[.*assertion.2\].* line 13.* FAILURE$ -- $ignoring\s*char\s*array diff --git a/regression/strings-smoke-tests/java_if/test.java b/regression/strings-smoke-tests/java_if/test.java index 9760eb7bb2a..a125b73b47f 100644 --- a/regression/strings-smoke-tests/java_if/test.java +++ b/regression/strings-smoke-tests/java_if/test.java @@ -1,16 +1,16 @@ public class test { - public static String main() + public static String main(int i) { Object t[] = new Object[5]; t[0] = "world!"; StringBuilder s = new StringBuilder("Hello "); if(t[0] instanceof String) - { s.append((String) t[0]); - } - assert(s.toString().equals("Hello world!")); - assert(!s.toString().equals("Hello world!")); + if(i == 0) + assert(s.toString().equals("Hello world!")); + else + assert(!s.toString().equals("Hello world!")); return s.toString(); } } diff --git a/regression/strings-smoke-tests/java_insert_char_array/test.desc b/regression/strings-smoke-tests/java_insert_char_array/test.desc index 6fc7a12f439..472ebd69890 100644 --- a/regression/strings-smoke-tests/java_insert_char_array/test.desc +++ b/regression/strings-smoke-tests/java_insert_char_array/test.desc @@ -1,7 +1,8 @@ CORE test_insert_char_array.class --refine-strings --string-max-length 1000 -^EXIT=0$ +^EXIT=10$ ^SIGNAL=0$ -^VERIFICATION SUCCESSFUL$ +assertion.* file test_insert_char_array.java line 20 .* SUCCESS$ +assertion.* file test_insert_char_array.java line 22 .* FAILURE$ -- diff --git a/regression/strings-smoke-tests/java_insert_char_array/test_insert_char_array.class b/regression/strings-smoke-tests/java_insert_char_array/test_insert_char_array.class index 3c0b5329230..d0edb7e3e24 100644 Binary files a/regression/strings-smoke-tests/java_insert_char_array/test_insert_char_array.class and b/regression/strings-smoke-tests/java_insert_char_array/test_insert_char_array.class differ diff --git a/regression/strings-smoke-tests/java_insert_char_array/test_insert_char_array.java b/regression/strings-smoke-tests/java_insert_char_array/test_insert_char_array.java index 2c2840df672..ab28b86ffdc 100644 --- a/regression/strings-smoke-tests/java_insert_char_array/test_insert_char_array.java +++ b/regression/strings-smoke-tests/java_insert_char_array/test_insert_char_array.java @@ -1,14 +1,24 @@ public class test_insert_char_array { - public static void main(/*String[] argv*/) - { - StringBuilder sb = new StringBuilder("ad"); - char[] array = new char[2]; - array[0] = 'b'; - array[1] = 'c'; - sb.insert(1, array); - String s = sb.toString(); - System.out.println(s); - assert(s.equals("abcd")); - } + public static void insert(StringBuilder sb, int offset, char[] arr) + { + assert(arr.length<5); + for(int i=0; isecond.symbol_expr()); @@ -118,9 +121,9 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) // Create a new symbol: symbolt new_symbol; - new_symbol.name=escaped_symbol_name; + new_symbol.name = escaped_symbol_name_with_prefix; new_symbol.type=string_type; - new_symbol.base_name="Literal"; + new_symbol.base_name = escaped_symbol_name; new_symbol.pretty_name=value; new_symbol.mode=ID_java; new_symbol.is_type=false; @@ -140,14 +143,19 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) if(string_refinement_enabled) { struct_exprt literal_init(new_symbol.type); - literal_init.move_to_operands(jlo_init); + literal_init.operands().resize(jls_struct.components().size()); + const std::size_t jlo_nb = jls_struct.component_number("@java.lang.Object"); + literal_init.operands()[jlo_nb] = jlo_init; + + const std::size_t length_nb = jls_struct.component_number("length"); + const typet &length_type = jls_struct.components()[length_nb].type(); + const exprt length = from_integer(id2string(value).size(), length_type); + literal_init.operands()[length_nb] = length; // Initialize the string with a constant utf-16 array: symbolt array_symbol; - array_symbol.name=escaped_symbol_name+"_constarray"; - array_symbol.type=array_typet( - java_char_type(), infinity_exprt(java_int_type())); - array_symbol.base_name="Literal_constarray"; + array_symbol.name = escaped_symbol_name_with_prefix + "_constarray"; + array_symbol.base_name = escaped_symbol_name + "_constarray"; array_symbol.pretty_name=value; array_symbol.mode=ID_java; array_symbol.is_type=false; @@ -155,19 +163,41 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr) // These are basically const global data: array_symbol.is_static_lifetime=true; array_symbol.is_state_var=true; - auto literal_array=utf16_to_array( - utf8_to_utf16_little_endian(id2string(value))); - array_symbol.value=literal_array; + array_symbol.value = + utf16_to_array(utf8_to_utf16_little_endian(id2string(value))); + array_symbol.type = array_symbol.value.type(); if(symbol_table.add(array_symbol)) throw "failed to add constarray symbol to symbol table"; - literal_init.copy_to_operands( - from_integer(literal_array.operands().size(), - jls_struct.components()[1].type())); - literal_init.copy_to_operands( - address_of_exprt(array_symbol.symbol_expr())); - + const symbol_exprt array_expr = array_symbol.symbol_expr(); + const address_of_exprt array_pointer( + index_exprt(array_expr, from_integer(0, java_int_type()))); + + const std::size_t data_nb = jls_struct.component_number("data"); + literal_init.operands()[data_nb] = array_pointer; + + // Associate array with pointer + symbolt return_symbol; + return_symbol.name = escaped_symbol_name_with_prefix + "_return_value"; + return_symbol.base_name = escaped_symbol_name + "_return_value"; + return_symbol.pretty_name = + escaped_symbol_name.length() > 10 + ? escaped_symbol_name.substr(0, 10) + "..._return_value" + : escaped_symbol_name + "_return_value"; + return_symbol.mode = ID_java; + return_symbol.is_type = false; + return_symbol.is_lvalue = true; + return_symbol.is_static_lifetime = true; + return_symbol.is_state_var = true; + return_symbol.value = make_function_application( + ID_cprover_associate_array_to_pointer_func, + {array_symbol.value, array_pointer}, + java_int_type(), + symbol_table); + return_symbol.type = return_symbol.value.type(); + if(symbol_table.add(return_symbol)) + throw "failed to add return symbol to symbol table"; new_symbol.value=literal_init; } else if(jls_struct.components().size()>=1 && diff --git a/src/java_bytecode/java_object_factory.cpp b/src/java_bytecode/java_object_factory.cpp index 9324093dc82..330c5d19a3c 100644 --- a/src/java_bytecode/java_object_factory.cpp +++ b/src/java_bytecode/java_object_factory.cpp @@ -31,6 +31,8 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_types.h" #include "java_utils.h" +#include "java_string_library_preprocess.h" +#include "java_root_class.h" static symbolt &new_tmp_symbol( symbol_tablet &symbol_table, @@ -460,8 +462,9 @@ class recursion_set_entryt /// Initialize a recursion-set entry owner operating on a given set. /// Initially it does not own any set entry. /// \param _recursion_set: set to operate on. - recursion_set_entryt(std::unordered_set &_recursion_set): - recursion_set(_recursion_set) + explicit recursion_set_entryt( + std::unordered_set &_recursion_set) + : recursion_set(_recursion_set) { } /// Removes erase_entry (if set) from the controlled set. @@ -494,6 +497,157 @@ class recursion_set_entryt } }; +/// Get max value for an integer type +/// \param type: +/// Type to find maximum value for +/// \return Maximum integer value +static mp_integer max_value(const typet &type) +{ + if(type.id() == ID_signedbv) + return to_signedbv_type(type).largest(); + else if(type.id() == ID_unsignedbv) + return to_unsignedbv_type(type).largest(); + UNREACHABLE; +} + +/// Initialize a nondeterministic String structure +/// \param obj: struct to initialize, must have been declared using +/// code of the form: +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// struct java.lang.String { struct \@java.lang.Object; +/// int length; char *data; } tmp_object_factory$1; +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// \param max_nondet_string_length: maximum length of strings to initialize +/// \param loc: location in the source +/// \param symbol_table: the symbol table +/// \return code for initialization of the strings +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// int tmp_object_factory$1; +/// tmp_object_factory$1 = NONDET(int); +/// __CPROVER_assume(tmp_object_factory$1 >= 0); +/// __CPROVER_assume(tmp_object_factory$1 <= max_nondet_string_length); +/// char nondet_infinite_array$2[INFINITY()]; +/// nondet_infinite_array$2 = NONDET(char [INFINITY()]); +/// cprover_associate_array_to_pointer_func +/// (nondet_infinite_array$2, &nondet_infinite_array$2[0]); +/// prover_associate_length_to_array_func +/// (nondet_infinite_array$2, tmp_object_factory$1); +/// arg = { .\@java.lang.Object={ .\@class_identifier="java.lang.String", +/// .\@lock=false }, .length=tmp_object_factory$1, +/// .data=nondet_infinite_array$2 }; +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// Unit tests in `unit/java_bytecode/java_object_factory/` ensure +/// it is the case. +codet initialize_nondet_string_struct( + const exprt &obj, + const std::size_t &max_nondet_string_length, + const source_locationt &loc, + symbol_tablet &symbol_table) +{ + PRECONDITION( + java_string_library_preprocesst::implements_java_char_sequence(obj.type())); + + const namespacet ns(symbol_table); + code_blockt code; + + // `obj` is `*expr` + const struct_typet &struct_type = to_struct_type(ns.follow(obj.type())); + const irep_idt &class_id = struct_type.get_tag(); + + // @clsid = String and @lock = false: + const symbol_typet jlo_symbol("java::java.lang.Object"); + const struct_typet &jlo_type = to_struct_type(ns.follow(jlo_symbol)); + struct_exprt jlo_init(jlo_symbol); + java_root_class_init(jlo_init, jlo_type, false, class_id); + + struct_exprt struct_expr(obj.type()); + struct_expr.copy_to_operands(jlo_init); + + // In case the type for string was not added to the symbol table, + // (typically when string refinement is not activated), `struct_type` + // just contains the standard Object field and no length and data fields. + if(struct_type.has_component("length")) + { + // length_expr = nondet(int); + const symbolt length_sym = + new_tmp_symbol(symbol_table, loc, java_int_type()); + const symbol_exprt length_expr = length_sym.symbol_expr(); + const side_effect_expr_nondett nondet_length(length_expr.type()); + code.add(code_declt(length_expr)); + code.add(code_assignt(length_expr, nondet_length)); + + // assume (length_expr >= 0); + code.add( + code_assumet( + binary_relation_exprt( + length_expr, ID_ge, from_integer(0, java_int_type())))); + + // assume (length_expr <= max_input_length) + if(max_nondet_string_length <= max_value(length_expr.type())) + { + exprt max_length = + from_integer(max_nondet_string_length, length_expr.type()); + code.add( + code_assumet(binary_relation_exprt(length_expr, ID_le, max_length))); + } + + // data_expr = nondet(char[INFINITY]) // we use infinity for variable size + exprt data_expr = make_nondet_infinite_char_array(symbol_table, loc, code); + + struct_expr.copy_to_operands(length_expr); + + const address_of_exprt array_pointer( + index_exprt(data_expr, from_integer(0, java_int_type()))); + struct_expr.copy_to_operands(array_pointer); + + add_pointer_to_array_association( + array_pointer, data_expr, symbol_table, loc, code); + + add_array_to_length_association( + data_expr, length_expr, symbol_table, loc, code); + } + + // tmp_object = struct_expr; + code.add(code_assignt(obj, struct_expr)); + return code; +} + +/// Add code for the initialization of a string using a nondeterministic +/// content and association of its address to the pointer `expr`. +/// \param expr: pointer to be affected +/// \param max_nondet_string_length: maximum length of strings to initialize +/// \param symbol_table: the symbol table +/// \param loc: location in the source +/// \param [out] code: code block in which initialization code is added +/// \return false if code was added, true to signal an error when the given +/// object does not implement CharSequence or does not have data and +/// length fields, in which case it should be initialized another way. +static bool add_nondet_string_pointer_initialization( + const exprt &expr, + const std::size_t &max_nondet_string_length, + symbol_tablet &symbol_table, + const source_locationt &loc, + code_blockt &code) +{ + const namespacet ns(symbol_table); + const dereference_exprt obj(expr, expr.type().subtype()); + const struct_typet &struct_type = + to_struct_type(ns.follow(to_symbol_type(obj.type()))); + + if(!struct_type.has_component("data") || !struct_type.has_component("length")) + return true; + + allocate_dynamic_object_with_decl(expr, symbol_table, loc, code); + + code.add( + initialize_nondet_string_struct( + dereference_exprt(expr, struct_type), + max_nondet_string_length, + loc, + symbol_table)); + return false; +} + /// Initializes a pointer \p expr of type \p pointer_type to a primitive-typed /// value or an object tree. It allocates child objects as necessary and /// nondet-initializes their members, or if MUST_UPDATE_IN_PLACE is set, @@ -534,7 +688,6 @@ void java_object_factoryt::gen_nondet_pointer_init( const update_in_placet &update_in_place) { PRECONDITION(expr.type().id()==ID_pointer); - const pointer_typet &replacement_pointer_type= pointer_type_selector.convert_pointer_type(pointer_type, ns); @@ -623,27 +776,34 @@ void java_object_factoryt::gen_nondet_pointer_init( // vector of assignments that create a new object (recursively initializes it) // and asign to `expr` the address of such object code_blockt non_null_inst; - gen_pointer_target_init( - non_null_inst, - expr, - subtype, - alloc_type, - depth, - update_in_placet::NO_UPDATE_IN_PLACE); + + if( + java_string_library_preprocesst::implements_java_char_sequence_pointer( + expr.type())) + { + add_nondet_string_pointer_initialization( + expr, + object_factory_parameters.max_nondet_string_length, + symbol_table, + loc, + assignments); + } + else + { + gen_pointer_target_init( + non_null_inst, + expr, + subtype, + alloc_type, + depth, + update_in_placet::NO_UPDATE_IN_PLACE); + } auto set_null_inst=get_null_assignment(expr, pointer_type); - // Determine whether the pointer can be null. In particular: - // - the 'data' of a String should not be null. - // - the pointers inside the java.lang.Class class shall not be null - bool not_null= - !allow_null || - ((class_identifier=="java.lang.String" || - class_identifier=="java.lang.StringBuilder" || - class_identifier=="java.lang.StringBuffer" || - class_identifier=="java.lang.CharSequence") && - subtype.id()==ID_array) || - class_identifier=="java.lang.Class"; + // Determine whether the pointer can be null. In particular the pointers + // inside the java.lang.Class class shall not be null + const bool not_null = !allow_null || class_identifier == "java.lang.Class"; // Alternatively, if this is a void* we *must* initialise with null: // (This can currently happen for some cases of #exception_value) @@ -750,19 +910,6 @@ symbol_exprt java_object_factoryt::gen_nondet_subtype_pointer_init( return new_symbol.symbol_expr(); } -/// Get max value for an integral type -/// \param type: -/// Type to find maximum value for -/// \return Maximum integral valu -static size_t max_value(const typet& type) -{ - if(type.id()==ID_signedbv) - return std::numeric_limits::max(); - else if(type.id()==ID_unsignedbv) - return std::numeric_limits::max(); - UNREACHABLE; -} - /// Initializes an object tree rooted at `expr`, allocating child objects as /// necessary and nondet-initializes their members, or if MUST_UPDATE_IN_PLACE /// is set, re-initializes already-allocated objects. @@ -866,23 +1013,6 @@ void java_object_factoryt::gen_nondet_struct_init( true, // allow_null always true for sub-objects depth, substruct_in_place); - - if(name=="length") - { - if(class_identifier=="java.lang.String" || - class_identifier=="java.lang.StringBuffer" || - class_identifier=="java.lang.StringBuilder") - { - if(object_factory_parameters.max_nondet_string_length <= - max_value(me.type())) - { - exprt max_length=from_integer( - object_factory_parameters.max_nondet_string_length, me.type()); - assignments.add(code_assumet( - binary_relation_exprt(me, ID_le, max_length))); - } - } - } } } } diff --git a/src/java_bytecode/java_object_factory.h b/src/java_bytecode/java_object_factory.h index 3e1c06c52e1..e731c70998b 100644 --- a/src/java_bytecode/java_object_factory.h +++ b/src/java_bytecode/java_object_factory.h @@ -152,4 +152,10 @@ void allocate_dynamic_object_with_decl( const source_locationt &loc, code_blockt &output_code); +codet initialize_nondet_string_struct( + const exprt &obj, + const std::size_t &max_nondet_string_length, + const source_locationt &loc, + symbol_tablet &symbol_table); + #endif // CPROVER_JAVA_BYTECODE_JAVA_OBJECT_FACTORY_H diff --git a/src/java_bytecode/java_string_library_preprocess.cpp b/src/java_bytecode/java_string_library_preprocess.cpp index 73200f1fee7..eaac8aaf6fc 100644 --- a/src/java_bytecode/java_string_library_preprocess.cpp +++ b/src/java_bytecode/java_string_library_preprocess.cpp @@ -29,6 +29,20 @@ Date: April 2017 #include "java_utils.h" #include "java_string_library_preprocess.h" +#include "java_root_class.h" + +/// \return tag of a struct prefixed by "java::" or symbolic tag +/// empty string if not symbol or struct +irep_idt get_tag(const typet &type) +{ + /// \todo Use follow instead of assuming tag to symbol relationship. + if(type.id() == ID_symbol) + return to_symbol_type(type).get_identifier(); + else if(type.id() == ID_struct) + return irep_idt("java::" + id2string(to_struct_type(type).get_tag())); + else + return ""; +} /// \param type: a type /// \param tag: a string @@ -37,17 +51,7 @@ Date: April 2017 bool java_string_library_preprocesst::java_type_matches_tag( const typet &type, const std::string &tag) { - if(type.id()==ID_symbol) - { - irep_idt tag_id=to_symbol_type(type).get_identifier(); - return tag_id=="java::"+tag; - } - else if(type.id()==ID_struct) - { - irep_idt tag_id=to_struct_type(type).get_tag(); - return tag_id==tag; - } - return false; + return irep_idt("java::" + tag) == get_tag(type); } /// \param type: a type @@ -199,11 +203,7 @@ void java_string_library_preprocesst::add_string_type( string_type.components()[1].type()=string_length_type(); string_type.components()[2].set_name("data"); string_type.components()[2].set_pretty_name("data"); - // Use a pointer-to-unbounded-array instead of a pointer-to-char. - // Saves some casting in the string refinement algorithm but may - // be unnecessary. - string_type.components()[2].type()=java_reference_type( - array_typet(java_char_type(), infinity_exprt(string_length_type()))); + string_type.components()[2].type() = pointer_type(java_char_type()); string_type.add_base(symbol_typet("java::java.lang.Object")); if(class_name!="java.lang.CharSequence") { @@ -248,23 +248,6 @@ symbol_exprt java_string_library_preprocesst::fresh_array( return array_symbol.symbol_expr(); } -/// declare a function with the given name and type -/// \param function_name: a name -/// \param type: a type -/// \param symbol_table: symbol table -void java_string_library_preprocesst::declare_function( - irep_idt function_name, const typet &type, symbol_tablet &symbol_table) -{ - auxiliary_symbolt func_symbol; - func_symbol.base_name=function_name; - func_symbol.pretty_name=function_name; - func_symbol.is_static_lifetime=false; - func_symbol.mode=ID_java; - func_symbol.name=function_name; - func_symbol.type=type; - symbol_table.add(func_symbol); -} - /// calls string_refine_preprocesst::process_operands with a list of parameters. /// \param params: a list of function parameters /// \param loc: location in the source @@ -292,21 +275,27 @@ exprt::operandst java_string_library_preprocesst::process_parameters( /// sequence /// \param loc: location in the source /// \param symbol_table: symbol table -/// \param init_code: code block, in which declaration of some arguments may be -/// added -/// \return the processed operand -exprt java_string_library_preprocesst::convert_exprt_to_string_exprt( +/// \param init_code: code block, in which declaration will be added: +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// char *cprover_string_content; +/// int cprover_string_length; +/// cprover_string_length = a->length; +/// cprover_string_content = a->data; +/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +/// \return the processed operand: +/// {content=cprover_string_content, length=cprover_string_length} +refined_string_exprt +java_string_library_preprocesst::convert_exprt_to_string_exprt( const exprt &expr_to_process, const source_locationt &loc, symbol_tablet &symbol_table, code_blockt &init_code) { - PRECONDITION(implements_java_char_sequence(expr_to_process.type())); - string_exprt string_expr=fresh_string_expr(loc, symbol_table, init_code); + PRECONDITION(implements_java_char_sequence_pointer(expr_to_process.type())); + const refined_string_exprt string_expr = + decl_string_expr(loc, symbol_table, init_code); code_assign_java_string_to_string_expr( - string_expr, expr_to_process, symbol_table, init_code); - exprt string_expr_sym=fresh_string_expr_symbol(loc, symbol_table, init_code); - init_code.add(code_assignt(string_expr_sym, string_expr)); + string_expr, expr_to_process, loc, symbol_table, init_code); return string_expr; } @@ -330,7 +319,7 @@ exprt::operandst java_string_library_preprocesst::process_operands( exprt::operandst ops; for(const auto &p : operands) { - if(implements_java_char_sequence(p.type())) + if(implements_java_char_sequence_pointer(p.type())) ops.push_back( convert_exprt_to_string_exprt(p, loc, symbol_table, init_code)); else if(is_java_char_array_pointer_type(p.type())) @@ -358,17 +347,18 @@ exprt::operandst code_blockt &init_code) { PRECONDITION(operands.size()==2); + exprt::operandst ops; const exprt &op0=operands[0]; - PRECONDITION(implements_java_char_sequence(op0.type())); + const exprt &op1 = operands[1]; + PRECONDITION(implements_java_char_sequence_pointer(op0.type())); - exprt::operandst ops; ops.push_back( convert_exprt_to_string_exprt(op0, loc, symbol_table, init_code)); // TODO: Manage the case where we have a non-String Object (this should // probably be handled upstream. At any rate, the following code should be // protected with assertions on the type of op1. - typecast_exprt tcast(operands[1], to_pointer_type(op0.type())); + typecast_exprt tcast(op1, to_pointer_type(op0.type())); ops.push_back( convert_exprt_to_string_exprt(tcast, loc, symbol_table, init_code)); return ops; @@ -378,8 +368,7 @@ exprt::operandst /// \param type: a type containing a "data" component /// \param symbol_table: symbol table /// \return type of the "data" component -typet java_string_library_preprocesst::get_data_type( - const typet &type, const symbol_tablet &symbol_table) +static typet get_data_type(const typet &type, const symbol_tablet &symbol_table) { PRECONDITION(type.id()==ID_struct || type.id()==ID_symbol); if(type.id()==ID_symbol) @@ -397,8 +386,8 @@ typet java_string_library_preprocesst::get_data_type( /// \param type: a type containing a "length" component /// \param symbol_table: symbol table /// \return type of the "length" component -typet java_string_library_preprocesst::get_length_type( - const typet &type, const symbol_tablet &symbol_table) +static typet +get_length_type(const typet &type, const symbol_tablet &symbol_table) { PRECONDITION(type.id()==ID_struct || type.id()==ID_symbol); if(type.id()==ID_symbol) @@ -416,19 +405,17 @@ typet java_string_library_preprocesst::get_length_type( /// \param expr: an expression of structured type with length component /// \param symbol_table: symbol table /// \return expression representing the "length" member -exprt java_string_library_preprocesst::get_length( - const exprt &expr, const symbol_tablet &symbol_table) +static exprt get_length(const exprt &expr, const symbol_tablet &symbol_table) { return member_exprt( expr, "length", get_length_type(expr.type(), symbol_table)); } /// access data member -/// \param expr: an expression of structured type with length component +/// \param expr: an expression of structured type with data component /// \param symbol_table: symbol table /// \return expression representing the "data" member -exprt java_string_library_preprocesst::get_data( - const exprt &expr, const symbol_tablet &symbol_table) +static exprt get_data(const exprt &expr, const symbol_tablet &symbol_table) { return member_exprt(expr, "data", get_data_type(expr.type(), symbol_table)); } @@ -440,39 +427,32 @@ exprt java_string_library_preprocesst::get_data( /// \param symbol_table: symbol table /// \param code: code block, in which some assignments will be added /// \return a string expression -string_exprt java_string_library_preprocesst::replace_char_array( +refined_string_exprt java_string_library_preprocesst::replace_char_array( const exprt &array_pointer, const source_locationt &loc, symbol_tablet &symbol_table, code_blockt &code) { - refined_string_typet ref_type=refined_string_type; + // array is *array_pointer dereference_exprt array= checked_dereference(array_pointer, array_pointer.type().subtype()); - exprt array_data=get_data(array, symbol_table); - // `deref_array` is *(array_pointer->data)` - // No null-pointer-exception check here since all array structures - // have non-null data - const typet &content_type=ref_type.get_content_type(); - dereference_exprt deref_array(array_data, array_data.type().subtype()); - - // lhs_deref <- convert_pointer_to_char_array(*(array_pointer->data)) - symbolt sym_char_array=get_fresh_aux_symbol( - content_type, "char_array", "char_array", loc, ID_java, symbol_table); + // array_data is array_pointer-> data + const exprt array_data = get_data(array, symbol_table); + symbolt sym_char_array = get_fresh_aux_symbol( + array_data.type(), "char_array", "char_array", loc, ID_java, symbol_table); symbol_exprt char_array=sym_char_array.symbol_expr(); - code.add(code_assign_function_application( - char_array, - ID_cprover_string_array_of_char_pointer_func, - {deref_array}, - symbol_table)); + // char_array = array_pointer->data + code.add(code_assignt(char_array, array_data)); // string_expr is `{ rhs->length; string_array }` - string_exprt string_expr( + refined_string_exprt string_expr( get_length(array, symbol_table), char_array, refined_string_type); - // string_expr_sym <- { rhs->length; string_array } - symbol_exprt string_expr_sym= - fresh_string(refined_string_type, loc, symbol_table); - code.add(code_assignt(string_expr_sym, string_expr)); + + dereference_exprt inf_array( + char_array, array_typet(java_char_type(), infinity_exprt(java_int_type()))); + + add_pointer_to_array_association( + string_expr.content(), inf_array, symbol_table, loc, code); return string_expr; } @@ -492,29 +472,37 @@ symbol_exprt java_string_library_preprocesst::fresh_string( return string_symbol.symbol_expr(); } -/// add symbols with prefix cprover_string_length and cprover_string_data and -/// construct a string_expr from them. -/// \param loc: a location in the program -/// \param symbol_table: symbol table -/// \param code: code block to which allocation instruction will be added -/// \return a new string_expr -string_exprt java_string_library_preprocesst::fresh_string_expr( - const source_locationt &loc, symbol_tablet &symbol_table, code_blockt &code) +/// Add declaration of a refined string expr whose content and length are +/// fresh symbols. +/// \param loc: source location +/// \param symbol_table: the symbol table +/// \param code [out] : code block to which the declaration is added +/// \return refined string expr with fresh content and length symbols +refined_string_exprt java_string_library_preprocesst::decl_string_expr( + const source_locationt &loc, + symbol_tablet &symbol_table, + code_blockt &code) { - refined_string_typet type=refined_string_type; - symbolt sym_length=get_fresh_aux_symbol( - type.get_index_type(), + symbolt sym_length = get_fresh_aux_symbol( + index_type, "cprover_string_length", "cprover_string_length", loc, ID_java, symbol_table); symbol_exprt length_field=sym_length.symbol_expr(); - symbol_exprt content_field=fresh_array( - type.get_content_type(), loc, symbol_table); - string_exprt str(length_field, content_field, type); - code.add(code_declt(length_field)); + pointer_typet array_type = pointer_type(java_char_type()); + symbolt sym_content = get_fresh_aux_symbol( + array_type, + "cprover_string_content", + "cprover_string_content", + loc, + ID_java, + symbol_table); + symbol_exprt content_field = sym_content.symbol_expr(); code.add(code_declt(content_field)); + refined_string_exprt str(length_field, content_field, refined_string_type); + code.add(code_declt(length_field)); return str; } @@ -523,19 +511,32 @@ string_exprt java_string_library_preprocesst::fresh_string_expr( /// \param loc: a location in the program /// \param symbol_table: symbol table /// \param code: code block to which allocation instruction will be added -/// \return a new expression of refined string type -exprt java_string_library_preprocesst::fresh_string_expr_symbol( - const source_locationt &loc, symbol_tablet &symbol_table, code_blockt &code) +/// \return a new string_expr +refined_string_exprt java_string_library_preprocesst::make_nondet_string_expr( + const source_locationt &loc, + symbol_tablet &symbol_table, + code_blockt &code) { - symbolt sym=get_fresh_aux_symbol( - refined_string_type, - "cprover_string", - "cprover_string", - loc, - ID_java, - symbol_table); - code.add(code_declt(sym.symbol_expr())); - return sym.symbol_expr(); + const refined_string_exprt str = decl_string_expr(loc, symbol_table, code); + + side_effect_expr_nondett nondet_length(str.length().type()); + code.add(code_assignt(str.length(), nondet_length)); + + exprt nondet_array_expr = + make_nondet_infinite_char_array(symbol_table, loc, code); + + address_of_exprt array_pointer( + index_exprt(nondet_array_expr, from_integer(0, java_int_type()))); + + add_pointer_to_array_association( + array_pointer, nondet_array_expr, symbol_table, loc, code); + + add_array_to_length_association( + nondet_array_expr, str.length(), symbol_table, loc, code); + + code.add(code_assignt(str.content(), array_pointer)); + + return refined_string_exprt(str.length(), str.content()); } /// declare a new String and allocate it @@ -551,7 +552,6 @@ exprt java_string_library_preprocesst::allocate_fresh_string( code_blockt &code) { exprt str=fresh_string(type, loc, symbol_table); - code.add(code_declt(str)); allocate_dynamic_object_with_decl(str, symbol_table, loc, code); return str; } @@ -574,29 +574,6 @@ exprt java_string_library_preprocesst::allocate_fresh_array( return array; } -/// \param function_name: the name of the function -/// \param arguments: a list of arguments -/// \param type: return type of the function -/// \param symbol_table: a symbol table -/// \return a function application representing: `function_name(arguments)` -exprt java_string_library_preprocesst::make_function_application( - const irep_idt &function_name, - const exprt::operandst &arguments, - const typet &type, - symbol_tablet &symbol_table) -{ - // Names of function to call - std::string fun_name=id2string(function_name); - - // Declaring the function - declare_function(fun_name, type, symbol_table); - - // Function application - function_application_exprt call(symbol_exprt(fun_name), type); - call.arguments()=arguments; - return call; -} - /// assign the result of a function call /// \param lhs: an expression /// \param function_name: the name of the function @@ -606,7 +583,7 @@ exprt java_string_library_preprocesst::make_function_application( /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// lhs = (arguments) /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -codet java_string_library_preprocesst::code_assign_function_application( +static codet code_assign_function_application( const exprt &lhs, const irep_idt &function_name, const exprt::operandst &arguments, @@ -637,56 +614,144 @@ codet java_string_library_preprocesst::code_return_function_application( return code_returnt(fun_app); } -/// \param string_expr: a string expression -/// \param function_name: the name of the function -/// \param arguments: arguments of the function -/// \param symbol_table: symbol table -/// \return return the following code: -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// str.length = _length(arguments) -/// str.data = _data(arguments) -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -codet java_string_library_preprocesst::code_assign_function_to_string_expr( - const string_exprt &string_expr, - const irep_idt &function_name, - const exprt::operandst &arguments, - symbol_tablet &symbol_table) +/// Declare a fresh symbol of type array of character with infinite size. +/// \param symbol_table: the symbol table +/// \param loc: source location +/// \param code [out] : code block where the declaration gets added +/// \return created symbol expression +exprt make_nondet_infinite_char_array( + symbol_tablet &symbol_table, + const source_locationt &loc, + code_blockt &code) { - // Names of function to call - std::string fun_name_length=id2string(function_name)+"_length"; - std::string fun_name_data=id2string(function_name)+"_data"; + const array_typet array_type( + java_char_type(), infinity_exprt(java_int_type())); + const symbolt data_sym = get_fresh_aux_symbol( + array_type, + "nondet_infinite_array", + "nondet_infinite_array", + loc, + ID_java, + symbol_table); + const symbol_exprt data_expr = data_sym.symbol_expr(); + code.add(code_declt(data_expr)); + side_effect_expr_nondett nondet_data(data_expr.type()); + code.add(code_assignt(data_expr, nondet_data)); + return data_expr; +} - // Assignments - codet assign_fun_length=code_assign_function_application( - string_expr.length(), fun_name_length, arguments, symbol_table); - codet assign_fun_data=code_assign_function_application( - string_expr.content(), fun_name_data, arguments, symbol_table); +/// Add a call to a primitive of the string solver, letting it know that +/// `pointer` points to the first character of `array`. +/// \param pointer: a character pointer expression +/// \param array: a character array expression +/// \param symbol_table: the symbol table +/// \param loc: source location +/// \param code [out] : code block to which declaration and calls get added +void add_pointer_to_array_association( + const exprt &pointer, + const exprt &array, + symbol_tablet &symbol_table, + const source_locationt &loc, + code_blockt &code) +{ + PRECONDITION(array.type().id() == ID_array); + PRECONDITION(pointer.type().id() == ID_pointer); + symbolt &return_sym = get_fresh_aux_symbol( + java_int_type(), + "return_array", + "return_array", + loc, + ID_java, + symbol_table); + exprt return_expr = return_sym.symbol_expr(); + code.add(code_declt(return_expr)); + code.add( + code_assign_function_application( + return_expr, + ID_cprover_associate_array_to_pointer_func, + {array, pointer}, + symbol_table)); +} - return code_blockt({assign_fun_length, assign_fun_data}); +/// Add a call to a primitive of the string solver, letting it know that +/// the actual length of `array` is `length`. +/// \param array: infinite size character array expression +/// \param length: integer expression +/// \param symbol_table: the symbol table +/// \param loc: source location +/// \param code [out] : code block to which declaration and calls get added +void add_array_to_length_association( + const exprt &array, + const exprt &length, + symbol_tablet &symbol_table, + const source_locationt &loc, + code_blockt &code) +{ + symbolt &return_sym = get_fresh_aux_symbol( + java_int_type(), + "return_array", + "return_array", + loc, + ID_java, + symbol_table); + const exprt return_expr = return_sym.symbol_expr(); + code.add(code_declt(return_expr)); + code.add( + code_assign_function_application( + return_expr, + ID_cprover_associate_length_to_array_func, + {array, length}, + symbol_table)); } +/// Create a refined_string_exprt `str` whose content and length are fresh +/// symbols, calls the string primitive with name `function_name`. +/// In the arguments of the primitive `str` takes the place of the result and +/// the following arguments are given by parameter `arguments. /// \param function_name: the name of the function /// \param arguments: arguments of the function -/// \param loc: a location in the program +/// \param loc: source location /// \param symbol_table: symbol table -/// \param code: code block in which we add instructions -/// \return return a string expr str and add the following code: +/// \param [out] code: gets added the following code: /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// array = str.data -/// str.length = _length(arguments) -/// str.data = _data(arguments) +/// int return_code; +/// int str.length; +/// char str.data[str.length] +/// return_code = (str.length, str.data, arguments) /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -string_exprt java_string_library_preprocesst:: - string_expr_of_function_application( - const irep_idt &function_name, - const exprt::operandst &arguments, - const source_locationt &loc, - symbol_tablet &symbol_table, - code_blockt &code) +/// \return refined string expression `str` +refined_string_exprt java_string_library_preprocesst::string_expr_of_function( + const irep_idt &function_name, + const exprt::operandst &arguments, + const source_locationt &loc, + symbol_tablet &symbol_table, + code_blockt &code) { - string_exprt string_expr=fresh_string_expr(loc, symbol_table, code); - code.add(code_assign_function_to_string_expr( - string_expr, function_name, arguments, symbol_table)); + // int return_code; + symbolt return_code_sym = get_fresh_aux_symbol( + java_int_type(), + std::string("return_code_") + function_name.c_str(), + std::string("return_code_") + function_name.c_str(), + loc, + ID_java, + symbol_table); + const exprt return_code = return_code_sym.symbol_expr(); + code.add(code_declt(return_code)); + + const refined_string_exprt string_expr = + make_nondet_string_expr(loc, symbol_table, code); + + // args is { str.length, str.content, arguments... } + exprt::operandst args; + args.push_back(string_expr.length()); + args.push_back(string_expr.content()); + args.insert(args.end(), arguments.begin(), arguments.end()); + + // return_code = _data(args) + code.add( + code_assign_function_application( + return_code, function_name, args, symbol_table)); + return string_expr; } @@ -705,27 +770,22 @@ codet java_string_library_preprocesst::code_assign_components_to_java_string( const exprt &rhs_length, symbol_tablet &symbol_table) { - PRECONDITION(implements_java_char_sequence(lhs.type())); + PRECONDITION(implements_java_char_sequence_pointer(lhs.type())); dereference_exprt deref=checked_dereference(lhs, lhs.type().subtype()); - code_blockt code; - // A String has a field Object with @clsid = String and @lock = false: const symbolt &jlo_symbol=*symbol_table.lookup("java::java.lang.Object"); const struct_typet &jlo_struct=to_struct_type(jlo_symbol.type); struct_exprt jlo_init(jlo_struct); - jlo_init.copy_to_operands(constant_exprt( - "java::java.lang.String", jlo_struct.components()[0].type())); - jlo_init.copy_to_operands(from_integer(0, jlo_struct.components()[1].type())); + irep_idt clsid = get_tag(lhs.type().subtype()); + java_root_class_init(jlo_init, jlo_struct, false, clsid); struct_exprt struct_rhs(deref.type()); struct_rhs.copy_to_operands(jlo_init); struct_rhs.copy_to_operands(rhs_length); struct_rhs.copy_to_operands(rhs_array); - code.add(code_assignt( - checked_dereference(lhs, lhs.type().subtype()), struct_rhs)); - - return code; + return code_assignt( + checked_dereference(lhs, lhs.type().subtype()), struct_rhs); } /// Produce code for an assignemnt of a string expr to a Java string. @@ -738,61 +798,30 @@ codet java_string_library_preprocesst::code_assign_components_to_java_string( /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ codet java_string_library_preprocesst::code_assign_string_expr_to_java_string( const exprt &lhs, - const string_exprt &rhs, + const refined_string_exprt &rhs, symbol_tablet &symbol_table) { return code_assign_components_to_java_string( - lhs, address_of_exprt(rhs.content()), rhs.length(), symbol_table); -} - -/// Produce code for an assignment of a string from a string expr. -/// \param lhs: an expression representing a java string -/// \param rhs: a string expression -/// \param loc: a location in the program -/// \param symbol_table: symbol table -/// \return return the following code: -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// data = new array[]; -/// *data = rhs.data; -/// lhs = { {Object} , length=rhs.length, data=data} -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -codet java_string_library_preprocesst:: - code_assign_string_expr_to_new_java_string( - const exprt &lhs, - const string_exprt &rhs, - const source_locationt &loc, - symbol_tablet &symbol_table) -{ - PRECONDITION(implements_java_char_sequence(lhs.type())); - dereference_exprt deref=checked_dereference(lhs, lhs.type().subtype()); - - code_blockt code; - exprt new_array=allocate_fresh_array( - get_data_type(deref.type(), symbol_table), loc, symbol_table, code); - code.add(code_assignt( - dereference_exprt(new_array, new_array.type().subtype()), rhs.content())); - - code.add(code_assign_components_to_java_string( - lhs, new_array, rhs.length(), symbol_table)); - - return code; + lhs, rhs.content(), rhs.length(), symbol_table); } /// \param lhs: a string expression /// \param rhs: an expression representing a java string +/// \param loc: source location /// \param symbol_table: symbol table -/// \param code: code block that gets appended the following code: +/// \param [out] code: code block that gets appended the following code: /// ~~~~~~~~~~~~~~~~~~~~~~ /// lhs.length=rhs->length -/// lhs.data=*(rhs->data) +/// lhs.data=rhs->data /// ~~~~~~~~~~~~~~~~~~~~~~ void java_string_library_preprocesst::code_assign_java_string_to_string_expr( - const string_exprt &lhs, + const refined_string_exprt &lhs, const exprt &rhs, + const source_locationt &loc, symbol_tablet &symbol_table, code_blockt &code) { - PRECONDITION(implements_java_char_sequence(rhs.type())); + PRECONDITION(implements_java_char_sequence_pointer(rhs.type())); typet deref_type; if(rhs.type().subtype().id()==ID_symbol) @@ -801,40 +830,37 @@ void java_string_library_preprocesst::code_assign_java_string_to_string_expr( else deref_type=rhs.type().subtype(); - dereference_exprt deref=checked_dereference(rhs, deref_type); + const dereference_exprt deref = checked_dereference(rhs, deref_type); // Fields of the string object - exprt rhs_length=get_length(deref, symbol_table); - exprt member_data=get_data(deref, symbol_table); - dereference_exprt rhs_data(member_data, member_data.type().subtype()); + const exprt rhs_length = get_length(deref, symbol_table); // Assignments code.add(code_assignt(lhs.length(), rhs_length)); - - // We always assume data of a String is not null - not_exprt data_not_null(equal_exprt( - member_data, null_pointer_exprt(to_pointer_type(member_data.type())))); - code.add(code_assumet(data_not_null)); - code.add(code_assignt(lhs.content(), rhs_data)); + const exprt data_as_array = get_data(deref, symbol_table); + code.add(code_assignt(lhs.content(), data_as_array)); } -/// \param lhs: an expression representing a java string +/// Create a string expression whose value is given by a literal /// \param s: the literal to be assigned +/// \param loc: location in the source /// \param symbol_table: symbol table -/// \return return the following code: +/// \param [out] code: gets added the following: /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// tmp_string = "" -/// lhs = (string_expr) tmp_string +/// lhs = cprover_string_literal_func(tmp_string) /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -codet java_string_library_preprocesst:: - code_assign_string_literal_to_string_expr( - const string_exprt &lhs, - const std::string &s, - symbol_tablet &symbol_table) +/// \return a new refined string +refined_string_exprt +java_string_library_preprocesst::string_literal_to_string_expr( + const std::string &s, + const source_locationt &loc, + symbol_tablet &symbol_table, + code_blockt &code) { - constant_exprt expr(s, string_typet()); - return code_assign_function_to_string_expr( - lhs, ID_cprover_string_literal_func, {expr}, symbol_table); + const constant_exprt expr(s, string_typet()); + return string_expr_of_function( + ID_cprover_string_literal_func, {expr}, loc, symbol_table, code); } /// Used to provide code for the Java String.equals(Object) function. @@ -896,56 +922,47 @@ codet java_string_library_preprocesst::make_float_to_string_code( // For each possible case with have a condition and a string_exprt std::vector condition_list; - std::vector string_expr_list; + std::vector string_expr_list; // Case of computerized scientific notation condition_list.push_back(binary_relation_exprt(arg, ID_ge, zero)); - string_exprt sci_notation=fresh_string_expr(loc, symbol_table, code); - exprt sci_notation_sym=fresh_string_expr_symbol(loc, symbol_table, code); - code.add(code_assign_function_to_string_expr( - sci_notation, + refined_string_exprt sci_notation = string_expr_of_function( ID_cprover_string_of_float_scientific_notation_func, {arg}, - symbol_table)); - // Assign string_expr_sym = { string_expr_length, string_expr_content } - code.add(code_assignt(sci_notation_sym, sci_notation)); + loc, + symbol_table, + code); string_expr_list.push_back(sci_notation); // Subcase of negative scientific notation condition_list.push_back(binary_relation_exprt(arg, ID_lt, zero)); - string_exprt neg_sci_notation=fresh_string_expr(loc, symbol_table, code); - exprt neg_sci_notation_sym=fresh_string_expr_symbol(loc, symbol_table, code); - string_exprt minus_sign=fresh_string_expr(loc, symbol_table, code); - code.add(code_assign_string_literal_to_string_expr( - minus_sign, "-", symbol_table)); - code.add(code_assign_function_to_string_expr( - neg_sci_notation, + refined_string_exprt minus_sign = + string_literal_to_string_expr("-", loc, symbol_table, code); + refined_string_exprt neg_sci_notation = string_expr_of_function( ID_cprover_string_concat_func, {minus_sign, sci_notation}, - symbol_table)); - code.add(code_assignt(neg_sci_notation_sym, neg_sci_notation)); + loc, + symbol_table, + code); string_expr_list.push_back(neg_sci_notation); // Case of NaN condition_list.push_back(isnan_exprt(arg)); - string_exprt nan=fresh_string_expr(loc, symbol_table, code); - code.add(code_assign_string_literal_to_string_expr( - nan, "NaN", symbol_table)); + refined_string_exprt nan = + string_literal_to_string_expr("NaN", loc, symbol_table, code); string_expr_list.push_back(nan); // Case of Infinity extractbit_exprt is_neg(arg, float_spec.width()-1); condition_list.push_back(and_exprt(isinf_exprt(arg), not_exprt(is_neg))); - string_exprt infinity=fresh_string_expr(loc, symbol_table, code); - code.add(code_assign_string_literal_to_string_expr( - infinity, "Infinity", symbol_table)); + refined_string_exprt infinity = + string_literal_to_string_expr("Infinity", loc, symbol_table, code); string_expr_list.push_back(infinity); // Case -Infinity - string_exprt minus_infinity=fresh_string_expr(loc, symbol_table, code); condition_list.push_back(and_exprt(isinf_exprt(arg), is_neg)); - code.add(code_assign_string_literal_to_string_expr( - minus_infinity, "-Infinity", symbol_table)); + refined_string_exprt minus_infinity = + string_literal_to_string_expr("-Infinity", loc, symbol_table, code); string_expr_list.push_back(minus_infinity); // Case of simple notation @@ -963,11 +980,8 @@ codet java_string_library_preprocesst::make_float_to_string_code( binary_relation_exprt(arg, ID_lt, bound_sup)); condition_list.push_back(is_simple_float); - string_exprt simple_notation=fresh_string_expr(loc, symbol_table, code); - exprt simple_notation_sym=fresh_string_expr_symbol(loc, symbol_table, code); - code.add(code_assign_function_to_string_expr( - simple_notation, ID_cprover_string_of_float_func, {arg}, symbol_table)); - code.add(code_assignt(simple_notation_sym, simple_notation)); + refined_string_exprt simple_notation = string_expr_of_function( + ID_cprover_string_of_float_func, {arg}, loc, symbol_table, code); string_expr_list.push_back(simple_notation); // Case of a negative number in simple notation @@ -976,15 +990,12 @@ codet java_string_library_preprocesst::make_float_to_string_code( binary_relation_exprt(arg, ID_gt, unary_minus_exprt(bound_sup))); condition_list.push_back(is_neg_simple_float); - string_exprt neg_simple_notation=fresh_string_expr(loc, symbol_table, code); - exprt neg_simple_notation_sym= - fresh_string_expr_symbol(loc, symbol_table, code); - code.add(code_assign_function_to_string_expr( - neg_simple_notation, + refined_string_exprt neg_simple_notation = string_expr_of_function( ID_cprover_string_concat_func, {minus_sign, simple_notation}, - symbol_table)); - code.add(code_assignt(neg_simple_notation_sym, simple_notation)); + loc, + symbol_table, + code); string_expr_list.push_back(neg_simple_notation); // Combining all cases @@ -1000,8 +1011,8 @@ codet java_string_library_preprocesst::make_float_to_string_code( { code_ifthenelset ife; ife.cond()=condition_list[i]; - ife.then_case()=code_assign_string_expr_to_new_java_string( - str, string_expr_list[i], loc, symbol_table); + ife.then_case() = code_assign_string_expr_to_java_string( + str, string_expr_list[i], symbol_table); ife.else_case()=tmp_code; tmp_code=ife; } @@ -1050,16 +1061,13 @@ codet java_string_library_preprocesst::make_init_function_from_call( exprt::operandst args=process_parameters(params, loc, symbol_table, code); // string_expr <- function(arg1) - string_exprt string_expr=string_expr_of_function_application( - function_name, args, loc, symbol_table, code); + refined_string_exprt string_expr = + string_expr_of_function(function_name, args, loc, symbol_table, code); // arg_this <- string_expr - code.add(code_assign_string_expr_to_new_java_string( - arg_this, string_expr, loc, symbol_table)); - - // string_expr_sym <- {string_expr.length, string_expr.content} - exprt string_expr_sym=fresh_string_expr_symbol(loc, symbol_table, code); - code.add(code_assignt(string_expr_sym, string_expr)); + code.add( + code_assign_string_expr_to_java_string( + arg_this, string_expr, symbol_table)); return code; } @@ -1109,68 +1117,6 @@ codet java_string_library_preprocesst::make_assign_function_from_call( return code; } -/// Used to provide our own implementation of the -/// `java.lang.String.toCharArray:()[C` function. -/// \param type: type of the function called -/// \param loc: location in the source -/// \param symbol_table: the symbol table -/// \return Code corresponding to -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -/// lhs = new java::array[char] -/// string_expr = {length=this->length, content=*(this->data)} -/// data = new char[] -/// *data = string_expr.content -/// lhs->data = &data[0] -/// lhs->length = string_expr.length -/// return lhs -/// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -codet java_string_library_preprocesst::make_string_to_char_array_code( - const code_typet &type, - const source_locationt &loc, - symbol_tablet &symbol_table) -{ - code_blockt code; - PRECONDITION(!type.parameters().empty()); - const code_typet::parametert &p=type.parameters()[0]; - symbol_exprt string_argument(p.get_identifier(), p.type()); - PRECONDITION(implements_java_char_sequence(string_argument.type())); - - // lhs = new java::array[char] - exprt lhs=allocate_fresh_array( - type.return_type(), loc, symbol_table, code); - - // string_expr = {this->length, this->data} - string_exprt string_expr=fresh_string_expr(loc, symbol_table, code); - code_assign_java_string_to_string_expr( - string_expr, string_argument, symbol_table, code); - exprt string_expr_sym=fresh_string_expr_symbol( - loc, symbol_table, code); - code.add(code_assignt(string_expr_sym, string_expr)); - - // data = new char[] - exprt data=allocate_fresh_array( - java_reference_type(string_expr.content().type()), loc, symbol_table, code); - - // *data = string_expr.content - dereference_exprt deref_data(data, data.type().subtype()); - code.add(code_assignt(deref_data, string_expr.content())); - - // lhs->data = &data[0] - dereference_exprt deref_lhs=checked_dereference(lhs, lhs.type().subtype()); - exprt lhs_data=get_data(deref_lhs, symbol_table); - index_exprt first_elt( - deref_data, from_integer(0, java_int_type()), java_char_type()); - code.add(code_assignt(lhs_data, address_of_exprt(first_elt))); - - // lhs->length = string_expr.length - exprt lhs_length=get_length(deref_lhs, symbol_table); - code.add(code_assignt(lhs_length, string_expr.length())); - - // return lhs - code.add(code_returnt(lhs)); - return code; -} - /// Adds to the code an assignment of the form /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /// type_name tmp_type_name @@ -1357,7 +1303,7 @@ exprt java_string_library_preprocesst::make_argument_for_format( code.add(code_declt(field_expr)); } else - field_expr=fresh_string_expr(loc, symbol_table, code); + field_expr = make_nondet_string_expr(loc, symbol_table, code); field_exprs.push_back(field_expr); arg_i_struct.copy_to_operands(field_expr); @@ -1394,12 +1340,9 @@ exprt java_string_library_preprocesst::make_argument_for_format( code_assign_java_string_to_string_expr( to_string_expr(field_expr), arg_i_as_string, + loc, symbol_table, code_not_null); - exprt arg_i_string_expr_sym=fresh_string_expr_symbol( - loc, symbol_table, code_not_null); - code_not_null.add(code_assignt( - arg_i_string_expr_sym, to_string_expr(field_expr))); } else if(name==ID_int || name==ID_float || name==ID_char || name==ID_boolean) { @@ -1461,15 +1404,13 @@ codet java_string_library_preprocesst::make_string_format_code( processed_args.push_back(make_argument_for_format( args[1], i, structured_type, loc, symbol_table, code)); - string_exprt string_expr=fresh_string_expr(loc, symbol_table, code); - code.add(code_assign_function_to_string_expr( - string_expr, ID_cprover_string_format_func, processed_args, symbol_table)); - exprt string_expr_sym=fresh_string_expr_symbol(loc, symbol_table, code); - code.add(code_assignt(string_expr_sym, string_expr)); + refined_string_exprt string_expr = string_expr_of_function( + ID_cprover_string_format_func, processed_args, loc, symbol_table, code); exprt java_string=allocate_fresh_string( type.return_type(), loc, symbol_table, code); - code.add(code_assign_string_expr_to_new_java_string( - java_string, string_expr, loc, symbol_table)); + code.add( + code_assign_string_expr_to_java_string( + java_string, string_expr, symbol_table)); code.add(code_returnt(java_string)); return code; } @@ -1513,35 +1454,29 @@ codet java_string_library_preprocesst::make_object_get_class_code( string_typet()); // string_expr = cprover_string_literal(this->@class_identifier) - string_exprt string_expr=fresh_string_expr(loc, symbol_table, code); - code.add( - code_assign_function_to_string_expr( - string_expr, - ID_cprover_string_literal_func, - {class_identifier}, - symbol_table)); - exprt string_expr_sym=fresh_string_expr_symbol(loc, symbol_table, code); - code.add(code_assignt(string_expr_sym, string_expr)); + refined_string_exprt string_expr = string_expr_of_function( + ID_cprover_string_literal_func, + {class_identifier}, + loc, + symbol_table, + code); // string_expr1 = substr(string_expr, 6) // We do this to remove the "java::" prefix - string_exprt string_expr1=fresh_string_expr(loc, symbol_table, code); - code.add( - code_assign_function_to_string_expr( - string_expr1, - ID_cprover_string_substring_func, - {string_expr, from_integer(6, java_int_type())}, - symbol_table)); - exprt string_expr_sym1=fresh_string_expr_symbol(loc, symbol_table, code); - code.add(code_assignt(string_expr_sym1, string_expr1)); + refined_string_exprt string_expr1 = string_expr_of_function( + ID_cprover_string_substring_func, + {string_expr, from_integer(6, java_int_type())}, + loc, + symbol_table, + code); // string1 = (String*) string_expr pointer_typet string_ptr_type=java_reference_type( symbol_table.lookup_ref("java::java.lang.String").type); exprt string1=allocate_fresh_string(string_ptr_type, loc, symbol_table, code); code.add( - code_assign_string_expr_to_new_java_string( - string1, string_expr1, loc, symbol_table)); + code_assign_string_expr_to_java_string( + string1, string_expr1, symbol_table)); // > class1 = Class.forName(string1) code_function_callt fun_call; @@ -1611,17 +1546,13 @@ codet java_string_library_preprocesst:: type.parameters(), loc, symbol_table, code); // String expression that will hold the result - string_exprt string_expr=string_expr_of_function_application( - function_name, arguments, loc, symbol_table, code); - - // Assign string_expr to symbol to keep track of it - exprt string_expr_sym=fresh_string_expr_symbol(loc, symbol_table, code); - code.add(code_assignt(string_expr_sym, string_expr)); + refined_string_exprt string_expr = + string_expr_of_function(function_name, arguments, loc, symbol_table, code); // Assign to string exprt str=allocate_fresh_string(type.return_type(), loc, symbol_table, code); - code.add(code_assign_string_expr_to_new_java_string( - str, string_expr, loc, symbol_table)); + code.add( + code_assign_string_expr_to_java_string(str, string_expr, symbol_table)); // Return value code.add(code_returnt(str)); @@ -1650,21 +1581,18 @@ codet java_string_library_preprocesst::make_copy_string_code( code_blockt code; // String expression that will hold the result - string_exprt string_expr=fresh_string_expr(loc, symbol_table, code); + refined_string_exprt string_expr = decl_string_expr(loc, symbol_table, code); // Assign the argument to string_expr code_typet::parametert op=type.parameters()[0]; symbol_exprt arg0(op.get_identifier(), op.type()); - code_assign_java_string_to_string_expr(string_expr, arg0, symbol_table, code); - - // Assign string_expr to string_expr_sym - exprt string_expr_sym=fresh_string_expr_symbol(loc, symbol_table, code); - code.add(code_assignt(string_expr_sym, string_expr)); + code_assign_java_string_to_string_expr( + string_expr, arg0, loc, symbol_table, code); // Allocate and assign the string exprt str=allocate_fresh_string(type.return_type(), loc, symbol_table, code); - code.add(code_assign_string_expr_to_new_java_string( - str, string_expr, loc, symbol_table)); + code.add( + code_assign_string_expr_to_java_string(str, string_expr, symbol_table)); // Return value code.add(code_returnt(str)); @@ -1691,21 +1619,72 @@ codet java_string_library_preprocesst::make_copy_constructor_code( code_blockt code; // String expression that will hold the result - string_exprt string_expr=fresh_string_expr(loc, symbol_table, code); + refined_string_exprt string_expr = decl_string_expr(loc, symbol_table, code); // Assign argument to a string_expr code_typet::parameterst params=type.parameters(); symbol_exprt arg1(params[1].get_identifier(), params[1].type()); - code_assign_java_string_to_string_expr(string_expr, arg1, symbol_table, code); + code_assign_java_string_to_string_expr( + string_expr, arg1, loc, symbol_table, code); + + // Assign string_expr to `this` object + symbol_exprt arg_this(params[0].get_identifier(), params[0].type()); + code.add( + code_assign_string_expr_to_java_string( + arg_this, string_expr, symbol_table)); + + return code; +} + +/// Used to provide code for constructor from a char array. +/// The implementation is similar to substring except the 3rd argument is a +/// count instead of end index +/// \param type: type of the function call +/// \param loc: location in the program_invocation_name +/// \param symbol_table: symbol table +/// \return code implementing String intitialization from a char array and +/// arguments offset and end. +codet java_string_library_preprocesst::make_init_from_array_code( + const code_typet &type, + const source_locationt &loc, + symbol_tablet &symbol_table) +{ + // Code for the output + code_blockt code; - // Assign string_expr to symbol to keep track of it - exprt string_expr_sym=fresh_string_expr_symbol(loc, symbol_table, code); - code.add(code_assignt(string_expr_sym, string_expr)); + code_typet::parameterst params = type.parameters(); + PRECONDITION(params.size() == 4); + exprt::operandst args = + process_parameters(type.parameters(), loc, symbol_table, code); + INVARIANT( + args.size() == 4, "process_parameters preserves number of arguments"); + + /// \todo this assumes the array to be constant between all calls to + /// string primitives, which may not be true in general. + refined_string_exprt string_arg = to_string_expr(args[1]); + add_pointer_to_array_association( + string_arg.content(), + dereference_exprt( + string_arg.content(), + array_typet(java_char_type(), infinity_exprt(java_int_type()))), + symbol_table, + loc, + code); + + // The third argument is `count`, whereas the third argument of substring + // is `end` which corresponds to `offset+count` + refined_string_exprt string_expr = string_expr_of_function( + ID_cprover_string_substring_func, + {args[1], args[2], plus_exprt(args[2], args[3])}, + loc, + symbol_table, + code); // Assign string_expr to `this` object symbol_exprt arg_this(params[0].get_identifier(), params[0].type()); - code.add(code_assign_string_expr_to_new_java_string( - arg_this, string_expr, loc, symbol_table)); + code.add( + code_assign_string_expr_to_java_string( + arg_this, string_expr, symbol_table)); return code; } @@ -1725,28 +1704,11 @@ codet java_string_library_preprocesst::make_string_length_code( const source_locationt &loc, symbol_tablet &symbol_table) { - // Code for the output - code_blockt code; - code_typet::parameterst params=type.parameters(); symbol_exprt arg_this(params[0].get_identifier(), params[0].type()); dereference_exprt deref= checked_dereference(arg_this, arg_this.type().subtype()); - - // Create a new string_exprt to be picked up by the solver - string_exprt str_expr=fresh_string_expr(loc, symbol_table, code); - - // Assign this to str_expr - code_assign_java_string_to_string_expr( - str_expr, arg_this, symbol_table, code); - - // Assign str_expr to str_expr_sym for that expression to be present in the - // symbol table in order to be processed by the string solver - exprt str_expr_sym=fresh_string_expr_symbol(loc, symbol_table, code); - code.add(code_assignt(str_expr_sym, str_expr)); - code.add(code_returnt(get_length(deref, symbol_table))); - - return code; + return code_returnt(get_length(deref, symbol_table)); } /// Should be called to provide code for string functions that are used in the @@ -1837,12 +1799,12 @@ void java_string_library_preprocesst::initialize_conversion_table() std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); - cprover_equivalent_to_java_constructor - ["java::java.lang.String.:([C)V"]= - ID_cprover_string_copy_func; - cprover_equivalent_to_java_constructor - ["java::java.lang.String.:([CII)V"]= - ID_cprover_string_copy_func; + conversion_table["java::java.lang.String.:([CII)V"] = std::bind( + &java_string_library_preprocesst::make_init_from_array_code, + this, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3); cprover_equivalent_to_java_constructor ["java::java.lang.String.:()V"]= ID_cprover_string_empty_string_func; @@ -1962,14 +1924,6 @@ void java_string_library_preprocesst::initialize_conversion_table() cprover_equivalent_to_java_string_returning_function ["java::java.lang.String.substring:(I)Ljava/lang/String;"]= ID_cprover_string_substring_func; - conversion_table - ["java::java.lang.String.toCharArray:()[C"]= - std::bind( - &java_string_library_preprocesst::make_string_to_char_array_code, - this, - std::placeholders::_1, - std::placeholders::_2, - std::placeholders::_3); cprover_equivalent_to_java_string_returning_function ["java::java.lang.String.toLowerCase:()Ljava/lang/String;"]= ID_cprover_string_to_lower_case_func; @@ -1993,12 +1947,6 @@ void java_string_library_preprocesst::initialize_conversion_table() cprover_equivalent_to_java_string_returning_function ["java::java.lang.String.valueOf:(C)Ljava/lang/String;"]= ID_cprover_string_of_char_func; - cprover_equivalent_to_java_string_returning_function - ["java::java.lang.String.valueOf:([C)Ljava/lang/String;"]= - ID_cprover_string_copy_func; - cprover_equivalent_to_java_string_returning_function - ["java::java.lang.String.valueOf:([CII)Ljava/lang/String;"]= - ID_cprover_string_copy_func; conversion_table ["java::java.lang.String.valueOf:(D)Ljava/lang/String;"]= std::bind( @@ -2039,11 +1987,10 @@ void java_string_library_preprocesst::initialize_conversion_table() ["java::java.lang.StringBuilder.append:(C)Ljava/lang/StringBuilder;"]= ID_cprover_string_concat_char_func; cprover_equivalent_to_java_assign_and_return_function - ["java::java.lang.StringBuilder.append:([C)" - "Ljava/lang/StringBuilder;"]= + ["java::java.lang.StringBuilder.append:([C)Ljava/lang/StringBuilder;"] = ID_cprover_string_concat_func; cprover_equivalent_to_java_assign_and_return_function - ["java::java.lang.StringBuilder.append:(D)Ljava/lang/StringBuilder;"]= + ["java::java.lang.StringBuilder.append:(D)Ljava/lang/StringBuilder;"] = ID_cprover_string_concat_double_func; cprover_equivalent_to_java_assign_and_return_function ["java::java.lang.StringBuilder.append:(Ljava/lang/CharSequence;II)" @@ -2085,12 +2032,6 @@ void java_string_library_preprocesst::initialize_conversion_table() cprover_equivalent_to_java_assign_and_return_function ["java::java.lang.StringBuilder.insert:(IC)Ljava/lang/StringBuilder;"]= ID_cprover_string_insert_char_func; - cprover_equivalent_to_java_assign_and_return_function - ["java::java.lang.StringBuilder.insert:(I[C)Ljava/lang/StringBuilder;"]= - ID_cprover_string_insert_func; - cprover_equivalent_to_java_assign_and_return_function - ["java::java.lang.StringBuilder.insert:(I[CII)Ljava/lang/StringBuilder;"]= - ID_cprover_string_insert_func; cprover_equivalent_to_java_assign_and_return_function ["java::java.lang.StringBuilder.insert:(IZ)Ljava/lang/StringBuilder;"]= ID_cprover_string_insert_bool_func; @@ -2149,10 +2090,6 @@ void java_string_library_preprocesst::initialize_conversion_table() cprover_equivalent_to_java_assign_and_return_function ["java::java.lang.StringBuffer.append:(C)Ljava/lang/StringBuffer;"]= ID_cprover_string_concat_char_func; - cprover_equivalent_to_java_assign_and_return_function - ["java::java.lang.StringBuffer.append:([C)" - "Ljava/lang/StringBuffer;"]= - ID_cprover_string_concat_func; cprover_equivalent_to_java_assign_and_return_function ["java::java.lang.StringBuffer.append:(D)Ljava/lang/StringBuffer;"]= ID_cprover_string_concat_double_func; @@ -2170,7 +2107,10 @@ void java_string_library_preprocesst::initialize_conversion_table() "Ljava/lang/StringBuffer;"]= ID_cprover_string_concat_func; cprover_equivalent_to_java_assign_and_return_function - ["java::java.lang.StringBuffer.append:(Z)Ljava/lang/StringBuffer;"]= + ["java::java.lang.StringBuffer.append:(Ljava/lang/StringBuffer;)" + "Ljava/lang/StringBuffer;"] = ID_cprover_string_concat_func; + cprover_equivalent_to_java_assign_and_return_function + ["java::java.lang.StringBuffer.append:(Z)Ljava/lang/StringBuffer;"] = ID_cprover_string_concat_bool_func; cprover_equivalent_to_java_assign_and_return_function ["java::java.lang.StringBuffer.appendCodePoint:(I)" @@ -2197,12 +2137,6 @@ void java_string_library_preprocesst::initialize_conversion_table() cprover_equivalent_to_java_assign_and_return_function ["java::java.lang.StringBuffer.insert:(IC)Ljava/lang/StringBuffer;"]= ID_cprover_string_insert_char_func; - cprover_equivalent_to_java_assign_and_return_function - ["java::java.lang.StringBuffer.insert:(I[C)Ljava/lang/StringBuffer;"]= - ID_cprover_string_insert_func; - cprover_equivalent_to_java_assign_and_return_function - ["java::java.lang.StringBuffer.insert:(I[CII)Ljava/lang/StringBuffer;"]= - ID_cprover_string_insert_func; cprover_equivalent_to_java_assign_and_return_function ["java::java.lang.StringBuffer.insert:(II)Ljava/lang/StringBuffer;"]= ID_cprover_string_insert_int_func; diff --git a/src/java_bytecode/java_string_library_preprocess.h b/src/java_bytecode/java_string_library_preprocess.h index cff80bea63f..b02af6bfbcd 100644 --- a/src/java_bytecode/java_string_library_preprocess.h +++ b/src/java_bytecode/java_string_library_preprocess.h @@ -33,8 +33,12 @@ Date: March 2017 class java_string_library_preprocesst:public messaget { public: - java_string_library_preprocesst(): - refined_string_type(java_int_type(), java_char_type()) {} + java_string_library_preprocesst() + : char_type(java_char_type()), + index_type(java_int_type()), + refined_string_type(index_type, char_type) + { + } void initialize_conversion_table(); void initialize_refined_string_type(); @@ -52,6 +56,21 @@ class java_string_library_preprocesst:public messaget void add_string_type(const irep_idt &class_name, symbol_tablet &symbol_table); bool is_known_string_type(irep_idt class_name); + static bool implements_java_char_sequence_pointer(const typet &type) + { + return is_java_char_sequence_pointer_type(type) + || is_java_string_builder_pointer_type(type) + || is_java_string_buffer_pointer_type(type) + || is_java_string_pointer_type(type); + } + static bool implements_java_char_sequence(const typet &type) + { + return is_java_char_sequence_type(type) + || is_java_string_builder_type(type) + || is_java_string_buffer_type(type) + || is_java_string_type(type); + } + private: // We forbid copies of the object java_string_library_preprocesst( @@ -68,17 +87,11 @@ class java_string_library_preprocesst:public messaget static bool is_java_char_sequence_pointer_type(const typet &type); static bool is_java_char_array_type(const typet &type); static bool is_java_char_array_pointer_type(const typet &type); - static bool implements_java_char_sequence(const typet &type) - { - return - is_java_char_sequence_pointer_type(type) || - is_java_string_builder_pointer_type(type) || - is_java_string_buffer_pointer_type(type) || - is_java_string_pointer_type(type); - } character_refine_preprocesst character_preprocess; + const typet char_type; + const typet index_type; const refined_string_typet refined_string_type; typedef @@ -156,16 +169,6 @@ class java_string_library_preprocesst:public messaget const source_locationt &loc, symbol_tablet &symbol_table); - // Auxiliary functions - codet code_for_scientific_notation( - const exprt &arg, - const ieee_float_spect &float_spec, - const string_exprt &string_expr, - const exprt &tmp_string, - const refined_string_typet &refined_string_type, - const source_locationt &loc, - symbol_tablet &symbol_table); - // Helper functions exprt::operandst process_parameters( const code_typet::parameterst ¶ms, @@ -173,14 +176,15 @@ class java_string_library_preprocesst:public messaget symbol_tablet &symbol_table, code_blockt &init_code); - friend exprt convert_exprt_to_string_exprt_unit_test( - java_string_library_preprocesst &preprocess, - const exprt &deref, - const source_locationt &loc, - symbol_tablet &symbol_table, - code_blockt &init_code); + // Friending this function for unit testing convert_exprt_to_string_exprt + friend refined_string_exprt convert_exprt_to_string_exprt_unit_test( + java_string_library_preprocesst &preprocess, + const exprt &deref, + const source_locationt &loc, + symbol_tablet &symbol_table, + code_blockt &init_code); - exprt convert_exprt_to_string_exprt( + refined_string_exprt convert_exprt_to_string_exprt( const exprt &deref, const source_locationt &loc, symbol_tablet &symbol_table, @@ -198,22 +202,12 @@ class java_string_library_preprocesst:public messaget symbol_tablet &symbol_table, code_blockt &init_code); - string_exprt replace_char_array( + refined_string_exprt replace_char_array( const exprt &array_pointer, const source_locationt &loc, symbol_tablet &symbol_table, code_blockt &code); - void declare_function( - irep_idt function_name, const typet &type, symbol_tablet &symbol_table); - - typet get_data_type( - const typet &type, const symbol_tablet &symbol_table); - typet get_length_type( - const typet &type, const symbol_tablet &symbol_table); - exprt get_data(const exprt &expr, const symbol_tablet &symbol_table); - exprt get_length(const exprt &expr, const symbol_tablet &symbol_table); - symbol_exprt fresh_string( const typet &type, const source_locationt &loc, @@ -224,12 +218,12 @@ class java_string_library_preprocesst:public messaget const source_locationt &loc, symbol_tablet &symbol_table); - string_exprt fresh_string_expr( + refined_string_exprt decl_string_expr( const source_locationt &loc, symbol_tablet &symbol_table, code_blockt &code); - exprt fresh_string_expr_symbol( + refined_string_exprt make_nondet_string_expr( const source_locationt &loc, symbol_tablet &symbol_table, code_blockt &code); @@ -246,31 +240,13 @@ class java_string_library_preprocesst:public messaget symbol_tablet &symbol_table, code_blockt &code); - exprt make_function_application( - const irep_idt &function_name, - const exprt::operandst &arguments, - const typet &type, - symbol_tablet &symbol_table); - - codet code_assign_function_application( - const exprt &lhs, - const irep_idt &function_name, - const exprt::operandst &arguments, - symbol_tablet &symbol_table); - codet code_return_function_application( const irep_idt &function_name, const exprt::operandst &arguments, const typet &type, symbol_tablet &symbol_table); - codet code_assign_function_to_string_expr( - const string_exprt &string_expr, - const irep_idt &function_name, - const exprt::operandst &arguments, - symbol_tablet &symbol_table); - - string_exprt string_expr_of_function_application( + refined_string_exprt string_expr_of_function( const irep_idt &function_name, const exprt::operandst &arguments, const source_locationt &loc, @@ -284,24 +260,22 @@ class java_string_library_preprocesst:public messaget symbol_tablet &symbol_table); codet code_assign_string_expr_to_java_string( - const exprt &lhs, const string_exprt &rhs, symbol_tablet &symbol_table); - - codet code_assign_string_expr_to_new_java_string( const exprt &lhs, - const string_exprt &rhs, - const source_locationt &loc, + const refined_string_exprt &rhs, symbol_tablet &symbol_table); void code_assign_java_string_to_string_expr( - const string_exprt &lhs, + const refined_string_exprt &lhs, const exprt &rhs, + const source_locationt &loc, symbol_tablet &symbol_table, code_blockt &code); - codet code_assign_string_literal_to_string_expr( - const string_exprt &lhs, + refined_string_exprt string_literal_to_string_expr( const std::string &s, - symbol_tablet &symbol_table); + const source_locationt &loc, + symbol_tablet &symbol_table, + code_blockt &code); codet make_function_from_call( const irep_idt &function_name, @@ -349,6 +323,30 @@ class java_string_library_preprocesst:public messaget code_blockt &code); exprt get_object_at_index(const exprt &argv, int index); + + codet make_init_from_array_code( + const code_typet &type, + const source_locationt &loc, + symbol_tablet &symbol_table); }; +exprt make_nondet_infinite_char_array( + symbol_tablet &symbol_table, + const source_locationt &loc, + code_blockt &code); + +void add_pointer_to_array_association( + const exprt &pointer, + const exprt &array, + symbol_tablet &symbol_table, + const source_locationt &loc, + code_blockt &code); + +void add_array_to_length_association( + const exprt &array, + const exprt &length, + symbol_tablet &symbol_table, + const source_locationt &loc, + code_blockt &code); + #endif // CPROVER_JAVA_BYTECODE_JAVA_STRING_LIBRARY_PREPROCESS_H diff --git a/src/java_bytecode/java_utils.cpp b/src/java_bytecode/java_utils.cpp index e3b20a51224..8184394e3b5 100644 --- a/src/java_bytecode/java_utils.cpp +++ b/src/java_bytecode/java_utils.cpp @@ -251,3 +251,47 @@ void java_add_components_to_class( components.push_back(component); } } + +/// Declare a function with the given name and type. +/// \param function_name: a name +/// \param type: a type +/// \param symbol_table: symbol table +void declare_function( + irep_idt function_name, + const typet &type, + symbol_tablet &symbol_table) +{ + auxiliary_symbolt func_symbol; + func_symbol.base_name=function_name; + func_symbol.pretty_name=function_name; + func_symbol.is_static_lifetime=false; + func_symbol.mode=ID_java; + func_symbol.name=function_name; + func_symbol.type=type; + symbol_table.add(func_symbol); +} + +/// Create a function application expression. +/// \param function_name: the name of the function +/// \param arguments: a list of arguments +/// \param type: return type of the function +/// \param symbol_table: a symbol table +/// \return a function application expression representing: +/// `function_name(arguments)` +exprt make_function_application( + const irep_idt &function_name, + const exprt::operandst &arguments, + const typet &type, + symbol_tablet &symbol_table) +{ + // Names of function to call + std::string fun_name=id2string(function_name); + + // Declaring the function + declare_function(fun_name, type, symbol_table); + + // Function application + function_application_exprt call(symbol_exprt(fun_name), type); + call.arguments()=arguments; + return call; +} diff --git a/src/java_bytecode/java_utils.h b/src/java_bytecode/java_utils.h index 774bb350572..1ed4cdcbb3d 100644 --- a/src/java_bytecode/java_utils.h +++ b/src/java_bytecode/java_utils.h @@ -81,4 +81,15 @@ size_t find_closing_delimiter( char open_char, char close_char); +void declare_function( + irep_idt function_name, + const typet &type, + symbol_tablet &symbol_table); + +exprt make_function_application( + const irep_idt &function_name, + const exprt::operandst &arguments, + const typet &type, + symbol_tablet &symbol_table); + #endif // CPROVER_JAVA_BYTECODE_JAVA_UTILS_H diff --git a/src/solvers/refinement/expr_cast.h b/src/solvers/refinement/expr_cast.h index 87531bd8dd1..6ab795860ea 100644 --- a/src/solvers/refinement/expr_cast.h +++ b/src/solvers/refinement/expr_cast.h @@ -45,10 +45,10 @@ struct expr_cast_implt final } }; -template<> -struct expr_cast_implt final +template <> +struct expr_cast_implt final { - optionalt operator()(const exprt &expr) const + optionalt operator()(const exprt &expr) const { if(is_refined_string_type(expr.type())) return to_string_expr(expr); diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h index f2a19153fe6..5172c277d1b 100644 --- a/src/solvers/refinement/string_constraint.h +++ b/src/solvers/refinement/string_constraint.h @@ -1,7 +1,7 @@ /*******************************************************************\ -Module: Defines string constraints. These are formulas talking about strings. - We implemented two forms of constraints: `string_constraintt` +Module: Defines string constraints. These are formulas talking about strings. + We implemented two forms of constraints: `string_constraintt` are formulas of the form $\forall univ_var \in [lb,ub[. prem => body$, and not_contains_constraintt of the form: $\forall x in [lb,ub[. p(x) => \exists y in [lb,ub[. s1[x+y] != s2[y]$. @@ -13,9 +13,9 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// \file /// Defines string constraints. These are formulas talking about strings. We /// implemented two forms of constraints: `string_constraintt` are formulas -/// of the form $\forall univ_var \in [lb,ub[. prem => body$, and -/// not_contains_constraintt of the form: $\forall x in [lb,ub[. p(x) => -/// \exists y in [lb,ub[. s1[x+y] != s2[y]$. +/// of the form \f$\forall univ\_var \in [lb,ub[. prem => body\f$, and +/// not_contains_constraintt of the form: \f$\forall x \in [lb,ub[. p(x) => +/// \exists y \in [lb,ub[. s1[x+y] \ne s2[y]\f$. #ifndef CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_H #define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_H @@ -167,9 +167,9 @@ class string_not_contains_constraintt: public exprt exprt premise, exprt exists_bound_inf, exprt exists_bound_sup, - const string_exprt &s0, - const string_exprt &s1): - exprt(ID_string_not_contains_constraint) + const array_string_exprt &s0, + const array_string_exprt &s1) + : exprt(ID_string_not_contains_constraint) { copy_to_operands(univ_lower_bound, univ_bound_sup, premise); copy_to_operands(exists_bound_inf, exists_bound_sup, s0); @@ -201,14 +201,14 @@ class string_not_contains_constraintt: public exprt return operands()[4]; } - const string_exprt &s0() const + const array_string_exprt &s0() const { - return to_string_expr(operands()[5]); + return to_array_string_expr(operands()[5]); } - const string_exprt &s1() const + const array_string_exprt &s1() const { - return to_string_expr(operands()[6]); + return to_array_string_expr(operands()[6]); } }; diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index ff5b4040005..8d1c9c75546 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -59,7 +59,7 @@ class string_constraint_generatort final const std::vector &get_index_symbols() const; /// Set of strings that have been created by the generator - const std::set &get_created_strings() const; + const std::set &get_created_strings() const; exprt get_witness_of( const string_not_contains_constraintt &c, @@ -72,34 +72,42 @@ class string_constraint_generatort final const irep_idt &prefix, const typet &type=bool_typet()); symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type); - /// Maps unresolved symbols to the string_exprt that was created for them - string_exprt add_axioms_for_refined_string(const exprt &expr); exprt add_axioms_for_function_application( const function_application_exprt &expr); symbol_exprt fresh_exist_index(const irep_idt &prefix, const typet &type); - /// remove functions applications and create the necessary axioms - /// \par parameters: an expression containing function applications - /// \return an expression containing no function application - exprt substitute_function_applications(const exprt& expr); + const std::map &get_arrays_of_pointers() const + { + return arrays_of_pointers_; + } + + exprt get_length_of_string_array(const array_string_exprt &s) const; + + // Type used by primitives to signal errors + const signedbv_typet get_return_code_type() + { + return signedbv_typet(32); + } private: symbol_exprt fresh_boolean(const irep_idt &prefix); - string_exprt fresh_string(const refined_string_typet &type); - string_exprt get_string_expr(const exprt &expr); + array_string_exprt + fresh_string(const typet &index_type, const typet &char_type); + array_string_exprt get_string_expr(const exprt &expr); plus_exprt plus_exprt_with_overflow_check(const exprt &op1, const exprt &op2); - string_exprt find_or_add_string_of_symbol( - const symbol_exprt &sym, - const refined_string_typet &ref_type); + array_string_exprt associate_char_array_to_char_pointer( + const exprt &char_pointer, + const typet &char_array_type); static constant_exprt constant_char(int i, const typet &char_type); - static irep_idt extract_java_string(const symbol_exprt &s); + array_string_exprt + char_array_of_pointer(const exprt &pointer, const exprt &length); - void add_default_axioms(const string_exprt &s); + void add_default_axioms(const array_string_exprt &s); exprt axiom_for_is_positive_index(const exprt &x); // The following functions add axioms for the returned value @@ -122,97 +130,92 @@ class string_constraint_generatort final exprt add_axioms_for_is_empty(const function_application_exprt &f); exprt add_axioms_for_is_prefix( - const string_exprt &prefix, const string_exprt &str, const exprt &offset); + const array_string_exprt &prefix, + const array_string_exprt &str, + const exprt &offset); exprt add_axioms_for_is_prefix( const function_application_exprt &f, bool swap_arguments=false); exprt add_axioms_for_is_suffix( const function_application_exprt &f, bool swap_arguments=false); exprt add_axioms_for_length(const function_application_exprt &f); - string_exprt add_axioms_for_empty_string(const function_application_exprt &f); - string_exprt add_axioms_for_char_set(const function_application_exprt &expr); - string_exprt add_axioms_for_copy(const function_application_exprt &f); - string_exprt add_axioms_for_concat( - const string_exprt &s1, const string_exprt &s2); - string_exprt add_axioms_for_concat_substr( - const string_exprt &s1, - const string_exprt &s2, + exprt add_axioms_for_empty_string(const function_application_exprt &f); + exprt add_axioms_for_char_set(const function_application_exprt &f); + exprt add_axioms_for_copy(const function_application_exprt &f); + exprt add_axioms_for_concat( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2); + exprt add_axioms_for_concat_char( + const array_string_exprt &res, + const array_string_exprt &s1, + const exprt &c); + exprt add_axioms_for_concat_char(const function_application_exprt &f); + exprt add_axioms_for_concat_substr( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2, const exprt &start_index, const exprt &end_index); - string_exprt add_axioms_for_concat(const function_application_exprt &f); - string_exprt add_axioms_for_concat_int(const function_application_exprt &f); - string_exprt add_axioms_for_concat_long(const function_application_exprt &f); - string_exprt add_axioms_for_concat_bool(const function_application_exprt &f); - string_exprt add_axioms_for_concat_char(const function_application_exprt &f); - string_exprt add_axioms_for_concat_char( - const string_exprt &string_expr, const exprt &char_expr); - string_exprt add_axioms_for_concat_double( - const function_application_exprt &f); - string_exprt add_axioms_for_concat_float(const function_application_exprt &f); - string_exprt add_axioms_for_concat_code_point( - const function_application_exprt &f); - string_exprt add_axioms_for_constant( - irep_idt sval, const refined_string_typet &ref_type); - string_exprt add_axioms_for_delete( - const string_exprt &str, const exprt &start, const exprt &end); - string_exprt add_axioms_for_delete(const function_application_exprt &expr); - string_exprt add_axioms_for_delete_char_at( - const function_application_exprt &expr); - string_exprt add_axioms_for_format(const function_application_exprt &f); - string_exprt add_axioms_for_format( + exprt add_axioms_for_concat(const function_application_exprt &f); + exprt add_axioms_for_concat_code_point(const function_application_exprt &f); + exprt add_axioms_for_constant(const array_string_exprt &res, irep_idt sval); + exprt add_axioms_for_delete( + const array_string_exprt &res, + const array_string_exprt &str, + const exprt &start, + const exprt &end); + exprt add_axioms_for_delete(const function_application_exprt &f); + exprt add_axioms_for_delete_char_at(const function_application_exprt &expr); + exprt add_axioms_for_format(const function_application_exprt &f); + exprt add_axioms_for_format( + const array_string_exprt &res, const std::string &s, - const exprt::operandst &args, - const refined_string_typet &ref_type); - exprt add_axioms_for_format_specifier_is_correct( - const function_application_exprt &expr); - bool add_axioms_for_format_specifier_is_correct( - const std::string &s); + const exprt::operandst &args); - string_exprt add_axioms_for_format_specifier( + array_string_exprt add_axioms_for_format_specifier( const format_specifiert &fs, const struct_exprt &arg, - const refined_string_typet &ref_type); - - string_exprt add_axioms_for_insert( - const string_exprt &s1, const string_exprt &s2, const exprt &offset); - string_exprt add_axioms_for_insert(const function_application_exprt &f); - string_exprt add_axioms_for_insert_int(const function_application_exprt &f); - string_exprt add_axioms_for_insert_long(const function_application_exprt &f); - string_exprt add_axioms_for_insert_bool(const function_application_exprt &f); - string_exprt add_axioms_for_insert_char(const function_application_exprt &f); - string_exprt add_axioms_for_insert_double( - const function_application_exprt &f); - string_exprt add_axioms_for_insert_float(const function_application_exprt &f); - string_exprt add_axioms_from_literal(const function_application_exprt &f); - string_exprt add_axioms_from_int(const function_application_exprt &f); - string_exprt add_axioms_from_int( + const typet &index_type, + const typet &char_type); + + exprt add_axioms_for_insert( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2, + const exprt &offset); + exprt add_axioms_for_insert(const function_application_exprt &f); + exprt add_axioms_for_insert_int(const function_application_exprt &f); + exprt add_axioms_for_insert_bool(const function_application_exprt &f); + exprt add_axioms_for_insert_char(const function_application_exprt &f); + exprt add_axioms_for_insert_float(const function_application_exprt &f); + exprt add_axioms_for_insert_double(const function_application_exprt &f); + exprt add_axioms_from_literal(const function_application_exprt &f); + exprt add_axioms_from_int(const function_application_exprt &f); + exprt add_axioms_from_int( + const array_string_exprt &res, const exprt &input_int, - const refined_string_typet &ref_type, - size_t max_size=0); - string_exprt add_axioms_from_int_with_radix( + size_t max_size = 0); + exprt add_axioms_from_int_with_radix( + const array_string_exprt &res, const exprt &input_int, const exprt &radix, - const refined_string_typet &ref_type, - size_t max_size=0); - string_exprt add_axioms_from_int_hex( - const exprt &i, const refined_string_typet &ref_type); - string_exprt add_axioms_from_int_hex(const function_application_exprt &f); - string_exprt add_axioms_from_long(const function_application_exprt &f); - string_exprt add_axioms_from_long(const exprt &i, size_t max_size); - string_exprt add_axioms_from_bool(const function_application_exprt &f); - string_exprt add_axioms_from_bool( - const exprt &i, const refined_string_typet &ref_type); - string_exprt add_axioms_from_char(const function_application_exprt &f); - string_exprt add_axioms_from_char( - const exprt &i, const refined_string_typet &ref_type); + size_t max_size = 0); + exprt add_axioms_from_int_hex(const array_string_exprt &res, const exprt &i); + exprt add_axioms_from_int_hex(const function_application_exprt &f); + exprt add_axioms_from_long(const function_application_exprt &f); + exprt add_axioms_from_bool(const function_application_exprt &f); + exprt add_axioms_from_bool(const array_string_exprt &res, const exprt &i); + exprt add_axioms_from_char(const function_application_exprt &f); + exprt add_axioms_from_char(const array_string_exprt &res, const exprt &i); exprt add_axioms_for_index_of( - const string_exprt &str, + const array_string_exprt &str, const exprt &c, const exprt &from_index); // Add axioms corresponding to the String.indexOf:(String;I) java function exprt add_axioms_for_index_of_string( - const string_exprt &haystack, - const string_exprt &needle, + const array_string_exprt &haystack, + const array_string_exprt &needle, const exprt &from_index); // Add axioms corresponding to the String.indexOf java functions @@ -220,13 +223,13 @@ class string_constraint_generatort final // Add axioms corresponding to the String.lastIndexOf:(String;I) java function exprt add_axioms_for_last_index_of_string( - const string_exprt &haystack, - const string_exprt &needle, + const array_string_exprt &haystack, + const array_string_exprt &needle, const exprt &from_index); // Add axioms corresponding to the String.lastIndexOf:(CI) java function exprt add_axioms_for_last_index_of( - const string_exprt &str, + const array_string_exprt &str, const exprt &c, const exprt &from_index); @@ -238,44 +241,45 @@ class string_constraint_generatort final // and minus infinity the string are "Infinity" and "-Infinity respectively // otherwise the string contains only characters in [0123456789.] and '-' at // the start for negative number - string_exprt add_axioms_for_string_of_float( - const function_application_exprt &f); - string_exprt add_axioms_for_string_of_float( - const exprt &f, const refined_string_typet &ref_type); - - string_exprt add_axioms_for_fractional_part( - const exprt &i, size_t max_size, const refined_string_typet &ref_type); - string_exprt add_axioms_from_float_scientific_notation( - const exprt &f, const refined_string_typet &ref_type); - string_exprt add_axioms_from_float_scientific_notation( + exprt add_axioms_for_string_of_float(const function_application_exprt &f); + exprt + add_axioms_for_string_of_float(const array_string_exprt &res, const exprt &f); + exprt add_axioms_for_fractional_part( + const array_string_exprt &res, + const exprt &i, + size_t max_size); + exprt add_axioms_from_float_scientific_notation( + const array_string_exprt &res, + const exprt &f); + exprt add_axioms_from_float_scientific_notation( const function_application_exprt &f); // Add axioms corresponding to the String.valueOf(D) java function // TODO: the specifications is only partial - string_exprt add_axioms_from_double(const function_application_exprt &f); + exprt add_axioms_from_double(const function_application_exprt &f); - string_exprt add_axioms_for_replace(const function_application_exprt &f); - string_exprt add_axioms_for_set_length(const function_application_exprt &f); + exprt add_axioms_for_replace(const function_application_exprt &f); + exprt add_axioms_for_set_length(const function_application_exprt &f); // TODO: the specification may not be correct for the case where the // string is shorter than end. An actual java program should throw an // exception in that case - string_exprt add_axioms_for_substring( - const string_exprt &str, const exprt &start, const exprt &end); - string_exprt add_axioms_for_substring(const function_application_exprt &expr); - - string_exprt add_axioms_for_to_lower_case( - const function_application_exprt &expr); - string_exprt add_axioms_for_to_upper_case( - const function_application_exprt &expr); - string_exprt add_axioms_for_to_upper_case( - const string_exprt &expr); - string_exprt add_axioms_for_trim(const function_application_exprt &expr); - - string_exprt add_axioms_for_code_point( - const exprt &code_point, const refined_string_typet &ref_type); - exprt add_axioms_for_char_pointer(const function_application_exprt &fun); - string_exprt add_axioms_for_if(const if_exprt &expr); + exprt add_axioms_for_substring( + const array_string_exprt &res, + const array_string_exprt &str, + const exprt &start, + const exprt &end); + exprt add_axioms_for_substring(const function_application_exprt &f); + exprt add_axioms_for_to_lower_case(const function_application_exprt &f); + exprt add_axioms_for_to_upper_case(const function_application_exprt &f); + exprt add_axioms_for_to_upper_case( + const array_string_exprt &res, + const array_string_exprt &expr); + exprt add_axioms_for_trim(const function_application_exprt &f); + + exprt add_axioms_for_code_point( + const array_string_exprt &res, + const exprt &code_point); exprt add_axioms_for_char_literal(const function_application_exprt &f); // Add axioms corresponding the String.codePointCount java function @@ -295,19 +299,18 @@ class string_constraint_generatort final const exprt &input_int, const typet &type, const bool strict_formatting, - const string_exprt &str, + const array_string_exprt &str, const std::size_t max_string_length, const exprt &radix, const unsigned long radix_ul); void add_axioms_for_correct_number_format( const exprt &input_int, - const string_exprt &str, + const array_string_exprt &str, const exprt &radix_as_char, const unsigned long radix_ul, const std::size_t max_size, const bool strict_formatting); exprt add_axioms_for_parse_int(const function_application_exprt &f); - exprt add_axioms_for_to_char_array(const function_application_exprt &f); exprt add_axioms_for_compare_to(const function_application_exprt &f); // Add axioms corresponding to the String.intern java function @@ -315,14 +318,9 @@ class string_constraint_generatort final // string pointers symbol_exprt add_axioms_for_intern(const function_application_exprt &f); - // assert that the number of argument is equal to nb and extract them - static const function_application_exprt::argumentst &args( - const function_application_exprt &expr, size_t nb) - { - const function_application_exprt::argumentst &args=expr.arguments(); - PRECONDITION(args.size()==nb); - return args; - } + exprt associate_array_to_pointer(const function_application_exprt &f); + + exprt associate_length_to_array(const function_application_exprt &f); // Helper functions static exprt int_of_hex_char(const exprt &chr); @@ -330,8 +328,6 @@ class string_constraint_generatort final static exprt is_low_surrogate(const exprt &chr); static exprt character_equals_ignore_case( exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z); - static bool is_constant_string(const string_exprt &expr); - static string_exprt empty_string(const refined_string_typet &ref_type); unsigned long to_integer_or_default(const exprt &expr, unsigned long def); // MEMBERS @@ -340,31 +336,27 @@ class string_constraint_generatort final // Used to store information about witnesses for not_contains constraints std::map witness; private: - // The integer with the longest string is Integer.MIN_VALUE which is -2^31, - // that is -2147483648 so takes 11 characters to write. - // The long with the longest string is Long.MIN_VALUE which is -2^63, - // approximately -9.223372037*10^18 so takes 20 characters to write. - CBMC_CONSTEXPR static const std::size_t MAX_INTEGER_LENGTH=11; - CBMC_CONSTEXPR static const std::size_t MAX_LONG_LENGTH=20; - CBMC_CONSTEXPR static const std::size_t MAX_FLOAT_LENGTH=15; - CBMC_CONSTEXPR static const std::size_t MAX_DOUBLE_LENGTH=30; - std::set m_created_strings; - unsigned m_symbol_count=0; - const messaget m_message; - const bool m_force_printable_characters; - - std::vector m_axioms; - std::map m_unresolved_symbols; - std::vector m_boolean_symbols; - std::vector m_index_symbols; - std::map m_function_application_cache; - const namespacet m_ns; + std::set created_strings; + unsigned symbol_count=0; + const messaget message; + const bool force_printable_characters; + + std::vector axioms; + std::vector boolean_symbols; + std::vector index_symbols; + const namespacet ns; // To each string on which hash_code was called we associate a symbol // representing the return value of the hash_code function. - std::map m_hash_code_of_string; + std::map hash_code_of_string; // Pool used for the intern method - std::map m_intern_of_string; + std::map intern_of_string; + + // associate arrays to char pointers + std::map arrays_of_pointers_; + + // associate length to arrays of infinite size + std::map length_of_array_; }; exprt is_digit_with_radix( diff --git a/src/solvers/refinement/string_constraint_generator_code_points.cpp b/src/solvers/refinement/string_constraint_generator_code_points.cpp index 55b914d1aa7..a2e67a3ea82 100644 --- a/src/solvers/refinement/string_constraint_generator_code_points.cpp +++ b/src/solvers/refinement/string_constraint_generator_code_points.cpp @@ -12,23 +12,16 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include -/******************************************************************* \ - -Function: string_constraint_generatort::add_axioms_for_code_point - - Inputs: an expression representing a java code point - - Outputs: a new string expression - - Purpose: add axioms for the conversion of an integer representing a java - code point to a utf-16 string - -\*******************************************************************/ - -string_exprt string_constraint_generatort::add_axioms_for_code_point( - const exprt &code_point, const refined_string_typet &ref_type) +/// add axioms for the conversion of an integer representing a java +/// code point to a utf-16 string +/// \param res: array of characters corresponding to the result fo the function +/// \param code_point: an expression representing a java code point +/// \return integer expression equal to zero +exprt string_constraint_generatort::add_axioms_for_code_point( + const array_string_exprt &res, + const exprt &code_point) { - string_exprt res=fresh_string(ref_type); + const typet &char_type = res.content().type().subtype(); const typet &type=code_point.type(); PRECONDITION(type.id()==ID_signedbv); @@ -48,36 +41,36 @@ string_exprt string_constraint_generatort::add_axioms_for_code_point( binary_relation_exprt small(code_point, ID_lt, hex010000); implies_exprt a1(small, res.axiom_for_has_length(1)); - m_axioms.push_back(a1); + axioms.push_back(a1); implies_exprt a2(not_exprt(small), res.axiom_for_has_length(2)); - m_axioms.push_back(a2); + axioms.push_back(a2); - typecast_exprt code_point_as_char(code_point, ref_type.get_char_type()); + typecast_exprt code_point_as_char(code_point, char_type); implies_exprt a3(small, equal_exprt(res[0], code_point_as_char)); - m_axioms.push_back(a3); + axioms.push_back(a3); plus_exprt first_char( hexD800, div_exprt(minus_exprt(code_point, hex010000), hex0400)); implies_exprt a4( not_exprt(small), - equal_exprt(res[0], typecast_exprt(first_char, ref_type.get_char_type()))); - m_axioms.push_back(a4); + equal_exprt(res[0], typecast_exprt(first_char, char_type))); + axioms.push_back(a4); plus_exprt second_char(hexDC00, mod_exprt(code_point, hex0400)); implies_exprt a5( not_exprt(small), - equal_exprt(res[1], typecast_exprt(second_char, ref_type.get_char_type()))); - m_axioms.push_back(a5); + equal_exprt(res[1], typecast_exprt(second_char, char_type))); + axioms.push_back(a5); - return res; + return from_integer(0, get_return_code_type()); } /// the output is true when the character is a high surrogate for UTF-16 /// encoding, see https://en.wikipedia.org/wiki/UTF-16 for more explenation /// about the encoding; this is true when the character is in the range /// 0xD800..0xDBFF -/// \par parameters: a character expression +/// \param chr: a character expression /// \return a Boolean expression exprt string_constraint_generatort::is_high_surrogate(const exprt &chr) { @@ -90,7 +83,7 @@ exprt string_constraint_generatort::is_high_surrogate(const exprt &chr) /// encoding, see https://en.wikipedia.org/wiki/UTF-16 for more explenation /// about the encoding; this is true when the character is in the range /// 0xDC00..0xDFFF -/// \par parameters: a character expression +/// \param chr: a character expression /// \return a Boolean expression exprt string_constraint_generatort::is_low_surrogate(const exprt &chr) { @@ -104,8 +97,9 @@ exprt string_constraint_generatort::is_low_surrogate(const exprt &chr) /// https://en.wikipedia.org/wiki/UTF-16 for more explenation about the /// encoding; the operation we perform is: /// pair_value=0x10000+(((char1%0x0800)*0x0400)+char2%0x0400) -/// \par parameters: two character expressions and a return type -/// char1 and char2 should be of type return_type +/// \param char1: a character expression +/// \param char2: a character expression +/// \param return_type: type of the expression to return /// \return an integer expression of type return_type exprt pair_value(exprt char1, exprt char2, typet return_type) { @@ -119,30 +113,31 @@ exprt pair_value(exprt char1, exprt char2, typet return_type) } /// add axioms corresponding to the String.codePointAt java function -/// \par parameters: function application with two arguments: a string and an +/// \param f: function application with arguments a string and an /// index /// \return a integer expression corresponding to a code point exprt string_constraint_generatort::add_axioms_for_code_point_at( const function_application_exprt &f) { - typet return_type=f.type(); + const typet &return_type = f.type(); PRECONDITION(return_type.id()==ID_signedbv); - string_exprt str=get_string_expr(args(f, 2)[0]); - const exprt &pos=args(f, 2)[1]; + PRECONDITION(f.arguments().size() == 2); + const array_string_exprt str = get_string_expr(f.arguments()[0]); + const exprt &pos = f.arguments()[1]; - symbol_exprt result=fresh_symbol("char", return_type); - exprt index1=from_integer(1, str.length().type()); + const symbol_exprt result = fresh_symbol("char", return_type); + const exprt index1 = from_integer(1, str.length().type()); const exprt &char1=str[pos]; const exprt &char2=str[plus_exprt_with_overflow_check(pos, index1)]; - exprt char1_as_int=typecast_exprt(char1, return_type); - exprt char2_as_int=typecast_exprt(char2, return_type); - exprt pair=pair_value(char1_as_int, char2_as_int, return_type); - exprt is_low=is_low_surrogate( - str[plus_exprt_with_overflow_check(pos, index1)]); - exprt return_pair=and_exprt(is_high_surrogate(str[pos]), is_low); - - m_axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); - m_axioms.push_back( + const typecast_exprt char1_as_int(char1, return_type); + const typecast_exprt char2_as_int(char2, return_type); + const exprt pair = pair_value(char1_as_int, char2_as_int, return_type); + const exprt is_low = + is_low_surrogate(str[plus_exprt_with_overflow_check(pos, index1)]); + const and_exprt return_pair(is_high_surrogate(str[pos]), is_low); + + axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); + axioms.push_back( implies_exprt(not_exprt(return_pair), equal_exprt(result, char1_as_int))); return result; } @@ -159,42 +154,43 @@ exprt string_constraint_generatort::add_axioms_for_code_point_before( typet return_type=f.type(); PRECONDITION(return_type.id()==ID_signedbv); symbol_exprt result=fresh_symbol("char", return_type); - string_exprt str=get_string_expr(args[0]); + array_string_exprt str = get_string_expr(args[0]); const exprt &char1= str[minus_exprt(args[1], from_integer(2, str.length().type()))]; const exprt &char2= str[minus_exprt(args[1], from_integer(1, str.length().type()))]; - exprt char1_as_int=typecast_exprt(char1, return_type); - exprt char2_as_int=typecast_exprt(char2, return_type); + const typecast_exprt char1_as_int(char1, return_type); + const typecast_exprt char2_as_int(char2, return_type); - exprt pair=pair_value(char1_as_int, char2_as_int, return_type); - exprt return_pair=and_exprt( + const exprt pair = pair_value(char1_as_int, char2_as_int, return_type); + const and_exprt return_pair( is_high_surrogate(char1), is_low_surrogate(char2)); - m_axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); - m_axioms.push_back( + axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); + axioms.push_back( implies_exprt(not_exprt(return_pair), equal_exprt(result, char2_as_int))); return result; } /// add axioms giving approximate bounds on the result of the /// String.codePointCount java function -/// \par parameters: function application with three arguments: a string and two -/// indexes +/// \param f: function application with three arguments string `str`, integer +/// `begin` and integer `end`. /// \return an integer expression exprt string_constraint_generatort::add_axioms_for_code_point_count( const function_application_exprt &f) { - string_exprt str=get_string_expr(args(f, 3)[0]); - const exprt &begin=args(f, 3)[1]; - const exprt &end=args(f, 3)[2]; + PRECONDITION(f.arguments().size() == 3); + const array_string_exprt str = get_string_expr(f.arguments()[0]); + const exprt &begin = f.arguments()[1]; + const exprt &end = f.arguments()[2]; const typet &return_type=f.type(); - symbol_exprt result=fresh_symbol("code_point_count", return_type); - minus_exprt length(end, begin); - div_exprt minimum(length, from_integer(2, length.type())); - m_axioms.push_back(binary_relation_exprt(result, ID_le, length)); - m_axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); + const symbol_exprt result = fresh_symbol("code_point_count", return_type); + const minus_exprt length(end, begin); + const div_exprt minimum(length, from_integer(2, length.type())); + axioms.push_back(binary_relation_exprt(result, ID_le, length)); + axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); return result; } @@ -202,23 +198,22 @@ exprt string_constraint_generatort::add_axioms_for_code_point_count( /// add axioms giving approximate bounds on the result of the /// String.offsetByCodePointCount java function. We approximate the result by /// saying the result is between index + offset and index + 2 * offset -/// \par parameters: function application with three arguments: a string and two -/// indexes +/// \param f: function application with arguments string `str`, integer `index` +/// and integer `offset`. /// \return a new string expression exprt string_constraint_generatort::add_axioms_for_offset_by_code_point( const function_application_exprt &f) { - string_exprt str=get_string_expr(args(f, 3)[0]); - const exprt &index=args(f, 3)[1]; - const exprt &offset=args(f, 3)[2]; + PRECONDITION(f.arguments().size() == 3); + const exprt &index = f.arguments()[1]; + const exprt &offset = f.arguments()[2]; const typet &return_type=f.type(); - symbol_exprt result=fresh_symbol("offset_by_code_point", return_type); + const symbol_exprt result = fresh_symbol("offset_by_code_point", return_type); - exprt minimum=plus_exprt_with_overflow_check(index, offset); - exprt maximum=plus_exprt_with_overflow_check( - index, plus_exprt_with_overflow_check(offset, offset)); - m_axioms.push_back(binary_relation_exprt(result, ID_le, maximum)); - m_axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); + const exprt minimum = plus_exprt_with_overflow_check(index, offset); + const exprt maximum = plus_exprt_with_overflow_check(minimum, offset); + axioms.push_back(binary_relation_exprt(result, ID_le, maximum)); + axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); return result; } diff --git a/src/solvers/refinement/string_constraint_generator_comparison.cpp b/src/solvers/refinement/string_constraint_generator_comparison.cpp index 7bc276cd24a..22f73351317 100644 --- a/src/solvers/refinement/string_constraint_generator_comparison.cpp +++ b/src/solvers/refinement/string_constraint_generator_comparison.cpp @@ -23,11 +23,13 @@ exprt string_constraint_generatort::add_axioms_for_equals( const function_application_exprt &f) { PRECONDITION(f.type()==bool_typet() || f.type().id()==ID_c_bool); + PRECONDITION(f.arguments().size() == 2); + + array_string_exprt s1 = get_string_expr(f.arguments()[0]); + array_string_exprt s2 = get_string_expr(f.arguments()[1]); symbol_exprt eq=fresh_boolean("equal"); typecast_exprt tc_eq(eq, f.type()); - string_exprt s1=get_string_expr(args(f, 2)[0]); - string_exprt s2=get_string_expr(args(f, 2)[1]); typet index_type=s1.length().type(); // We want to write: @@ -35,15 +37,15 @@ exprt string_constraint_generatort::add_axioms_for_equals( // We add axioms: // a1 : eq => s1.length=s2.length // a2 : forall i s1[i]=s2[i] - // a3 : !eq => s1.length!=s2.length - // || (witness (s1.length!=s2.length && witness=-1) + // || (0<=witness |s1|=|s2| @@ -116,32 +117,33 @@ exprt string_constraint_generatort::add_axioms_for_equals_ignore_case( // eq => char_equal_ignore_case(s1[qvar],s2[qvar]); // a3 : !eq => |s1|!=s2 || (0 <=witness<|s1| &&!char_equal_ignore_case) - implies_exprt a1(eq, s1.axiom_for_has_same_length_as(s2)); - m_axioms.push_back(a1); + const implies_exprt a1(eq, equal_exprt(s1.length(), s2.length())); + axioms.push_back(a1); - symbol_exprt qvar=fresh_univ_index("QA_equal_ignore_case", index_type); - exprt constr2=character_equals_ignore_case( - s1[qvar], s2[qvar], char_a, char_A, char_Z); - string_constraintt a2(qvar, s1.length(), eq, constr2); - m_axioms.push_back(a2); + const symbol_exprt qvar = + fresh_univ_index("QA_equal_ignore_case", index_type); + const exprt constr2 = + character_equals_ignore_case(s1[qvar], s2[qvar], char_a, char_A, char_Z); + const string_constraintt a2(qvar, s1.length(), eq, constr2); + axioms.push_back(a2); - symbol_exprt witness=fresh_exist_index( - "witness_unequal_ignore_case", index_type); - exprt zero=from_integer(0, witness.type()); - and_exprt bound_witness( + const symbol_exprt witness = + fresh_exist_index("witness_unequal_ignore_case", index_type); + const exprt zero = from_integer(0, witness.type()); + const and_exprt bound_witness( binary_relation_exprt(witness, ID_lt, s1.length()), binary_relation_exprt(witness, ID_ge, zero)); - exprt witness_eq=character_equals_ignore_case( + const exprt witness_eq = character_equals_ignore_case( s1[witness], s2[witness], char_a, char_A, char_Z); - not_exprt witness_diff(witness_eq); - implies_exprt a3( + const not_exprt witness_diff(witness_eq); + const implies_exprt a3( not_exprt(eq), or_exprt( notequal_exprt(s1.length(), s2.length()), and_exprt(bound_witness, witness_diff))); - m_axioms.push_back(a3); + axioms.push_back(a3); - return tc_eq; + return typecast_exprt(eq, f.type()); } /// add axioms stating that if two strings are equal then their hash codes are @@ -151,13 +153,14 @@ exprt string_constraint_generatort::add_axioms_for_equals_ignore_case( exprt string_constraint_generatort::add_axioms_for_hash_code( const function_application_exprt &f) { - string_exprt str=get_string_expr(args(f, 1)[0]); - typet return_type=f.type(); - typet index_type=str.length().type(); + PRECONDITION(f.arguments().size() == 1); + const array_string_exprt str = get_string_expr(f.arguments()[0]); + const typet &return_type = f.type(); + const typet &index_type = str.length().type(); - auto pair=m_hash_code_of_string.insert( + auto pair=hash_code_of_string.insert( std::make_pair(str, fresh_symbol("hash", return_type))); - exprt hash=pair.first->second; + const exprt hash = pair.first->second; // for each string s. either: // c1: hash(str)=hash(s) @@ -165,19 +168,17 @@ exprt string_constraint_generatort::add_axioms_for_hash_code( // c3: (|str|==|s| && exists i<|s|. s[i]!=str[i]) // WARNING: the specification may be incomplete - for(auto it : m_hash_code_of_string) + for(auto it : hash_code_of_string) { - symbol_exprt i=fresh_exist_index("index_hash", index_type); - equal_exprt c1(it.second, hash); - not_exprt c2(equal_exprt(it.first.length(), str.length())); - and_exprt c3( + const symbol_exprt i = fresh_exist_index("index_hash", index_type); + const equal_exprt c1(it.second, hash); + const notequal_exprt c2(it.first.length(), str.length()); + const and_exprt c3( equal_exprt(it.first.length(), str.length()), and_exprt( - not_exprt(equal_exprt(str[i], it.first[i])), - and_exprt( - str.axiom_for_length_gt(i), - axiom_for_is_positive_index(i)))); - m_axioms.push_back(or_exprt(c1, or_exprt(c2, c3))); + notequal_exprt(str[i], it.first[i]), + and_exprt(str.axiom_for_length_gt(i), axiom_for_is_positive_index(i)))); + axioms.push_back(or_exprt(c1, or_exprt(c2, c3))); } return hash; } @@ -188,11 +189,13 @@ exprt string_constraint_generatort::add_axioms_for_hash_code( exprt string_constraint_generatort::add_axioms_for_compare_to( const function_application_exprt &f) { - string_exprt s1=get_string_expr(args(f, 2)[0]); - string_exprt s2=get_string_expr(args(f, 2)[1]); + PRECONDITION(f.arguments().size() == 2); const typet &return_type=f.type(); - symbol_exprt res=fresh_symbol("compare_to", return_type); - typet index_type=s1.length().type(); + PRECONDITION(return_type.id() == ID_signedbv); + const array_string_exprt &s1 = get_string_expr(f.arguments()[0]); + const array_string_exprt &s2 = get_string_expr(f.arguments()[1]); + const symbol_exprt res = fresh_symbol("compare_to", return_type); + const typet &index_type = s1.length().type(); // In the lexicographic comparison, x is the first point where the two // strings differ. @@ -207,47 +210,45 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) // a4 : forall i' s1[i]=s2[i] - PRECONDITION(return_type.id()==ID_signedbv); - - equal_exprt res_null=equal_exprt(res, from_integer(0, return_type)); - implies_exprt a1(res_null, s1.axiom_for_has_same_length_as(s2)); - m_axioms.push_back(a1); + const equal_exprt res_null(res, from_integer(0, return_type)); + const implies_exprt a1(res_null, equal_exprt(s1.length(), s2.length())); + axioms.push_back(a1); - symbol_exprt i=fresh_univ_index("QA_compare_to", index_type); - string_constraintt a2(i, s1.length(), res_null, equal_exprt(s1[i], s2[i])); - m_axioms.push_back(a2); + const symbol_exprt i = fresh_univ_index("QA_compare_to", index_type); + const string_constraintt a2( + i, s1.length(), res_null, equal_exprt(s1[i], s2[i])); + axioms.push_back(a2); - symbol_exprt x=fresh_exist_index("index_compare_to", index_type); - equal_exprt ret_char_diff( + const symbol_exprt x = fresh_exist_index("index_compare_to", index_type); + const equal_exprt ret_char_diff( res, minus_exprt( - typecast_exprt(s1[x], return_type), - typecast_exprt(s2[x], return_type))); - equal_exprt ret_length_diff( + typecast_exprt(s1[x], return_type), typecast_exprt(s2[x], return_type))); + const equal_exprt ret_length_diff( res, minus_exprt( typecast_exprt(s1.length(), return_type), typecast_exprt(s2.length(), return_type))); - or_exprt guard1( - and_exprt(s1.axiom_for_length_le(s2), s1.axiom_for_length_gt(x)), - and_exprt(s1.axiom_for_length_ge(s2), s2.axiom_for_length_gt(x))); - and_exprt cond1(ret_char_diff, guard1); - or_exprt guard2( - and_exprt(s2.axiom_for_length_gt(s1), s1.axiom_for_has_length(x)), - and_exprt(s1.axiom_for_length_gt(s2), s2.axiom_for_has_length(x))); - and_exprt cond2(ret_length_diff, guard2); - - implies_exprt a3( + const or_exprt guard1( + and_exprt(s1.axiom_for_length_le(s2.length()), s1.axiom_for_length_gt(x)), + and_exprt(s1.axiom_for_length_ge(s2.length()), s2.axiom_for_length_gt(x))); + const and_exprt cond1(ret_char_diff, guard1); + const or_exprt guard2( + and_exprt(s2.axiom_for_length_gt(s1.length()), s1.axiom_for_has_length(x)), + and_exprt(s1.axiom_for_length_gt(s2.length()), s2.axiom_for_has_length(x))); + const and_exprt cond2(ret_length_diff, guard2); + + const implies_exprt a3( not_exprt(res_null), and_exprt( binary_relation_exprt(x, ID_ge, from_integer(0, return_type)), or_exprt(cond1, cond2))); - m_axioms.push_back(a3); + axioms.push_back(a3); - symbol_exprt i2=fresh_univ_index("QA_compare_to", index_type); - string_constraintt a4( + const symbol_exprt i2 = fresh_univ_index("QA_compare_to", index_type); + const string_constraintt a4( i2, x, not_exprt(res_null), equal_exprt(s1[i2], s2[i2])); - m_axioms.push_back(a4); + axioms.push_back(a4); return res; } @@ -259,45 +260,43 @@ exprt string_constraint_generatort::add_axioms_for_compare_to( symbol_exprt string_constraint_generatort::add_axioms_for_intern( const function_application_exprt &f) { - string_exprt str=get_string_expr(args(f, 1)[0]); + PRECONDITION(f.arguments().size() == 1); + const array_string_exprt str = get_string_expr(f.arguments()[0]); // For now we only enforce content equality and not pointer equality const typet &return_type=f.type(); + const typet index_type = str.length().type(); - typet index_type=str.length().type(); - - auto pair=m_intern_of_string.insert( + auto pair=intern_of_string.insert( std::make_pair(str, fresh_symbol("pool", return_type))); - symbol_exprt intern=pair.first->second; + const symbol_exprt intern = pair.first->second; // intern(str)=s_0 || s_1 || ... // for each string s. // intern(str)=intern(s) || |str|!=|s| // || (|str|==|s| &&exists i<|s|. s[i]!=str[i]) - exprt disj=false_exprt(); - for(auto it : m_intern_of_string) - disj=or_exprt( - disj, equal_exprt(intern, it.second)); - - m_axioms.push_back(disj); - + exprt::operandst disj; + for(auto it : intern_of_string) + disj.push_back(equal_exprt(intern, it.second)); + axioms.push_back(disjunction(disj)); // WARNING: the specification may be incomplete or incorrect - for(auto it : m_intern_of_string) + for(auto it : intern_of_string) if(it.second!=str) { symbol_exprt i=fresh_exist_index("index_intern", index_type); - m_axioms.push_back( + axioms.push_back( or_exprt( equal_exprt(it.second, intern), or_exprt( - not_exprt(str.axiom_for_has_same_length_as(it.first)), + notequal_exprt(str.length(), it.first.length()), and_exprt( - str.axiom_for_has_same_length_as(it.first), + equal_exprt(str.length(), it.first.length()), and_exprt( - not_exprt(equal_exprt(str[i], it.first[i])), - and_exprt(str.axiom_for_length_gt(i), - axiom_for_is_positive_index(i))))))); + notequal_exprt(str[i], it.first[i]), + and_exprt( + str.axiom_for_length_gt(i), + axiom_for_is_positive_index(i))))))); } return intern; diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index 3f4f607addd..8a85267e803 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -13,28 +13,27 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include -/// Add axioms enforcing that the returned string expression is equal to the -/// concatenation of s1 with the substring of s2 starting at index start_index -/// and ending at index end_index. -/// -/// If start_index >= end_index, the value returned is s1. -/// If end_index > |s2| and/or start_index < 0, the appended string will be of -/// length end_index - start_index and padded with non-deterministic values. +/// Add axioms enforcing that `res` is the concatenation of `s1` with +/// the substring of `s2` starting at index `start_index` and ending +/// at index `end_index`. /// +/// If `start_index >= end_index`, the value returned is `s1`. +/// If `end_index > |s2|` and/or `start_index < 0`, the appended string will +/// be of length `end_index - start_index` and padded with non-deterministic +/// values. +/// \param res: an array of character /// \param s1: string expression /// \param s2: string expression /// \param start_index: expression representing an integer /// \param end_index: expression representing an integer /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_concat_substr( - const string_exprt &s1, - const string_exprt &s2, +exprt string_constraint_generatort::add_axioms_for_concat_substr( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2, const exprt &start_index, const exprt &end_index) { - const refined_string_typet &ref_type=to_refined_string_type(s1.type()); - string_exprt res=fresh_string(ref_type); - // We add axioms: // a1 : end_index > start_index => |res|=|s1|+ end_index - start_index // a2 : end_index <= start_index => res = s1 @@ -46,35 +45,69 @@ string_exprt string_constraint_generatort::add_axioms_for_concat_substr( exprt res_length=plus_exprt_with_overflow_check( s1.length(), minus_exprt(end_index, start_index)); implies_exprt a1(prem, equal_exprt(res.length(), res_length)); - m_axioms.push_back(a1); + axioms.push_back(a1); implies_exprt a2(not_exprt(prem), equal_exprt(res.length(), s1.length())); - m_axioms.push_back(a2); + axioms.push_back(a2); symbol_exprt idx=fresh_univ_index("QA_index_concat", res.length().type()); string_constraintt a3(idx, s1.length(), equal_exprt(s1[idx], res[idx])); - m_axioms.push_back(a3); + axioms.push_back(a3); symbol_exprt idx2=fresh_univ_index("QA_index_concat2", res.length().type()); equal_exprt res_eq( res[plus_exprt(idx2, s1.length())], s2[plus_exprt(start_index, idx2)]); string_constraintt a4(idx2, minus_exprt(end_index, start_index), res_eq); - m_axioms.push_back(a4); + axioms.push_back(a4); - return res; + // We should have a enum type for the possible error codes + return from_integer(0, res.length().type()); } -/// Add axioms to say that the returned string expression is equal to the -/// concatenation of the two string expressions given as input. -/// +/// Add axioms enforcing that `res` is the concatenation of `s1` with +/// character `c`. +/// \param res: string expression +/// \param s1: string expression +/// \param c: character expression +/// \return code 0 on success +exprt string_constraint_generatort::add_axioms_for_concat_char( + const array_string_exprt &res, + const array_string_exprt &s1, + const exprt &c) +{ + // We add axioms: + // a1 : |res|=|s1|+1 + // a2 : forall i<|s1|. res[i]=s1[i] + // a3 : res[|s1|]=c + + const typet &index_type = res.length().type(); + const equal_exprt a1( + res.length(), plus_exprt(s1.length(), from_integer(1, index_type))); + axioms.push_back(a1); + + symbol_exprt idx = fresh_univ_index("QA_index_concat_char", index_type); + string_constraintt a2(idx, s1.length(), equal_exprt(s1[idx], res[idx])); + axioms.push_back(a2); + + equal_exprt a3(res[s1.length()], c); + axioms.push_back(a3); + + // We should have a enum type for the possible error codes + return from_integer(0, get_return_code_type()); +} + +/// Add axioms to say that `res` is equal to the concatenation of `s1` and `s2`. +/// \param res: string_expression corresponding to the result /// \param s1: the string expression to append to /// \param s2: the string expression to append to the first one -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_concat( - const string_exprt &s1, const string_exprt &s2) +/// \return an integer expression +exprt string_constraint_generatort::add_axioms_for_concat( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2) { exprt index_zero=from_integer(0, s2.length().type()); - return add_axioms_for_concat_substr(s1, s2, index_zero, s2.length()); + return add_axioms_for_concat_substr(res, s1, s2, index_zero, s2.length()); } /// Add axioms enforcing that the returned string expression is equal to the @@ -87,117 +120,51 @@ string_exprt string_constraint_generatort::add_axioms_for_concat( /// /// \param f: function application with two string arguments /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_concat( +exprt string_constraint_generatort::add_axioms_for_concat( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - PRECONDITION(args.size()>=2); - string_exprt s1=get_string_expr(args[0]); - string_exprt s2=get_string_expr(args[1]); - if(args.size()!=2) - { - PRECONDITION(args.size()==4); - return add_axioms_for_concat_substr(s1, s2, args[2], args[3]); - } - return add_axioms_for_concat(s1, s2); + PRECONDITION(args.size() == 4 || args.size() == 6); + const array_string_exprt s1 = get_string_expr(args[2]); + const array_string_exprt s2 = get_string_expr(args[3]); + const array_string_exprt res = char_array_of_pointer(args[1], args[0]); + if(args.size() == 6) + return add_axioms_for_concat_substr(res, s1, s2, args[4], args[5]); + else // args.size()==4 + return add_axioms_for_concat(res, s1, s2); } -/// Add axioms corresponding to the StringBuilder.append(I) java function -/// \param f: function application with two arguments: a string and an -/// integer -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_concat_int( +/// Add axioms enforcing that the string represented by the two first +/// expressions is equal to the concatenation of the string argument and +/// the character argument of the function application. +/// \param f: function application with a length, pointer, string and character +/// argument. +/// \return code 0 on success +exprt string_constraint_generatort::add_axioms_for_concat_char( const function_application_exprt &f) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=get_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_int(args(f, 2)[1], ref_type); - return add_axioms_for_concat(s1, s2); -} - -/// Add axioms corresponding to the StringBuilder.append(J) java function -/// \param f: function application with two arguments: a string and an -/// integer of type long -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_concat_long( - const function_application_exprt &f) -{ - const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=get_string_expr(args(f, 2)[0]); - string_exprt s2=add_axioms_from_int(args(f, 2)[1], ref_type); - return add_axioms_for_concat(s1, s2); -} - -/// Add axioms corresponding to the StringBuilder.append(Z) java function -/// \param f: function application two arguments: a string and a bool -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_concat_bool( - const function_application_exprt &f) -{ - string_exprt s1=get_string_expr(args(f, 2)[0]); - const refined_string_typet &ref_type=to_refined_string_type(s1.type()); - string_exprt s2=add_axioms_from_bool(args(f, 2)[1], ref_type); - return add_axioms_for_concat(s1, s2); -} - -/// Add axioms corresponding to the StringBuilder.append(C) java function -/// \param f: function application with two arguments: a string and a char -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_concat_char( - const function_application_exprt &f) -{ - string_exprt s1=get_string_expr(args(f, 2)[0]); - return add_axioms_for_concat_char(s1, args(f, 2)[1]); -} - -/// Add axioms corresponding to adding the character char at the end of -/// string_expr. -/// \param string_expr: a string expression -/// \param char' a character expression -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_concat_char( - const string_exprt &string_expr, const exprt &char_expr) -{ - const refined_string_typet &ref_type= - to_refined_string_type(string_expr.type()); - string_exprt s2=add_axioms_from_char(char_expr, ref_type); - return add_axioms_for_concat(string_expr, s2); -} - -/// Add axioms corresponding to the StringBuilder.append(D) java function -/// \param f: function application with two arguments: a string and a double -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_concat_double( - const function_application_exprt &f) -{ - string_exprt s1=get_string_expr(args(f, 2)[0]); - PRECONDITION(is_refined_string_type(f.type())); - refined_string_typet ref_type=to_refined_string_type(f.type()); - string_exprt s2=add_axioms_for_string_of_float(args(f, 2)[1], ref_type); - return add_axioms_for_concat(s1, s2); -} - -/// Add axioms corresponding to the StringBuilder.append(F) java function -/// \param f: function application with two arguments: a string and a float -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_concat_float( - const function_application_exprt &f) -{ - string_exprt s1=get_string_expr(args(f, 2)[0]); - PRECONDITION(is_refined_string_type(f.type())); - refined_string_typet ref_type=to_refined_string_type(f.type()); - string_exprt s2=add_axioms_for_string_of_float(args(f, 2)[1], ref_type); - return add_axioms_for_concat(s1, s2); + const function_application_exprt::argumentst &args = f.arguments(); + PRECONDITION(args.size() == 4); + const array_string_exprt s1 = get_string_expr(args[2]); + const exprt &c = args[3]; + const array_string_exprt res = char_array_of_pointer(args[1], args[0]); + return add_axioms_for_concat_char(res, s1, c); } /// Add axioms corresponding to the StringBuilder.appendCodePoint(I) function /// \param f: function application with two arguments: a string and a code point -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_concat_code_point( +/// \return an expression +exprt string_constraint_generatort::add_axioms_for_concat_code_point( const function_application_exprt &f) { - string_exprt s1=get_string_expr(args(f, 2)[0]); - const refined_string_typet &ref_type=to_refined_string_type(s1.type()); - string_exprt s2=add_axioms_for_code_point(args(f, 2)[1], ref_type); - return add_axioms_for_concat(s1, s2); + PRECONDITION(f.arguments().size() == 4); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const array_string_exprt s1 = get_string_expr(f.arguments()[2]); + const typet &char_type = s1.content().type().subtype(); + const typet &index_type = s1.length().type(); + const array_string_exprt code_point = fresh_string(index_type, char_type); + const exprt return_code1 = + add_axioms_for_code_point(code_point, f.arguments()[3]); + return add_axioms_for_concat(res, s1, code_point); } diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp index d701d27a583..18fbb876e4e 100644 --- a/src/solvers/refinement/string_constraint_generator_constants.cpp +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -17,12 +17,15 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// add axioms saying the returned string expression should be equal to the /// string constant -/// \par parameters: a string constant -/// \return a string expression -string_exprt string_constraint_generatort::add_axioms_for_constant( - irep_idt sval, const refined_string_typet &ref_type) +/// \param res: array of characters for the result +/// \param sval: a string constant +/// \return integer expression equal to zero +exprt string_constraint_generatort::add_axioms_for_constant( + const array_string_exprt &res, + irep_idt sval) { - string_exprt res=fresh_string(ref_type); + const typet &index_type = res.length().type(); + const typet &char_type = res.content().type().subtype(); std::string c_str=id2string(sval); std::wstring str; @@ -37,42 +40,29 @@ string_exprt string_constraint_generatort::add_axioms_for_constant( for(std::size_t i=0; i=2); const typet &type=int_expr.type(); - string_exprt res=fresh_string(ref_type); - exprt ten=from_integer(10, type); - const typet &char_type=ref_type.get_char_type(); - const typet &index_type=ref_type.get_index_type(); - exprt zero_char=constant_char('0', char_type); - exprt nine_char=constant_char('9', char_type); - exprt max=from_integer(max_size, index_type); + const typet &char_type = res.content().type().subtype(); + const typet &index_type = res.length().type(); + const exprt ten = from_integer(10, type); + const exprt zero_char = constant_char('0', char_type); + const exprt nine_char = constant_char('9', char_type); + const exprt max = from_integer(max_size, index_type); // We add axioms: // a1 : 2 <= |res| <= max_size @@ -237,7 +249,7 @@ string_exprt string_constraint_generatort::add_axioms_for_fractional_part( and_exprt a1(res.axiom_for_length_gt(1), res.axiom_for_length_le(max)); - m_axioms.push_back(a1); + axioms.push_back(a1); equal_exprt starts_with_dot(res[0], from_integer('.', char_type)); @@ -275,37 +287,38 @@ string_exprt string_constraint_generatort::add_axioms_for_fractional_part( } exprt a2=conjunction(digit_constraints); - m_axioms.push_back(a2); + axioms.push_back(a2); equal_exprt a3(int_expr, sum); - m_axioms.push_back(a3); + axioms.push_back(a3); - return res; + return from_integer(0, signedbv_typet(32)); } /// Add axioms to write the float in scientific notation. /// -/// A float is represented as $f = m * 2^e$ where $0 <= m < 2$ is the -/// significand and $-126 <= e <= 127$ is the exponent. -/// We want an alternate representation by finding $n$ and -/// $d$ such that $f=n*10^d$. We can estimate $d$ the following way: -/// $d ~= log_10(f/n) ~= log_10(m) + log_10(2) * e - log_10(n)$ -/// $d = floor(log_10(2) * e)$ -/// Then $n$ can be expressed by the equation: -/// $log_10(n) = log_10(m) + log_10(2) * e - d$ -/// $n = f / 10^d = m * 2^e / 10^d = m * 2^e / 10^(floor(log_10(2) * e))$ +/// A float is represented as \f$f = m * 2^e\f$ where \f$0 <= m < 2\f$ is the +/// significand and \f$-126 <= e <= 127\f$ is the exponent. +/// We want an alternate representation by finding \f$n\f$ and +/// \f$d\f$ such that \f$f=n*10^d\f$. We can estimate \f$d\f$ the following way: +/// \f$d ~= log_10(f/n) ~= log_10(m) + log_10(2) * e - log_10(n)\f$ +/// \f$d = floor(log_10(2) * e)\f$ +/// Then \f$n\f$ can be expressed by the equation: +/// \f$log_10(n) = log_10(m) + log_10(2) * e - d\f$ +/// \f$n = f / 10^d = m * 2^e / 10^d = m * 2^e / 10^(floor(log_10(2) * e))\f$ /// TODO: For now we only consider single precision. +/// \param res: string expression representing the float in scientific notation /// \param f: a float expression, which is positive -/// \param max_size: a maximal size for the string -/// \param ref_type: a type for refined strings -/// \return a string expression representing the float in scientific notation -string_exprt string_constraint_generatort:: - add_axioms_from_float_scientific_notation( - const exprt &f, const refined_string_typet &ref_type) +/// \return a integer expression different from 0 to signal an exception +exprt string_constraint_generatort::add_axioms_from_float_scientific_notation( + const array_string_exprt &res, + const exprt &f) { - ieee_float_spect float_spec=ieee_float_spect::single_precision(); - typet float_type=float_spec.to_type(); - signedbv_typet int_type(32); + const ieee_float_spect float_spec = ieee_float_spect::single_precision(); + const typet float_type = float_spec.to_type(); + const signedbv_typet int_type(32); + const typet &index_type = res.length().type(); + const typet &char_type = res.content().type().subtype(); // This is used for rounding float to integers. exprt round_to_zero_expr=from_integer(ieee_floatt::ROUND_TO_ZERO, int_type); @@ -406,8 +419,10 @@ string_exprt string_constraint_generatort:: dec_significand); dec_significand_int=round_expr_to_zero(dec_significand); - string_exprt string_expr_integer_part= - add_axioms_from_int(dec_significand_int, ref_type, 3); + array_string_exprt string_expr_integer_part = + fresh_string(index_type, char_type); + exprt return_code1 = + add_axioms_from_int(string_expr_integer_part, dec_significand_int, 3); minus_exprt fractional_part( dec_significand, floatbv_of_int_expr(dec_significand_int, float_spec)); @@ -421,37 +436,51 @@ string_exprt string_constraint_generatort:: exprt max_non_exponent_notation=from_integer(100000, shifted_float.type()); // fractional_part_shifted is floor(f * 100000) % 100000 - mod_exprt fractional_part_shifted(shifted_float, max_non_exponent_notation); + const mod_exprt fractional_part_shifted( + shifted_float, max_non_exponent_notation); - string_exprt string_fractional_part=add_axioms_for_fractional_part( - fractional_part_shifted, 6, ref_type); + array_string_exprt string_fractional_part = + fresh_string(index_type, char_type); + const exprt return_code2 = add_axioms_for_fractional_part( + string_fractional_part, fractional_part_shifted, 6); // string_expr_with_fractional_part = // concat(string_with_do, string_fractional_part) - string_exprt string_expr_with_fractional_part=add_axioms_for_concat( - string_expr_integer_part, string_fractional_part); + const array_string_exprt string_expr_with_fractional_part = + fresh_string(index_type, char_type); + const exprt return_code3 = add_axioms_for_concat( + string_expr_with_fractional_part, + string_expr_integer_part, + string_fractional_part); // string_expr_with_E = concat(string_fraction, string_lit_E) - string_exprt string_expr_with_E=add_axioms_for_concat_char( - string_expr_with_fractional_part, - from_integer('E', ref_type.get_char_type())); + const array_string_exprt stringE = fresh_string(index_type, char_type); + const exprt return_code4 = add_axioms_for_constant(stringE, "E"); + const array_string_exprt string_expr_with_E = + fresh_string(index_type, char_type); + const exprt return_code5 = add_axioms_for_concat( + string_expr_with_E, string_expr_with_fractional_part, stringE); // exponent_string = string_of_int(decimal_exponent) - string_exprt exponent_string= - add_axioms_from_int(decimal_exponent, ref_type, 3); + const array_string_exprt exponent_string = + fresh_string(index_type, char_type); + const exprt return_code6 = + add_axioms_from_int(exponent_string, decimal_exponent, 3); // string_expr = concat(string_expr_with_E, exponent_string) - return add_axioms_for_concat(string_expr_with_E, exponent_string); + return add_axioms_for_concat(res, string_expr_with_E, exponent_string); } /// Add axioms corresponding to the scientific representation of floating point /// values /// \param f: a function application expression -/// \return a new string expression -string_exprt string_constraint_generatort:: - add_axioms_from_float_scientific_notation( - const function_application_exprt &f) +/// \return code 0 on success +exprt string_constraint_generatort::add_axioms_from_float_scientific_notation( + const function_application_exprt &f) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); - return add_axioms_from_float_scientific_notation(args(f, 1)[0], ref_type); + PRECONDITION(f.arguments().size() == 3); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const exprt &arg = f.arguments()[2]; + return add_axioms_from_float_scientific_notation(res, arg); } diff --git a/src/solvers/refinement/string_constraint_generator_format.cpp b/src/solvers/refinement/string_constraint_generator_format.cpp index ae672e6a01c..85f738182af 100644 --- a/src/solvers/refinement/string_constraint_generator_format.cpp +++ b/src/solvers/refinement/string_constraint_generator_format.cpp @@ -253,42 +253,56 @@ static exprt get_component_in_struct( /// specifier. /// \param fs: a format specifier /// \param arg: a struct containing the possible value of the argument to format -/// \param ref_type: a type for refined string type +/// \param index_type: type for indexes in strings +/// \param char_type: type of characters /// \return String expression representing the output of String.format. -string_exprt string_constraint_generatort::add_axioms_for_format_specifier( +array_string_exprt +string_constraint_generatort::add_axioms_for_format_specifier( const format_specifiert &fs, const struct_exprt &arg, - const refined_string_typet &ref_type) + const typet &index_type, + const typet &char_type) { + const array_string_exprt res = fresh_string(index_type, char_type); + exprt return_code; switch(fs.conversion) { case format_specifiert::DECIMAL_INTEGER: - return add_axioms_from_int(get_component_in_struct(arg, ID_int), ref_type); + return_code = + add_axioms_from_int(res, get_component_in_struct(arg, ID_int)); + return res; case format_specifiert::HEXADECIMAL_INTEGER: - return add_axioms_from_int_hex( - get_component_in_struct(arg, ID_int), ref_type); + return_code = + add_axioms_from_int_hex(res, get_component_in_struct(arg, ID_int)); + return res; case format_specifiert::SCIENTIFIC: - return add_axioms_from_float_scientific_notation( - get_component_in_struct(arg, ID_float), ref_type); + add_axioms_from_float_scientific_notation( + res, get_component_in_struct(arg, ID_float)); + return res; case format_specifiert::DECIMAL_FLOAT: - return add_axioms_for_string_of_float( - get_component_in_struct(arg, ID_float), ref_type); + add_axioms_for_string_of_float(res, get_component_in_struct(arg, ID_float)); + return res; case format_specifiert::CHARACTER: - return add_axioms_from_char( - get_component_in_struct(arg, ID_char), ref_type); + return_code = + add_axioms_from_char(res, get_component_in_struct(arg, ID_char)); + return res; case format_specifiert::BOOLEAN: - return add_axioms_from_bool( - get_component_in_struct(arg, ID_boolean), ref_type); + return_code = + add_axioms_from_bool(res, get_component_in_struct(arg, ID_boolean)); + return res; case format_specifiert::STRING: return get_string_expr(get_component_in_struct(arg, "string_expr")); case format_specifiert::HASHCODE: - return add_axioms_from_int( - get_component_in_struct(arg, "hashcode"), ref_type); + return_code = + add_axioms_from_int(res, get_component_in_struct(arg, "hashcode")); + return res; case format_specifiert::LINE_SEPARATOR: // TODO: the constant should depend on the system: System.lineSeparator() - return add_axioms_for_constant("\n", ref_type); + return_code = add_axioms_for_constant(res, "\n"); + return res; case format_specifiert::PERCENT_SIGN: - return add_axioms_for_constant("%", ref_type); + return_code = add_axioms_for_constant(res, "%"); + return res; case format_specifiert::SCIENTIFIC_UPPER: case format_specifiert::GENERAL_UPPER: case format_specifiert::HEXADECIMAL_FLOAT_UPPER: @@ -300,9 +314,10 @@ string_exprt string_constraint_generatort::add_axioms_for_format_specifier( { string_constraint_generatort::format_specifiert fs_lower=fs; fs_lower.conversion=tolower(fs.conversion); - string_exprt lower_case=add_axioms_for_format_specifier( - fs_lower, arg, ref_type); - return add_axioms_for_to_upper_case(lower_case); + const array_string_exprt lower_case = + add_axioms_for_format_specifier(fs_lower, arg, index_type, char_type); + add_axioms_for_to_upper_case(res, lower_case); + return res; } case format_specifiert::OCTAL_INTEGER: // TODO: conversion of octal not implemented @@ -313,12 +328,12 @@ string_exprt string_constraint_generatort::add_axioms_for_format_specifier( case format_specifiert::DATE_TIME: // TODO: DateTime not implemented // For all these unimplemented cases we return a non-deterministic string - m_message.warning() << "unimplemented format specifier: " << fs.conversion - << m_message.eom; - return fresh_string(ref_type); + message.warning() << "unimplemented format specifier: " << fs.conversion + << message.eom; + return fresh_string(index_type, char_type); default: - m_message.error() << "invalid format specifier: " << fs.conversion - << m_message.eom; + message.error() << "invalid format specifier: " << fs.conversion + << message.eom; INVARIANT( false, "format specifier must belong to [bBhHsScCdoxXeEfgGaAtT%n]"); throw 0; @@ -327,18 +342,20 @@ string_exprt string_constraint_generatort::add_axioms_for_format_specifier( /// Parse `s` and add axioms ensuring the output corresponds to the output of /// String.format. +/// \param res: string expression for the result of the format function /// \param s: a format string /// \param args: a vector of arguments -/// \param ref_type: a type for refined string type -/// \return String expression representing the output of String.format. -string_exprt string_constraint_generatort::add_axioms_for_format( +/// \return code, 0 on success +exprt string_constraint_generatort::add_axioms_for_format( + const array_string_exprt &res, const std::string &s, - const exprt::operandst &args, - const refined_string_typet &ref_type) + const exprt::operandst &args) { const std::vector format_strings=parse_format_string(s); - std::vector intermediary_strings; + std::vector intermediary_strings; std::size_t arg_count=0; + const typet &char_type = res.content().type().subtype(); + const typet &index_type = res.length().type(); for(const format_elementt &fe : format_strings) if(fe.is_format_specifier()) @@ -366,21 +383,33 @@ string_exprt string_constraint_generatort::add_axioms_for_format( } } intermediary_strings.push_back( - add_axioms_for_format_specifier(fs, arg, ref_type)); + add_axioms_for_format_specifier(fs, arg, index_type, char_type)); } else - intermediary_strings.push_back( - add_axioms_for_constant( - fe.get_format_text().get_content(), ref_type)); + { + const array_string_exprt str = fresh_string(index_type, char_type); + const exprt return_code = + add_axioms_for_constant(str, fe.get_format_text().get_content()); + intermediary_strings.push_back(str); + } if(intermediary_strings.empty()) - return empty_string(ref_type); + return to_array_string_expr( + array_exprt(array_typet(char_type, from_integer(0, index_type)))); auto it=intermediary_strings.begin(); - string_exprt str=*(it++); + array_string_exprt str = *(it++); + exprt return_code = from_integer(0, signedbv_typet(32)); for(; it!=intermediary_strings.end(); ++it) - str=add_axioms_for_concat(str, *it); - return str; + { + const array_string_exprt fresh = fresh_string(index_type, char_type); + return_code = + bitor_exprt(return_code, add_axioms_for_concat(fresh, str, *it)); + str = fresh; + } + // Copy + add_axioms_for_substring(res, str, from_integer(0, index_type), str.length()); + return return_code; } /// Construct a string from a constant array. @@ -413,12 +442,13 @@ std::string utf16_constant_array_to_java( /// String.format function on the given arguments, assuming the first argument /// in the function application is a constant. Otherwise the first argument is /// returned. -string_exprt string_constraint_generatort::add_axioms_for_format( +exprt string_constraint_generatort::add_axioms_for_format( const function_application_exprt &f) { - PRECONDITION(!f.arguments().empty()); - string_exprt s1=get_string_expr(f.arguments()[0]); - const refined_string_typet &ref_type=to_refined_string_type(f.type()); + PRECONDITION(f.arguments().size() >= 3); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const array_string_exprt s1 = get_string_expr(f.arguments()[2]); unsigned int length; if(s1.length().id()==ID_constant && @@ -428,15 +458,14 @@ string_exprt string_constraint_generatort::add_axioms_for_format( std::string s=utf16_constant_array_to_java( to_array_expr(s1.content()), length); // List of arguments after s - std::vector args( - std::next(f.arguments().begin()), f.arguments().end()); - return add_axioms_for_format(s, args, ref_type); + std::vector args(f.arguments().begin() + 3, f.arguments().end()); + return add_axioms_for_format(res, s, args); } else { - m_message.warning() + message.warning() << "ignoring format function with non constant first argument" - << m_message.eom; - return fresh_string(ref_type); + << message.eom; + return from_integer(1, f.type()); } } diff --git a/src/solvers/refinement/string_constraint_generator_indexof.cpp b/src/solvers/refinement/string_constraint_generator_indexof.cpp index 977b9832dbf..a0cea95abdd 100644 --- a/src/solvers/refinement/string_constraint_generator_indexof.cpp +++ b/src/solvers/refinement/string_constraint_generator_indexof.cpp @@ -22,7 +22,9 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// \param from_index: an expression representing an index in the string /// \return a integer expression exprt string_constraint_generatort::add_axioms_for_index_of( - const string_exprt &str, const exprt &c, const exprt &from_index) + const array_string_exprt &str, + const exprt &c, + const exprt &from_index) { const typet &index_type=str.length().type(); symbol_exprt index=fresh_exist_index("index_of", index_type); @@ -39,22 +41,22 @@ exprt string_constraint_generatort::add_axioms_for_index_of( and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), binary_relation_exprt(index, ID_lt, str.length())); - m_axioms.push_back(a1); + axioms.push_back(a1); equal_exprt a2(not_exprt(contains), equal_exprt(index, minus1)); - m_axioms.push_back(a2); + axioms.push_back(a2); implies_exprt a3( contains, and_exprt( binary_relation_exprt(from_index, ID_le, index), equal_exprt(str[index], c))); - m_axioms.push_back(a3); + axioms.push_back(a3); symbol_exprt n=fresh_univ_index("QA_index_of", index_type); string_constraintt a4( n, from_index, index, contains, not_exprt(equal_exprt(str[n], c))); - m_axioms.push_back(a4); + axioms.push_back(a4); symbol_exprt m=fresh_univ_index("QA_index_of", index_type); string_constraintt a5( @@ -63,7 +65,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of( str.length(), not_exprt(contains), not_exprt(equal_exprt(str[m], c))); - m_axioms.push_back(a5); + axioms.push_back(a5); return index; } @@ -77,8 +79,8 @@ exprt string_constraint_generatort::add_axioms_for_index_of( /// \return an integer expression representing the first index of needle in /// haystack after from_index, or -1 if there is none exprt string_constraint_generatort::add_axioms_for_index_of_string( - const string_exprt &haystack, - const string_exprt &needle, + const array_string_exprt &haystack, + const array_string_exprt &needle, const exprt &from_index) { const typet &index_type=haystack.length().type(); @@ -101,12 +103,12 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( binary_relation_exprt(from_index, ID_le, offset), binary_relation_exprt( offset, ID_le, minus_exprt(haystack.length(), needle.length())))); - m_axioms.push_back(a1); + axioms.push_back(a1); equal_exprt a2( not_exprt(contains), equal_exprt(offset, from_integer(-1, index_type))); - m_axioms.push_back(a2); + axioms.push_back(a2); symbol_exprt qvar=fresh_univ_index("QA_index_of_string", index_type); string_constraintt a3( @@ -114,7 +116,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( needle.length(), contains, equal_exprt(haystack[plus_exprt(qvar, offset)], needle[qvar])); - m_axioms.push_back(a3); + axioms.push_back(a3); // string_not contains_constraintt are formulas of the form: // forall x in [lb,ub[. p(x) => exists y in [lb,ub[. s1[x+y] != s2[y] @@ -126,7 +128,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( needle.length(), haystack, needle); - m_axioms.push_back(a4); + axioms.push_back(a4); string_not_contains_constraintt a5( from_index, @@ -138,7 +140,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( needle.length(), haystack, needle); - m_axioms.push_back(a5); + axioms.push_back(a5); return offset; } @@ -153,8 +155,8 @@ exprt string_constraint_generatort::add_axioms_for_index_of_string( /// \return an integer expression representing the last index of needle in /// haystack before or at from_index, or -1 if there is none exprt string_constraint_generatort::add_axioms_for_last_index_of_string( - const string_exprt &haystack, - const string_exprt &needle, + const array_string_exprt &haystack, + const array_string_exprt &needle, const exprt &from_index) { const typet &index_type=haystack.length().type(); @@ -181,17 +183,17 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( binary_relation_exprt( offset, ID_le, minus_exprt(haystack.length(), needle.length())), binary_relation_exprt(offset, ID_le, from_index))); - m_axioms.push_back(a1); + axioms.push_back(a1); equal_exprt a2( not_exprt(contains), equal_exprt(offset, from_integer(-1, index_type))); - m_axioms.push_back(a2); + axioms.push_back(a2); symbol_exprt qvar=fresh_univ_index("QA_index_of_string", index_type); equal_exprt constr3(haystack[plus_exprt(qvar, offset)], needle[qvar]); string_constraintt a3(qvar, needle.length(), contains, constr3); - m_axioms.push_back(a3); + axioms.push_back(a3); // end_index is min(from_index, |str| - |substring|) minus_exprt length_diff(haystack.length(), needle.length()); @@ -208,7 +210,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( needle.length(), haystack, needle); - m_axioms.push_back(a4); + axioms.push_back(a4); string_not_contains_constraintt a5( from_integer(0, index_type), @@ -218,7 +220,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of_string( needle.length(), haystack, needle); - m_axioms.push_back(a5); + axioms.push_back(a5); return offset; } @@ -231,23 +233,19 @@ exprt string_constraint_generatort::add_axioms_for_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=get_string_expr(args[0]); + PRECONDITION(args.size() == 2 || args.size() == 3); + const array_string_exprt str = get_string_expr(args[0]); const exprt &c=args[1]; - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - PRECONDITION(f.type()==ref_type.get_index_type()); - exprt from_index; - - if(args.size()==2) - from_index=from_integer(0, ref_type.get_index_type()); - else if(args.size()==3) - from_index=args[2]; - else - UNREACHABLE; + const typet &index_type = str.length().type(); + const typet &char_type = str.content().type().subtype(); + PRECONDITION(f.type() == index_type); + const exprt from_index = + args.size() == 2 ? from_integer(0, index_type) : args[2]; if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { return add_axioms_for_index_of( - str, typecast_exprt(c, ref_type.get_char_type()), from_index); + str, typecast_exprt(c, char_type), from_index); } else { @@ -255,7 +253,7 @@ exprt string_constraint_generatort::add_axioms_for_index_of( is_refined_string_type(c.type()), string_refinement_invariantt("c can only be a (un)signedbv or a refined " "string and the (un)signedbv case is already handled")); - string_exprt sub=get_string_expr(c); + array_string_exprt sub = get_string_expr(c); return add_axioms_for_index_of_string(str, sub, from_index); } } @@ -269,10 +267,11 @@ exprt string_constraint_generatort::add_axioms_for_index_of( /// \return an integer expression representing the last index of c in str before /// or at from_index, or -1 if there is none exprt string_constraint_generatort::add_axioms_for_last_index_of( - const string_exprt &str, const exprt &c, const exprt &from_index) + const array_string_exprt &str, + const exprt &c, + const exprt &from_index) { - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - const typet &index_type=ref_type.get_index_type(); + const typet &index_type = str.length().type(); symbol_exprt index=fresh_exist_index("last_index_of", index_type); symbol_exprt contains=fresh_boolean("contains_in_last_index_of"); @@ -289,17 +288,17 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( and_exprt a1( binary_relation_exprt(index, ID_ge, minus1), binary_relation_exprt(index, ID_lt, from_index_plus_one)); - m_axioms.push_back(a1); + axioms.push_back(a1); equal_exprt a2(not_exprt(contains), equal_exprt(index, minus1)); - m_axioms.push_back(a2); + axioms.push_back(a2); implies_exprt a3( contains, and_exprt( binary_relation_exprt(from_index, ID_ge, index), equal_exprt(str[index], c))); - m_axioms.push_back(a3); + axioms.push_back(a3); symbol_exprt n=fresh_univ_index("QA_last_index_of1", index_type); string_constraintt a4( @@ -308,7 +307,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( from_index_plus_one, contains, not_exprt(equal_exprt(str[n], c))); - m_axioms.push_back(a4); + axioms.push_back(a4); symbol_exprt m=fresh_univ_index("QA_last_index_of2", index_type); string_constraintt a5( @@ -316,7 +315,7 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( from_index_plus_one, not_exprt(contains), not_exprt(equal_exprt(str[m], c))); - m_axioms.push_back(a5); + axioms.push_back(a5); return index; } @@ -330,27 +329,25 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - string_exprt str=get_string_expr(args[0]); - exprt c=args[1]; - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - exprt from_index; - PRECONDITION(f.type()==ref_type.get_index_type()); - - if(args.size()==2) - from_index=minus_exprt(str.length(), from_integer(1, str.length().type())); - else if(args.size()==3) - from_index=args[2]; - else - UNREACHABLE; + PRECONDITION(args.size() == 2 || args.size() == 3); + const array_string_exprt str = get_string_expr(args[0]); + const exprt c = args[1]; + const typet &index_type = str.length().type(); + const typet &char_type = str.content().type().subtype(); + PRECONDITION(f.type() == index_type); + + const exprt from_index = + args.size() == 2 ? minus_exprt(str.length(), from_integer(1, index_type)) + : args[2]; if(c.type().id()==ID_unsignedbv || c.type().id()==ID_signedbv) { return add_axioms_for_last_index_of( - str, typecast_exprt(c, ref_type.get_char_type()), from_index); + str, typecast_exprt(c, char_type), from_index); } else { - string_exprt sub=get_string_expr(c); + const array_string_exprt sub = get_string_expr(c); return add_axioms_for_last_index_of_string(str, sub, from_index); } } diff --git a/src/solvers/refinement/string_constraint_generator_insert.cpp b/src/solvers/refinement/string_constraint_generator_insert.cpp index 85ba2837c80..e0fa0db911e 100644 --- a/src/solvers/refinement/string_constraint_generator_insert.cpp +++ b/src/solvers/refinement/string_constraint_generator_insert.cpp @@ -13,129 +13,156 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include /// add axioms stating that the result correspond to the first string where we -/// inserted the second one at possition offset +/// inserted the second one at position offset /// \par parameters: two string expression and an integer offset -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_insert( - const string_exprt &s1, const string_exprt &s2, const exprt &offset) +/// \return an expression whose value would be different from zero if there is +/// an exception to signal +exprt string_constraint_generatort::add_axioms_for_insert( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2, + const exprt &offset) { PRECONDITION(offset.type()==s1.length().type()); - string_exprt pref=add_axioms_for_substring( - s1, from_integer(0, offset.type()), offset); - string_exprt suf=add_axioms_for_substring(s1, offset, s1.length()); - string_exprt concat1=add_axioms_for_concat(pref, s2); - return add_axioms_for_concat(concat1, suf); + const typet &index_type = s1.length().type(); + const typet &char_type = s1.content().type().subtype(); + array_string_exprt pref = fresh_string(index_type, char_type); + exprt return_code1 = + add_axioms_for_substring(pref, s1, from_integer(0, offset.type()), offset); + array_string_exprt suf = fresh_string(index_type, char_type); + exprt return_code2 = add_axioms_for_substring(suf, s1, offset, s1.length()); + array_string_exprt concat1 = fresh_string(index_type, char_type); + exprt return_code3 = add_axioms_for_concat(concat1, pref, s2); + exprt return_code4 = add_axioms_for_concat(res, concat1, suf); + return if_exprt( + equal_exprt(return_code1, from_integer(0, return_code1.type())), + if_exprt( + equal_exprt(return_code2, from_integer(0, return_code1.type())), + if_exprt( + equal_exprt(return_code3, from_integer(0, return_code1.type())), + return_code4, + return_code3), + return_code2), + return_code1); } /// add axioms corresponding to the StringBuilder.insert(int, CharSequence) and /// StringBuilder.insert(int, CharSequence, int, int) java functions -/// \par parameters: function application with three arguments: two strings and -/// an index +/// \param f: function application with arguments integer `|res|`, char pointer +/// `&res[0]`, refined string `s1`, refined string `s2`, +/// integer `offset` /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_insert( +exprt string_constraint_generatort::add_axioms_for_insert( const function_application_exprt &f) { - PRECONDITION(f.arguments().size()>=3); - string_exprt s1=get_string_expr(f.arguments()[0]); - string_exprt s2=get_string_expr(f.arguments()[2]); - const exprt &offset=f.arguments()[1]; - if(f.arguments().size()==5) + PRECONDITION(f.arguments().size() == 5 || f.arguments().size() == 7); + array_string_exprt s1 = get_string_expr(f.arguments()[2]); + array_string_exprt s2 = get_string_expr(f.arguments()[4]); + array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const exprt &offset = f.arguments()[3]; + if(f.arguments().size() == 7) { - const exprt &start=f.arguments()[3]; - const exprt &end=f.arguments()[4]; - string_exprt substring=add_axioms_for_substring(s2, start, end); - return add_axioms_for_insert(s1, substring, offset); + const exprt &start = f.arguments()[5]; + const exprt &end = f.arguments()[6]; + const typet &char_type = s1.content().type().subtype(); + const typet &index_type = s1.length().type(); + array_string_exprt substring = fresh_string(index_type, char_type); + exprt return_code1 = add_axioms_for_substring(substring, s2, start, end); + exprt return_code2 = add_axioms_for_insert(res, s1, substring, offset); + return if_exprt( + equal_exprt(return_code1, from_integer(0, return_code1.type())), + return_code2, + return_code1); } - else + else // 5 arguments { - INVARIANT( - f.arguments().size()==3, - string_refinement_invariantt("f must have 2 or 5 arguments and the case " - "of 5 arguments is already handled")); - return add_axioms_for_insert(s1, s2, offset); + return add_axioms_for_insert(res, s1, s2, offset); } } - /// add axioms corresponding to the StringBuilder.insert(I) java function -/// \par parameters: function application with three arguments: a string, an -/// integer -/// offset, and an integer -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_insert_int( - const function_application_exprt &f) -{ - const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=get_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_int(args(f, 3)[2], ref_type); - return add_axioms_for_insert(s1, s2, args(f, 3)[1]); -} - -/// add axioms corresponding to the StringBuilder.insert(J) java function -/// \par parameters: function application with three arguments: a string, an -/// integer -/// offset and a long -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_insert_long( +/// \param f: function application with three arguments: a string, an +/// integer offset, and an integer +/// \return an expression +exprt string_constraint_generatort::add_axioms_for_insert_int( const function_application_exprt &f) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=get_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_int(args(f, 3)[2], ref_type); - return add_axioms_for_insert(s1, s2, args(f, 3)[1]); + PRECONDITION(f.arguments().size() == 5); + const array_string_exprt s1 = get_string_expr(f.arguments()[2]); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const exprt &offset = f.arguments()[3]; + const typet &index_type = s1.length().type(); + const typet &char_type = s1.content().type().subtype(); + array_string_exprt s2 = fresh_string(index_type, char_type); + exprt return_code = add_axioms_from_int(s2, f.arguments()[4]); + return add_axioms_for_insert(res, s1, s2, offset); } /// add axioms corresponding to the StringBuilder.insert(Z) java function -/// \par parameters: function application with three arguments: a string, an -/// integer -/// offset, and a Boolean +/// \param f: function application with three arguments: a string, an +/// integer offset, and a Boolean /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_insert_bool( +exprt string_constraint_generatort::add_axioms_for_insert_bool( const function_application_exprt &f) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); - string_exprt s1=get_string_expr(args(f, 3)[0]); - string_exprt s2=add_axioms_from_bool(args(f, 3)[2], ref_type); - return add_axioms_for_insert(s1, s2, args(f, 3)[1]); + PRECONDITION(f.arguments().size() == 5); + const array_string_exprt s1 = get_string_expr(f.arguments()[0]); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const exprt &offset = f.arguments()[3]; + const typet &index_type = s1.length().type(); + const typet &char_type = s1.content().type().subtype(); + array_string_exprt s2 = fresh_string(index_type, char_type); + exprt return_code = add_axioms_from_bool(s2, f.arguments()[4]); + return add_axioms_for_insert(res, s1, s2, offset); } /// add axioms corresponding to the StringBuilder.insert(C) java function -/// \par parameters: function application with three arguments: a string, an -/// integer -/// offset, and a character -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_insert_char( +/// \param f: function application with three arguments: a string, an +/// integer offset, and a character +/// \return an expression +exprt string_constraint_generatort::add_axioms_for_insert_char( const function_application_exprt &f) { - string_exprt s1=get_string_expr(args(f, 3)[0]); - const refined_string_typet &ref_type=to_refined_string_type(s1.type()); - string_exprt s2=add_axioms_from_char(args(f, 3)[2], ref_type); - return add_axioms_for_insert(s1, s2, args(f, 3)[1]); + PRECONDITION(f.arguments().size() == 5); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const array_string_exprt s1 = get_string_expr(f.arguments()[2]); + const exprt &offset = f.arguments()[3]; + const typet &index_type = s1.length().type(); + const typet &char_type = s1.content().type().subtype(); + array_string_exprt s2 = fresh_string(index_type, char_type); + exprt return_code = add_axioms_from_char(s2, f.arguments()[4]); + return add_axioms_for_insert(res, s1, s2, offset); } /// add axioms corresponding to the StringBuilder.insert(D) java function -/// \par parameters: function application with three arguments: a string, an -/// integer -/// offset, and a double -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_insert_double( +/// \param f: function application with three arguments: a string, an +/// integer offset, and a double +/// \return a string expression +exprt string_constraint_generatort::add_axioms_for_insert_double( const function_application_exprt &f) { - string_exprt s1=get_string_expr(args(f, 3)[0]); - const refined_string_typet &ref_type=to_refined_string_type(s1.type()); - string_exprt s2=add_axioms_for_string_of_float(args(f, 3)[2], ref_type); - return add_axioms_for_insert(s1, s2, args(f, 3)[1]); + PRECONDITION(f.arguments().size() == 5); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const array_string_exprt s1 = get_string_expr(f.arguments()[2]); + const exprt &offset = f.arguments()[3]; + const typet &index_type = s1.length().type(); + const typet &char_type = s1.content().type().subtype(); + const array_string_exprt s2 = fresh_string(index_type, char_type); + const exprt return_code = + add_axioms_for_string_of_float(s2, f.arguments()[4]); + return add_axioms_for_insert(res, s1, s2, offset); } /// add axioms corresponding to the StringBuilder.insert(F) java function -/// \par parameters: function application with three arguments: a string, an -/// integer -/// offset, and a float +/// \param f: function application with three arguments: a string, an +/// integer offset, and a float /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_insert_float( +exprt string_constraint_generatort::add_axioms_for_insert_float( const function_application_exprt &f) { - string_exprt s1=get_string_expr(args(f, 3)[0]); - const refined_string_typet &ref_type=to_refined_string_type(s1.type()); - string_exprt s2=add_axioms_for_string_of_float(args(f, 3)[2], ref_type); - return add_axioms_for_insert(s1, s2, args(f, 3)[1]); + return add_axioms_for_insert_double(f); } diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 2eca6940c7b..d7ff6d98c86 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -28,32 +28,35 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include string_constraint_generatort::string_constraint_generatort( - const string_constraint_generatort::infot& info, const namespacet& ns): - max_string_length(info.string_max_length), - m_force_printable_characters(info.string_printable), - m_ns(ns) { } + const string_constraint_generatort::infot &info, + const namespacet &ns) + : max_string_length(info.string_max_length), + force_printable_characters(info.string_printable), + ns(ns) +{ +} const std::vector &string_constraint_generatort::get_axioms() const { - return m_axioms; + return axioms; } const std::vector & string_constraint_generatort::get_index_symbols() const { - return m_index_symbols; + return index_symbols; } const std::vector & string_constraint_generatort::get_boolean_symbols() const { - return m_boolean_symbols; + return boolean_symbols; } -const std::set & +const std::set & string_constraint_generatort::get_created_strings() const { - return m_created_strings; + return created_strings; } /// generate constant character expression with character type. @@ -76,7 +79,7 @@ symbol_exprt string_constraint_generatort::fresh_symbol( const irep_idt &prefix, const typet &type) { std::ostringstream buf; - buf << "string_refinement#" << prefix << "#" << ++m_symbol_count; + buf << "string_refinement#" << prefix << "#" << ++symbol_count; irep_idt name(buf.str()); return symbol_exprt(name, type); } @@ -97,7 +100,7 @@ symbol_exprt string_constraint_generatort::fresh_exist_index( const irep_idt &prefix, const typet &type) { symbol_exprt s=fresh_symbol(prefix, type); - m_index_symbols.push_back(s); + index_symbols.push_back(s); return s; } @@ -108,7 +111,7 @@ symbol_exprt string_constraint_generatort::fresh_boolean( const irep_idt &prefix) { symbol_exprt b=fresh_symbol(prefix, bool_typet()); - m_boolean_symbols.push_back(b); + boolean_symbols.push_back(b); return b; } @@ -134,43 +137,172 @@ plus_exprt string_constraint_generatort::plus_exprt_with_overflow_check( implies_exprt no_overflow(equal_exprt(neg1, neg2), equal_exprt(neg1, neg_sum)); - m_axioms.push_back(no_overflow); + axioms.push_back(no_overflow); return sum; } +/// Associate an actual finite length to infinite arrays +/// \param s: array expression representing a string +/// \return expression for the length of `s` +exprt string_constraint_generatort::get_length_of_string_array( + const array_string_exprt &s) const +{ + if(s.length() == infinity_exprt(s.length().type())) + { + auto it = length_of_array_.find(s); + if(it != length_of_array_.end()) + return it->second; + } + return s.length(); +} + /// construct a string expression whose length and content are new variables /// \par parameters: a type for string /// \return a string expression -string_exprt string_constraint_generatort::fresh_string( - const refined_string_typet &type) +array_string_exprt string_constraint_generatort::fresh_string( + const typet &index_type, + const typet &char_type) { - symbol_exprt length=fresh_symbol("string_length", type.get_index_type()); - symbol_exprt content=fresh_symbol("string_content", type.get_content_type()); - string_exprt str(length, content, type); - m_created_strings.insert(str); + symbol_exprt length = fresh_symbol("string_length", index_type); + array_typet array_type(char_type, length); + symbol_exprt content = fresh_symbol("string_content", array_type); + array_string_exprt str = to_array_string_expr(content); + created_strings.insert(str); add_default_axioms(str); return str; } -/// casts an expression to a string expression, or fetches the actual -/// string_exprt in the case of a symbol. -/// \par parameters: an expression of refined string type -/// \return a string expression -string_exprt string_constraint_generatort::get_string_expr(const exprt &expr) +// Associate a char array to a char pointer. The size of the char array is a +// variable with no constraint. +array_string_exprt +string_constraint_generatort::associate_char_array_to_char_pointer( + const exprt &char_pointer, + const typet &char_array_type) { - PRECONDITION(is_refined_string_type(expr.type())); - - if(expr.id()==ID_symbol) + PRECONDITION(char_pointer.type().id() == ID_pointer); + PRECONDITION(char_array_type.id() == ID_array); + PRECONDITION( + char_array_type.subtype().id() == ID_unsignedbv || + char_array_type.subtype().id() == ID_signedbv); + std::string symbol_name; + if( + char_pointer.id() == ID_address_of && + (to_address_of_expr(char_pointer).object().id() == ID_index) && + char_pointer.op0().op0().id() == ID_array) { - return find_or_add_string_of_symbol( - to_symbol_expr(expr), - to_refined_string_type(expr.type())); + // Do not replace constant arrays + return to_array_string_expr( + to_index_expr(to_address_of_expr(char_pointer).object()).array()); + } + else if(char_pointer.id() == ID_address_of) + { + symbol_name = "char_array_of_address"; + } + else if(char_pointer.id() == ID_if) + { + const if_exprt &if_expr = to_if_expr(char_pointer); + const array_string_exprt t = associate_char_array_to_char_pointer( + if_expr.true_case(), char_array_type); + const array_string_exprt f = associate_char_array_to_char_pointer( + if_expr.false_case(), char_array_type); + array_typet array_type( + char_array_type.subtype(), + if_exprt( + if_expr.cond(), + to_array_type(t.type()).size(), + to_array_type(f.type()).size())); + return to_array_string_expr(if_exprt(if_expr.cond(), t, f, array_type)); + } + else if(char_pointer.id() == ID_symbol) + symbol_name = "char_array_symbol"; + else if(char_pointer.id() == ID_member) + symbol_name = "char_array_member"; + else if( + char_pointer.id() == ID_constant && + to_constant_expr(char_pointer).get_value() == ID_NULL) + { + // TODO: is this useful? + array_typet array_type( + char_array_type.subtype(), + from_integer(0, to_array_type(char_array_type).size().type())); + symbol_exprt array_sym = fresh_symbol("char_array_null", array_type); + return to_array_string_expr(array_sym); } else + symbol_name = "unknown_char_array"; + + array_string_exprt array_sym = + to_array_string_expr(fresh_symbol(symbol_name, char_array_type)); + auto insert_result = + arrays_of_pointers_.insert(std::make_pair(char_pointer, array_sym)); + array_string_exprt result = to_array_string_expr(insert_result.first->second); + add_default_axioms(result); + return result; +} + +/// Associate a char array to a char pointer. +/// Insert in `arrays_of_pointers_` a binding from `ptr` to `arr`. +/// If the length of `arr` is infinite, we create a new integer symbol and add +/// a binding from `arr` to this length in `length_of_array_`. +/// This also adds the default axioms for `arr`. +/// \param f: a function application with argument a character array `arr` and +/// a character pointer `ptr`. +exprt string_constraint_generatort::associate_array_to_pointer( + const function_application_exprt &f) +{ + PRECONDITION(f.arguments().size() == 2); + + /// \todo: we allow expression of the form of `arr[0]` instead of `arr` as + /// the array argument but this could go away. + array_string_exprt array_expr = to_array_string_expr( + f.arguments()[0].id() == ID_index ? to_index_expr(f.arguments()[0]).array() + : f.arguments()[0]); + + const exprt &pointer_expr = f.arguments()[1]; + + const auto &length = array_expr.length(); + if(length == infinity_exprt(length.type())) { - return to_string_expr(expr); + auto pair = length_of_array_.insert( + std::make_pair(array_expr, fresh_symbol("string_length", length.type()))); + array_expr.length() = pair.first->second; } + + // TODO should use a function for that + arrays_of_pointers_.insert(std::make_pair(pointer_expr, array_expr)); + // TODO should go inside function + add_default_axioms(to_array_string_expr(array_expr)); + return from_integer(0, f.type()); +} + +/// Associate an integer length to a char array. +/// This adds an axiom ensuring that `arr.length` and `length` are equal. +/// \param f: a function application with argument a character array `arr` and +/// a integer `length`. +/// \return integer expression equal to 0 +exprt string_constraint_generatort::associate_length_to_array( + const function_application_exprt &f) +{ + PRECONDITION(f.arguments().size() == 2); + array_string_exprt array_expr = to_array_string_expr(f.arguments()[0]); + const exprt &new_length = f.arguments()[1]; + + const auto &length = get_length_of_string_array(array_expr); + axioms.push_back(equal_exprt(length, new_length)); + return from_integer(0, f.type()); +} + +/// casts an expression to a string expression, or fetches the actual +/// string_exprt in the case of a symbol. +/// \param expr: an expression of refined string type +/// \return a string expression +array_string_exprt +string_constraint_generatort::get_string_expr(const exprt &expr) +{ + PRECONDITION(is_refined_string_type(expr.type())); + const refined_string_exprt &str = to_string_expr(expr); + return char_array_of_pointer(str.content(), str.length()); } /// adds standard axioms about the length of the string and its content: * its @@ -181,14 +313,18 @@ string_exprt string_constraint_generatort::get_string_expr(const exprt &expr) /// \return a string expression that is linked to the argument through axioms /// that are added to the list void string_constraint_generatort::add_default_axioms( - const string_exprt &s) + const array_string_exprt &s) { - m_axioms.push_back( + // If `s` was already added we do nothing. + if(!created_strings.insert(s).second) + return; + + axioms.push_back( s.axiom_for_length_ge(from_integer(0, s.length().type()))); if(max_string_length!=std::numeric_limits::max()) - m_axioms.push_back(s.axiom_for_length_le(max_string_length)); + axioms.push_back(s.axiom_for_length_le(max_string_length)); - if(m_force_printable_characters) + if(force_printable_characters) { symbol_exprt qvar=fresh_univ_index("printable", s.length().type()); exprt chr=s[qvar]; @@ -196,120 +332,20 @@ void string_constraint_generatort::add_default_axioms( binary_relation_exprt(chr, ID_ge, from_integer(' ', chr.type())), binary_relation_exprt(chr, ID_le, from_integer('~', chr.type()))); string_constraintt sc(qvar, s.length(), printable); - m_axioms.push_back(sc); + axioms.push_back(sc); } } -/// obtain a refined string expression corresponding to a expression of type -/// string -/// \par parameters: an expression of refined string type -/// \return a string expression that is linked to the argument through axioms -/// that are added to the list -string_exprt string_constraint_generatort::add_axioms_for_refined_string( - const exprt &string) +/// Adds creates a new array if it does not already exists +/// TODO: This should be replaced by associate_char_array_to_char_pointer +array_string_exprt string_constraint_generatort::char_array_of_pointer( + const exprt &pointer, + const exprt &length) { - PRECONDITION(is_refined_string_type(string.type())); - refined_string_typet type=to_refined_string_type(string.type()); - - // Function applications should have been removed before - PRECONDITION(string.id()!=ID_function_application); - - if(string.id()==ID_symbol) - { - const symbol_exprt &sym=to_symbol_expr(string); - string_exprt s=find_or_add_string_of_symbol(sym, type); - add_default_axioms(s); - return s; - } - else if(string.id()==ID_nondet_symbol) - { - string_exprt s=fresh_string(type); - add_default_axioms(s); - return s; - } - else if(string.id()==ID_if) - { - return add_axioms_for_if(to_if_expr(string)); - } - else if(string.id()==ID_struct) - { - const string_exprt &s=to_string_expr(string); - INVARIANT( - s.length().id()==ID_symbol || s.length().id()==ID_constant, - "string length should be a symbol or a constant"); - irep_idt content_id=s.content().id(); - INVARIANT( - content_id==ID_symbol || content_id==ID_array || content_id==ID_if, - "string content should be a symbol, a constant array, or an if"); - if(content_id==ID_if) - { - // If the string content is an if expression, we add axioms ensuring - // the content is the same as the content in the 'true' branch when the - // condition holds and the 'false' branch otherwise. - if_exprt if_expr=to_if_expr(s.content()); - string_exprt str_true=add_axioms_for_refined_string( - string_exprt(s.length(), if_expr.true_case(), type)); - string_exprt str_false=add_axioms_for_refined_string( - string_exprt(s.length(), if_expr.false_case(), type)); - return add_axioms_for_if(if_exprt(if_expr.cond(), str_true, str_false)); - } - add_default_axioms(s); - return s; - } - else - { - INVARIANT( - false, - string_refinement_invariantt("add_axioms_for_refined_string:\n"+ - string.pretty()+"\nwhich is not a function application, a symbol, a "+ - "struct or an if expression")); - // For the compiler - throw 0; - } -} - -/// add axioms for an if expression which should return a string -/// \par parameters: an if expression -/// \return a string expression -string_exprt string_constraint_generatort::add_axioms_for_if( - const if_exprt &expr) -{ - PRECONDITION(is_refined_string_type(expr.true_case().type())); - string_exprt t=get_string_expr(expr.true_case()); - PRECONDITION(is_refined_string_type(expr.false_case().type())); - string_exprt f=get_string_expr(expr.false_case()); - const refined_string_typet &ref_type=to_refined_string_type(t.type()); - const typet &index_type=ref_type.get_index_type(); - string_exprt res=fresh_string(ref_type); - - m_axioms.push_back( - implies_exprt(expr.cond(), res.axiom_for_has_same_length_as(t))); - symbol_exprt qvar=fresh_univ_index("QA_string_if_true", index_type); - equal_exprt qequal(res[qvar], t[qvar]); - string_constraintt sc1(qvar, t.length(), implies_exprt(expr.cond(), qequal)); - m_axioms.push_back(sc1); - m_axioms.push_back( - implies_exprt(not_exprt(expr.cond()), res.axiom_for_has_same_length_as(f))); - symbol_exprt qvar2=fresh_univ_index("QA_string_if_false", index_type); - equal_exprt qequal2(res[qvar2], f[qvar2]); - string_constraintt sc2(qvar2, f.length(), or_exprt(expr.cond(), qequal2)); - m_axioms.push_back(sc2); - return res; -} - -/// if a symbol representing a string is present in the symbol_to_string table, -/// returns the corresponding string, if the symbol is not yet present, creates -/// a new string with the correct type depending on whether the mode is java or -/// c, adds it to the table and returns it. -/// \par parameters: a symbol expression -/// \return a string expression -string_exprt string_constraint_generatort::find_or_add_string_of_symbol( - const symbol_exprt &sym, const refined_string_typet &ref_type) -{ - irep_idt id=sym.get_identifier(); - string_exprt str=fresh_string(ref_type); - auto entry=m_unresolved_symbols.insert(std::make_pair(id, str)); - return entry.first->second; + const array_typet array_type(pointer.type().subtype(), length); + const array_string_exprt array = + associate_char_array_to_char_pointer(pointer, array_type); + return array; } /// strings contained in this call are converted to objects of type @@ -326,57 +362,6 @@ exprt string_constraint_generatort::add_axioms_for_function_application( const irep_idt &id=is_ssa_expr(name)?to_ssa_expr(name).get_object_name(): to_symbol_expr(name).get_identifier(); - std::string str_id(id.c_str()); - - size_t pos=str_id.find("func_length"); - if(pos!=std::string::npos) - { - function_application_exprt new_expr(expr); - // TODO: This part needs some improvement. - // Stripping the symbol name is not a very robust process. - new_expr.function()=symbol_exprt(str_id.substr(0, pos+4)); - new_expr.type()=refined_string_typet(java_int_type(), java_char_type()); - - auto res_it=m_function_application_cache.insert(std::make_pair(new_expr, - nil_exprt())); - if(res_it.second) - { - string_exprt res=to_string_expr( - add_axioms_for_function_application(new_expr)); - res_it.first->second=res; - return res.length(); - } - else - return to_string_expr(res_it.first->second).length(); - } - - pos = str_id.find("func_data"); - if(pos!=std::string::npos) - { - function_application_exprt new_expr(expr); - new_expr.function()=symbol_exprt(str_id.substr(0, pos+4)); - new_expr.type()=refined_string_typet(java_int_type(), java_char_type()); - - auto res_it=m_function_application_cache.insert(std::make_pair(new_expr, - nil_exprt())); - if(res_it.second) - { - string_exprt res=to_string_expr( - add_axioms_for_function_application(new_expr)); - res_it.first->second=res; - return res.content(); - } - else - return to_string_expr(res_it.first->second).content(); - } - - // TODO: improve efficiency of this test by either ordering test by frequency - // or using a map - - auto res_it=m_function_application_cache.find(expr); - if(res_it!=m_function_application_cache.end() && res_it->second!=nil_exprt()) - return res_it->second; - exprt res; if(id==ID_cprover_char_literal_func) @@ -409,8 +394,6 @@ exprt string_constraint_generatort::add_axioms_for_function_application( res=add_axioms_for_last_index_of(expr); else if(id==ID_cprover_string_parse_int_func) res=add_axioms_for_parse_int(expr); - else if(id==ID_cprover_string_to_char_array_func) - res=add_axioms_for_to_char_array(expr); else if(id==ID_cprover_string_code_point_at_func) res=add_axioms_for_code_point_at(expr); else if(id==ID_cprover_string_code_point_before_func) @@ -425,18 +408,8 @@ exprt string_constraint_generatort::add_axioms_for_function_application( res=add_axioms_from_literal(expr); else if(id==ID_cprover_string_concat_func) res=add_axioms_for_concat(expr); - else if(id==ID_cprover_string_concat_int_func) - res=add_axioms_for_concat_int(expr); - else if(id==ID_cprover_string_concat_long_func) - res=add_axioms_for_concat_long(expr); - else if(id==ID_cprover_string_concat_bool_func) - res=add_axioms_for_concat_bool(expr); else if(id==ID_cprover_string_concat_char_func) res=add_axioms_for_concat_char(expr); - else if(id==ID_cprover_string_concat_double_func) - res=add_axioms_for_concat_double(expr); - else if(id==ID_cprover_string_concat_float_func) - res=add_axioms_for_concat_float(expr); else if(id==ID_cprover_string_concat_code_point_func) res=add_axioms_for_concat_code_point(expr); else if(id==ID_cprover_string_insert_func) @@ -444,7 +417,7 @@ exprt string_constraint_generatort::add_axioms_for_function_application( else if(id==ID_cprover_string_insert_int_func) res=add_axioms_for_insert_int(expr); else if(id==ID_cprover_string_insert_long_func) - res=add_axioms_for_insert_long(expr); + res = add_axioms_for_insert_int(expr); else if(id==ID_cprover_string_insert_bool_func) res=add_axioms_for_insert_bool(expr); else if(id==ID_cprover_string_insert_char_func) @@ -493,10 +466,12 @@ exprt string_constraint_generatort::add_axioms_for_function_application( res=add_axioms_for_replace(expr); else if(id==ID_cprover_string_intern_func) res=add_axioms_for_intern(expr); - else if(id==ID_cprover_string_array_of_char_pointer_func) - res=add_axioms_for_char_pointer(expr); else if(id==ID_cprover_string_format_func) res=add_axioms_for_format(expr); + else if(id == ID_cprover_associate_array_to_pointer_func) + res = associate_array_to_pointer(expr); + else if(id == ID_cprover_associate_length_to_array_func) + res = associate_length_to_array(expr); else { std::string msg( @@ -504,7 +479,6 @@ exprt string_constraint_generatort::add_axioms_for_function_application( msg+=id2string(id); DATA_INVARIANT(false, string_refinement_invariantt(msg)); } - m_function_application_cache[expr]=res; return res; } @@ -513,46 +487,19 @@ exprt string_constraint_generatort::add_axioms_for_function_application( /// \par parameters: function application with one argument, which is a string, /// or three arguments: string, integer offset and count /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_copy( +exprt string_constraint_generatort::add_axioms_for_copy( const function_application_exprt &f) { const auto &args=f.arguments(); - if(args.size()==1) - { - string_exprt s1=get_string_expr(args[0]); - return s1; - } - else - { - INVARIANT( - args.size()==3, - string_refinement_invariantt("f must have 1 or 3 arguments and the case " - "of 3 arguments is already handled")); - string_exprt s1=get_string_expr(args[0]); - exprt offset=args[1]; - exprt count=args[2]; - return add_axioms_for_substring(s1, offset, plus_exprt(offset, count)); - } + PRECONDITION(args.size() == 3 || args.size() == 5); + const array_string_exprt res = char_array_of_pointer(args[1], args[0]); + const array_string_exprt str = get_string_expr(args[2]); + const typet &index_type = str.length().type(); + const exprt offset = args.size() == 3 ? from_integer(0, index_type) : args[3]; + const exprt count = args.size() == 3 ? str.length() : args[4]; + return add_axioms_for_substring(res, str, offset, plus_exprt(offset, count)); } -/// for an expression of the form `array[0]` returns `array` -/// \par parameters: an expression of type char -/// \return an array expression -exprt string_constraint_generatort::add_axioms_for_char_pointer( - const function_application_exprt &fun) -{ - exprt char_pointer=args(fun, 1)[0]; - if(char_pointer.id()==ID_index) - return typecast_exprt(char_pointer.op0(), fun.type()); - // TODO: It seems reasonable that the result of the function application - // should match the return type of the function. However it is not - // clear whether this typecast is properly handled in the string - // refinement. We need regression tests that use that function. - - // TODO: we do not know what to do in the other cases - TODO; - return exprt(); -} /// add axioms corresponding to the String.length java function /// \par parameters: function application with one string argument @@ -560,7 +507,8 @@ exprt string_constraint_generatort::add_axioms_for_char_pointer( exprt string_constraint_generatort::add_axioms_for_length( const function_application_exprt &f) { - string_exprt str=get_string_expr(args(f, 1)[0]); + PRECONDITION(f.arguments().size() == 1); + const array_string_exprt str = get_string_expr(f.arguments()[0]); return str.length(); } @@ -569,8 +517,7 @@ exprt string_constraint_generatort::add_axioms_for_length( /// \return a Boolean expression exprt string_constraint_generatort::axiom_for_is_positive_index(const exprt &x) { - return binary_relation_exprt( - x, ID_ge, from_integer(0, x.type())); + return binary_relation_exprt(x, ID_ge, from_integer(0, x.type())); } /// add axioms stating that the returned value is equal to the argument @@ -612,35 +559,9 @@ exprt string_constraint_generatort::add_axioms_for_char_literal( exprt string_constraint_generatort::add_axioms_for_char_at( const function_application_exprt &f) { - string_exprt str=get_string_expr(args(f, 2)[0]); - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - symbol_exprt char_sym=fresh_symbol("char", ref_type.get_char_type()); - m_axioms.push_back(equal_exprt(char_sym, str[args(f, 2)[1]])); + PRECONDITION(f.arguments().size() == 2); + array_string_exprt str = get_string_expr(f.arguments()[0]); + symbol_exprt char_sym = fresh_symbol("char", str.type().subtype()); + axioms.push_back(equal_exprt(char_sym, str[f.arguments()[1]])); return char_sym; } - -/// add axioms corresponding to the String.toCharArray java function -/// \par parameters: function application with one string argument -/// \return a char array expression -exprt string_constraint_generatort::add_axioms_for_to_char_array( - const function_application_exprt &f) -{ - string_exprt str=get_string_expr(args(f, 1)[0]); - return str.content(); -} - -exprt string_constraint_generatort::substitute_function_applications( - const exprt &expr) -{ - exprt copy=expr; - for(exprt &operand : copy.operands()) - operand=substitute_function_applications(exprt(operand)); - - if(copy.id()==ID_function_application) - { - function_application_exprt f=to_function_application_expr(copy); - return this->add_axioms_for_function_application(f); - } - - return copy; -} diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp index c11b8ef4263..2c6b68fdc0b 100644 --- a/src/solvers/refinement/string_constraint_generator_testing.cpp +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -18,7 +18,9 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// \par parameters: a prefix string, a string and an integer offset /// \return a Boolean expression exprt string_constraint_generatort::add_axioms_for_is_prefix( - const string_exprt &prefix, const string_exprt &str, const exprt &offset) + const array_string_exprt &prefix, + const array_string_exprt &str, + const exprt &offset) { symbol_exprt isprefix=fresh_boolean("isprefix"); const typet &index_type=str.length().type(); @@ -34,32 +36,28 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( isprefix, str.axiom_for_length_ge(plus_exprt_with_overflow_check( prefix.length(), offset))); - m_axioms.push_back(a1); + axioms.push_back(a1); symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type); string_constraintt a2( qvar, prefix.length(), isprefix, - equal_exprt(str[plus_exprt_with_overflow_check(qvar, offset)], - prefix[qvar])); - m_axioms.push_back(a2); + equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])); + axioms.push_back(a2); symbol_exprt witness=fresh_exist_index("witness_not_isprefix", index_type); and_exprt witness_diff( axiom_for_is_positive_index(witness), and_exprt( prefix.axiom_for_length_gt(witness), - notequal_exprt(str[plus_exprt_with_overflow_check(witness, offset)], - prefix[witness]))); + notequal_exprt(str[plus_exprt(witness, offset)], prefix[witness]))); or_exprt s0_notpref_s1( - not_exprt( - str.axiom_for_length_ge( - plus_exprt_with_overflow_check(prefix.length(), offset))), + not_exprt(str.axiom_for_length_ge(plus_exprt(prefix.length(), offset))), witness_diff); implies_exprt a3(not_exprt(isprefix), s0_notpref_s1); - m_axioms.push_back(a3); + axioms.push_back(a3); return isprefix; } @@ -74,13 +72,11 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix( { const function_application_exprt::argumentst &args=f.arguments(); PRECONDITION(f.type()==bool_typet() || f.type().id()==ID_c_bool); - string_exprt s0=get_string_expr(args[swap_arguments?1:0]); - string_exprt s1=get_string_expr(args[swap_arguments?0:1]); - exprt offset; - if(args.size()==2) - offset=from_integer(0, s0.length().type()); - else if(args.size()==3) - offset=args[2]; + PRECONDITION(args.size() == 2 || args.size() == 3); + const array_string_exprt s0 = get_string_expr(args[swap_arguments ? 1 : 0]); + const array_string_exprt s1 = get_string_expr(args[swap_arguments ? 0 : 1]); + const exprt offset = + args.size() == 2 ? from_integer(0, s0.length().type()) : args[2]; return typecast_exprt(add_axioms_for_is_prefix(s0, s1, offset), f.type()); } @@ -92,15 +88,15 @@ exprt string_constraint_generatort::add_axioms_for_is_empty( const function_application_exprt &f) { PRECONDITION(f.type()==bool_typet() || f.type().id()==ID_c_bool); - + PRECONDITION(f.arguments().size() == 1); // We add axioms: // a1 : is_empty => |s0| = 0 // a2 : s0 => is_empty symbol_exprt is_empty=fresh_boolean("is_empty"); - string_exprt s0=get_string_expr(args(f, 1)[0]); - m_axioms.push_back(implies_exprt(is_empty, s0.axiom_for_has_length(0))); - m_axioms.push_back(implies_exprt(s0.axiom_for_has_length(0), is_empty)); + array_string_exprt s0 = get_string_expr(f.arguments()[0]); + axioms.push_back(implies_exprt(is_empty, s0.axiom_for_has_length(0))); + axioms.push_back(implies_exprt(s0.axiom_for_has_length(0), is_empty)); return typecast_exprt(is_empty, f.type()); } @@ -119,8 +115,8 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( symbol_exprt issuffix=fresh_boolean("issuffix"); typecast_exprt tc_issuffix(issuffix, f.type()); - string_exprt s0=get_string_expr(args[swap_arguments?1:0]); - string_exprt s1=get_string_expr(args[swap_arguments?0:1]); + const array_string_exprt &s0 = get_string_expr(args[swap_arguments ? 1 : 0]); + const array_string_exprt &s1 = get_string_expr(args[swap_arguments ? 0 : 1]); const typet &index_type=s0.length().type(); // We add axioms: @@ -132,22 +128,23 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( // || (s1.length > witness>=0 // &&s1[witness]!=s0[witness + s0.length-s1.length] - implies_exprt a1(issuffix, s1.axiom_for_length_ge(s0)); - m_axioms.push_back(a1); + implies_exprt a1(issuffix, s1.axiom_for_length_ge(s0.length())); + axioms.push_back(a1); symbol_exprt qvar=fresh_univ_index("QA_suffix", index_type); exprt qvar_shifted=plus_exprt( qvar, minus_exprt(s1.length(), s0.length())); string_constraintt a2( qvar, s0.length(), issuffix, equal_exprt(s0[qvar], s1[qvar_shifted])); - m_axioms.push_back(a2); + axioms.push_back(a2); symbol_exprt witness=fresh_exist_index("witness_not_suffix", index_type); exprt shifted=plus_exprt( witness, minus_exprt(s1.length(), s0.length())); or_exprt constr3( - and_exprt(s0.axiom_for_length_gt(s1), - equal_exprt(witness, from_integer(-1, index_type))), + and_exprt( + s0.axiom_for_length_gt(s1.length()), + equal_exprt(witness, from_integer(-1, index_type))), and_exprt( notequal_exprt(s0[witness], s1[shifted]), and_exprt( @@ -155,42 +152,24 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix( axiom_for_is_positive_index(witness)))); implies_exprt a3(not_exprt(issuffix), constr3); - m_axioms.push_back(a3); + axioms.push_back(a3); return tc_issuffix; } -/// tells whether the given string is a constant -/// \param expr: a string expression -/// \return a Boolean -bool string_constraint_generatort::is_constant_string( - const string_exprt &expr) -{ - if(expr.id()!=ID_struct || - expr.operands().size()!=2 || - expr.length().id()!=ID_constant || - expr.content().id()!=ID_array) - return false; - for(const auto &element : expr.content().operands()) - { - if(element.id()!=ID_constant) - return false; - } - return true; -} - /// add axioms corresponding to the String.contains java function /// \par parameters: function application with two string arguments /// \return a Boolean expression exprt string_constraint_generatort::add_axioms_for_contains( const function_application_exprt &f) { + PRECONDITION(f.arguments().size() == 2); PRECONDITION(f.type()==bool_typet() || f.type().id()==ID_c_bool); - string_exprt s0=get_string_expr(args(f, 2)[0]); - string_exprt s1=get_string_expr(args(f, 2)[1]); - - symbol_exprt contains=fresh_boolean("contains"); - const refined_string_typet ref_type=to_refined_string_type(s0.type()); - const typet &index_type=ref_type.get_index_type(); + const array_string_exprt s0 = get_string_expr(f.arguments()[0]); + const array_string_exprt s1 = get_string_expr(f.arguments()[1]); + const typet &index_type = s0.length().type(); + const symbol_exprt contains = fresh_boolean("contains"); + const symbol_exprt startpos = + fresh_exist_index("startpos_contains", index_type); // We add axioms: // a1 : contains ==> |s0| >= |s1| @@ -201,27 +180,26 @@ exprt string_constraint_generatort::add_axioms_for_contains( // (forall startpos <= |s0| - |s1|. // exists witness < |s1|. s1[witness] != s0[witness + startpos]) - implies_exprt a1(contains, s0.axiom_for_length_ge(s1)); - m_axioms.push_back(a1); + const implies_exprt a1(contains, s0.axiom_for_length_ge(s1.length())); + axioms.push_back(a1); - symbol_exprt startpos=fresh_exist_index("startpos_contains", index_type); minus_exprt length_diff(s0.length(), s1.length()); and_exprt bounds( axiom_for_is_positive_index(startpos), binary_relation_exprt(startpos, ID_le, length_diff)); implies_exprt a2(contains, bounds); - m_axioms.push_back(a2); + axioms.push_back(a2); implies_exprt a3( not_exprt(contains), equal_exprt(startpos, from_integer(-1, index_type))); - m_axioms.push_back(a3); + axioms.push_back(a3); symbol_exprt qvar=fresh_univ_index("QA_contains", index_type); exprt qvar_shifted=plus_exprt(qvar, startpos); string_constraintt a4( qvar, s1.length(), contains, equal_exprt(s1[qvar], s0[qvar_shifted])); - m_axioms.push_back(a4); + axioms.push_back(a4); // We rewrite axiom a4 as: // forall startpos <= |s0|-|s1|. (!contains && |s0| >= |s1|) @@ -229,12 +207,12 @@ exprt string_constraint_generatort::add_axioms_for_contains( string_not_contains_constraintt a5( from_integer(0, index_type), plus_exprt(from_integer(1, index_type), length_diff), - and_exprt(not_exprt(contains), s0.axiom_for_length_ge(s1)), + and_exprt(not_exprt(contains), s0.axiom_for_length_ge(s1.length())), from_integer(0, index_type), s1.length(), s0, s1); - m_axioms.push_back(a5); + axioms.push_back(a5); return typecast_exprt(contains, f.type()); } diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp index 7a4c0ba8ff1..0f6d5f624a1 100644 --- a/src/solvers/refinement/string_constraint_generator_transformation.cpp +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -23,43 +23,43 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// is a string `s1` and the second an integer `k` which should have /// same type as the string length /// \return a new string expression `res` -string_exprt string_constraint_generatort::add_axioms_for_set_length( +exprt string_constraint_generatort::add_axioms_for_set_length( const function_application_exprt &f) { - string_exprt s1=get_string_expr(args(f, 2)[0]); - exprt k=args(f, 2)[1]; - const refined_string_typet &ref_type=to_refined_string_type(s1.type()); - string_exprt res=fresh_string(ref_type); + PRECONDITION(f.arguments().size() == 4); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const array_string_exprt s1 = get_string_expr(f.arguments()[2]); + const exprt &k = f.arguments()[3]; + const typet &index_type = s1.length().type(); + const typet &char_type = s1.content().type().subtype(); // We add axioms: // a1 : |res|=k // a2 : forall i<|res|. i < |s1| ==> res[i] = s1[i] // a3 : forall i<|res|. i >= |s1| ==> res[i] = 0 - m_axioms.push_back(res.axiom_for_has_length(k)); + axioms.push_back(res.axiom_for_has_length(k)); - symbol_exprt idx=fresh_univ_index( - "QA_index_set_length", ref_type.get_index_type()); + symbol_exprt idx = fresh_univ_index("QA_index_set_length", index_type); string_constraintt a2( idx, res.length(), s1.axiom_for_length_gt(idx), equal_exprt(s1[idx], res[idx])); - m_axioms.push_back(a2); + axioms.push_back(a2); - symbol_exprt idx2=fresh_univ_index( - "QA_index_set_length2", ref_type.get_index_type()); + symbol_exprt idx2 = fresh_univ_index("QA_index_set_length2", index_type); string_constraintt a3( idx2, res.length(), s1.axiom_for_length_le(idx2), - equal_exprt(res[idx2], constant_char(0, ref_type.get_char_type()))); - m_axioms.push_back(a3); + equal_exprt(res[idx2], constant_char(0, char_type))); + axioms.push_back(a3); - return res; + return from_integer(0, signedbv_typet(32)); } - /// add axioms corresponding to the String.substring java function Warning: the /// specification may not be correct for the case where the string is shorter /// than the end index @@ -67,27 +67,16 @@ string_exprt string_constraint_generatort::add_axioms_for_set_length( /// index /// argument and an optional end index argument /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_substring( +exprt string_constraint_generatort::add_axioms_for_substring( const function_application_exprt &f) { const function_application_exprt::argumentst &args=f.arguments(); - PRECONDITION(args.size()>=2); - string_exprt str=get_string_expr(args[0]); - exprt i(args[1]); - exprt j; - if(args.size()==3) - { - j=args[2]; - } - else - { - INVARIANT( - args.size()==2, - string_refinement_invariantt("f must have 2 or 3 arguments and the case " - "of 3 arguments is already handled")); - j=str.length(); - } - return add_axioms_for_substring(str, i, j); + PRECONDITION(args.size() == 4 || args.size() == 5); + const array_string_exprt str = get_string_expr(args[2]); + const array_string_exprt res = char_array_of_pointer(args[1], args[0]); + const exprt &i = args[3]; + const exprt j = args.size() == 5 ? args[4] : str.length(); + return add_axioms_for_substring(res, str, i, j); } /// add axioms stating that the returned string expression is equal to the input @@ -96,14 +85,15 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( /// an /// expression for the end index /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_substring( - const string_exprt &str, const exprt &start, const exprt &end) +exprt string_constraint_generatort::add_axioms_for_substring( + const array_string_exprt &res, + const array_string_exprt &str, + const exprt &start, + const exprt &end) { - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - const typet &index_type=ref_type.get_index_type(); + const typet &index_type = str.length().type(); PRECONDITION(start.type()==index_type); PRECONDITION(end.type()==index_type); - string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : start < end => |res| = end - start @@ -114,36 +104,38 @@ string_exprt string_constraint_generatort::add_axioms_for_substring( implies_exprt a1( binary_relation_exprt(start, ID_lt, end), res.axiom_for_has_length(minus_exprt(end, start))); - m_axioms.push_back(a1); + axioms.push_back(a1); exprt is_empty=res.axiom_for_has_length(from_integer(0, index_type)); implies_exprt a2(binary_relation_exprt(start, ID_ge, end), is_empty); - m_axioms.push_back(a2); + axioms.push_back(a2); // Warning: check what to do if the string is not long enough - m_axioms.push_back(str.axiom_for_length_ge(end)); + axioms.push_back(str.axiom_for_length_ge(end)); symbol_exprt idx=fresh_univ_index("QA_index_substring", index_type); string_constraintt a4(idx, res.length(), equal_exprt(res[idx], str[plus_exprt(start, idx)])); - m_axioms.push_back(a4); - return res; + axioms.push_back(a4); + return from_integer(0, signedbv_typet(32)); } /// add axioms corresponding to the String.trim java function /// \par parameters: function application with one string argument /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_trim( - const function_application_exprt &expr) +exprt string_constraint_generatort::add_axioms_for_trim( + const function_application_exprt &f) { - string_exprt str=get_string_expr(args(expr, 1)[0]); - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - const typet &index_type=ref_type.get_index_type(); - string_exprt res=fresh_string(ref_type); - symbol_exprt idx=fresh_exist_index("index_trim", index_type); - exprt space_char=constant_char(' ', ref_type.get_char_type()); + PRECONDITION(f.arguments().size() == 3); + const array_string_exprt &str = get_string_expr(f.arguments()[2]); + const array_string_exprt &res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const typet &index_type = str.length().type(); + const typet &char_type = str.content().type().subtype(); + const symbol_exprt idx = fresh_exist_index("index_trim", index_type); + const exprt space_char = from_integer(' ', char_type); // We add axioms: // a1 : m + |s1| <= |str| @@ -159,25 +151,25 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( exprt a1=str.axiom_for_length_ge( plus_exprt_with_overflow_check(idx, res.length())); - m_axioms.push_back(a1); + axioms.push_back(a1); binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type)); - m_axioms.push_back(a2); + axioms.push_back(a2); exprt a3=str.axiom_for_length_ge(idx); - m_axioms.push_back(a3); + axioms.push_back(a3); exprt a4=res.axiom_for_length_ge( from_integer(0, index_type)); - m_axioms.push_back(a4); + axioms.push_back(a4); - exprt a5=res.axiom_for_length_le(str); - m_axioms.push_back(a5); + exprt a5 = res.axiom_for_length_le(str.length()); + axioms.push_back(a5); symbol_exprt n=fresh_univ_index("QA_index_trim", index_type); binary_relation_exprt non_print(str[n], ID_le, space_char); string_constraintt a6(n, idx, non_print); - m_axioms.push_back(a6); + axioms.push_back(a6); symbol_exprt n2=fresh_univ_index("QA_index_trim2", index_type); minus_exprt bound(str.length(), plus_exprt_with_overflow_check(idx, @@ -188,12 +180,12 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( space_char); string_constraintt a7(n2, bound, eqn2); - m_axioms.push_back(a7); + axioms.push_back(a7); symbol_exprt n3=fresh_univ_index("QA_index_trim3", index_type); equal_exprt eqn3(res[n3], str[plus_exprt(n3, idx)]); string_constraintt a8(n3, res.length(), eqn3); - m_axioms.push_back(a8); + axioms.push_back(a8); minus_exprt index_before( plus_exprt_with_overflow_check(idx, res.length()), @@ -204,25 +196,27 @@ string_exprt string_constraint_generatort::add_axioms_for_trim( and_exprt( binary_relation_exprt(str[idx], ID_gt, space_char), no_space_before)); - m_axioms.push_back(a9); - return res; + axioms.push_back(a9); + return from_integer(0, f.type()); } /// add axioms corresponding to the String.toLowerCase java function /// \par parameters: function application with one string argument /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( - const function_application_exprt &expr) +exprt string_constraint_generatort::add_axioms_for_to_lower_case( + const function_application_exprt &f) { - string_exprt str=get_string_expr(args(expr, 1)[0]); - const refined_string_typet &ref_type=to_refined_string_type(str.type()); + PRECONDITION(f.arguments().size() == 3); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const array_string_exprt str = get_string_expr(f.arguments()[2]); + const refined_string_typet &ref_type = + to_refined_string_type(f.arguments()[2].type()); const typet &char_type=ref_type.get_char_type(); const typet &index_type=ref_type.get_index_type(); - string_exprt res=fresh_string(ref_type); const exprt char_A=constant_char('A', char_type); const exprt char_Z=constant_char('Z', char_type); - // TODO: for now, only characters in Basic Latin and Latin-1 supplement // are supported (up to 0x100), we should add others using case mapping // information from the UnicodeData file. @@ -235,8 +229,8 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( // where diff is the difference between lower case and upper case characters: // diff = 'a'-'A' = 0x20 - exprt a1=res.axiom_for_has_same_length_as(str); - m_axioms.push_back(a1); + equal_exprt a1(res.length(), str.length()); + axioms.push_back(a1); symbol_exprt idx=fresh_univ_index("QA_lower_case", index_type); exprt::operandst upper_case; @@ -269,21 +263,20 @@ string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( if_exprt conditional_convert(is_upper_case, converted, non_converted); string_constraintt a2(idx, res.length(), conditional_convert); - m_axioms.push_back(a2); + axioms.push_back(a2); - return res; + return from_integer(0, f.type()); } /// add axioms corresponding to the String.toUpperCase java function /// \par parameters: function application with one string argument /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( - const string_exprt &str) +exprt string_constraint_generatort::add_axioms_for_to_upper_case( + const array_string_exprt &res, + const array_string_exprt &str) { - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - const typet &char_type=ref_type.get_char_type(); - const typet &index_type=ref_type.get_index_type(); - string_exprt res=fresh_string(ref_type); + const typet &char_type = str.content().type().subtype(); + const typet &index_type = str.length().type(); exprt char_a=constant_char('a', char_type); exprt char_A=constant_char('A', char_type); exprt char_z=constant_char('z', char_type); @@ -299,8 +292,8 @@ string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( // Note that index expressions are only allowed in the body of universal // axioms, so we use a trivial premise and push our premise into the body. - exprt a1=res.axiom_for_has_same_length_as(str); - m_axioms.push_back(a1); + equal_exprt a1(res.length(), str.length()); + axioms.push_back(a1); symbol_exprt idx1=fresh_univ_index("QA_upper_case1", index_type); exprt is_lower_case=and_exprt( @@ -310,7 +303,7 @@ string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( equal_exprt convert(res[idx1], plus_exprt(str[idx1], diff)); implies_exprt body1(is_lower_case, convert); string_constraintt a2(idx1, res.length(), body1); - m_axioms.push_back(a2); + axioms.push_back(a2); symbol_exprt idx2=fresh_univ_index("QA_upper_case2", index_type); exprt is_not_lower_case=not_exprt(and_exprt( @@ -319,45 +312,52 @@ string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( equal_exprt eq(res[idx2], str[idx2]); implies_exprt body2(is_not_lower_case, eq); string_constraintt a3(idx2, res.length(), body2); - m_axioms.push_back(a3); - return res; + axioms.push_back(a3); + return from_integer(0, signedbv_typet(32)); } /// add axioms corresponding to the String.toUpperCase java function -/// \param expr: function application with one string argument +/// \param f: function application with one string argument /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_to_upper_case( - const function_application_exprt &expr) +exprt string_constraint_generatort::add_axioms_for_to_upper_case( + const function_application_exprt &f) { - string_exprt str=get_string_expr(args(expr, 1)[0]); - return add_axioms_for_to_upper_case(str); + PRECONDITION(f.arguments().size() == 3); + array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + array_string_exprt str = get_string_expr(f.arguments()[2]); + return add_axioms_for_to_upper_case(res, str); } /// add axioms corresponding stating that the result is similar to that of the /// StringBuilder.setCharAt java function Warning: this may be underspecified in /// the case wher the index exceed the length of the string -/// \par parameters: function application with three arguments, the first is a -/// string -/// the second an index and the third a character +/// \param f: function application with three arguments, the first is a +/// string, the second an index and the third a character /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_char_set( +exprt string_constraint_generatort::add_axioms_for_char_set( const function_application_exprt &f) { - string_exprt str=get_string_expr(args(f, 3)[0]); - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - string_exprt res=fresh_string(ref_type); - with_exprt sarrnew(str.content(), args(f, 3)[1], args(f, 3)[2]); - - // We add axiom: - // a1 : arg1 < |str| => res = str with [arg1]=arg2 + PRECONDITION(f.arguments().size() == 5); + const array_string_exprt str = get_string_expr(f.arguments()[2]); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const exprt &position = f.arguments()[3]; + const exprt &character = f.arguments()[4]; - implies_exprt a1( - binary_relation_exprt(args(f, 3)[1], ID_lt, str.length()), - and_exprt( - equal_exprt(res.content(), sarrnew), - res.axiom_for_has_same_length_as(str))); - m_axioms.push_back(a1); - return res; + // We add axioms: + // a1 : |res| = |str| + // a2 : res[pos]=char + // a3 : forall i<|res|. i != pos => res[i] = str[i] + + const binary_relation_exprt out_of_bounds(position, ID_ge, str.length()); + axioms.push_back(equal_exprt(res.length(), str.length())); + axioms.push_back(equal_exprt(res[position], character)); + const symbol_exprt q = fresh_univ_index("QA_char_set", position.type()); + or_exprt a3_body(equal_exprt(q, position), equal_exprt(res[q], str[q])); + axioms.push_back(string_constraintt(q, res.length(), a3_body)); + return if_exprt( + out_of_bounds, from_integer(1, f.type()), from_integer(0, f.type())); } /// Convert two expressions to pair of chars @@ -368,15 +368,17 @@ string_exprt string_constraint_generatort::add_axioms_for_char_set( /// \param expr2 Second expression /// \return Optional pair of two expressions static optionalt> to_char_pair( - exprt expr1, exprt expr2) + exprt expr1, + exprt expr2, + std::function get_string_expr) { if((expr1.type().id()==ID_unsignedbv || expr1.type().id()==ID_char) && (expr2.type().id()==ID_char || expr2.type().id()==ID_unsignedbv)) return std::make_pair(expr1, expr2); - const auto expr1_str=to_string_expr(expr1); - const auto expr2_str=to_string_expr(expr2); + const auto expr1_str = get_string_expr(expr1); + const auto expr2_str = get_string_expr(expr2); const auto expr1_length=expr_cast(expr1_str.length()); const auto expr2_length=expr_cast(expr2_str.length()); if(expr1_length && expr2_length && *expr1_length==1 && *expr2_length==1) @@ -393,16 +395,21 @@ static optionalt> to_char_pair( /// string, the second and the third are either pair of characters or /// a pair of strings /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_replace( +exprt string_constraint_generatort::add_axioms_for_replace( const function_application_exprt &f) { - string_exprt str=get_string_expr(args(f, 3)[0]); - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - if(const auto maybe_chars=to_char_pair(args(f, 3)[1], args(f, 3)[2])) + PRECONDITION(f.arguments().size() == 5); + array_string_exprt str = get_string_expr(f.arguments()[2]); + array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + if( + const auto maybe_chars = + to_char_pair(f.arguments()[3], f.arguments()[4], [this](const exprt &e) { // NOLINT + return get_string_expr(e); + })) { const auto old_char=maybe_chars->first; const auto new_char=maybe_chars->second; - string_exprt res=fresh_string(ref_type); // We add axioms: // a1 : |res| = |str| @@ -410,9 +417,9 @@ string_exprt string_constraint_generatort::add_axioms_for_replace( // str[qvar]=oldChar => res[qvar]=newChar // !str[qvar]=oldChar => res[qvar]=str[qvar] - m_axioms.push_back(res.axiom_for_has_same_length_as(str)); + axioms.push_back(equal_exprt(res.length(), str.length())); - symbol_exprt qvar=fresh_univ_index("QA_replace", ref_type.get_index_type()); + symbol_exprt qvar = fresh_univ_index("QA_replace", str.length().type()); implies_exprt case1( equal_exprt(str[qvar], old_char), equal_exprt(res[qvar], new_char)); @@ -420,51 +427,70 @@ string_exprt string_constraint_generatort::add_axioms_for_replace( not_exprt(equal_exprt(str[qvar], old_char)), equal_exprt(res[qvar], str[qvar])); string_constraintt a2(qvar, res.length(), and_exprt(case1, case2)); - m_axioms.push_back(a2); - return res; + axioms.push_back(a2); + return from_integer(0, f.type()); } - return str; + return from_integer(1, f.type()); } /// add axioms corresponding to the StringBuilder.deleteCharAt java function -/// \par parameters: function application with two arguments, the first is a -/// string -/// and the second is an index -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_delete_char_at( +/// \param f: function application with two arguments, the first is a +/// string and the second is an index +/// \return an expression whose value is non null to signal an exception +exprt string_constraint_generatort::add_axioms_for_delete_char_at( const function_application_exprt &f) { - string_exprt str=get_string_expr(args(f, 2)[0]); + PRECONDITION(f.arguments().size() == 4); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const array_string_exprt str = get_string_expr(f.arguments()[2]); exprt index_one=from_integer(1, str.length().type()); return add_axioms_for_delete( + res, str, - args(f, 2)[1], - plus_exprt_with_overflow_check(args(f, 2)[1], index_one)); + f.arguments()[3], + plus_exprt_with_overflow_check(f.arguments()[3], index_one)); } -/// add axioms stating that the returned string corresponds to the input one +/// add axioms stating that `res` corresponds to the input `str` /// where we removed characters between the positions start (included) and end /// (not included) -/// \par parameters: a string expression, a start index and an end index -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_delete( - const string_exprt &str, const exprt &start, const exprt &end) +/// \param res: a string expression +/// \param str: a string expression +/// \param start: a start index +/// \param end: an end index +/// \return a expression different from zero to signal an exception +exprt string_constraint_generatort::add_axioms_for_delete( + const array_string_exprt &res, + const array_string_exprt &str, + const exprt &start, + const exprt &end) { PRECONDITION(start.type()==str.length().type()); PRECONDITION(end.type()==str.length().type()); - string_exprt str1=add_axioms_for_substring( - str, from_integer(0, str.length().type()), start); - string_exprt str2=add_axioms_for_substring(str, end, str.length()); - return add_axioms_for_concat(str1, str2); + const typet &index_type = str.length().type(); + const typet &char_type = str.content().type().subtype(); + const array_string_exprt sub1 = fresh_string(index_type, char_type); + const array_string_exprt sub2 = fresh_string(index_type, char_type); + const exprt return_code1 = add_axioms_for_substring( + sub1, str, from_integer(0, str.length().type()), start); + const exprt return_code2 = + add_axioms_for_substring(sub2, str, end, str.length()); + const exprt return_code3 = add_axioms_for_concat(res, sub1, sub2); + return bitor_exprt(return_code1, bitor_exprt(return_code2, return_code3)); } /// add axioms corresponding to the StringBuilder.delete java function -/// \par parameters: function application with three arguments: a string -/// expression, a start index and an end index -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_for_delete( +/// \param f: function application with three arguments: a string +/// expression, a start index and an end index +/// \return an integer expression whose value is different from 0 to signal +/// an exception +exprt string_constraint_generatort::add_axioms_for_delete( const function_application_exprt &f) { - string_exprt str=get_string_expr(args(f, 3)[0]); - return add_axioms_for_delete(str, args(f, 3)[1], args(f, 3)[2]); + PRECONDITION(f.arguments().size() == 5); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + const array_string_exprt arg = get_string_expr(f.arguments()[2]); + return add_axioms_for_delete(res, arg, f.arguments()[3], f.arguments()[4]); } diff --git a/src/solvers/refinement/string_constraint_generator_valueof.cpp b/src/solvers/refinement/string_constraint_generator_valueof.cpp index f54a821f623..dd1c80a7341 100644 --- a/src/solvers/refinement/string_constraint_generator_valueof.cpp +++ b/src/solvers/refinement/string_constraint_generator_valueof.cpp @@ -18,70 +18,60 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include - /// Add axioms corresponding to the String.valueOf(I) java function. -/// \param expr: function application with one integer argument +/// \param f: function application with one integer argument /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_from_int( - const function_application_exprt &expr) +exprt string_constraint_generatort::add_axioms_from_int( + const function_application_exprt &f) { - const refined_string_typet &ref_type=to_refined_string_type(expr.type()); - PRECONDITION(expr.arguments().size()>=1); - if(expr.arguments().size()==1) - { - return add_axioms_from_int(expr.arguments()[0], ref_type); - } - else - { + PRECONDITION(f.arguments().size() == 3 || f.arguments().size() == 4); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + if(f.arguments().size() == 4) return add_axioms_from_int_with_radix( - expr.arguments()[0], - expr.arguments()[1], - ref_type); - } + res, f.arguments()[2], f.arguments()[3]); + else // f.arguments.size()==3 + return add_axioms_from_int(res, f.arguments()[2]); } /// Add axioms corresponding to the String.valueOf(J) java function. -/// \param expr: function application with one long argument +/// \param f: function application with one long argument /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_from_long( - const function_application_exprt &expr) +exprt string_constraint_generatort::add_axioms_from_long( + const function_application_exprt &f) { - const refined_string_typet &ref_type=to_refined_string_type(expr.type()); - PRECONDITION(expr.arguments().size()>=1); - if(expr.arguments().size()==1) - { - return add_axioms_from_int(expr.arguments()[0], ref_type); - } - else - { + PRECONDITION(f.arguments().size() == 3 || f.arguments().size() == 4); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + if(f.arguments().size() == 4) return add_axioms_from_int_with_radix( - expr.arguments()[0], - expr.arguments()[1], - ref_type); - } + res, f.arguments()[2], f.arguments()[3]); + else + return add_axioms_from_int(res, f.arguments()[2]); } /// Add axioms corresponding to the String.valueOf(Z) java function. /// \param f: function application with a Boolean argument /// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_from_bool( +exprt string_constraint_generatort::add_axioms_from_bool( const function_application_exprt &f) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); - return add_axioms_from_bool(args(f, 1)[0], ref_type); + PRECONDITION(f.arguments().size() == 3); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + return add_axioms_from_bool(res, f.arguments()[2]); } /// Add axioms stating that the returned string equals "true" when the Boolean /// expression is true and "false" when it is false. +/// \param res: string expression for the result /// \param b: Boolean expression -/// \param ref_type: type of refined string expressions -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_from_bool( - const exprt &b, const refined_string_typet &ref_type) +/// \return code 0 on success +exprt string_constraint_generatort::add_axioms_from_bool( + const array_string_exprt &res, + const exprt &b) { - string_exprt res=fresh_string(ref_type); - const typet &char_type=ref_type.get_char_type(); - + const typet &char_type = res.content().type().subtype(); PRECONDITION(b.type()==bool_typet() || b.type().id()==ID_c_bool); typecast_exprt eq(b, bool_typet()); @@ -94,62 +84,62 @@ string_exprt string_constraint_generatort::add_axioms_from_bool( std::string str_true="true"; implies_exprt a1(eq, res.axiom_for_has_length(str_true.length())); - m_axioms.push_back(a1); + axioms.push_back(a1); for(std::size_t i=0; i::max()); + PRECONDITION(max_size < std::numeric_limits::max()); const typet &type=input_int.type(); PRECONDITION(type.id()==ID_signedbv); @@ -164,25 +154,24 @@ string_exprt string_constraint_generatort::add_axioms_from_int_with_radix( CHECK_RETURN(max_size::max()); } - string_exprt str=fresh_string(ref_type); - const typet &char_type=ref_type.get_char_type(); + const typet &char_type = res.content().type().subtype(); exprt radix_as_char=typecast_exprt(radix, char_type); exprt radix_input_type=typecast_exprt(radix, type); const bool strict_formatting=true; add_axioms_for_correct_number_format( - input_int, str, radix_as_char, radix_ul, max_size, strict_formatting); + input_int, res, radix_as_char, radix_ul, max_size, strict_formatting); add_axioms_for_characters_in_integer_string( input_int, type, strict_formatting, - str, + res, max_size, radix_input_type, radix_ul); - return str; + return from_integer(0, signedbv_typet(32)); } /// Returns the integer value represented by the character. @@ -200,19 +189,19 @@ exprt string_constraint_generatort::int_of_hex_char(const exprt &chr) minus_exprt(chr, zero_char)); } -/// Add axioms stating that the returned string corresponds to the integer +/// Add axioms stating that the string `res` corresponds to the integer /// argument written in hexadecimal. +/// \param res: string expression for the result /// \param i: an integer argument -/// \param ref_type: type of refined string expressions -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_from_int_hex( - const exprt &i, const refined_string_typet &ref_type) +/// \return code 0 on success +exprt string_constraint_generatort::add_axioms_from_int_hex( + const array_string_exprt &res, + const exprt &i) { - string_exprt res=fresh_string(ref_type); const typet &type=i.type(); PRECONDITION(type.id()==ID_signedbv); - const typet &index_type=ref_type.get_index_type(); - const typet &char_type=ref_type.get_char_type(); + const typet &index_type = res.length().type(); + const typet &char_type = res.content().type().subtype(); exprt sixteen=from_integer(16, index_type); exprt minus_char=constant_char('-', char_type); exprt zero_char=constant_char('0', char_type); @@ -221,7 +210,7 @@ string_exprt string_constraint_generatort::add_axioms_from_int_hex( exprt f_char=constant_char('f', char_type); size_t max_size=8; - m_axioms.push_back( + axioms.push_back( and_exprt(res.axiom_for_length_gt(0), res.axiom_for_length_le(max_size))); @@ -234,8 +223,8 @@ string_exprt string_constraint_generatort::add_axioms_from_int_hex( for(size_t j=0; j1) - m_axioms.push_back( + axioms.push_back( implies_exprt(premise, not_exprt(equal_exprt(res[0], zero_char)))); } - return res; + return from_integer(0, get_return_code_type()); } /// add axioms corresponding to the Integer.toHexString(I) java function /// \param f: function application with an integer argument -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_from_int_hex( +/// \return code 0 on success +exprt string_constraint_generatort::add_axioms_from_int_hex( const function_application_exprt &f) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); - return add_axioms_from_int_hex(args(f, 1)[0], ref_type); + PRECONDITION(f.arguments().size() == 3); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + return add_axioms_from_int_hex(res, f.arguments()[2]); } /// Add axioms corresponding to the String.valueOf(C) java function. /// \param f: function application one char argument -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_from_char( +/// \return code 0 on success +exprt string_constraint_generatort::add_axioms_from_char( const function_application_exprt &f) { - const refined_string_typet &ref_type=to_refined_string_type(f.type()); - return add_axioms_from_char(args(f, 1)[0], ref_type); + PRECONDITION(f.arguments().size() == 3); + const array_string_exprt res = + char_array_of_pointer(f.arguments()[1], f.arguments()[0]); + return add_axioms_from_char(res, f.arguments()[2]); } /// Add axioms stating that the returned string has length 1 and the character /// it contains corresponds to the input expression. +/// \param res: string expression for the result /// \param c: one expression of type char -/// \param ref_type: type of refined string expressions -/// \return a new string expression -string_exprt string_constraint_generatort::add_axioms_from_char( - const exprt &c, const refined_string_typet &ref_type) +/// \return code 0 on success +exprt string_constraint_generatort::add_axioms_from_char( + const array_string_exprt &res, + const exprt &c) { - string_exprt res=fresh_string(ref_type); and_exprt lemma(equal_exprt(res[0], c), res.axiom_for_has_length(1)); - m_axioms.push_back(lemma); - return res; + axioms.push_back(lemma); + return from_integer(0, get_return_code_type()); } /// Add axioms making the return value true if the given string is a correct @@ -305,15 +298,14 @@ string_exprt string_constraint_generatort::add_axioms_from_char( /// zeros or upper case letters void string_constraint_generatort::add_axioms_for_correct_number_format( const exprt &input_int, - const string_exprt &str, + const array_string_exprt &str, const exprt &radix_as_char, const unsigned long radix_ul, const std::size_t max_size, const bool strict_formatting) { - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - const typet &char_type=ref_type.get_char_type(); - const typet &index_type=ref_type.get_index_type(); + const typet &char_type = str.content().type().subtype(); + const typet &index_type = str.length().type(); const exprt &chr=str[0]; const equal_exprt starts_with_minus(chr, constant_char('-', char_type)); @@ -323,30 +315,30 @@ void string_constraint_generatort::add_axioms_for_correct_number_format( // |str| > 0 const exprt non_empty=str.axiom_for_length_ge(from_integer(1, index_type)); - m_axioms.push_back(non_empty); + axioms.push_back(non_empty); if(strict_formatting) { // str[0] = '-' || is_digit_with_radix(str[0], radix) const or_exprt correct_first(starts_with_minus, starts_with_digit); - m_axioms.push_back(correct_first); + axioms.push_back(correct_first); } else { // str[0] = '-' || str[0] = '+' || is_digit_with_radix(str[0], radix) const or_exprt correct_first( starts_with_minus, starts_with_digit, starts_with_plus); - m_axioms.push_back(correct_first); + axioms.push_back(correct_first); } // str[0]='+' or '-' ==> |str| > 1 const implies_exprt contains_digit( or_exprt(starts_with_minus, starts_with_plus), str.axiom_for_length_ge(from_integer(2, index_type))); - m_axioms.push_back(contains_digit); + axioms.push_back(contains_digit); // |str| <= max_size - m_axioms.push_back(str.axiom_for_length_le(max_size)); + axioms.push_back(str.axiom_for_length_le(max_size)); // forall 1 <= i < |str| . is_digit_with_radix(str[i], radix) // We unfold the above because we know that it will be used for all i up to @@ -358,7 +350,7 @@ void string_constraint_generatort::add_axioms_for_correct_number_format( str.axiom_for_length_ge(from_integer(index+1, index_type)), is_digit_with_radix( str[index], strict_formatting, radix_as_char, radix_ul)); - m_axioms.push_back(character_at_index_is_digit); + axioms.push_back(character_at_index_is_digit); } if(strict_formatting) @@ -369,12 +361,12 @@ void string_constraint_generatort::add_axioms_for_correct_number_format( const implies_exprt no_leading_zero( equal_exprt(chr, zero_char), str.axiom_for_has_length(from_integer(1, index_type))); - m_axioms.push_back(no_leading_zero); + axioms.push_back(no_leading_zero); // no_leading_zero_after_minus : str[0]='-' => str[1]!='0' implies_exprt no_leading_zero_after_minus( starts_with_minus, not_exprt(equal_exprt(str[1], zero_char))); - m_axioms.push_back(no_leading_zero_after_minus); + axioms.push_back(no_leading_zero_after_minus); } } @@ -383,24 +375,23 @@ void string_constraint_generatort::add_axioms_for_correct_number_format( /// in terms of the characters in str. /// \param input_int: the integer represented by str /// \param type: the type for input_int +/// \param strict_formatting: if true, don't allow a leading plus, redundant +/// zeros or upper case letters /// \param str: input string /// \param max_string_length: the maximum length str can have /// \param radix: the radix, with the same type as input_int /// \param radix_ul: the radix as an unsigned long, or 0 if that can't be /// determined -/// \param strict_formatting: if true, don't allow a leading plus, redundant -/// zeros or upper case letters void string_constraint_generatort::add_axioms_for_characters_in_integer_string( const exprt &input_int, const typet &type, const bool strict_formatting, - const string_exprt &str, + const array_string_exprt &str, const std::size_t max_string_length, const exprt &radix, const unsigned long radix_ul) { - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - const typet &char_type=ref_type.get_char_type(); + const typet &char_type = str.content().type().subtype(); const equal_exprt starts_with_minus(str[0], constant_char('-', char_type)); const constant_exprt zero_expr=from_integer(0, type); @@ -412,7 +403,7 @@ void string_constraint_generatort::add_axioms_for_characters_in_integer_string( /// Deal with size==1 case separately. There are axioms from /// add_axioms_for_correct_number_format which say that the string must /// contain at least one digit, so we don't have to worry about "+" or "-". - m_axioms.push_back( + axioms.push_back( implies_exprt(str.axiom_for_has_length(1), equal_exprt(input_int, sum))); for(size_t size=2; size<=max_string_length; size++) @@ -456,18 +447,18 @@ void string_constraint_generatort::add_axioms_for_characters_in_integer_string( if(!digit_constraints.empty()) { const implies_exprt a5(premise, conjunction(digit_constraints)); - m_axioms.push_back(a5); + axioms.push_back(a5); } const implies_exprt a6( and_exprt(premise, not_exprt(starts_with_minus)), equal_exprt(input_int, sum)); - m_axioms.push_back(a6); + axioms.push_back(a6); const implies_exprt a7( and_exprt(premise, starts_with_minus), equal_exprt(input_int, unary_minus_exprt(sum))); - m_axioms.push_back(a7); + axioms.push_back(a7); } } @@ -479,7 +470,7 @@ exprt string_constraint_generatort::add_axioms_for_parse_int( const function_application_exprt &f) { PRECONDITION(f.arguments().size()==1 || f.arguments().size()==2); - const string_exprt str=get_string_expr(f.arguments()[0]); + const array_string_exprt str = get_string_expr(f.arguments()[0]); const typet &type=f.type(); PRECONDITION(type.id()==ID_signedbv); const exprt radix=f.arguments().size()==1? @@ -491,8 +482,7 @@ exprt string_constraint_generatort::add_axioms_for_parse_int( PRECONDITION((radix_ul>=2 && radix_ul<=36) || radix_ul==0); const symbol_exprt input_int=fresh_symbol("parsed_int", type); - const refined_string_typet &ref_type=to_refined_string_type(str.type()); - const typet &char_type=ref_type.get_char_type(); + const typet &char_type = str.content().type().subtype(); const typecast_exprt radix_as_char(radix, char_type); const bool strict_formatting=false; @@ -530,7 +520,7 @@ unsigned long string_constraint_generatort::to_integer_or_default( const exprt &expr, unsigned long def) { mp_integer mp_radix; - bool to_integer_failed=to_integer(simplify_expr(expr, m_ns), mp_radix); + bool to_integer_failed=to_integer(simplify_expr(expr, ns), mp_radix); return to_integer_failed?def:integer2ulong(mp_radix); } diff --git a/src/solvers/refinement/string_constraint_instantiation.cpp b/src/solvers/refinement/string_constraint_instantiation.cpp index 7a99bb0e969..26ea7ba3515 100644 --- a/src/solvers/refinement/string_constraint_instantiation.cpp +++ b/src/solvers/refinement/string_constraint_instantiation.cpp @@ -15,7 +15,7 @@ Author: Jesse Sigal, jesse.sigal@diffblue.com /// substituting the quantifiers and generating axioms. /// \related string_refinementt /// \param [in] axiom: the axiom to instantiate -/// \param [in] index_pairs: the pairs of indices to at which to instantiate +/// \param [in] index_pairs: pair of indexes for `axiom.s0()`and `axiom.s1()` /// \param [in] generator: generator to be used to get `axiom`'s witness /// \return the lemmas produced through instantiation std::vector instantiate_not_contains( @@ -25,8 +25,8 @@ std::vector instantiate_not_contains( { std::vector lemmas; - const string_exprt &s0=to_string_expr(axiom.s0()); - const string_exprt &s1=to_string_expr(axiom.s1()); + const array_string_exprt s0 = axiom.s0(); + const array_string_exprt s1 = axiom.s1(); for(const auto &pair : index_pairs) { diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 9ec575671b5..e70ea613c5c 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -22,6 +22,7 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include #include +#include #include #include #include @@ -30,8 +31,6 @@ Author: Alberto Griggio, alberto.griggio@gmail.com static exprt substitute_array_with_expr(const exprt &expr, const exprt &index); -static bool is_char_array(const namespacet &ns, const typet &type); - static bool is_valid_string_constraint( messaget::mstreamt &stream, const namespacet &ns, @@ -52,7 +51,7 @@ static std::pair> check_axioms( std::size_t max_string_length, bool use_counter_example, ui_message_handlert::uit ui, - const replace_mapt &symbol_resolve); + const union_find_replacet &symbol_resolve); static void initial_index_set( index_set_pairt &index_set, @@ -92,9 +91,12 @@ static std::vector instantiate( const index_set_pairt &index_set, const string_constraint_generatort &generator); -static exprt get_array( +static optionalt get_array( const std::function &super_get, - const exprt &arr); + const namespacet &ns, + const std::size_t max_string_length, + messaget::mstreamt &stream, + const array_string_exprt &arr); /// Convert index-value map to a vector of values. If a value for an /// index is not defined, set it to the value referenced by the next higher @@ -113,7 +115,7 @@ static std::vector fill_in_map_as_vector( for(auto it=index_value.rbegin(); it!=index_value.rend(); ++it) { const std::size_t index=it->first; - const T& value=it->second; + const T &value = it->second; const auto next=std::next(it); const std::size_t leftmost_index_to_pad= next!=index_value.rend() @@ -199,298 +201,213 @@ static std::vector generate_instantiations( return lemmas; } -/// List the simple expressions on which the expression depends in the -/// `symbol_resolve` map. A simple expression is either a symbol or a -/// constant array -/// \param expr: an expression -static void depends_in_symbol_map(const exprt &expr, std::vector &accu) +/// Remove functions applications and create the necessary axioms. +/// \param expr: an expression possibly containing function applications +/// \param generator: generator for the string constraints +/// \return an expression containing no function application +static exprt substitute_function_applications( + exprt expr, + string_constraint_generatort &generator) { - if(expr.id()==ID_if) - { - if_exprt if_expr=to_if_expr(expr); - depends_in_symbol_map(if_expr.true_case(), accu); - depends_in_symbol_map(if_expr.false_case(), accu); - } - else if(expr.id()==ID_struct) - { - string_exprt str=to_string_expr(expr); - depends_in_symbol_map(str.content(), accu); - } - else - { - INVARIANT( - expr.id()==ID_symbol || expr.id()==ID_array || expr.id()==ID_array_of, - "leaf in symbol resolve should be a symbol or a constant array"); - accu.push_back(expr); - } + for(auto &operand : expr.operands()) + operand = substitute_function_applications(operand, generator); + + if(expr.id() == ID_function_application) + return generator.add_axioms_for_function_application( + to_function_application_expr(expr)); + + return expr; } -/// keeps a map of symbols to expressions, such as none of the mapped values -/// exist as a key -/// \param lhs: a symbol expression -/// \param rhs: an expression to map it to, which should be either a symbol -/// a string_exprt, an array_exprt, an array_of_exprt or an -/// if_exprt with branches of the previous kind -void add_symbol_to_symbol_map( - replace_mapt &symbol_resolve, - std::map> &reverse_symbol_resolve, - const exprt &lhs, - const exprt &rhs) +/// Remove functions applications and create the necessary axioms. +/// \param equations: vector of equations +/// \param generator: generator for the string constraints +/// \return vector of equations where function application have been replaced +static void substitute_function_applications_in_equations( + std::vector &equations, + string_constraint_generatort &generator) { - PRECONDITION(lhs.id()==ID_symbol); - PRECONDITION(rhs.id()==ID_symbol || - rhs.id()==ID_array || - rhs.id()==ID_array_of || - rhs.id()==ID_if || - (rhs.id()==ID_struct && - is_refined_string_type(rhs.type()))); - - // We insert the mapped value of the rhs, if it exists. - auto it=symbol_resolve.find(rhs); - const exprt &new_rhs=it!=symbol_resolve.end()?it->second:rhs; - symbol_resolve[lhs]=new_rhs; - - // List the leaves of new_rhs - std::vector leaves; - depends_in_symbol_map(new_rhs, leaves); - - const auto &symbols_to_update_with_new_rhs=reverse_symbol_resolve[lhs]; - - // We need to update all the symbols which depend on lhs - for(const exprt &item : symbols_to_update_with_new_rhs) - replace_expr(symbol_resolve, symbol_resolve[item]); - - // Every time a symbol at the leaves is updated we need to update lhs - // and the symbols that depend on it - for(const auto &leaf : leaves) - { - reverse_symbol_resolve[leaf].push_back(lhs); - for(const exprt &item : symbols_to_update_with_new_rhs) - reverse_symbol_resolve[leaf].push_back(item); - } + for(auto &eq : equations) + eq.rhs() = substitute_function_applications(eq.rhs(), generator); } -/// add axioms if the rhs is a character array -/// \par parameters: the rhs and lhs of an equality over character arrays -std::vector set_char_array_equality(const exprt &lhs, const exprt &rhs) +/// For now, any unsigned bitvector type of width smaller or equal to 16 is +/// considered a character. +/// \note type that are not characters maybe detected as characters (for +/// instance unsigned char in C), this will make dec_solve do unnecessary +/// steps for these, but should not affect correctness. +/// \param type: a type +/// \return true if the given type represents characters +bool is_char_type(const typet &type) { - PRECONDITION(lhs.id()==ID_symbol); - - if(rhs.id()==ID_array && rhs.type().id()==ID_array) - { - std::vector lemmas; - const typet &index_type=to_array_type(rhs.type()).size().type(); - for(size_t i=0, ilim=rhs.operands().size(); i!=ilim; ++i) - { - // Introduce axioms to map symbolic rhs to its char array. - index_exprt arraycell(rhs, from_integer(i, index_type)); - equal_exprt arrayeq(arraycell, rhs.operands()[i]); - lemmas.push_back(arrayeq); - } - return lemmas; - } - return { }; - // At least for Java (as it is currently pre-processed), we need not consider - // other cases, because all character arrays find themselves on the rhs of an - // equality. Note that this might not be the case for other languages. + return type.id() == ID_unsignedbv && + to_unsignedbv_type(type).get_width() <= 16; } -/// distinguish char array from other types -/// -/// TODO: this is only for java char array and does not work for other languages +/// Distinguish char array from other types. +/// For now, any unsigned bitvector type is considered a character. /// \param type: a type -/// \return true if the given type is an array of java characters -static bool is_char_array(const namespacet &ns, const typet &type) +/// \param ns: name space +/// \return true if the given type is an array of characters +bool is_char_array_type(const typet &type, const namespacet &ns) { if(type.id()==ID_symbol) - return is_char_array(ns, ns.follow(type)); + return is_char_array_type(ns.follow(type), ns); + return type.id() == ID_array && is_char_type(type.subtype()); +} - return (type.id()==ID_array && type.subtype()==java_char_type()); +/// For now, any unsigned bitvector type is considered a character. +/// \param type: a type +/// \return true if the given type represents a pointer to characters +bool is_char_pointer_type(const typet &type) +{ + return type.id() == ID_pointer && is_char_type(type.subtype()); } -/// add lemmas to the solver corresponding to the given equation -/// \param lhs: left hand side of an equality expression -/// \param rhs: right and side of the equality -/// \return true if the assignemnt needs to be handled by the parent class -/// via `set_to` -std::pair> add_axioms_for_string_assigns( - replace_mapt &symbol_resolve, - std::map> &reverse_symbol_resolve, - string_constraint_generatort &generator, - messaget::mstreamt &stream, - const namespacet &ns, - const exprt &lhs, - const exprt &rhs) +/// \param type: a type +/// \param ns: name space +/// \return true if a subtype is an pointer of characters +static bool has_char_pointer_subtype(const typet &type, const namespacet &ns) { - if(is_char_array(ns, rhs.type())) + if(is_char_pointer_type(type)) + return true; + + if(type.id() == ID_struct || type.id() == ID_union) { - std::vector lemmas=set_char_array_equality(lhs, rhs); - if(rhs.id()==ID_symbol || rhs.id()==ID_array) - { - add_symbol_to_symbol_map( - symbol_resolve, - reverse_symbol_resolve, - lhs, - rhs); - return { false, std::move(lemmas) }; - } - else if(rhs.id()==ID_nondet_symbol) - { - add_symbol_to_symbol_map( - symbol_resolve, - reverse_symbol_resolve, - lhs, - generator.fresh_symbol("nondet_array", lhs.type())); - return { false, std::move(lemmas) }; - } - else if(rhs.id()==ID_if) + const struct_union_typet &struct_type = to_struct_union_type(type); + for(const auto &comp : struct_type.components()) { - add_symbol_to_symbol_map( - symbol_resolve, - reverse_symbol_resolve, - lhs, - rhs); - return { true, std::move(lemmas) }; - } - else - { - stream << "ignoring char array " << from_expr(ns, "", rhs) - << messaget::eom; - return { true, std::move(lemmas) }; + if(has_char_pointer_subtype(comp.type(), ns)) + return true; } } - if(is_refined_string_type(rhs.type())) + + for(const auto &t : type.subtypes()) { - exprt refined_rhs=generator.add_axioms_for_refined_string(rhs); - add_symbol_to_symbol_map( - symbol_resolve, - reverse_symbol_resolve, - lhs, - refined_rhs); - return { false, std::vector() }; + if(has_char_pointer_subtype(t, ns)) + return true; } - // Other cases are to be handled by supert::set_to. - return { true, std::vector() }; + return false; } -/// For each string whose length has been solved, add constants to the map -/// `found_length` -void concretize_lengths( - std::map &found_length, - const std::function &get, - const replace_mapt &symbol_resolve, - const std::set &created_strings) +/// \param expr: an expression +/// \param ns: name space +/// \return true if a subexpression of `expr` is an array of characters +static bool has_char_array_subexpr(const exprt &expr, const namespacet &ns) { - for(const auto &pair : symbol_resolve) - { - if(const auto str=expr_cast(pair.second)) - { - exprt length=get(str->length()); - exprt content=str->content(); - replace_expr(symbol_resolve, content); - found_length[content]=length; - } - } - for(const auto &it : created_strings) - { - if(const auto str=expr_cast(it)) - { - exprt length=get(str->length()); - exprt content=str->content(); - replace_expr(symbol_resolve, content); - found_length[content]=length; - } - } + for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it) + if(is_char_array_type(it->type(), ns)) + return true; + return false; +} + +void replace_symbols_in_equations( + const union_find_replacet &symbol_resolve, + std::vector &equations) +{ + for(equal_exprt &eq : equations) + symbol_resolve.replace_expr(eq); } -/// add lemmas representing the setting of an expression to a given value -/// \par parameters: an expression and the value to set it to +/// Add equation to `m_equation_list` or give them to `supert::set_to` +/// \param expr: an expression +/// \param value: the value to set it to void string_refinementt::set_to(const exprt &expr, bool value) { PRECONDITION(expr.type().id()==ID_bool); PRECONDITION(equality_propagation); - if(expr.id()==ID_equal) + if(expr.id() == ID_equal && value) { - equal_exprt eq_expr=to_equal_expr(expr); - const exprt &lhs=eq_expr.lhs(); - const exprt &rhs=eq_expr.rhs(); - - // The assignment of a string equality to false is not supported. - PRECONDITION(value || !is_char_array(ns, rhs.type())); - PRECONDITION(value || !is_refined_string_type(rhs.type())); - - PRECONDITION(lhs.id()==ID_symbol || !is_char_array(ns, rhs.type())); - PRECONDITION(lhs.id()==ID_symbol || !is_refined_string_type(rhs.type())); + const equal_exprt &eq_expr = to_equal_expr(expr); + equations.push_back(eq_expr); + } + else + { + INVARIANT( + !has_char_array_subexpr(expr, ns), "char array only appear in equations"); + supert::set_to(expr, value); + } +} - // If lhs is not a symbol, let supert::set_to() handle it. +/// Add association for each char pointer in the equation +/// \param equations: vector of equations +/// \param ns: namespace +/// \param stream: output stream +/// \return union_find_replacet where char pointer that have been set equal +/// by an equation are associated to the same element +static union_find_replacet generate_symbol_resolution_from_equations( + const std::vector &equations, + const namespacet &ns, + messaget::mstreamt &stream) +{ + const auto eom = messaget::eom; + const std::string log_message = + "WARNING string_refinement.cpp generate_symbol_resolution_from_equations:"; + union_find_replacet solver; + for(const equal_exprt &eq : equations) + { + const exprt &lhs = eq.lhs(); + const exprt &rhs = eq.rhs(); if(lhs.id()!=ID_symbol) { - non_string_axioms.emplace_back(expr, value); - return; + stream << log_message << "non symbol lhs: " << from_expr(ns, "", lhs) + << " with rhs: " << from_expr(ns, "", rhs) << eom; + continue; } if(lhs.type()!=rhs.type()) { - warning() << "ignoring " << from_expr(ns, "", expr) - << " [inconsistent types]" << eom; - debug() << "lhs has type: " << lhs.type().pretty(12) << eom; - debug() << "rhs has type: " << rhs.type().pretty(12) << eom; - return; + stream << log_message << "non equal types lhs: " << from_expr(ns, "", lhs) + << "\n####################### rhs: " << from_expr(ns, "", rhs) + << eom; + continue; } - // Preprocessing to remove function applications. - debug() << "(sr::set_to) " << from_expr(ns, "", lhs) - << "=" << from_expr(ns, "", rhs) << eom; - - const exprt subst_rhs=generator.substitute_function_applications(rhs); - if(lhs.type()!=subst_rhs.type()) + if(is_char_pointer_type(rhs.type())) { - if(lhs.type().id()!=ID_array || - subst_rhs.type().id()!=ID_array || - lhs.type().subtype()!=subst_rhs.type().subtype()) + solver.make_union(lhs, rhs); + } + else if(rhs.id() == ID_function_application) + { + // function applications can be ignored because they will be replaced + // in the convert_function_application step of dec_solve + } + else if(has_char_pointer_subtype(lhs.type(), ns)) + { + if(rhs.type().id() == ID_struct) { - warning() << "ignoring " << from_expr(ns, "", expr) - << " [inconsistent types after substitution]" << eom; - return; + const struct_typet &struct_type = to_struct_type(rhs.type()); + for(const auto &comp : struct_type.components()) + { + if(is_char_pointer_type(comp.type())) + { + const member_exprt lhs_data(lhs, comp.get_name(), comp.type()); + const exprt rhs_data = simplify_expr( + member_exprt(rhs, comp.get_name(), comp.type()), ns); + solver.make_union(lhs_data, rhs_data); + } + } } else { - debug() << "(sr::set_to) accepting arrays with " - << "same subtype but different sizes" << eom; + stream << log_message << "non struct with char pointer subexpr " + << from_expr(ns, "", rhs) << "\n * of type " + << from_type(ns, "", rhs.type()) << eom; } } - - if(value) - { - bool not_handled; - std::vector lemmas; - std::tie(not_handled, lemmas)=add_axioms_for_string_assigns( - symbol_resolve, - reverse_symbol_resolve, - generator, - warning(), - ns, - lhs, - subst_rhs); - for(const auto &lemma : lemmas) - add_lemma(lemma, false); - if(!not_handled) - return; - } - - // Push the substituted equality to the list of axioms to be given to - // supert::set_to. - non_string_axioms.emplace_back(equal_exprt(lhs, subst_rhs), value); - } - // Push the unmodified equality to the list of axioms to be given to - // supert::set_to. - else - { - // TODO: Verify that the expression contains no string. - // This will be easy once exprt iterators will have been implemented. - non_string_axioms.emplace_back(expr, value); } + return solver; +} + +void output_equations( + std::ostream &output, + const std::vector &equations, + const namespacet &ns) +{ + for(const auto &eq : equations) + output << " * " << from_expr(ns, "", eq.lhs()) + << " == " << from_expr(ns, "", eq.rhs()) << std::endl; } /// use a refinement loop to instantiate universal axioms, call the sat solver, @@ -498,22 +415,60 @@ void string_refinementt::set_to(const exprt &expr, bool value) /// \return result of the decision procedure decision_proceduret::resultt string_refinementt::dec_solve() { - // Substitute all symbols to char arrays in the axioms to give to - // supert::set_to(). - for(std::pair &pair : non_string_axioms) +#ifdef DEBUG + debug() << "dec_solve: Initial set of equations" << eom; + output_equations(debug(), equations, ns); +#endif + + debug() << "dec_solve: Build symbol solver from equations" << eom; + // This is used by get, that's why we use a class member here + symbol_resolve = + generate_symbol_resolution_from_equations(equations, ns, debug()); +#ifdef DEBUG + debug() << "symbol resolve:" << eom; + for(const auto &pair : symbol_resolve.to_vector()) + debug() << from_expr(ns, "", pair.first) << " --> " + << from_expr(ns, "", pair.second) << eom; +#endif + + debug() << "dec_solve: Replacing char pointer symbols" << eom; + replace_symbols_in_equations(symbol_resolve, equations); +#ifdef DEBUG + output_equations(debug(), equations, ns); +#endif + + debug() << "dec_solve: Replace function applications" << eom; + // Generator is also used by get, that's why we use a class member + substitute_function_applications_in_equations(equations, generator); +#ifdef DEBUG + output_equations(debug(), equations, ns); +#endif + +#ifdef DEBUG + debug() << "dec_solve: arrays_of_pointers:" << eom; + for(auto pair : generator.get_arrays_of_pointers()) { - replace_expr(symbol_resolve, pair.first); - debug() << "super::set_to " << from_expr(ns, "", pair.first) << eom; - supert::set_to(pair.first, pair.second); + debug() << " * " << from_expr(ns, "", pair.first) << "\t--> " + << from_expr(ns, "", pair.second) << " : " + << from_type(ns, "", pair.second.type()) << eom; } +#endif + for(const auto &eq : equations) + { +#ifdef DEBUG + debug() << "dec_solve: set_to " << from_expr(ns, "", eq) << eom; +#endif + supert::set_to(eq, true); + } + + const auto get = [this](const exprt &expr) { return this->get(expr); }; for(exprt axiom : generator.get_axioms()) { - replace_expr(symbol_resolve, axiom); + symbol_resolve.replace_expr(axiom); if(axiom.id()==ID_string_constraint) { - string_constraintt univ_axiom= - to_string_constraint(axiom); + string_constraintt univ_axiom = to_string_constraint(axiom); DATA_INVARIANT( is_valid_string_constraint(error(), ns, univ_axiom), string_refinement_invariantt( @@ -524,9 +479,8 @@ decision_proceduret::resultt string_refinementt::dec_solve() { string_not_contains_constraintt nc_axiom= to_string_not_contains_constraint(axiom); - const refined_string_typet &rtype= - to_refined_string_type(nc_axiom.s0().type()); - const typet &index_type=rtype.get_index_type(); + array_typet rtype = to_array_type(nc_axiom.s0().type()); + const typet &index_type = rtype.size().type(); array_typet witness_type(index_type, infinity_exprt(index_type)); generator.witness[nc_axiom]= generator.fresh_symbol("not_contains_witness", witness_type); @@ -538,11 +492,6 @@ decision_proceduret::resultt string_refinementt::dec_solve() } } - found_length.clear(); - found_content.clear(); - - const auto get=[this](const exprt &expr) { return this->get(expr); }; - // Initial try without index set const decision_proceduret::resultt res=supert::dec_solve(); if(res==resultt::D_SATISFIABLE) @@ -568,11 +517,6 @@ decision_proceduret::resultt string_refinementt::dec_solve() else { debug() << "check_SAT: the model is correct" << eom; - concretize_lengths( - found_length, - get, - symbol_resolve, - generator.get_created_strings()); return resultt::D_SATISFIABLE; } } @@ -584,7 +528,6 @@ decision_proceduret::resultt string_refinementt::dec_solve() initial_index_set(index_sets, ns, axioms); update_index_set(index_sets, ns, current_constraints); - display_index_set(debug(), ns, index_sets); current_constraints.clear(); for(const auto &instance : generate_instantiations( @@ -622,11 +565,6 @@ decision_proceduret::resultt string_refinementt::dec_solve() else { debug() << "check_SAT: the model is correct" << eom; - concretize_lengths( - found_length, - get, - symbol_resolve, - generator.get_created_strings()); return resultt::D_SATISFIABLE; } @@ -638,27 +576,19 @@ decision_proceduret::resultt string_refinementt::dec_solve() index_sets.current.clear(); update_index_set(index_sets, ns, current_constraints); + display_index_set(debug(), ns, index_sets); + if(index_sets.current.empty()) { - debug() << "current index set is empty" << eom; if(axioms.not_contains.empty()) { - debug() << "no not_contains axioms, hence SAT" << eom; - concretize_lengths( - found_length, - get, - symbol_resolve, - generator.get_created_strings()); - return resultt::D_SATISFIABLE; - } - else - { - debug() << "not_contains axioms exist, hence ERROR" << eom; + error() << "dec_solve: current index set is empty, " + << "this should not happen" << eom; return resultt::D_ERROR; } + else + debug() << "dec_solve: current index set is empty" << eom; } - - display_index_set(debug(), ns, index_sets); current_constraints.clear(); for(const auto &instance : generate_instantiations( @@ -680,11 +610,13 @@ decision_proceduret::resultt string_refinementt::dec_solve() return resultt::D_ERROR; } -/// add the given lemma to the solver -/// \par parameters: a lemma and Boolean value stating whether the lemma should -/// be added to the index set. +/// Add the given lemma to the solver. +/// \param lemma: a Boolean expression +/// \param simplify_lemma: whether the lemma should be simplified before being +/// given to the underlying solver. void string_refinementt::add_lemma( - const exprt &lemma, const bool _simplify) + const exprt &lemma, + const bool simplify_lemma) { if(!seen_instances.insert(lemma).second) return; @@ -692,7 +624,7 @@ void string_refinementt::add_lemma( current_constraints.push_back(lemma); exprt simple_lemma=lemma; - if(_simplify) + if(simplify_lemma) simplify(simple_lemma, ns); if(simple_lemma.is_true()) @@ -703,67 +635,75 @@ void string_refinementt::add_lemma( return; } + symbol_resolve.replace_expr(simple_lemma); + + // Replace empty arrays with array_of expression because the solver cannot + // handle empty arrays. + for(auto it = simple_lemma.depth_begin(); it != simple_lemma.depth_end();) + { + if(it->id() == ID_array && it->operands().empty()) + { + it.mutate() = array_of_exprt( + from_integer(CHARACTER_FOR_UNKNOWN, it->type().subtype()), + to_array_type(it->type())); + it.next_sibling_or_parent(); + } + else + ++it; + } + debug() << "adding lemma " << from_expr(ns, "", simple_lemma) << eom; prop.l_set_to_true(convert(simple_lemma)); } -/// get a model of an array and put it in a certain form. If the size cannot be -/// obtained or if it is too big, return an empty array. +/// Get a model of an array and put it in a certain form. +/// If the model is incomplete or if it is too big, return no value. /// \par parameters: an expression representing an array and an expression /// representing an integer -/// \return an array expression or an array_of_exprt -static exprt get_array( +/// \return an optional array expression or array_of_exprt +static optionalt get_array( const std::function &super_get, const namespacet &ns, const std::size_t max_string_length, - const exprt &arr, - const exprt &size) + messaget::mstreamt &stream, + const array_string_exprt &arr) { - exprt arr_val=simplify_expr(get_array(super_get, arr), ns); + const auto eom = messaget::eom; + const exprt &size = arr.length(); + exprt arr_val = simplify_expr(super_get(arr), ns); exprt size_val=super_get(size); size_val=simplify_expr(size_val, ns); - typet char_type=arr.type().subtype(); + const typet char_type = arr.type().subtype(); const typet &index_type=size.type(); - array_typet empty_ret_type(char_type, from_integer(0, index_type)); - array_of_exprt empty_ret(from_integer(0, char_type), empty_ret_type); + const array_typet empty_ret_type(char_type, from_integer(0, index_type)); + const array_of_exprt empty_ret(from_integer(0, char_type), empty_ret_type); if(size_val.id()!=ID_constant) { -#if 0 - debug() << "(sr::get_array) string of unknown size: " - << from_expr(ns, "", size_val) << eom; -#endif - return empty_ret; + stream << "(sr::get_array) string of unknown size: " + << from_expr(ns, "", size_val) << eom; + return {}; } unsigned n; if(to_unsigned_integer(to_constant_expr(size_val), n)) { -#if 0 - debug() << "(sr::get_array) size is not valid" << eom; -#endif - return empty_ret; + stream << "(sr::get_array) size is not valid" << eom; + return {}; } - array_typet ret_type(char_type, from_integer(n, index_type)); + const array_typet ret_type(char_type, from_integer(n, index_type)); array_exprt ret(ret_type); if(n>max_string_length) { -#if 0 - debug() << "(sr::get_array) long string (size=" << n << ")" << eom; -#endif - return empty_ret; + stream << "(sr::get_array) long string (size=" << n << ")" << eom; + return {}; } if(n==0) - { -#if 0 - debug() << "(sr::get_array) empty string" << eom; -#endif return empty_ret; - } if(arr_val.id()=="array-list") { @@ -773,12 +713,12 @@ static exprt get_array( "with, array_of, if, or array, and all " "cases besides array are handled above")); std::map initial_map; - for(size_t i=0; i &super_get, - const exprt &arr) -{ - exprt arr_model=super_get(arr); - if(arr_model.id()==ID_array) - { - array_typet &arr_type=to_array_type(arr_model.type()); - arr_type.size()=from_integer( - arr_model.operands().size(), arr_type.size().type()); - } - return arr_model; + else + return {}; } /// convert the content of a string to a more readable representation. This @@ -830,10 +753,65 @@ static std::string string_of_array(const array_exprt &arr) return utf16_constant_array_to_java(arr, n); } +/// Debugging function which finds the valuation of the given array in +/// `super_get` and concretize unknown characters. +/// \param super_get: give a valuation to variables +/// \param ns: namespace +/// \param max_string_length: limit up to which we concretize strings +/// \param stream: output stream +/// \param arr: array expression +/// \return expression corresponding to `arr` in the model +static exprt get_char_array_and_concretize( + const std::function &super_get, + const namespacet &ns, + const std::size_t max_string_length, + messaget::mstreamt &stream, + const array_string_exprt &arr) +{ + const auto &eom = messaget::eom; + static const std::string indent(" "); + stream << "- " << from_expr(ns, "", arr) << ":\n"; + stream << indent << indent << "- type: " << from_type(ns, "", arr.type()) + << eom; + const auto arr_model_opt = + get_array(super_get, ns, max_string_length, stream, arr); + if(arr_model_opt) + { + stream << indent << indent + << "- char_array: " << from_expr(ns, "", *arr_model_opt) << eom; + const exprt simple = simplify_expr(*arr_model_opt, ns); + stream << indent << indent + << "- simplified_char_array: " << from_expr(ns, "", simple) << eom; + const exprt concretized_array = + concretize_arrays_in_expression(simple, max_string_length, ns); + stream << indent << indent << "- concretized_char_array: " + << from_expr(ns, "", concretized_array) << eom; + + if(concretized_array.id() == ID_array) + { + stream << indent << indent << "- as_string: \"" + << string_of_array(to_array_expr(concretized_array)) << "\"\n"; + } + else + { + stream << indent << "- warning: not an array" << eom; + } + + stream << indent << indent + << "- type: " << from_type(ns, "", concretized_array.type()) << eom; + return concretized_array; + } + else + { + stream << indent << indent << "- incomplete model" << eom; + return arr; + } +} + /// Display part of the current model by mapping the variables created by the /// solver to constant expressions given by the current model void debug_model( - const replace_mapt &symbol_resolve, + const string_constraint_generatort &generator, messaget::mstreamt &stream, const namespacet &ns, const std::size_t max_string_length, @@ -841,63 +819,32 @@ void debug_model( const std::vector &boolean_symbols, const std::vector &index_symbols) { - const std::string indent(" "); - for(auto it : symbol_resolve) + static const std::string indent(" "); + + stream << "debug_model:" << '\n'; + for(const auto &pointer_array : generator.get_arrays_of_pointers()) { - if(const auto refined=expr_cast(it.second)) - { - stream << "- " << from_expr(ns, "", to_symbol_expr(it.first)) << ":\n" - << indent << indent << "in_map: " - << from_expr(ns, "", *refined) << '\n' - << indent << indent << "resolved: " - << from_expr(ns, "", *refined) << '\n'; - const exprt &econtent=refined->content(); - const exprt &elength=refined->length(); - - exprt len=super_get(elength); - len=simplify_expr(len, ns); - const exprt arr=get_array( - super_get, - ns, - max_string_length, - econtent, len); - if(arr.id()==ID_array) - stream << indent << indent << "as_string: \"" - << string_of_array(to_array_expr(arr)) << "\"\n"; - else - stream << indent << indent << "as_char_array: " - << from_expr(ns, "", arr) << "\n"; + const auto arr = pointer_array.second; + const exprt model = get_char_array_and_concretize( + super_get, ns, max_string_length, stream, arr); - stream << indent << indent << "size: " << from_expr(ns, "", len) << '\n'; - } - else - { - INVARIANT( - is_char_array(ns, it.second.type()), - string_refinement_invariantt("symbol_resolve should only map to " - "refined_strings or to char_arrays, and refined_strings are already " - "handled")); - exprt arr=it.second; - replace_expr(symbol_resolve, arr); - stream << "- " << from_expr(ns, "", to_symbol_expr(it.first)) << ":\n"; - stream << indent << indent << "resolved: " - << from_expr(ns, "", arr) << "\n"; - exprt arr_model=get_array(super_get, arr); - stream << indent << indent << "char_array: " - << from_expr(ns, "", arr_model) << '\n'; - } + stream << "- " << from_expr(ns, "", arr) << ":\n" + << indent << "- pointer: " << from_expr(ns, "", pointer_array.first) + << "\n" + << indent << "- model: " << from_expr(ns, "", model) + << messaget::eom; } - for(const auto &it : boolean_symbols) + for(const auto &symbol : boolean_symbols) { - stream << " - " << it.get_identifier() << ": " - << from_expr(ns, "", super_get(it)) << '\n'; + stream << " - " << symbol.get_identifier() << ": " + << from_expr(ns, "", super_get(symbol)) << '\n'; } - for(const auto &it : index_symbols) + for(const auto &symbol : index_symbols) { - stream << " - " << it.get_identifier() << ": " - << from_expr(ns, "", super_get(it)) << '\n'; + stream << " - " << symbol.get_identifier() << ": " + << from_expr(ns, "", super_get(symbol)) << '\n'; } stream << messaget::eom; } @@ -948,26 +895,63 @@ exprt fill_in_array_with_expr( { PRECONDITION(expr.type().id()==ID_array); PRECONDITION(expr.id()==ID_with || expr.id()==ID_array_of); - - // Nothing to do for empty array - if(expr.id()==ID_array_of) - return expr; + const array_typet &array_type = to_array_type(expr.type()); // Map of the parts of the array that are initialized std::map initial_map; + // Set the last index to be sure the array will have the right length + const auto &array_size_opt = expr_cast(array_type.size()); + if(array_size_opt && *array_size_opt > 0) + initial_map.emplace( + *array_size_opt - 1, + from_integer(CHARACTER_FOR_UNKNOWN, array_type.subtype())); + for(exprt it=expr; it.id()==ID_with; it=to_with_expr(it).old()) { // Add to `initial_map` all the pairs (index,value) contained in `WITH` // statements - const with_exprt with_expr=to_with_expr(it); + const with_exprt &with_expr = to_with_expr(it); const exprt &then_expr=with_expr.new_value(); const auto index=expr_cast_v(with_expr.where()); - if(index initial_map; + const auto &array_size_opt = expr_cast(array_type.size()); + + if(array_size_opt && *array_size_opt > 0) + initial_map.emplace( + *array_size_opt - 1, + from_integer(CHARACTER_FOR_UNKNOWN, array_type.subtype())); + + for(std::size_t i = 0; i < expr.operands().size(); ++i) + { + if(i < string_max_length && expr.operands()[i].id() != ID_unknown) + initial_map[i] = expr.operands()[i]; + } + + array_exprt result(array_type); result.operands()=fill_in_map_as_vector(initial_map); return result; } @@ -1162,17 +1146,31 @@ static exprt negation_of_constraint(const string_constraintt &axiom) /// be interpreted as `{ 2, 2, 3, 3, 3}`. /// \param expr: expression to interpret /// \param string_max_length: maximum size of arrays to consider +/// \param ns: namespace, used to determine what is an array of character /// \return the interpreted expression -exprt concretize_arrays_in_expression(exprt expr, std::size_t string_max_length) +exprt concretize_arrays_in_expression( + exprt expr, + std::size_t string_max_length, + const namespacet &ns) { auto it=expr.depth_begin(); const auto end=expr.depth_end(); while(it!=end) { - if(it->id()==ID_with && it->type().id()==ID_array) + if(is_char_array_type(it->type(), ns)) { - it.mutate()=fill_in_array_with_expr(*it, string_max_length); - it.next_sibling_or_parent(); + if(it->id() == ID_with || it->id() == ID_array_of) + { + it.mutate() = fill_in_array_with_expr(*it, string_max_length); + it.next_sibling_or_parent(); + } + else if(it->id() == ID_array) + { + it.mutate() = fill_in_array_expr(to_array_expr(*it), string_max_length); + it.next_sibling_or_parent(); + } + else + ++it; // ignoring other expressions } else ++it; @@ -1180,7 +1178,45 @@ exprt concretize_arrays_in_expression(exprt expr, std::size_t string_max_length) return expr; } -/// return true if the current model satisfies all the axioms +/// Debugging function which outputs the different steps an axiom goes through +/// to be checked in check axioms. +static void debug_check_axioms_step( + messaget::mstreamt &stream, + const namespacet &ns, + const exprt &axiom, + const exprt &axiom_in_model, + const exprt &negaxiom, + const exprt &with_concretized_arrays) +{ + static const std::string indent = " "; + static const std::string indent2 = " "; + stream << indent2 << "- axiom:\n" << indent2 << indent; + + if(axiom.id() == ID_string_constraint) + stream << from_expr(ns, "", to_string_constraint(axiom)); + else if(axiom.id() == ID_string_not_contains_constraint) + stream << from_expr(ns, "", to_string_not_contains_constraint(axiom)); + else + stream << from_expr(ns, "", axiom); + stream << '\n' << indent2 << "- axiom_in_model:\n" << indent2 << indent; + + if(axiom_in_model.id() == ID_string_constraint) + stream << from_expr(ns, "", to_string_constraint(axiom_in_model)); + else if(axiom_in_model.id() == ID_string_not_contains_constraint) + stream << from_expr( + ns, "", to_string_not_contains_constraint(axiom_in_model)); + else + stream << from_expr(ns, "", axiom_in_model); + + stream << '\n' + << indent2 << "- negated_axiom:\n" + << indent2 << indent << from_expr(ns, "", negaxiom) << '\n'; + stream << indent2 << "- negated_axiom_with_concretized_arrays:\n" + << indent2 << indent << from_expr(ns, "", with_concretized_arrays) + << '\n'; +} + +/// \return true if the current model satisfies all the axioms /// \return a Boolean static std::pair> check_axioms( const string_axiomst &axioms, @@ -1191,20 +1227,30 @@ static std::pair> check_axioms( std::size_t max_string_length, bool use_counter_example, ui_message_handlert::uit ui, - const replace_mapt &symbol_resolve) + const union_find_replacet &symbol_resolve) { const auto eom=messaget::eom; + static const std::string indent = " "; + static const std::string indent2 = " "; + stream << "string_refinementt::check_axioms:" << eom; - #if 0 - debug_model(symbol_resolve, + stream << "symbol_resolve:" << eom; + auto pairs = symbol_resolve.to_vector(); + for(const auto &pair : pairs) + stream << " - " << from_expr(ns, "", pair.first) << " --> " + << from_expr(ns, "", pair.second) << eom; + +#ifdef DEBUG + debug_model( + generator, stream, ns, max_string_length, get, generator.get_boolean_symbols(), generator.get_index_symbols()); - #endif +#endif // Maps from indexes of violated universal axiom to a witness of violation std::map violated; @@ -1224,34 +1270,24 @@ static std::pair> check_axioms( univ_var, get(bound_inf), get(bound_sup), get(prem), get(body)); exprt negaxiom=negation_of_constraint(axiom_in_model); - - stream << " " << i << ".\n" - << " - axiom:\n" - << " " << from_expr(ns, "", axiom) << '\n'; - stream << " - axiom_in_model:\n" - << " " << from_expr(ns, "", axiom_in_model) << '\n'; - stream << " - negated_axiom:\n" - << " " << from_expr(ns, "", negaxiom) << '\n'; - - exprt with_concretized_arrays=concretize_arrays_in_expression( - negaxiom, max_string_length); - stream << " - negated_axiom_with_concretized_array_access:\n" - << " " << from_expr(ns, "", with_concretized_arrays) << '\n'; - + negaxiom = simplify_expr(negaxiom, ns); + exprt with_concretized_arrays = + concretize_arrays_in_expression(negaxiom, max_string_length, ns); substitute_array_access(with_concretized_arrays); - stream << " - negated_axiom_without_array_access:\n" - << " " << from_expr(ns, "", with_concretized_arrays) << '\n'; + + stream << indent << i << ".\n"; + debug_check_axioms_step( + stream, ns, axiom, axiom_in_model, negaxiom, with_concretized_arrays); if(const auto &witness= find_counter_example(ns, ui, with_concretized_arrays, univ_var)) { - stream << " - violated_for: " - << univ_var.get_identifier() - << "=" << from_expr(ns, "", *witness) << '\n'; + stream << indent2 << "- violated_for: " << univ_var.get_identifier() + << "=" << from_expr(ns, "", *witness) << eom; violated[i]=*witness; } else - stream << " - correct" << '\n'; + stream << indent2 << "- correct" << eom; } // Maps from indexes of violated not_contains axiom to a witness of violation @@ -1259,7 +1295,7 @@ static std::pair> check_axioms( stream << "there are " << axioms.not_contains.size() << " not_contains axioms" << eom; - for(size_t i=0; i> check_axioms( const exprt &prem=nc_axiom.premise(); const exprt &exists_bound_inf=nc_axiom.exists_lower_bound(); const exprt &exists_bound_sup=nc_axiom.exists_upper_bound(); - const string_exprt &s0=nc_axiom.s0(); - const string_exprt &s1=nc_axiom.s1(); + const array_string_exprt &s0 = nc_axiom.s0(); + const array_string_exprt &s1 = nc_axiom.s1(); symbol_exprt univ_var=generator.fresh_univ_index( "not_contains_univ_var", nc_axiom.s0().length().type()); - const string_not_contains_constraintt nc_axiom_in_model( + string_not_contains_constraintt nc_axiom_in_model( get(univ_bound_inf), get(univ_bound_sup), get(prem), get(exists_bound_inf), get(exists_bound_sup), - to_string_expr(get(s0)), - to_string_expr(get(s1))); + to_array_string_expr(get(s0)), + to_array_string_expr(get(s1))); - exprt negaxiom=negation_of_not_contains_constraint( - nc_axiom_in_model, univ_var); + // necessary so that expressions such as `1 + (3 - (TRUE ? 0 : 0))` do not + // appear in bounds + nc_axiom_in_model = + to_string_not_contains_constraint(simplify_expr(nc_axiom_in_model, ns)); - stream << " " << i << ".\n" - << " - axiom:\n" - << " " << from_expr(ns, "", nc_axiom) << '\n'; - stream << " - axiom_in_model:\n" - << " " << from_expr(ns, "", nc_axiom_in_model) << '\n'; - stream << " - negated_axiom:\n" - << " " << from_expr(ns, "", negaxiom) << '\n'; + exprt negaxiom = + negation_of_not_contains_constraint(nc_axiom_in_model, univ_var); - exprt with_concretized_arrays=concretize_arrays_in_expression( - negaxiom, max_string_length); - stream << " - negated_axiom_with_concretized_array_access:\n" - << " " << from_expr(ns, "", with_concretized_arrays) << '\n'; + negaxiom = simplify_expr(negaxiom, ns); + exprt with_concrete_arrays = + concretize_arrays_in_expression(negaxiom, max_string_length, ns); - substitute_array_access(with_concretized_arrays); - stream << " - negated_axiom_without_array_access:\n" - << " " << from_expr(ns, "", with_concretized_arrays) << '\n'; + substitute_array_access(with_concrete_arrays); - if(const auto &witness= - find_counter_example(ns, ui, with_concretized_arrays, univ_var)) + stream << indent << i << ".\n"; + debug_check_axioms_step( + stream, ns, nc_axiom, nc_axiom_in_model, negaxiom, with_concrete_arrays); + + if(const auto witness = find_counter_example(ns, ui, negaxiom, univ_var)) { - stream << " - violated_for: " - << univ_var.get_identifier() - << "=" << from_expr(ns, "", *witness) << '\n'; + stream << indent2 << "- violated_for: " << univ_var.get_identifier() + << "=" << from_expr(ns, "", *witness) << eom; violated_not_contains[i]=*witness; } - else - stream << " - correct" << '\n'; } if(violated.empty() && violated_not_contains.empty()) @@ -1364,7 +1394,7 @@ static std::pair> check_axioms( const exprt counter=::instantiate_not_contains( axiom, indices, generator)[0]; - stream << " - " << from_expr(ns, "", counter) << eom; + stream << " - " << from_expr(ns, "", counter) << eom; lemmas.push_back(counter); } return { false, lemmas }; @@ -1532,20 +1562,19 @@ static exprt compute_inverse_function( else { INVARIANT( - it->second==0, - string_refinement_invariantt("a proper function must have exactly one " - "occurrences after reduction, or it canceled out, and it does not have " - " one")); + it->second == 0, + string_refinement_invariantt( + "a proper function must have exactly one " + "occurrences after reduction, or it cancelled out, and it does not" + " have one")); stream << "in string_refinementt::compute_inverse_function:" - << " warning: occurrences of qvar canceled out " << messaget::eom; + << " warning: occurrences of qvar cancelled out " << messaget::eom; } elems.erase(it); return sum_over_map(elems, f.type(), neg); } - - class find_qvar_visitort: public const_expr_visitort { private: @@ -1602,23 +1631,27 @@ static void update_index_set( /// if expression for instance `cond?array1:(cond2:array2:array3)`. /// We return all the array expressions contained in `array_expr`. /// \param array_expr : an expression representing an array -/// \return a vector containing symbols and constant arrays contained in the -/// expression -static std::vector sub_arrays(const exprt &array_expr) +/// \param accu: a vector to which symbols and constant arrays contained in the +/// expression will be appended +static void get_sub_arrays(const exprt &array_expr, std::vector &accu) { if(array_expr.id()==ID_if) { - std::vector res1=sub_arrays(to_if_expr(array_expr).true_case()); - std::vector res2=sub_arrays(to_if_expr(array_expr).false_case()); - res1.insert(res1.end(), res2.begin(), res2.end()); - return res1; + get_sub_arrays(to_if_expr(array_expr).true_case(), accu); + get_sub_arrays(to_if_expr(array_expr).false_case(), accu); } else { - INVARIANT( - array_expr.id()==ID_symbol || array_expr.id()==ID_array, - "character arrays should be symbol, constant array, or if expression"); - return std::vector(1, array_expr); + if(array_expr.type().id() == ID_array) + { + // TODO: check_that it does not contain any sub_array + accu.push_back(array_expr); + } + else + { + for(const auto &operand : array_expr.operands()) + get_sub_arrays(operand, accu); + } } } @@ -1635,7 +1668,9 @@ static void add_to_index_set( const bool is_size_t=expr_cast(i).has_value(); if(i.id()!=ID_constant || is_size_t) { - for(const auto &sub : sub_arrays(s)) + std::vector sub_arrays; + get_sub_arrays(s, sub_arrays); + for(const auto &sub : sub_arrays) if(index_set.cumulative[sub].insert(i).second) index_set.current[sub].insert(i); } @@ -1652,29 +1687,37 @@ static void initial_index_set( while(!to_process.empty()) { - exprt cur=to_process.back(); + const exprt &cur = to_process.back(); to_process.pop_back(); - if(cur.id()==ID_index) + if(cur.id() == ID_index && is_char_type(cur.type())) { - const exprt &s=cur.op0(); - const exprt &i=cur.op1(); - - bool has_quant_var=find_qvar(i, qvar); + const index_exprt &index_expr = to_index_expr(cur); + const exprt &s = index_expr.array(); + const exprt &i = index_expr.index(); - // if cur is of the form s[i] and no quantified variable appears in i - if(!has_quant_var) + if(s.id() == ID_array) { - add_to_index_set(index_set, ns, s, i); + for(std::size_t j = 0; j < s.operands().size(); ++j) + add_to_index_set(index_set, ns, s, from_integer(j, i.type())); } else { - // otherwise we add k-1 - exprt e(i); - const minus_exprt kminus1( - axiom.upper_bound(), - from_integer(1, axiom.upper_bound().type())); - replace_expr(qvar, kminus1, e); - add_to_index_set(index_set, ns, s, e); + const bool has_quant_var = find_qvar(i, qvar); + + // if cur is of the form s[i] and no quantified variable appears in i + if(!has_quant_var) + { + add_to_index_set(index_set, ns, s, i); + } + else + { + // otherwise we add k-1 + exprt copy(i); + const minus_exprt kminus1( + axiom.upper_bound(), from_integer(1, axiom.upper_bound().type())); + replace_expr(qvar, kminus1, copy); + add_to_index_set(index_set, ns, s, copy); + } } } else @@ -1692,7 +1735,7 @@ static void initial_index_set( const auto end=axiom.premise().depth_end(); while(it!=end) { - if(it->id()==ID_index) + if(it->id() == ID_index && is_char_type(it->type())) { const exprt &s=it->op0(); const exprt &i=it->op1(); @@ -1726,7 +1769,7 @@ static void update_index_set( { exprt cur=to_process.back(); to_process.pop_back(); - if(cur.id()==ID_index) + if(cur.id() == ID_index && is_char_type(cur.type())) { const exprt &s=cur.op0(); const exprt &i=cur.op1(); @@ -1734,7 +1777,8 @@ static void update_index_set( s.type().id()==ID_array, string_refinement_invariantt("index expressions must index on arrays")); exprt simplified=simplify_sum(i); - add_to_index_set(index_set, ns, s, simplified); + if(s.id() != ID_array) // do not update index set of constant arrays + add_to_index_set(index_set, ns, s, simplified); } else { @@ -1773,9 +1817,9 @@ static exprt find_index( /// variable `str`, and an index expression `val`. /// \return substitute `qvar` the universally quantified variable of `axiom`, by /// an index `val`, in `axiom`, so that the index used for `str` equals `val`. -/// For instance, if `axiom` corresponds to $\forall q. s[q+x]='a' && -/// t[q]='b'$, `instantiate(axiom,s,v)` would return an expression for -/// $s[v]='a' && t[v-x]='b'$. +/// For instance, if `axiom` corresponds to \f$\forall q. s[q+x]='a' && +/// t[q]='b'\f$, `instantiate(axiom,s,v)` would return an expression for +/// \f$s[v]='a' && t[v-x]='b'\f$. static exprt instantiate( messaget::mstreamt &stream, const string_constraintt &axiom, @@ -1801,14 +1845,17 @@ static exprt instantiate( /// Instantiates a quantified formula representing `not_contains` by /// substituting the quantifiers and generating axioms. /// \param [in] axiom: the axiom to instantiate +/// \param index_set: set of indexes +/// \param current_index_set: set of indexes that have been newly added +/// \param generator: constraint generator object /// \return the lemmas produced through instantiation static std::vector instantiate( const string_not_contains_constraintt &axiom, const index_set_pairt &index_set, const string_constraint_generatort &generator) { - const string_exprt &s0=axiom.s0(); - const string_exprt &s1=axiom.s1(); + const array_string_exprt &s0 = axiom.s0(); + const array_string_exprt &s1 = axiom.s1(); const auto &index_set0=index_set.cumulative.find(s0.content()); const auto &index_set1=index_set.cumulative.find(s1.content()); @@ -1845,11 +1892,7 @@ static std::vector instantiate( exprt substitute_array_lists(exprt expr, size_t string_max_length) { for(auto &operand : expr.operands()) - { - // TODO: only copy when necessary - const exprt op(operand); - operand=substitute_array_lists(op, string_max_length); - } + operand = substitute_array_lists(operand, string_max_length); if(expr.id()=="array-list") { @@ -1857,7 +1900,7 @@ exprt substitute_array_lists(exprt expr, size_t string_max_length) expr.operands().size()>=2, string_refinement_invariantt("array-lists must have at least two " "operands")); - typet &char_type=expr.operands()[1].type(); + const typet &char_type = expr.operands()[1].type(); array_typet arr_type(char_type, infinity_exprt(char_type)); exprt ret_expr=array_of_exprt(from_integer(0, char_type), arr_type); @@ -1881,50 +1924,54 @@ exprt substitute_array_lists(exprt expr, size_t string_max_length) /// \return an expression exprt string_refinementt::get(const exprt &expr) const { - const std::function super_get=[this](const exprt &expr) - { return (exprt) supert::get(expr); }; + const auto super_get = [this](const exprt &expr) { // NOLINT + return supert::get(expr); + }; exprt ecopy(expr); - replace_expr(symbol_resolve, ecopy); - if(is_char_array(ns, ecopy.type())) - { - auto it_content=found_content.find(ecopy); - if(it_content!=found_content.end()) - return it_content->second; - - auto it=found_length.find(ecopy); - if(it!=found_length.end()) - return get_array( - super_get, - ns, - generator.max_string_length, - ecopy, - it->second); - } - else if(ecopy.id()==ID_struct) + (void)symbol_resolve.replace_expr(ecopy); + + if(is_char_array_type(ecopy.type(), ns)) { - if(const auto string=expr_cast(ecopy)) + array_string_exprt &arr = to_array_string_expr(ecopy); + arr.length() = generator.get_length_of_string_array(arr); + const auto arr_model_opt = + get_array(super_get, ns, generator.max_string_length, debug(), arr); + // \todo Refactor with get array in model + if(arr_model_opt) { - const exprt &content=string->content(); - const exprt &length=string->length(); - - const exprt arr=get_array( - super_get, - ns, - generator.max_string_length, - content, - length); - ecopy=string_exprt(length, arr, string->type()); + const exprt arr_model = simplify_expr(*arr_model_opt, ns); + const exprt concretized_array = concretize_arrays_in_expression( + arr_model, generator.max_string_length, ns); + return concretized_array; + } + else + { + auto set = generator.get_created_strings(); + if(set.find(arr) != set.end()) + { + exprt length = super_get(arr.length()); + if(const auto n = expr_cast(length)) + { + exprt arr_model = + array_exprt(array_typet(arr.type().subtype(), length)); + for(size_t i = 0; i < *n; i++) + arr_model.copy_to_operands(exprt(ID_unknown, arr.type().subtype())); + const exprt concretized_array = concretize_arrays_in_expression( + arr_model, generator.max_string_length, ns); + return concretized_array; + } + } + return arr; } } - - ecopy=supert::get(ecopy); - - return substitute_array_lists(ecopy, generator.max_string_length); + return supert::get(ecopy); } /// Creates a solver with `axiom` as the only formula added and runs it. If it /// is SAT, then true is returned and the given evaluation of `var` is stored /// in `witness`. If UNSAT, then what witness is is undefined. +/// \param ns: namespace +/// \param ui: message handler /// \param [in] axiom: the axiom to be checked /// \param [in] var: the variable whose evaluation will be stored in witness /// \return: the witness of the satisfying assignment if one @@ -1983,32 +2030,29 @@ static array_index_mapt gather_indices(const exprt &expr) return v.indices; } +/// \param expr: an expression +/// \param var: a symbol +/// \return Boolean telling whether `expr` is a linear function of `var`. +/// TODO: add unit test /// \related string_constraintt -class is_linear_arithmetic_expr_visitort: public const_expr_visitort +static bool +is_linear_arithmetic_expr(const exprt &expr, const symbol_exprt &var) { -public: - bool correct; - - is_linear_arithmetic_expr_visitort(): correct(true) {} - - void operator()(const exprt &expr) override + for(auto it = expr.depth_begin(); it != expr.depth_end();) { - if(expr.id()!=ID_plus && expr.id()!=ID_minus && expr.id()!=ID_unary_minus) + if( + it->id() != ID_plus && it->id() != ID_minus && + it->id() != ID_unary_minus && *it != var) { - // This represents that the expr is a valid leaf, may not be future proof - // or 100% enforced, but is correct prescriptively. All non-sum exprs must - // be leaves. - correct&=expr.operands().empty(); + if(find_qvar(*it, var)) + return false; + else + it.next_sibling_or_parent(); } + else + ++it; } -}; - -/// \related string_constraintt -static bool is_linear_arithmetic_expr(const exprt &expr) -{ - is_linear_arithmetic_expr_visitort v; - expr.visit(v); - return v.correct; + return true; } /// The universally quantified variable is only allowed to occur in index @@ -2051,8 +2095,10 @@ static bool universal_only_in_index(const string_constraintt &expr) return true; } -/// Checks the data invariant for \link string_constraintt +/// Checks the data invariant for \link string_constraintt \endlink /// \related string_constraintt +/// \param stream: message stream +/// \param ns: namespace /// \param [in] expr: the string constraint to check /// \return whether the constraint satisfies the invariant static bool is_valid_string_constraint( @@ -2096,8 +2142,8 @@ static bool is_valid_string_constraint( } } - // Condition 3: f must be linear - if(!is_linear_arithmetic_expr(rep)) + // Condition 3: f must be linear in the quantified variable + if(!is_linear_arithmetic_expr(rep, expr.univ_var())) { stream << "f is not linear: " << from_expr(ns, "", expr) << ", str: " << from_expr(ns, "", pair.first) << eom; diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index d6c7814367a..b890f9e40d5 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -23,11 +23,13 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include #include +#include #include #include #include #define MAX_NB_REFINEMENT 100 +#define CHARACTER_FOR_UNKNOWN '?' struct index_set_pairt { @@ -73,9 +75,6 @@ class string_refinementt final: public bv_refinementt // Base class typedef bv_refinementt supert; - typedef std::set expr_sett; - typedef std::list exprt_listt; - string_refinementt(const infot &, bool); const configt config_; @@ -83,7 +82,7 @@ class string_refinementt final: public bv_refinementt string_constraint_generatort generator; // Simple constraints that have been given to the solver - expr_sett seen_instances; + std::set seen_instances; string_axiomst axioms; @@ -94,19 +93,23 @@ class string_refinementt final: public bv_refinementt // Warning: this is indexed by array_expressions and not string expressions index_set_pairt index_sets; - replace_mapt symbol_resolve; - std::map reverse_symbol_resolve; + union_find_replacet symbol_resolve; + + std::vector equations; std::list> non_string_axioms; - // Length of char arrays found during concretization - std::map found_length; - // Content of char arrays found during concretization - std::map found_content; + // Map pointers to array symbols + std::map pointer_map; - void add_lemma(const exprt &lemma, bool simplify=true); + void add_lemma(const exprt &lemma, const bool _simplify = true); }; exprt substitute_array_lists(exprt expr, std::size_t string_max_length); exprt concretize_arrays_in_expression( - exprt expr, std::size_t string_max_length); + exprt expr, + std::size_t string_max_length, + const namespacet &ns); + +bool is_char_array_type(const typet &type, const namespacet &ns); + #endif diff --git a/src/util/Makefile b/src/util/Makefile index fbea7fc6a57..c51cd7e752a 100644 --- a/src/util/Makefile +++ b/src/util/Makefile @@ -92,6 +92,7 @@ SRC = arith_tools.cpp \ ui_message.cpp \ unicode.cpp \ union_find.cpp \ + union_find_replace.cpp \ xml.cpp \ xml_expr.cpp \ xml_irep.cpp \ diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index 18ba6509988..ac1f75b7f60 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -752,6 +752,8 @@ IREP_ID_ONE(exception_landingpad) IREP_ID_ONE(length_upper_bound) IREP_ID_ONE(string_constraint) IREP_ID_ONE(string_not_contains_constraint) +IREP_ID_ONE(cprover_associate_array_to_pointer_func) +IREP_ID_ONE(cprover_associate_length_to_array_func) IREP_ID_ONE(cprover_char_literal_func) IREP_ID_ONE(cprover_string_literal_func) IREP_ID_ONE(cprover_string_array_of_char_pointer_func) diff --git a/src/util/refined_string_type.cpp b/src/util/refined_string_type.cpp index 91ac2e4661d..818eb9e4391 100644 --- a/src/util/refined_string_type.cpp +++ b/src/util/refined_string_type.cpp @@ -21,8 +21,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com refined_string_typet::refined_string_typet( const typet &index_type, const typet &char_type) { - infinity_exprt infinite_index(index_type); - array_typet char_array(char_type, infinite_index); + array_typet char_array(char_type, infinity_exprt(index_type)); components().emplace_back("length", index_type); components().emplace_back("content", char_array); set_tag(CPROVER_PREFIX"refined_string_type"); diff --git a/src/util/refined_string_type.h b/src/util/refined_string_type.h index f20f0048151..ecef73288ea 100644 --- a/src/util/refined_string_type.h +++ b/src/util/refined_string_type.h @@ -32,10 +32,10 @@ class refined_string_typet: public struct_typet refined_string_typet(const typet &index_type, const typet &char_type); // Type for the content (list of characters) of a string - const array_typet &get_content_type() const + const typet &get_content_type() const { PRECONDITION(components().size()==2); - return to_array_type(components()[1].type()); + return components()[1].type(); } const typet &get_char_type() const diff --git a/src/util/string_expr.h b/src/util/string_expr.h index 693df01d87f..f0e080a8ec7 100644 --- a/src/util/string_expr.h +++ b/src/util/string_expr.h @@ -14,37 +14,38 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include +#include -class string_exprt: public struct_exprt +// Given an representation of strings as exprt that implements `length` and +// `content` this provides additional useful methods. +template +class string_exprt { -public: - string_exprt(): struct_exprt() {} - - explicit string_exprt(typet type): struct_exprt(type) +private: + exprt &length() { - operands().resize(2); + return static_cast(this)->length(); } - - string_exprt(const exprt &_length, const exprt &_content, typet type): - struct_exprt(type) + const exprt &length() const { - copy_to_operands(_length, _content); + return static_cast(this)->length(); + } + exprt &content() + { + return static_cast(this)->content(); + } + const exprt &content() const + { + return static_cast(this)->content(); } - // Expression corresponding to the length of the string - const exprt &length() const { return op0(); } - exprt &length() { return op0(); } - - // Expression corresponding to the content (array of characters) of the string - const exprt &content() const { return op1(); } - exprt &content() { return op1(); } - - static exprt within_bounds(const exprt &idx, const exprt &bound); +protected: + string_exprt() = default; - // Expression of the character at position idx in the string - index_exprt operator[] (const exprt &idx) const +public: + exprt operator[](const exprt &i) const { - return index_exprt(content(), idx); + return index_exprt(content(), i); } index_exprt operator[] (int i) const @@ -62,12 +63,14 @@ class string_exprt: public struct_exprt binary_relation_exprt axiom_for_length_ge( const exprt &rhs) const { + PRECONDITION(rhs.type() == length().type()); return binary_relation_exprt(length(), ID_ge, rhs); } binary_relation_exprt axiom_for_length_gt( const exprt &rhs) const { + PRECONDITION(rhs.type() == length().type()); return binary_relation_exprt(rhs, ID_lt, length()); } @@ -91,6 +94,7 @@ class string_exprt: public struct_exprt binary_relation_exprt axiom_for_length_le( const exprt &rhs) const { + PRECONDITION(rhs.type() == length().type()); return binary_relation_exprt(length(), ID_le, rhs); } @@ -108,6 +112,7 @@ class string_exprt: public struct_exprt binary_relation_exprt axiom_for_length_lt( const exprt &rhs) const { + PRECONDITION(rhs.type() == length().type()); return binary_relation_exprt(length(), ID_lt, rhs); } @@ -119,6 +124,7 @@ class string_exprt: public struct_exprt equal_exprt axiom_for_has_length(const exprt &rhs) const { + PRECONDITION(rhs.type() == length().type()); return equal_exprt(length(), rhs); } @@ -126,22 +132,113 @@ class string_exprt: public struct_exprt { return axiom_for_has_length(from_integer(i, length().type())); } +}; + +// Representation of strings as arrays +class array_string_exprt : public string_exprt, public exprt +{ +public: + exprt &length() + { + return to_array_type(type()).size(); + } + + const exprt &length() const + { + return to_array_type(type()).size(); + } + + exprt &content() + { + return *this; + } + + const exprt &content() const + { + return *this; + } +}; + +inline array_string_exprt &to_array_string_expr(exprt &expr) +{ + PRECONDITION(expr.type().id() == ID_array); + return static_cast(expr); +} + +inline const array_string_exprt &to_array_string_expr(const exprt &expr) +{ + PRECONDITION(expr.type().id() == ID_array); + return static_cast(expr); +} + +// Represent strings as a struct with a length field and a content field +class refined_string_exprt : public struct_exprt, + public string_exprt +{ +public: + refined_string_exprt() : struct_exprt() + { + } + + explicit refined_string_exprt(const typet &type) : struct_exprt(type) + { + operands().resize(2); + } + + refined_string_exprt( + const exprt &_length, + const exprt &_content, + const typet &type) + : struct_exprt(type) + { + copy_to_operands(_length, _content); + } + + refined_string_exprt(const exprt &_length, const exprt &_content) + : refined_string_exprt( + _length, + _content, + refined_string_typet(_length.type(), _content.type())) + { + } + + // Expression corresponding to the length of the string + const exprt &length() const + { + return op0(); + } + exprt &length() + { + return op0(); + } + + // Expression corresponding to the content (array of characters) of the string + const exprt &content() const + { + return op1(); + } + exprt &content() + { + return op1(); + } + + static exprt within_bounds(const exprt &idx, const exprt &bound); - friend inline string_exprt &to_string_expr(exprt &expr); + friend inline refined_string_exprt &to_string_expr(exprt &expr); }; -inline string_exprt &to_string_expr(exprt &expr) +inline refined_string_exprt &to_string_expr(exprt &expr) { PRECONDITION(expr.id()==ID_struct); PRECONDITION(expr.operands().size()==2); - return static_cast(expr); + return static_cast(expr); } -inline const string_exprt &to_string_expr(const exprt &expr) +inline const refined_string_exprt &to_string_expr(const exprt &expr) { PRECONDITION(expr.id()==ID_struct); PRECONDITION(expr.operands().size()==2); - return static_cast(expr); + return static_cast(expr); } #endif diff --git a/src/util/union_find_replace.cpp b/src/util/union_find_replace.cpp new file mode 100644 index 00000000000..d399764f1de --- /dev/null +++ b/src/util/union_find_replace.cpp @@ -0,0 +1,56 @@ +/*******************************************************************\ + +Module: util + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include "union_find_replace.h" + +/// Keeps a map of symbols to expressions, such as none of the mapped values +/// exist as a key +/// \param a: an expression of type char array +/// \param b: an expression to map it to, which should be either a symbol +/// a string_exprt, an array_exprt, an array_of_exprt or an +/// if_exprt with branches of the previous kind +/// \return the new mapped value +exprt union_find_replacet::make_union(const exprt &a, const exprt &b) +{ + const exprt &lhs_root = find(a); + const exprt &rhs_root = find(b); + if(lhs_root != rhs_root) + map[lhs_root] = rhs_root; + return rhs_root; +} + +/// Replace subexpressions of `expr` by a canonical element of the set they +/// belong to. +/// \param expr: an expression, modified in place +/// \return true if expr is left unchanged +bool union_find_replacet::replace_expr(exprt &expr) const +{ + bool unchanged = ::replace_expr(map, expr); + while(!unchanged && !::replace_expr(map, expr)) + continue; + return unchanged; +} + +/// \param expr: an expression +/// \return canonical representation for expressions which belong to the same +/// set +exprt union_find_replacet::find(exprt expr) const +{ + replace_expr(expr); + return expr; +} + +/// \return pairs of expression composed of expressions and a canonical +/// expression for the set they below to. +std::vector> union_find_replacet::to_vector() const +{ + std::vector> equations; + for(const auto &pair : map) + equations.emplace_back(pair.first, find(pair.second)); + return equations; +} diff --git a/src/util/union_find_replace.h b/src/util/union_find_replace.h new file mode 100644 index 00000000000..37f86604f1b --- /dev/null +++ b/src/util/union_find_replace.h @@ -0,0 +1,31 @@ +/*******************************************************************\ + +Module: util + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_UTIL_UNION_FIND_REPLACE_H +#define CPROVER_UTIL_UNION_FIND_REPLACE_H + +#include + +/// Similar interface to union-find for expressions, with a function for +/// replacing sub-expressions by their result for find. +class union_find_replacet +{ +public: + bool replace_expr(exprt &expr) const; + + exprt find(exprt expr) const; + + exprt make_union(const exprt &a, const exprt &b); + + std::vector> to_vector() const; + +private: + replace_mapt map; +}; + +#endif // CPROVER_UTIL_UNION_FIND_REPLACE_H diff --git a/unit/Makefile b/unit/Makefile index d667c199036..6c6326bd4d2 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -18,6 +18,7 @@ SRC += unit_tests.cpp \ analyses/does_remove_const/is_type_at_least_as_const_as.cpp \ java_bytecode/java_bytecode_convert_class/convert_abstract_class.cpp \ java_bytecode/java_bytecode_parse_generics/parse_generic_class.cpp \ + java_bytecode/java_object_factory/gen_nondet_string_init.cpp \ miniBDD_new.cpp \ java_bytecode/java_string_library_preprocess/convert_exprt_to_string_exprt.cpp \ java_bytecode/java_utils_test.cpp \ @@ -27,6 +28,7 @@ SRC += unit_tests.cpp \ solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp \ solvers/refinement/string_refinement/concretize_array.cpp \ solvers/refinement/string_refinement/substitute_array_list.cpp \ + solvers/refinement/string_refinement/union_find_replace.cpp \ util/expr_cast/expr_cast.cpp \ util/expr_iterator.cpp \ util/message.cpp \ diff --git a/unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp b/unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp new file mode 100644 index 00000000000..1225263b824 --- /dev/null +++ b/unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp @@ -0,0 +1,95 @@ +/*******************************************************************\ + + Module: Java string library preprocess. + Test for converting an expression to a string expression. + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +SCENARIO( + "Generate string object", + "[core][java_bytecode][java_object_factor][gen_nondet_string_init]") +{ + GIVEN("an expression, a location, and a symbol table") + { + source_locationt loc; + symbol_tablet symbol_table; + register_language(new_java_bytecode_language); + + // Add java.lang.Object to symbol table + symbolt jlo_sym; + jlo_sym.name = "java::java.lang.Object"; + jlo_sym.type = struct_typet(); + jlo_sym.is_type = true; + java_root_class(jlo_sym); + bool failed = symbol_table.add(jlo_sym); + CHECK_RETURN(!failed); + + // Add java.lang.String to symbol table + java_string_library_preprocesst preprocess; + preprocess.add_string_type("java.lang.String", symbol_table); + namespacet ns(symbol_table); + + // Declare a String named arg + symbol_typet java_string_type("java::java.lang.String"); + symbol_exprt expr("arg", java_string_type); + + WHEN("Initialisation code for a string is generated") + { + const codet code = + initialize_nondet_string_struct(expr, 20, loc, symbol_table); + + THEN("Code is produced") + { + std::vector code_string; + + const std::regex spaces("\\s+"); + const std::regex numbers("\\$[0-9]*"); + for(auto op : code.operands()) + { + const std::string line = from_expr(ns, "", op); + code_string.push_back( + std::regex_replace( + std::regex_replace(line, spaces, " "), numbers, "")); + } + + const std::vector reference_code = { // NOLINT + "int tmp_object_factory;", + "tmp_object_factory = NONDET(int);", + "__CPROVER_assume(tmp_object_factory >= 0);", + "__CPROVER_assume(tmp_object_factory <= 20);", + "char nondet_infinite_array[INFINITY()];", + "nondet_infinite_array = NONDET(char [INFINITY()]);", + "int return_array;", + "return_array = cprover_associate_array_to_pointer_func" + "(nondet_infinite_array, nondet_infinite_array);", + "int return_array;", + "return_array = cprover_associate_length_to_array_func" + "(nondet_infinite_array, tmp_object_factory);", + "arg = { .@java.lang.Object={ .@class_identifier" + "=\"java.lang.String\", .@lock=false }," + " .length=tmp_object_factory, " + ".data=nondet_infinite_array };"}; + + for(std::size_t i = 0; + i < code_string.size() && i < reference_code.size(); + ++i) + REQUIRE(code_string[i] == reference_code[i]); + + REQUIRE(code_string.size() == reference_code.size()); + } + } + } +} diff --git a/unit/java_bytecode/java_string_library_preprocess/convert_exprt_to_string_exprt.cpp b/unit/java_bytecode/java_string_library_preprocess/convert_exprt_to_string_exprt.cpp index 4d2a86944d8..78c85045929 100644 --- a/unit/java_bytecode/java_string_library_preprocess/convert_exprt_to_string_exprt.cpp +++ b/unit/java_bytecode/java_string_library_preprocess/convert_exprt_to_string_exprt.cpp @@ -17,7 +17,7 @@ #include #include -exprt convert_exprt_to_string_exprt_unit_test( +refined_string_exprt convert_exprt_to_string_exprt_unit_test( java_string_library_preprocesst &preprocess, const exprt &deref, const source_locationt &loc, @@ -30,30 +30,57 @@ exprt convert_exprt_to_string_exprt_unit_test( TEST_CASE("Convert exprt to string exprt") { - source_locationt loc; - symbol_tablet symbol_table; - namespacet ns(symbol_table); - code_blockt code; - java_string_library_preprocesst preprocess; - preprocess.add_string_type("java.lang.String", symbol_table); - symbol_typet java_string_type("java::java.lang.String"); - symbol_exprt expr("a", pointer_type(java_string_type)); - convert_exprt_to_string_exprt_unit_test( - preprocess, expr, loc, symbol_table, code); - register_language(new_java_bytecode_language); - - std::vector code_string; - for(auto op : code.operands()) - code_string.push_back(from_expr(ns, "", op)); - - REQUIRE(code_string.size()==7); - REQUIRE(code_string[0]=="int cprover_string_length$1;"); - REQUIRE(code_string[1]=="char cprover_string_array$2[INFINITY()];"); - REQUIRE(code_string[2]=="cprover_string_length$1 = a->length;"); - REQUIRE(code_string[3]=="__CPROVER_assume(!(a->data == null));"); - REQUIRE(code_string[4]=="cprover_string_array$2 = *a->data;"); - REQUIRE(code_string[5]=="struct __CPROVER_refined_string_type { int length; " - "char content[INFINITY()]; } cprover_string$3;"); - REQUIRE(code_string[6]=="cprover_string$3 = { .=cprover_string_length$1, " - ".=cprover_string_array$2 };"); + GIVEN("A location, a string expression, and a symbol table") + { + source_locationt loc; + symbol_tablet symbol_table; + namespacet ns(symbol_table); + code_blockt code; + java_string_library_preprocesst preprocess; + preprocess.add_string_type("java.lang.String", symbol_table); + symbol_typet java_string_type("java::java.lang.String"); + symbol_exprt expr("a", pointer_type(java_string_type)); + + WHEN("String expression is converted to refined string expression") + { + refined_string_exprt string_expr = + convert_exprt_to_string_exprt_unit_test( + preprocess, expr, loc, symbol_table, code); + + THEN("The type of the returd expression is that of refined strings") + { + REQUIRE(string_expr.id() == ID_struct); + REQUIRE(is_refined_string_type(string_expr.type())); + } + + THEN("Code is produced") + { + register_language(new_java_bytecode_language); + + std::vector code_string; + const std::regex spaces("\\s+"); + const std::regex numbers("\\$[0-9]*"); + for(auto op : code.operands()) + { + const std::string line = from_expr(ns, "", op); + code_string.push_back( + std::regex_replace( + std::regex_replace(line, spaces, " "), numbers, "")); + } + + const std::vector reference_code = { // NOLINT + "char *cprover_string_content;", + "int cprover_string_length;", + "cprover_string_length = a->length;", + "cprover_string_content = a->data;"}; + + for(std::size_t i = 0; + i < code_string.size() && i < reference_code.size(); + ++i) + REQUIRE(code_string[i] == reference_code[i]); + + REQUIRE(code_string.size() == reference_code.size()); + } + } + } } diff --git a/unit/solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp b/unit/solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp index c90e0ebd27a..2b33ebaed4c 100644 --- a/unit/solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp +++ b/unit/solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp @@ -53,21 +53,37 @@ constant_exprt from_integer(const mp_integer &i) /// Creates a `string_exprt` of the proper string type. /// \param [in] str: string to convert /// \return corresponding `string_exprt` -string_exprt make_string_exprt(const std::string &str) +array_string_exprt make_string_exprt(const std::string &str) { const constant_exprt length=from_integer(str.length(), t.length_type()); - array_exprt content(t.array_type()); + array_exprt content(array_typet(t.char_type(), length)); for(const char c : str) content.copy_to_operands(from_integer(c, t.char_type())); - return string_exprt(length, content, t.string_type()); + return to_array_string_expr(content); +} + +/// Return a pointer to the data array of an array_string_exprt +/// \param arr: char array representing a string +/// \return pointer to the first character of the array +exprt get_data_pointer(const array_string_exprt &arr) +{ + return address_of_exprt(index_exprt(arr, from_integer(0, t.length_type()))); +} + +/// Creates a `string_exprt` of the proper string type. +/// \param [in] str: string to convert +/// \return corresponding `string_exprt` +refined_string_exprt make_refined_string_exprt(const array_string_exprt &arr) +{ + return refined_string_exprt(arr.length(), get_data_pointer(arr)); } /// For a constant `string_exprt`, creates a full index set. /// \param [in] s: `string_exprt` to create index set for /// \return the corresponding index set -std::set full_index_set(const string_exprt &s) +std::set full_index_set(const array_string_exprt &s) { PRECONDITION(s.length().is_constant()); mp_integer n; @@ -97,7 +113,6 @@ exprt combine_lemmas(const std::vector &lemmas, const namespacet &ns) { // Conjunction of new lemmas exprt conj=conjunction(lemmas); - // Simplify simplify(conj, ns); @@ -146,12 +161,20 @@ SCENARIO("instantiate_not_contains", symbol_tablet symtbl; const namespacet ns(symtbl); + // Creating strings + const auto ab_array = make_string_exprt("ab"); + const auto b_array = make_string_exprt("b"); + const auto a_array = make_string_exprt("a"); + const auto empty_array = make_string_exprt(""); + const auto cd_array = make_string_exprt("cd"); + const auto ab = make_refined_string_exprt(ab_array); + const auto b = make_refined_string_exprt(b_array); + const auto a = make_refined_string_exprt(a_array); + const auto empty = make_refined_string_exprt(empty_array); + const auto cd = make_refined_string_exprt(cd_array); + GIVEN("The not_contains axioms of String.lastIndexOf(String, Int)") { - // Creating strings - const string_exprt ab=make_string_exprt("ab"); - const string_exprt b=make_string_exprt("b"); - // Creating "ab".lastIndexOf("b", 0) function_application_exprt func( symbol_exprt(ID_cprover_string_last_index_of_func), t.length_type()); @@ -194,8 +217,8 @@ SCENARIO("instantiate_not_contains", WHEN("we instantiate and simplify") { // Making index sets - const std::set index_set_ab=full_index_set(ab); - const std::set index_set_b=full_index_set(b); + const std::set index_set_ab = full_index_set(ab_array); + const std::set index_set_b = full_index_set(b_array); // List of new lemmas to be returned std::vector lemmas; @@ -228,9 +251,6 @@ SCENARIO("instantiate_not_contains", GIVEN("A vacuously true not_contains axioms") { - // Creating strings - const string_exprt a=make_string_exprt("a"); - // Make // forall x in [0, 0). true => (exists y in [0, 1). // { .=1, .={ (char)'a' } }[x+y] != { .=1, .={ (char)'b' } }[y] @@ -242,8 +262,8 @@ SCENARIO("instantiate_not_contains", true_exprt(), from_integer(0), from_integer(1), - a, - a); + a_array, + a_array); // Create witness for axiom symbol_tablet symtab; @@ -259,7 +279,7 @@ SCENARIO("instantiate_not_contains", WHEN("we instantiate and simplify") { // Making index sets - const std::set index_set_a=full_index_set(a); + const std::set index_set_a = full_index_set(a_array); // Instantiate the lemmas std::vector lemmas=instantiate_not_contains( @@ -285,10 +305,6 @@ SCENARIO("instantiate_not_contains", GIVEN("A trivially false (via empty existential) not_contains axioms") { - // Creating strings - const string_exprt a=make_string_exprt("a"); - const string_exprt b=make_string_exprt("b"); - // Make // forall x in [0, 1). true => (exists y in [0, 0). // { .=1, .={ (char)'a' } }[x+y] != { .=1, .={ (char)'b' } }[y] @@ -300,8 +316,8 @@ SCENARIO("instantiate_not_contains", true_exprt(), from_integer(0), from_integer(0), - a, - b); + a_array, + b_array); // Create witness for axiom symbol_tablet symtab; @@ -317,8 +333,8 @@ SCENARIO("instantiate_not_contains", WHEN("we instantiate and simplify") { // Making index sets - const std::set index_set_a=full_index_set(a); - const std::set index_set_b=full_index_set(b); + const std::set index_set_a = full_index_set(a_array); + const std::set index_set_b = full_index_set(b_array); // Instantiate the lemmas std::vector lemmas=instantiate_not_contains( @@ -344,10 +360,6 @@ SCENARIO("instantiate_not_contains", GIVEN("A not_contains axioms with an non-empty and empty string") { - // Creating strings - const string_exprt a=make_string_exprt("a"); - const string_exprt empty=make_string_exprt(""); - // Make // forall x in [0, 1). true => (exists y in [0, 0). // { .=1, .={ (char)'a' } }[x+y] != { .=0, .={ } }[y] @@ -359,8 +371,8 @@ SCENARIO("instantiate_not_contains", true_exprt(), from_integer(0), from_integer(0), - a, - empty); + a_array, + empty_array); // Create witness for axiom symbol_tablet symtab; @@ -376,7 +388,7 @@ SCENARIO("instantiate_not_contains", WHEN("we instantiate and simplify") { // Making index sets - const std::set index_set_a=full_index_set(a); + const std::set index_set_a = full_index_set(a_array); const std::set index_set_empty= {generator.fresh_exist_index("z", t.length_type())}; @@ -404,9 +416,6 @@ SCENARIO("instantiate_not_contains", GIVEN("A not_contains on the same string twice (hence is false)") { - // Creating strings - const string_exprt ab=make_string_exprt("ab"); - // Make // forall x in [0, 2). true => (exists y in [0, 2). // { .=2, .={ (char)'a', (char)'b'} }[x+y] != @@ -419,8 +428,8 @@ SCENARIO("instantiate_not_contains", true_exprt(), from_integer(0), from_integer(2), - ab, - ab); + ab_array, + ab_array); // Create witness for axiom symbol_tablet symtab; @@ -437,7 +446,7 @@ SCENARIO("instantiate_not_contains", WHEN("we instantiate and simplify") { // Making index sets - const std::set index_set_ab=full_index_set(ab); + const std::set index_set_ab = full_index_set(ab_array); // Instantiate the lemmas std::vector lemmas=instantiate_not_contains( @@ -463,10 +472,6 @@ SCENARIO("instantiate_not_contains", GIVEN("A not_contains on two string with no chars in common (hence is true)") { - // Creating strings - const string_exprt ab=make_string_exprt("ab"); - const string_exprt cd=make_string_exprt("cd"); - // Make // forall x in [0, 2). true => (exists y in [0, 2). // { .=2, .={ (char)'a', (char)'b'} }[x+y] != @@ -479,8 +484,8 @@ SCENARIO("instantiate_not_contains", true_exprt(), from_integer(0), from_integer(2), - ab, - cd); + ab_array, + cd_array); // Create witness for axiom symbol_tablet symtab; @@ -496,8 +501,8 @@ SCENARIO("instantiate_not_contains", WHEN("we instantiate and simplify") { // Making index sets - const std::set index_set_ab=full_index_set(ab); - const std::set index_set_cd=full_index_set(cd); + const std::set index_set_ab = full_index_set(ab_array); + const std::set index_set_cd = full_index_set(cd_array); // Instantiate the lemmas std::vector lemmas=instantiate_not_contains( diff --git a/unit/solvers/refinement/string_refinement/concretize_array.cpp b/unit/solvers/refinement/string_refinement/concretize_array.cpp index 8a7ae042c3f..834a97067e7 100644 --- a/unit/solvers/refinement/string_refinement/concretize_array.cpp +++ b/unit/solvers/refinement/string_refinement/concretize_array.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include SCENARIO("concretize_array_expression", @@ -46,7 +47,9 @@ SCENARIO("concretize_array_expression", index2)); // String max length is 50, so index 100 should get ignored. - const exprt concrete=concretize_arrays_in_expression(input_expr, 50); + symbol_tablet symbol_table; + namespacet ns(symbol_table); + const exprt concrete = concretize_arrays_in_expression(input_expr, 50, ns); // The expected result is `'0' + { 'x', 'x', 'y', 'y', 'y' }` array_exprt array(array_type); diff --git a/unit/solvers/refinement/string_refinement/union_find_replace.cpp b/unit/solvers/refinement/string_refinement/union_find_replace.cpp new file mode 100644 index 00000000000..92d700bb6ae --- /dev/null +++ b/unit/solvers/refinement/string_refinement/union_find_replace.cpp @@ -0,0 +1,76 @@ +/*******************************************************************\ + + Module: Unit tests for union_find_replacet in + solvers/refinement/string_refinement.cpp + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include + +#include +#include +#include +#include +#include + +SCENARIO("union_find_replace", "[core][solvers][refinement][string_refinement]") +{ + GIVEN("An empty dictionary") + { + union_find_replacet dict; + pointer_typet char_pointer_type = pointer_type(unsignedbv_typet(16)); + const symbol_exprt a("a", char_pointer_type); + const symbol_exprt b("b", char_pointer_type); + const symbol_exprt c("c", char_pointer_type); + const symbol_exprt d("d", char_pointer_type); + const symbol_exprt e("e", char_pointer_type); + const symbol_exprt f("f", char_pointer_type); + + WHEN("Relations a=b, a=c, d=b, e=f are added") + { + dict.make_union(a, b); + dict.make_union(a, c); + dict.make_union(d, b); + dict.make_union(e, f); + THEN("find(d)=find(c), but find(e)!=find(a)") + { + REQUIRE(dict.find(d) == dict.find(c)); // transitive equality + REQUIRE(dict.find(a) == dict.find(a)); // trivial equality + REQUIRE(dict.find(b) == dict.find(d)); // rhs only symbol + REQUIRE(dict.find(b) == dict.find(c)); // rhs only transitive + REQUIRE(dict.find(e) != dict.find(a)); // transitive not equal + REQUIRE(dict.find(f) != dict.find(a)); // transitive not equal + } + + GIVEN("Expressions a+e, a+d, c+f, c+d") + { + plus_exprt a_plus_e(a, e); + plus_exprt a_plus_d(a, d); + plus_exprt c_plus_f(c, f); + plus_exprt c_plus_d(c, d); + WHEN("We use the dictionary for replacement") + { + dict.replace_expr(a_plus_e); + dict.replace_expr(a_plus_d); + dict.replace_expr(c_plus_f); + dict.replace_expr(c_plus_d); + THEN("a+e=c+f but a+e!=c+d") + { + REQUIRE(a_plus_e == c_plus_f); + REQUIRE(a_plus_e != c_plus_d); + REQUIRE(a_plus_d == c_plus_d); + } + } + } + + THEN("Introducing cycles does not cause infinite loops or exceptions") + { + dict.make_union(c, d); + REQUIRE(dict.find(d) == dict.find(c)); + REQUIRE(dict.find(e) != dict.find(a)); + } + } + } +}