diff --git a/regression/strings/cprover-string-hack.h b/regression/strings/cprover-string-hack.h index 9160e935529..86da5dd638d 100644 --- a/regression/strings/cprover-string-hack.h +++ b/regression/strings/cprover-string-hack.h @@ -56,7 +56,7 @@ typedef unsigned char __CPROVER_char; ******************************************************************************/ extern __CPROVER_char __CPROVER_uninterpreted_string_char_at_func(__CPROVER_string str, int pos); extern __CPROVER_bool __CPROVER_uninterpreted_string_equal_func(__CPROVER_string str1, __CPROVER_string str2); -extern __CPROVER_string __CPROVER_uninterpreted_string_literal_func(); +extern __CPROVER_string __CPROVER_uninterpreted_string_literal_func(char * str); extern __CPROVER_char __CPROVER_uninterpreted_char_literal_func(); extern __CPROVER_string __CPROVER_uninterpreted_string_concat_func(__CPROVER_string str1, __CPROVER_string str2); extern int __CPROVER_uninterpreted_string_length_func(__CPROVER_string str); @@ -68,5 +68,6 @@ extern int __CPROVER_uninterpreted_string_index_of_func(__CPROVER_string str, __ extern int __CPROVER_uninterpreted_string_last_index_of_func(__CPROVER_string str, __CPROVER_char c); extern __CPROVER_string __CPROVER_uninterpreted_string_char_set_func(__CPROVER_string str, int pos, __CPROVER_char c); extern __CPROVER_string __CPROVER_uninterpreted_string_copy_func(__CPROVER_string str); -extern unsigned __CPROVER_uninterpreted_string_parse_int_func(__CPROVER_string str); -extern __CPROVER_string __CPROVER_uninterpreted_string_of_int_func(unsigned i); +extern int __CPROVER_uninterpreted_string_parse_int_func(__CPROVER_string str); +extern __CPROVER_string __CPROVER_uninterpreted_string_of_int_func(int i); + diff --git a/regression/strings/java_case/test.desc b/regression/strings/java_case/test.desc index 49ea16d56ff..782848fc4bc 100644 --- a/regression/strings/java_case/test.desc +++ b/regression/strings/java_case/test.desc @@ -1,6 +1,6 @@ CORE test_case.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_case.java line 11: SUCCESS$ diff --git a/regression/strings/java_char_array/test.desc b/regression/strings/java_char_array/test.desc new file mode 100644 index 00000000000..68467c16a5d --- /dev/null +++ b/regression/strings/java_char_array/test.desc @@ -0,0 +1,9 @@ +CORE +test_char_array.class +--string-refine +^EXIT=10$ +^SIGNAL=0$ +^\[assertion.1\] assertion at file test_char_array.java line 11: SUCCESS$ +^\[assertion.2\] assertion at file test_char_array.java line 12: SUCCESS$ +^\[assertion.3\] assertion at file test_char_array.java line 13: FAILURE$ +-- \ No newline at end of file diff --git a/regression/strings/java_char_array/test_char_array.class b/regression/strings/java_char_array/test_char_array.class new file mode 100644 index 00000000000..836942da134 Binary files /dev/null and b/regression/strings/java_char_array/test_char_array.class differ diff --git a/regression/strings/java_char_array/test_char_array.java b/regression/strings/java_char_array/test_char_array.java new file mode 100644 index 00000000000..428dc9210cf --- /dev/null +++ b/regression/strings/java_char_array/test_char_array.java @@ -0,0 +1,15 @@ +public class test_char_array { + + public static void main(String[] argv) + { + String s = "abc"; + char [] str = s.toCharArray(); + int[] test = new int[312]; + char c = str[2]; + String t = argv[0]; + char d = t.charAt(0); + assert(str.length == 3); + assert(c == 'c'); + assert(c == d || d < 'a' || d > 'z' ); + } +} diff --git a/regression/strings/java_char_array_init/test.desc b/regression/strings/java_char_array_init/test.desc new file mode 100644 index 00000000000..78583ae254a --- /dev/null +++ b/regression/strings/java_char_array_init/test.desc @@ -0,0 +1,11 @@ +CORE +test_init.class +--string-refine +^EXIT=10$ +^SIGNAL=0$ +^\[assertion.1\] assertion at file test_init.java line 16: SUCCESS$ +^\[assertion.2\] assertion at file test_init.java line 17: SUCCESS$ +^\[assertion.3\] assertion at file test_init.java line 18: SUCCESS$ +^\[assertion.4\] assertion at file test_init.java line 20: SUCCESS$ +^\[assertion.5\] assertion at file test_init.java line 21: FAILURE$ +-- \ No newline at end of file diff --git a/regression/strings/java_char_array_init/test_init.class b/regression/strings/java_char_array_init/test_init.class new file mode 100644 index 00000000000..be3baee56bd Binary files /dev/null and b/regression/strings/java_char_array_init/test_init.class differ diff --git a/regression/strings/java_char_array_init/test_init.java b/regression/strings/java_char_array_init/test_init.java new file mode 100644 index 00000000000..5f4e220844c --- /dev/null +++ b/regression/strings/java_char_array_init/test_init.java @@ -0,0 +1,23 @@ +public class test_init { + + public static void main(String[] argv) + { + char [] str = new char[10]; + str[0] = 'H'; + str[1] = 'e'; + str[2] = 'l'; + str[3] = 'l'; + str[4] = 'o'; + String s = new String(str); + String t = new String(str,1,2); + + System.out.println(s); + System.out.println(s.length()); + assert(s.startsWith("Hello")); + assert(s.length() == 10); + assert(t.equals("el")); + String u = String.valueOf(str,3,2); + assert(u.equals("lo")); + assert(s.equals("Hello") || !t.equals("el") || !u.equals("lo")); + } +} diff --git a/regression/strings/java_char_at/test.desc b/regression/strings/java_char_at/test.desc index babcc395bcf..95f7f047240 100644 --- a/regression/strings/java_char_at/test.desc +++ b/regression/strings/java_char_at/test.desc @@ -1,6 +1,6 @@ CORE test_char_at.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_char_at.java line 11: SUCCESS$ diff --git a/regression/strings/java_code_point/test.desc b/regression/strings/java_code_point/test.desc index 1543a327fa4..75c871d3af3 100644 --- a/regression/strings/java_code_point/test.desc +++ b/regression/strings/java_code_point/test.desc @@ -1,6 +1,6 @@ CORE test_code_point.class ---pass +--string-refine ^EXIT=0$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_code_point.java line 5: SUCCESS$ diff --git a/regression/strings/java_compare/test.desc b/regression/strings/java_compare/test.desc index c500900a21e..98373d1cec6 100644 --- a/regression/strings/java_compare/test.desc +++ b/regression/strings/java_compare/test.desc @@ -1,6 +1,6 @@ CORE test_compare.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_compare.java line 6: SUCCESS$ diff --git a/regression/strings/java_concat/test.desc b/regression/strings/java_concat/test.desc index c6c0b193e5b..8dc65a8539d 100644 --- a/regression/strings/java_concat/test.desc +++ b/regression/strings/java_concat/test.desc @@ -1,6 +1,6 @@ CORE test_concat.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_concat.java line 9: SUCCESS$ diff --git a/regression/strings/java_contains/test.desc b/regression/strings/java_contains/test.desc index ade6b433bf1..4afb02b28ea 100644 --- a/regression/strings/java_contains/test.desc +++ b/regression/strings/java_contains/test.desc @@ -1,6 +1,6 @@ KNOWNBUG test_contains.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_contains.java line 7: SUCCESS$ diff --git a/regression/strings/java_delete/test.desc b/regression/strings/java_delete/test.desc index 377ada44770..4a61fbd4fdc 100644 --- a/regression/strings/java_delete/test.desc +++ b/regression/strings/java_delete/test.desc @@ -1,6 +1,6 @@ KNOWNBUG test_delete.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_delete.java line 11: SUCCESS$ diff --git a/regression/strings/java_easychair/test.desc b/regression/strings/java_easychair/test.desc index bd68fd1f60a..e47cb506ce7 100644 --- a/regression/strings/java_easychair/test.desc +++ b/regression/strings/java_easychair/test.desc @@ -1,6 +1,6 @@ KNOWNBUG easychair.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file easychair.java line 29: FAILURE$ diff --git a/regression/strings/java_empty/test.desc b/regression/strings/java_empty/test.desc index 56b21e2041c..44debdc3753 100644 --- a/regression/strings/java_empty/test.desc +++ b/regression/strings/java_empty/test.desc @@ -1,6 +1,6 @@ CORE test_empty.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_empty.java line 4: SUCCESS$ diff --git a/regression/strings/java_equal/test.desc b/regression/strings/java_equal/test.desc index 6375cfdc3d8..62f259bd750 100644 --- a/regression/strings/java_equal/test.desc +++ b/regression/strings/java_equal/test.desc @@ -1,6 +1,6 @@ CORE test_equal.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_equal.java line 7: FAILURE$ diff --git a/regression/strings/java_float/test.desc b/regression/strings/java_float/test.desc index 5edfd6eea22..dc77baa6a81 100644 --- a/regression/strings/java_float/test.desc +++ b/regression/strings/java_float/test.desc @@ -1,6 +1,6 @@ CORE test_float.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_float.java line 14: SUCCESS$ diff --git a/regression/strings/java_index_of/test.desc b/regression/strings/java_index_of/test.desc index dd5c60464d5..4d537ae191d 100644 --- a/regression/strings/java_index_of/test.desc +++ b/regression/strings/java_index_of/test.desc @@ -1,6 +1,6 @@ CORE test_index_of.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_index_of.java line 13: SUCCESS$ diff --git a/regression/strings/java_int/test.desc b/regression/strings/java_int/test.desc index 5211656c61b..afece95babd 100644 --- a/regression/strings/java_int/test.desc +++ b/regression/strings/java_int/test.desc @@ -1,6 +1,6 @@ CORE test_int.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_int.java line 8: SUCCESS$ diff --git a/regression/strings/java_prefix/test.desc b/regression/strings/java_prefix/test.desc index b234bba1788..a4f044bdcb0 100644 --- a/regression/strings/java_prefix/test.desc +++ b/regression/strings/java_prefix/test.desc @@ -1,6 +1,6 @@ CORE test_prefix.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_prefix.java line 14: SUCCESS$ diff --git a/regression/strings/java_replace/test.desc b/regression/strings/java_replace/test.desc index a5b15efd737..c3c0e2db362 100644 --- a/regression/strings/java_replace/test.desc +++ b/regression/strings/java_replace/test.desc @@ -1,6 +1,6 @@ CORE test_replace.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_replace.java line 6: SUCCESS$ diff --git a/regression/strings/java_set_length/test.desc b/regression/strings/java_set_length/test.desc index 66cf52835ac..6879ae6f470 100644 --- a/regression/strings/java_set_length/test.desc +++ b/regression/strings/java_set_length/test.desc @@ -1,6 +1,6 @@ CORE test_set_length.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_set_length.java line 8: SUCCESS$ diff --git a/regression/strings/java_string_builder/test.desc b/regression/strings/java_string_builder/test.desc index c0b3b6a51ce..de4bdc4c52a 100644 --- a/regression/strings/java_string_builder/test.desc +++ b/regression/strings/java_string_builder/test.desc @@ -1,6 +1,6 @@ CORE test_string_builder.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_string_builder.java line 11: SUCCESS$ diff --git a/regression/strings/java_string_builder_insert/test.desc b/regression/strings/java_string_builder_insert/test.desc new file mode 100644 index 00000000000..54065573b19 --- /dev/null +++ b/regression/strings/java_string_builder_insert/test.desc @@ -0,0 +1,8 @@ +CORE +test_insert.class +--string-refine +^EXIT=10$ +^SIGNAL=0$ +^\[assertion.1\] assertion at file test_insert.java line 17: SUCCESS$ +^\[assertion.2\] assertion at file test_insert.java line 18: FAILURE$ +-- diff --git a/regression/strings/java_string_builder_insert/test_insert.class b/regression/strings/java_string_builder_insert/test_insert.class new file mode 100644 index 00000000000..69a32d7f93f Binary files /dev/null and b/regression/strings/java_string_builder_insert/test_insert.class differ diff --git a/regression/strings/java_string_builder_insert/test_insert.java b/regression/strings/java_string_builder_insert/test_insert.java new file mode 100644 index 00000000000..1fac897c5ed --- /dev/null +++ b/regression/strings/java_string_builder_insert/test_insert.java @@ -0,0 +1,20 @@ +public class test_insert { + + public static void main(String[] argv) + { + char [] str = new char[5]; + str[0] = 'H'; + str[1] = 'e'; + str[2] = 'l'; + str[3] = 'l'; + str[4] = 'o'; + + + StringBuilder sb = new StringBuilder(" world"); + sb.insert(0,str); + String s = sb.toString(); + System.out.println(s); + assert(s.equals("Hello world")); + assert(!s.equals("Hello world")); + } +} diff --git a/regression/strings/java_string_builder_length/test.desc b/regression/strings/java_string_builder_length/test.desc index a15660ee85b..a9721e2b398 100644 --- a/regression/strings/java_string_builder_length/test.desc +++ b/regression/strings/java_string_builder_length/test.desc @@ -1,6 +1,6 @@ CORE test_sb_length.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ \[assertion.1\] assertion at file test_sb_length.java line 6: SUCCESS$ diff --git a/regression/strings/java_strlen/test.desc b/regression/strings/java_strlen/test.desc index 78007186493..ae3f7421ab5 100644 --- a/regression/strings/java_strlen/test.desc +++ b/regression/strings/java_strlen/test.desc @@ -1,6 +1,6 @@ CORE test_length.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_length.java line 10: SUCCESS$ diff --git a/regression/strings/java_substring/test.desc b/regression/strings/java_substring/test.desc index 78a9bcca9cb..b3fa34e1474 100644 --- a/regression/strings/java_substring/test.desc +++ b/regression/strings/java_substring/test.desc @@ -1,6 +1,6 @@ CORE test_substring.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_substring.java line 12: SUCCESS$ diff --git a/regression/strings/java_suffix/test.desc b/regression/strings/java_suffix/test.desc index f9472f03b47..6145a8258e1 100644 --- a/regression/strings/java_suffix/test.desc +++ b/regression/strings/java_suffix/test.desc @@ -1,6 +1,6 @@ CORE test_suffix.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_suffix.java line 12: SUCCESS$ diff --git a/regression/strings/java_trim/test.desc b/regression/strings/java_trim/test.desc index fa0e10a1ca7..5f540c5f2c0 100644 --- a/regression/strings/java_trim/test.desc +++ b/regression/strings/java_trim/test.desc @@ -1,6 +1,6 @@ CORE test_trim.class ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[assertion.1\] assertion at file test_trim.java line 5: SUCCESS$ diff --git a/regression/strings/test1/test.c b/regression/strings/test1/test.c index d3830e38a3f..c5e10eb1fc7 100644 --- a/regression/strings/test1/test.c +++ b/regression/strings/test1/test.c @@ -5,13 +5,13 @@ int main() { __CPROVER_string s; - __CPROVER_char c1, c2; + char c1, c2; int i; int j; i = 2; s = __CPROVER_string_literal("pippo"); c1 = __CPROVER_char_at(s, i); - c2 = __CPROVER_char_literal("p"); + c2 = 'p'; assert (c1 == c2); assert (c1 != c2); return 0; diff --git a/regression/strings/test1/test.desc b/regression/strings/test1/test.desc index f622390356d..9522b31c347 100644 --- a/regression/strings/test1/test.desc +++ b/regression/strings/test1/test.desc @@ -1,6 +1,6 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[main.assertion.1\] assertion c1 == c2: SUCCESS$ diff --git a/regression/strings/test2/test.desc b/regression/strings/test2/test.desc index d3054f813f1..e6d3d1d5130 100644 --- a/regression/strings/test2/test.desc +++ b/regression/strings/test2/test.desc @@ -1,6 +1,6 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[main.assertion.1\] assertion n == 5: SUCCESS$ diff --git a/regression/strings/test3.1/test.desc b/regression/strings/test3.1/test.desc index 0f5bd6ccca7..d0214dfb10b 100644 --- a/regression/strings/test3.1/test.desc +++ b/regression/strings/test3.1/test.desc @@ -1,6 +1,6 @@ CORE test.c ---pass +--string-refine ^EXIT=0$ ^SIGNAL=0$ ^VERIFICATION SUCCESSFUL$ diff --git a/regression/strings/test3.2/test.desc b/regression/strings/test3.2/test.desc index 0f5bd6ccca7..d0214dfb10b 100644 --- a/regression/strings/test3.2/test.desc +++ b/regression/strings/test3.2/test.desc @@ -1,6 +1,6 @@ CORE test.c ---pass +--string-refine ^EXIT=0$ ^SIGNAL=0$ ^VERIFICATION SUCCESSFUL$ diff --git a/regression/strings/test3.3/test.c b/regression/strings/test3.3/test.c index 35e25d82ee5..bef96c6cd4e 100644 --- a/regression/strings/test3.3/test.c +++ b/regression/strings/test3.3/test.c @@ -16,7 +16,7 @@ int main() // proving the assertions individually seems to be much faster //assert(__CPROVER_string_length(s) == i + 5); //assert(__CPROVER_string_issuffix(__CPROVER_string_literal("po"), s)); - assert(__CPROVER_char_at(s, i) == __CPROVER_char_literal("p")); + assert(__CPROVER_char_at(s, i) == 'p'); return 0; } diff --git a/regression/strings/test3.3/test.desc b/regression/strings/test3.3/test.desc index 0f5bd6ccca7..d0214dfb10b 100644 --- a/regression/strings/test3.3/test.desc +++ b/regression/strings/test3.3/test.desc @@ -1,6 +1,6 @@ CORE test.c ---pass +--string-refine ^EXIT=0$ ^SIGNAL=0$ ^VERIFICATION SUCCESSFUL$ diff --git a/regression/strings/test3.4/test.desc b/regression/strings/test3.4/test.desc index dbf3c40cfdb..ae761559453 100644 --- a/regression/strings/test3.4/test.desc +++ b/regression/strings/test3.4/test.desc @@ -1,6 +1,6 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^VERIFICATION FAILED$ diff --git a/regression/strings/test3/test.c b/regression/strings/test3/test.c index 2fa4b22e017..cbfc398099e 100644 --- a/regression/strings/test3/test.c +++ b/regression/strings/test3/test.c @@ -14,7 +14,7 @@ int main() assert(__CPROVER_string_length(s) == i + 5); assert(__CPROVER_string_issuffix(__CPROVER_string_literal("po"),s)); - assert(__CPROVER_char_at(s, i) == __CPROVER_char_literal("p")); + assert(__CPROVER_char_at(s, i) == 'p'); assert(__CPROVER_string_issuffix(__CPROVER_string_literal("p!o"), s)); return 0; diff --git a/regression/strings/test3/test.desc b/regression/strings/test3/test.desc index 6cacec86a19..0df79c99ea1 100644 --- a/regression/strings/test3/test.desc +++ b/regression/strings/test3/test.desc @@ -1,10 +1,10 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[main.assertion.1\] assertion __CPROVER_uninterpreted_string_length_func(s) == i + 5: SUCCESS$ -^\[main.assertion.2\] assertion __CPROVER_uninterpreted_string_is_suffix_func(__CPROVER_uninterpreted_string_literal_func(\"po\"), s): SUCCESS$ -^\[main.assertion.3\] assertion __CPROVER_uninterpreted_string_char_at_func(s, i) == __CPROVER_uninterpreted_char_literal_func(\"p\"): SUCCESS$ -^\[main.assertion.4\] assertion __CPROVER_uninterpreted_string_is_suffix_func(__CPROVER_uninterpreted_string_literal_func(\"p!o\"), s): FAILURE$ +^\[main.assertion.1\] assertion __CPROVER_string_length(s) == i + 5: SUCCESS$ +^\[main.assertion.2\] assertion __CPROVER_string_issuffix(__CPROVER_string_literal(\"po\"),s): SUCCESS$ +^\[main.assertion.3\] assertion __CPROVER_char_at(s, i) == .p.: SUCCESS$ +^\[main.assertion.4\] assertion __CPROVER_string_issuffix(__CPROVER_string_literal(\"p!o\"), s): FAILURE$ -- diff --git a/regression/strings/test4/test.c b/regression/strings/test4/test.c index d73324f8ef4..3c768717823 100644 --- a/regression/strings/test4/test.c +++ b/regression/strings/test4/test.c @@ -9,7 +9,7 @@ int main() int j; i = 2; s = __CPROVER_string_literal("pippo"); - if (__CPROVER_char_at(s, i) == __CPROVER_char_literal("p")) { + if (__CPROVER_char_at(s, i) == 'p') { j = 1; } assert(j == 1); diff --git a/regression/strings/test4/test.desc b/regression/strings/test4/test.desc index 0f5bd6ccca7..d0214dfb10b 100644 --- a/regression/strings/test4/test.desc +++ b/regression/strings/test4/test.desc @@ -1,6 +1,6 @@ CORE test.c ---pass +--string-refine ^EXIT=0$ ^SIGNAL=0$ ^VERIFICATION SUCCESSFUL$ diff --git a/regression/strings/test5/test.desc b/regression/strings/test5/test.desc index dbf3c40cfdb..ae761559453 100644 --- a/regression/strings/test5/test.desc +++ b/regression/strings/test5/test.desc @@ -1,6 +1,6 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^VERIFICATION FAILED$ diff --git a/regression/strings/test_char_set/test.desc b/regression/strings/test_char_set/test.desc index 8cf42dda8f3..12ab1ce28bf 100644 --- a/regression/strings/test_char_set/test.desc +++ b/regression/strings/test_char_set/test.desc @@ -1,8 +1,8 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[main.assertion.1\] assertion __CPROVER_uninterpreted_string_equal_func(t, __CPROVER_uninterpreted_string_literal_func("apc")): SUCCESS$ -^\[main.assertion.2\] assertion __CPROVER_uninterpreted_string_equal_func(t, __CPROVER_uninterpreted_string_literal_func("abc")): FAILURE$ +^\[main.assertion.1\] assertion __CPROVER_string_equal(t, __CPROVER_string_literal("apc")): SUCCESS$ +^\[main.assertion.2\] assertion __CPROVER_string_equal(t, __CPROVER_string_literal("abc")): FAILURE$ -- diff --git a/regression/strings/test_concat/test.c b/regression/strings/test_concat/test.c index 007b9ca1b5c..923bffb46ae 100644 --- a/regression/strings/test_concat/test.c +++ b/regression/strings/test_concat/test.c @@ -10,7 +10,7 @@ int main() u = __CPROVER_string_concat(s, t); __CPROVER_char c = __CPROVER_char_at(u,i); - assert(c == __CPROVER_char_literal("p")); - assert(__CPROVER_char_at(u,2) == __CPROVER_char_literal("p")); + assert(c == 'p'); + assert(__CPROVER_char_at(u,2) == 'p'); return 0; } diff --git a/regression/strings/test_concat/test.desc b/regression/strings/test_concat/test.desc index e5d8b30d6da..a7c68f62e25 100644 --- a/regression/strings/test_concat/test.desc +++ b/regression/strings/test_concat/test.desc @@ -1,8 +1,8 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[main.assertion.1\] assertion c == __CPROVER_uninterpreted_char_literal_func(\"p\"): SUCCESS$ -^\[main.assertion.2\] assertion __CPROVER_uninterpreted_string_char_at_func(u, 2) == __CPROVER_uninterpreted_char_literal_func(\"p\"): FAILURE$ +^\[main.assertion.1\] assertion c == .p.: SUCCESS$ +^\[main.assertion.2\] assertion __CPROVER_char_at(u,2) == .p.: FAILURE$ -- diff --git a/regression/strings/test_contains/test.desc b/regression/strings/test_contains/test.desc index 8275425c548..cd8d4102e17 100644 --- a/regression/strings/test_contains/test.desc +++ b/regression/strings/test_contains/test.desc @@ -1,6 +1,6 @@ KNOWNBUG test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[main.assertion.1\] assertion !__CPROVER_uninterpreted_string_contains_func(t, __CPROVER_uninterpreted_string_literal_func(\"3\")): SUCCESS$ diff --git a/regression/strings/test_equal/test.desc b/regression/strings/test_equal/test.desc index 81ad6913856..86d583a12b0 100644 --- a/regression/strings/test_equal/test.desc +++ b/regression/strings/test_equal/test.desc @@ -1,8 +1,8 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[main.assertion.1\] assertion __CPROVER_uninterpreted_string_equal_func(s, __CPROVER_uninterpreted_string_literal_func(\"pippo\")): SUCCESS$ -^\[main.assertion.2\] assertion __CPROVER_uninterpreted_string_equal_func(s, __CPROVER_uninterpreted_string_literal_func(\"mippo\")): FAILURE$ +^\[main.assertion.1\] assertion __CPROVER_string_equal(s, __CPROVER_string_literal("pippo")): SUCCESS$ +^\[main.assertion.2\] assertion __CPROVER_string_equal(s, __CPROVER_string_literal("mippo")): FAILURE$ -- diff --git a/regression/strings/test_index_of/test.c b/regression/strings/test_index_of/test.c index d64d3c2d66e..c182e7952ab 100644 --- a/regression/strings/test_index_of/test.c +++ b/regression/strings/test_index_of/test.c @@ -7,7 +7,7 @@ int main(){ __CPROVER_string str; int firstSlash = __CPROVER_string_index_of(str,'/'); //__CPROVER_char_literal("/")); - int lastSlash = __CPROVER_string_last_index_of(str,__CPROVER_char_literal("/")); + int lastSlash = __CPROVER_string_last_index_of(str,'/'); __CPROVER_assume(__CPROVER_string_equal(str, __CPROVER_string_literal("abc/abc/abc"))); diff --git a/regression/strings/test_index_of/test.desc b/regression/strings/test_index_of/test.desc index 6d9ddbc6281..7a8759cf7d1 100644 --- a/regression/strings/test_index_of/test.desc +++ b/regression/strings/test_index_of/test.desc @@ -1,6 +1,6 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[main.assertion.1\] assertion firstSlash == 3: SUCCESS$ diff --git a/regression/strings/test_int/test.c b/regression/strings/test_int/test.c index 3f8f8651783..28aa9c7e156 100644 --- a/regression/strings/test_int/test.c +++ b/regression/strings/test_int/test.c @@ -5,12 +5,12 @@ int main() { __CPROVER_string s; - unsigned i = 10; + int i = 10; s = __CPROVER_string_of_int(123); assert(__CPROVER_char_at(s,0) == '1'); assert(__CPROVER_char_at(s,1) == '2'); - unsigned j = __CPROVER_parse_int(__CPROVER_string_literal("234")); + int j = __CPROVER_parse_int(__CPROVER_string_literal("234")); assert(j == 234); assert(j < 233 || __CPROVER_char_at(s,2) == '4'); diff --git a/regression/strings/test_int/test.desc b/regression/strings/test_int/test.desc index e46e43ed936..bdcc1519aa1 100644 --- a/regression/strings/test_int/test.desc +++ b/regression/strings/test_int/test.desc @@ -1,10 +1,10 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[main.assertion.1\] assertion __CPROVER_uninterpreted_string_char_at_func(s, 0) == .1.: SUCCESS$ -^\[main.assertion.2\] assertion __CPROVER_uninterpreted_string_char_at_func(s, 1) == .2.: SUCCESS$ +^\[main.assertion.1\] assertion __CPROVER_char_at(s,0) == .1.: SUCCESS$ +^\[main.assertion.2\] assertion __CPROVER_char_at(s,1) == .2.: SUCCESS$ ^\[main.assertion.3\] assertion j == 234: SUCCESS$ -^\[main.assertion.4\] assertion j < 233 || __CPROVER_uninterpreted_string_char_at_func(s, 2) == .4.: FAILURE$ +^\[main.assertion.4\] assertion j < 233 || __CPROVER_char_at(s,2) == .4.: FAILURE$ -- diff --git a/regression/strings/test_pass1/test.desc b/regression/strings/test_pass1/test.desc index 7548b6e91ef..1779aeec664 100644 --- a/regression/strings/test_pass1/test.desc +++ b/regression/strings/test_pass1/test.desc @@ -1,6 +1,6 @@ KNOWNBUG test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[main.assertion.1\] assertion __CPROVER_uninterpreted_string_equal_func(t, __CPROVER_uninterpreted_string_literal_func(\"a\")): SUCCESS diff --git a/regression/strings/test_pass_pc3/test.desc b/regression/strings/test_pass_pc3/test.desc index 88f4659c45d..e5acffb2923 100644 --- a/regression/strings/test_pass_pc3/test.desc +++ b/regression/strings/test_pass_pc3/test.desc @@ -1,8 +1,8 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[main.assertion.1\] assertion __CPROVER_uninterpreted_string_length_func(s3) == 0: FAILURE$ -^\[main.assertion.2\] assertion __CPROVER_uninterpreted_string_length_func(s3) < 2: SUCCESS$ +^\[main.assertion.1\] assertion __CPROVER_string_length(s3) == 0: FAILURE$ +^\[main.assertion.2\] assertion __CPROVER_string_length(s3) < 2: SUCCESS$ ^VERIFICATION FAILED$ diff --git a/regression/strings/test_prefix/test.desc b/regression/strings/test_prefix/test.desc index 187565433e4..4985862fefa 100644 --- a/regression/strings/test_prefix/test.desc +++ b/regression/strings/test_prefix/test.desc @@ -1,6 +1,6 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[main.assertion.1\] assertion b: SUCCESS$ diff --git a/regression/strings/test_strlen/test.desc b/regression/strings/test_strlen/test.desc index a35e2499c9f..d56adaffe82 100644 --- a/regression/strings/test_strlen/test.desc +++ b/regression/strings/test_strlen/test.desc @@ -1,6 +1,6 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ ^\[main.assertion.1\] assertion len_s == len_t: SUCCESS$ diff --git a/regression/strings/test_substring/test.desc b/regression/strings/test_substring/test.desc index 327d5bbe1f2..a10fee36c8f 100644 --- a/regression/strings/test_substring/test.desc +++ b/regression/strings/test_substring/test.desc @@ -1,10 +1,10 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[main.assertion.1\] assertion __CPROVER_uninterpreted_string_equal_func(t, __CPROVER_uninterpreted_string_literal_func(\"cd\")): SUCCESS$ -^\[main.assertion.2\] assertion __CPROVER_uninterpreted_string_equal_func(t, __CPROVER_uninterpreted_string_literal_func(\"cc\")): FAILURE$ -^\[main.assertion.3\] assertion !__CPROVER_uninterpreted_string_equal_func(t, __CPROVER_uninterpreted_string_literal_func(\"bc\")): SUCCESS$ -^\[main.assertion.4\] assertion !__CPROVER_uninterpreted_string_equal_func(t, __CPROVER_uninterpreted_string_literal_func(\"cd\")): FAILURE$ +^\[main.assertion.1\] assertion __CPROVER_string_equal(t,__CPROVER_string_literal("cd")): SUCCESS$ +^\[main.assertion.2\] assertion __CPROVER_string_equal(t,__CPROVER_string_literal("cc")): FAILURE$ +^\[main.assertion.3\] assertion !__CPROVER_string_equal(t,__CPROVER_string_literal("bc")): SUCCESS$ +^\[main.assertion.4\] assertion !__CPROVER_string_equal(t,__CPROVER_string_literal("cd")): FAILURE$ -- diff --git a/regression/strings/test_suffix/test.desc b/regression/strings/test_suffix/test.desc index e0e8af7704c..505d0071f8a 100644 --- a/regression/strings/test_suffix/test.desc +++ b/regression/strings/test_suffix/test.desc @@ -1,8 +1,8 @@ CORE test.c ---pass +--string-refine ^EXIT=10$ ^SIGNAL=0$ -^\[main.assertion.1\] assertion __CPROVER_uninterpreted_string_is_suffix_func(__CPROVER_uninterpreted_string_literal_func(\"po\"), s): SUCCESS$ -^\[main.assertion.2\] assertion __CPROVER_uninterpreted_string_is_suffix_func(__CPROVER_uninterpreted_string_literal_func(\"pp\"), s): FAILURE$ +^\[main.assertion.1\] assertion __CPROVER_string_issuffix(__CPROVER_string_literal("po"),s): SUCCESS$ +^\[main.assertion.2\] assertion __CPROVER_string_issuffix(__CPROVER_string_literal("pp"),s): FAILURE$ -- diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index de1b7060898..16f3d82f142 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -22,6 +22,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -339,6 +340,11 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) options.set_option("refine-arithmetic", true); } + if(cmdline.isset("string-refine")) + { + options.set_option("string-refine", true); + } + if(cmdline.isset("max-node-refinement")) options.set_option("max-node-refinement", cmdline.get_value("max-node-refinement")); @@ -913,6 +919,13 @@ bool cbmc_parse_optionst::process_goto_program( status() << "Partial Inlining" << eom; goto_partial_inline(goto_functions, ns, ui_message_handler); + + if(cmdline.isset("string-refine")) + { + status() << "Preprocessing for string refinement" << eom; + string_refine_preprocesst(symbol_table, goto_functions, ui_message_handler); + } + // remove returns, gcc vectors, complex remove_returns(symbol_table, goto_functions); remove_vector(symbol_table, goto_functions); @@ -1188,6 +1201,7 @@ void cbmc_parse_optionst::help() " --yices use Yices\n" " --z3 use Z3\n" " --refine use refinement procedure (experimental)\n" + " --string-refine use string refinement (experimental)\n" " --outfile filename output formula to given file\n" " --arrays-uf-never never turn arrays into uninterpreted functions\n" " --arrays-uf-always always turn arrays into uninterpreted functions\n" diff --git a/src/cbmc/cbmc_parse_options.h b/src/cbmc/cbmc_parse_options.h index 16ea202ba8c..1a687a54c41 100644 --- a/src/cbmc/cbmc_parse_options.h +++ b/src/cbmc/cbmc_parse_options.h @@ -36,7 +36,7 @@ class optionst; "(no-sat-preprocessor)" \ "(no-pretty-names)(beautify)" \ "(fixedbv)" \ - "(dimacs)(refine)(max-node-refinement):(refine-arrays)(refine-arithmetic)(aig)" \ + "(dimacs)(refine)(max-node-refinement):(refine-arrays)(refine-arithmetic)(aig)(string-refine)" \ "(16)(32)(64)(LP64)(ILP64)(LLP64)(ILP32)(LP32)" \ "(little-endian)(big-endian)" \ "(show-goto-functions)(show-loops)" \ diff --git a/src/cbmc/cbmc_solvers.cpp b/src/cbmc/cbmc_solvers.cpp index 95d9b35cf38..8595a26871c 100644 --- a/src/cbmc/cbmc_solvers.cpp +++ b/src/cbmc/cbmc_solvers.cpp @@ -14,6 +14,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -324,6 +325,29 @@ cbmc_solverst::solvert* cbmc_solverst::get_bv_refinement() /*******************************************************************\ +Function: cbmc_solverst::get_string_refinement + + Inputs: + + Outputs: + + Purpose: + +\*******************************************************************/ + +cbmc_solverst::solvert* cbmc_solverst::get_string_refinement() +{ + propt *prop; + prop=new satcheck_no_simplifiert(); + prop->set_message_handler(get_message_handler()); + + string_refinementt *string_refinement = new string_refinementt(ns, *prop); + string_refinement->set_ui(ui); + return new cbmc_solver_with_propt(string_refinement, prop); +} + +/*******************************************************************\ + Function: cbmc_solverst::get_smt1 Inputs: diff --git a/src/cbmc/cbmc_solvers.h b/src/cbmc/cbmc_solvers.h index 765b3bbfd13..d4e60bbab4b 100644 --- a/src/cbmc/cbmc_solvers.h +++ b/src/cbmc/cbmc_solvers.h @@ -84,6 +84,8 @@ class cbmc_solverst:public messaget solver = get_dimacs(); else if(options.get_bool_option("refine")) solver = get_bv_refinement(); + else if(options.get_bool_option("string-refine")) + solver = get_string_refinement(); else if(options.get_bool_option("smt1")) solver = get_smt1(get_smt1_solver_type()); else if(options.get_bool_option("smt2")) @@ -111,6 +113,7 @@ class cbmc_solverst:public messaget solvert* get_default(); solvert* get_dimacs(); solvert* get_bv_refinement(); + solvert* get_string_refinement(); solvert* get_smt1(smt1_dect::solvert solver); solvert* get_smt2(smt2_dect::solvert solver); diff --git a/src/goto-programs/Makefile b/src/goto-programs/Makefile index 932508b0edf..575bad87162 100644 --- a/src/goto-programs/Makefile +++ b/src/goto-programs/Makefile @@ -16,7 +16,8 @@ SRC = goto_convert.cpp goto_convert_function_call.cpp \ remove_returns.cpp osx_fat_reader.cpp remove_complex.cpp \ goto_trace.cpp xml_goto_trace.cpp vcd_goto_trace.cpp \ graphml_witness.cpp remove_virtual_functions.cpp \ - class_hierarchy.cpp show_goto_functions.cpp get_goto_model.cpp + class_hierarchy.cpp show_goto_functions.cpp get_goto_model.cpp \ + string_refine_preprocess.cpp INCLUDES= -I .. diff --git a/src/goto-programs/string_refine_preprocess.cpp b/src/goto-programs/string_refine_preprocess.cpp new file mode 100644 index 00000000000..73fd6e622b7 --- /dev/null +++ b/src/goto-programs/string_refine_preprocess.cpp @@ -0,0 +1,707 @@ +/*******************************************************************\ + +Module: Preprocess a goto-programs so that calls to the java String + library are recognized by the PASS algorithm + +Author: Romain Brenguier + +Date: September 2016 + +\*******************************************************************/ + +#include "string_refine_preprocess.h" + +#include +#include +#include +#include +#include + +symbol_exprt string_refine_preprocesst::new_tmp_symbol +(const std::string &name, const typet &type) +{ + auxiliary_symbolt tmp_symbol; + tmp_symbol.base_name=name; + tmp_symbol.is_static_lifetime=false; + tmp_symbol.mode=ID_java; + tmp_symbol.name=name; + tmp_symbol.type=type; + symbol_table.add(tmp_symbol); + return symbol_exprt(name, type); +} + +void string_refine_preprocesst::declare_function +(irep_idt function_name, const typet &type) +{ + auxiliary_symbolt func_symbol; + func_symbol.base_name=function_name; + func_symbol.is_static_lifetime=false; + func_symbol.mode=ID_java; + func_symbol.name=function_name; + func_symbol.type=type; + symbol_table.add(func_symbol); + goto_functions.function_map[function_name]; +} + +void string_refine_preprocesst::make_string_function +(goto_programt::instructionst::iterator & i_it, irep_idt function_name) +{ + code_function_callt &function_call=to_code_function_call(i_it->code); + code_typet function_type=to_code_type(function_call.function().type()); + declare_function(function_name, function_type); + function_application_exprt rhs; + rhs.type()=function_type.return_type(); + rhs.add_source_location()=function_call.source_location(); + rhs.function()=symbol_exprt(function_name); + for(std::size_t i=0; imake_assignment(); + i_it->code=assignment; +} + +void string_refine_preprocesst::make_string_function_call +(goto_programt::instructionst::iterator & i_it, irep_idt function_name) +{ + code_function_callt &function_call=to_code_function_call(i_it->code); + code_typet function_type=to_code_type(function_call.function().type()); + declare_function(function_name, function_type); + function_application_exprt rhs; + rhs.type()=function_call.arguments()[0].type(); + rhs.add_source_location()=function_call.source_location(); + rhs.function()=symbol_exprt(function_name); + for(std::size_t i=1; imake_assignment(); + i_it->code=assignment; +} + +void string_refine_preprocesst::make_string_function_side_effect +(goto_programt::instructionst::iterator & i_it, irep_idt function_name) +{ + code_function_callt &function_call=to_code_function_call(i_it->code); + code_typet function_type=to_code_type(function_call.function().type()); + declare_function(function_name, function_type); + function_application_exprt rhs; + typet return_type = function_call.arguments()[0].type(); + rhs.type()=return_type; + rhs.add_source_location()=function_call.source_location(); + rhs.function()=symbol_exprt(function_name); + for(std::size_t i=0; imake_assignment(); + i_it->code=assignment; +} + +void string_refine_preprocesst::make_to_char_array_function +(goto_programt & goto_program, goto_programt::instructionst::iterator & i_it) +{ + code_function_callt &function_call=to_code_function_call(i_it->code); + if(function_call.lhs().type().id()!=ID_pointer) + debug() << "string_refine_preprocesst::make_to_char_array_function: " + << "the function call should return a pointer" << eom; + + typet object_type=function_call.lhs().type().subtype(); + exprt object_size=size_of_expr(object_type, ns); + + if(object_size.is_nil()) + debug() << "string_refine_preprocesst::make_to_char_array_function" + << "got nil object_size" << eom; + + auto location = function_call.source_location(); + std::vector new_code; + + side_effect_exprt malloc_expr(ID_malloc); + malloc_expr.copy_to_operands(object_size); + malloc_expr.type()=pointer_typet(object_type); + malloc_expr.add_source_location()=location; + + assert(function_call.arguments().size()>=1); + exprt string_argument = replace_string_literals(function_call.arguments()[0]); + typet string_argument_type = string_argument.type(); + + // tmp_assign = MALLOC(struct java::array[reference],sizeof(s)) + symbol_exprt tmp_assign = + new_tmp_symbol("tmp_assign", pointer_typet(object_type)); + + code_assignt assign_malloc(tmp_assign, malloc_expr); + new_code.push_back(assign_malloc); + + // tmp_assign->length = (int)__CPROVER_uninterpreted_string_length_func(s); + declare_function(ID_cprover_string_length_func, unsignedbv_typet(32)); + + function_application_exprt call_to_length; + call_to_length.type()=unsignedbv_typet(32); + call_to_length.add_source_location()=location; + call_to_length.function()=symbol_exprt(ID_cprover_string_length_func); + call_to_length.arguments().push_back(string_argument); + + const struct_typet &struct_type=to_struct_type(ns.follow(object_type)); + dereference_exprt deref(tmp_assign, object_type); + member_exprt length(deref, struct_type.components()[1].get_name(), + struct_type.components()[1].type()); + typecast_exprt rhs_length(call_to_length, signedbv_typet(32)); + code_assignt assign_length(length, rhs_length); + new_code.push_back(assign_length); + + // tmp_assign->data = new data.type[length]; + assert(ns.follow(object_type).id()==ID_struct); + member_exprt data(deref, struct_type.components()[2].get_name(), + struct_type.components()[2].type()); + side_effect_exprt data_cpp_new_expr(ID_cpp_new_array, data.type()); + data_cpp_new_expr.set(ID_size, length); + symbol_exprt tmp_data= + new_tmp_symbol("tmp_data", struct_type.components()[2].type()); + + new_code.push_back(code_assignt(data, data_cpp_new_expr)); + + // tmp_assign->data = string_data_func(s,tmp_assing->data); + declare_function(ID_cprover_string_data_func, void_typet()); + function_application_exprt call_to_data; + call_to_data.type()=void_typet(); + call_to_data.add_source_location()=location; + call_to_data.function()=symbol_exprt(ID_cprover_string_data_func); + call_to_data.arguments().push_back(string_argument); + call_to_data.arguments().push_back(data); + call_to_data.arguments().push_back(dereference_exprt(data)); + + exprt tmp_nil = new_tmp_symbol("tmp_nil", void_typet()); + new_code.push_back(code_assignt(tmp_nil, call_to_data)); + + // return_tmp0 = tmp_assign + new_code.push_back(code_assignt(function_call.lhs(), tmp_assign)); + + // putting the assignements into the program + for(std::size_t i=0; imake_assignment(); + i_it->code=new_code[i]; + i_it->source_location=location; + if(icode); + exprt arg = function_call.arguments()[0]; + auto location = function_call.source_location(); + typet object_type = arg.type().subtype(); + + dereference_exprt deref(arg, object_type); + exprt array_size=member_exprt(deref, "length", signedbv_typet(32)); + pointer_typet data_type(unsignedbv_typet(16)); + exprt data_pointer= member_exprt(deref, "data", pointer_typet(data_type)); + exprt data = dereference_exprt(data_pointer, data_type); + + std::vector::iterator it = function_call.arguments().begin(); + *it=array_size; + function_call.arguments().insert(++it, data); + make_string_function(i_it, function_name); +} + +void string_refine_preprocesst::make_char_array_function_call +(goto_programt::instructionst::iterator & i_it, irep_idt function_name) +{ + code_function_callt &function_call=to_code_function_call(i_it->code); + exprt arg = function_call.arguments()[1]; + auto location = function_call.source_location(); + typet object_type = arg.type().subtype(); + dereference_exprt deref(arg, object_type); + exprt array_size = member_exprt(deref, "length", signedbv_typet(32)); + pointer_typet data_type(unsignedbv_typet(16)); + exprt data_pointer = member_exprt(deref, "data", pointer_typet(data_type)); + exprt data = dereference_exprt(data_pointer, data_type); + + std::vector::iterator it=function_call.arguments().begin(); + *(++it)=array_size; + function_call.arguments().insert(++it, data); + make_string_function_call(i_it, function_name); +} + +void string_refine_preprocesst::make_char_array_side_effect +(goto_programt::instructionst::iterator & i_it, irep_idt function_name) +{ + code_function_callt &function_call=to_code_function_call(i_it->code); + exprt arg=function_call.arguments()[2]; + auto location=function_call.source_location(); + typet object_type=arg.type().subtype(); + dereference_exprt deref(arg, object_type); + exprt array_size=member_exprt(deref, "length", signedbv_typet(32)); + pointer_typet data_type(unsignedbv_typet(16)); + exprt data_pointer=member_exprt(deref, "data", pointer_typet(data_type)); + exprt data=dereference_exprt(data_pointer, data_type); + + std::vector::iterator it= + std::next(std::next(function_call.arguments().begin())); + *it=array_size; + function_call.arguments().insert(++it, data); + make_string_function_side_effect(i_it, function_name); +} + + +void string_refine_preprocesst::replace_string_calls +(goto_functionst::function_mapt::iterator f_it) +{ + goto_programt &goto_program=f_it->second.body; + + Forall_goto_program_instructions(i_it, goto_program) + { + if(i_it->is_function_call()) + { + code_function_callt &function_call=to_code_function_call(i_it->code); + for(std::size_t i=0; isecond; + } + + if(function_call.function().id()==ID_symbol) + { + const irep_idt function_id= + to_symbol_expr(function_call.function()).get_identifier(); + + auto it=string_functions.find(function_id); + if(it!=string_functions.end()) + make_string_function(i_it, it->second); + + it=side_effect_functions.find(function_id); + if(it!=side_effect_functions.end()) + make_string_function_side_effect(i_it, it->second); + + it=string_function_calls.find(function_id); + if(it!=string_function_calls.end()) + make_string_function_call(i_it, it->second); + + it=string_of_char_array_functions.find(function_id); + if(it!=string_of_char_array_functions.end()) + make_char_array_function(i_it, it->second); + + it=string_of_char_array_function_calls.find(function_id); + if(it!=string_of_char_array_function_calls.end()) + make_char_array_function_call(i_it, it->second); + + it=side_effect_char_array_functions.find(function_id); + if(it!=side_effect_char_array_functions.end()) + make_char_array_side_effect(i_it, it->second); + + if(function_id==irep_idt("java::java.lang.String.toCharArray:()[C")) + make_to_char_array_function(goto_program, i_it); + } + } + else + { + if(i_it->is_assign()) + { + code_assignt assignment=to_code_assign(i_it->code); + exprt new_rhs=replace_string_literals(assignment.rhs()); + code_assignt new_assignment(assignment.lhs(), new_rhs); + + if(new_rhs.id()==ID_function_application) + { + function_application_exprt f=to_function_application_expr(new_rhs); + const exprt &name=f.function(); + assert(name.id()==ID_symbol); + const irep_idt &id=to_symbol_expr(name).get_identifier(); + auto it=c_string_functions.find(id); + if(it!=c_string_functions.end()) + { + declare_function(it->second, f.type()); + f.function()=symbol_exprt(it->second); + new_assignment=code_assignt(assignment.lhs(), f); + } + } + + new_assignment.add_source_location()=assignment.source_location(); + i_it->make_assignment(); + i_it->code=new_assignment; + } + } + } + return; +} + +bool string_refine_preprocesst::has_java_string_type(const exprt &expr) +{ + const typet type=expr.type(); + if(type.id()==ID_pointer) + { + pointer_typet pt=to_pointer_type(type); + typet subtype=pt.subtype(); + if(subtype.id()==ID_symbol) + { + irep_idt tag=to_symbol_type(subtype).get_identifier(); + return (tag==irep_idt("java::java.lang.String")); + } + else + return false; + } + else + return false; +} + +exprt string_refine_preprocesst::replace_string_literals(const exprt & expr) +{ + if(has_java_string_type(expr) ) + { + if(expr.operands().size()==1 && expr.op0().id()==ID_symbol) + { + std::string id(to_symbol_expr(expr.op0()).get_identifier().c_str()); + if(has_prefix(id, "java::java.lang.String.Literal.")) + { + function_application_exprt rhs; + rhs.type()=expr.type(); + rhs.add_source_location()=expr.source_location(); + rhs.function()=symbol_exprt(ID_cprover_string_literal_func); + goto_functions.function_map[ID_cprover_string_literal_func]; + rhs.arguments().push_back(address_of_exprt(expr.op0())); + auxiliary_symbolt tmp_symbol; + tmp_symbol.is_static_lifetime=false; + tmp_symbol.mode=ID_java; + tmp_symbol.name=ID_cprover_string_literal_func; + symbol_table.add(tmp_symbol); + return rhs; + } + } + } + return expr; +} + +string_refine_preprocesst::string_refine_preprocesst( + symbol_tablet & _symbol_table, + goto_functionst & _goto_functions, + message_handlert &_message_handler) + :messaget(_message_handler), + symbol_table(_symbol_table), + ns(_symbol_table), + goto_functions(_goto_functions) +{ + // initialiasing the function maps + string_functions + [irep_idt("java::java.lang.String.codePointAt:(I)I")]= + ID_cprover_string_code_point_at_func; + string_functions + [irep_idt("java::java.lang.String.codePointBefore:(I)I")]= + ID_cprover_string_code_point_before_func; + string_functions + [irep_idt("java::java.lang.String.codePointCount:(II)I")]= + ID_cprover_string_code_point_count_func; + string_functions + [irep_idt("java::java.lang.String.offsetByCodePoints:(II)I")]= + ID_cprover_string_offset_by_code_point_func; + string_functions + [irep_idt("java::java.lang.String.hashCode:()I")]= + ID_cprover_string_hash_code_func; + string_functions + [irep_idt("java::java.lang.String.indexOf:(I)I")]= + ID_cprover_string_index_of_func; + string_functions + [irep_idt("java::java.lang.String.indexOf:(II)I")]= + ID_cprover_string_index_of_func; + string_functions + [irep_idt("java::java.lang.String.indexOf:(Ljava/lang/String;)I")]= + ID_cprover_string_index_of_func; + string_functions + [irep_idt("java::java.lang.String.indexOf:(Ljava/lang/String;I)I")]= + ID_cprover_string_index_of_func; + string_functions + [irep_idt("java::java.lang.String.lastIndexOf:(I)I")]= + ID_cprover_string_last_index_of_func; + string_functions + [irep_idt("java::java.lang.String.lastIndexOf:(II)I")]= + ID_cprover_string_last_index_of_func; + string_functions + [irep_idt("java::java.lang.String.lastIndexOf:(Ljava/lang/String;)I")]= + ID_cprover_string_last_index_of_func; + string_functions + [irep_idt("java::java.lang.String.lastIndexOf:(Ljava/lang/String;I)I")]= + ID_cprover_string_last_index_of_func; + string_functions + [irep_idt("java::java.lang.String.concat:(Ljava/lang/String;)" + "Ljava/lang/String;")]=ID_cprover_string_concat_func; + string_functions + [irep_idt("java::java.lang.String.length:()I")]= + ID_cprover_string_length_func; + string_functions + [irep_idt("java::java.lang.StringBuilder.length:()I")]= + ID_cprover_string_length_func; + string_functions + [irep_idt("java::java.lang.String.equals:(Ljava/lang/Object;)Z")]= + ID_cprover_string_equal_func; + string_functions + [irep_idt("java::java.lang.String.equalsIgnoreCase:(Ljava/lang/String;)Z")]= + ID_cprover_string_equals_ignore_case_func; + string_functions + [irep_idt("java::java.lang.String.startsWith:(Ljava/lang/String;)Z")]= + ID_cprover_string_startswith_func; + string_functions + [irep_idt("java::java.lang.String.startsWith:(Ljava/lang/String;I)Z")]= + ID_cprover_string_startswith_func; + string_functions + [irep_idt("java::java.lang.String.endsWith:(Ljava/lang/String;)Z")]= + ID_cprover_string_endswith_func; + string_functions + [irep_idt("java::java.lang.String.substring:(II)Ljava/lang/String;")]= + ID_cprover_string_substring_func; + string_functions + [irep_idt("java::java.lang.String.substring:(II)Ljava/lang/String;")]= + ID_cprover_string_substring_func; + string_functions + [irep_idt("java::java.lang.String.substring:(I)Ljava/lang/String;")]= + ID_cprover_string_substring_func; + string_functions + [irep_idt("java::java.lang.StringBuilder.substring:(II)Ljava/lang/String;") + ]=ID_cprover_string_substring_func; + string_functions + [irep_idt("java::java.lang.StringBuilder.substring:(I)Ljava/lang/String;")]= + ID_cprover_string_substring_func; + string_functions + [irep_idt + ("java::java.lang.String.subSequence:(II)Ljava/lang/CharSequence;")]= + ID_cprover_string_substring_func; + string_functions + [irep_idt("java::java.lang.String.trim:()Ljava/lang/String;")]= + ID_cprover_string_trim_func; + string_functions + [irep_idt("java::java.lang.String.toLowerCase:()Ljava/lang/String;")]= + ID_cprover_string_to_lower_case_func; + string_functions + [irep_idt("java::java.lang.String.toUpperCase:()Ljava/lang/String;")]= + ID_cprover_string_to_upper_case_func; + string_functions + [irep_idt("java::java.lang.String.replace:(CC)Ljava/lang/String;")]= + ID_cprover_string_replace_func; + string_functions + [irep_idt("java::java.lang.String.contains:(Ljava/lang/CharSequence;)Z")]= + ID_cprover_string_contains_func; + string_functions + [irep_idt("java::java.lang.String.compareTo:(Ljava/lang/String;)I")]= + ID_cprover_string_compare_to_func; + string_functions + [irep_idt("java::java.lang.String.intern:()Ljava/lang/String;")]= + ID_cprover_string_intern_func; + string_functions + [irep_idt("java::java.lang.String.isEmpty:()Z")]= + ID_cprover_string_is_empty_func; + string_functions + [irep_idt("java::java.lang.String.charAt:(I)C")]= + ID_cprover_string_char_at_func; + string_functions + [irep_idt("java::java.lang.StringBuilder.charAt:(I)C")]= + ID_cprover_string_char_at_func; + string_functions + [irep_idt("java::java.lang.CharSequence.charAt:(I)C")]= + ID_cprover_string_char_at_func; + string_functions + [irep_idt("java::java.lang.StringBuilder.toString:()Ljava/lang/String;")]= + ID_cprover_string_copy_func; + + string_functions + [irep_idt("java::java.lang.String.valueOf:(F)Ljava/lang/String;")]= + ID_cprover_string_of_float_func; + string_functions + [irep_idt("java::java.lang.Float.toString:(F)Ljava/lang/String;")]= + ID_cprover_string_of_float_func; + string_functions + [irep_idt("java::java.lang.Integer.toString:(I)Ljava/lang/String;")]= + ID_cprover_string_of_int_func; + string_functions + [irep_idt("java::java.lang.String.valueOf:(I)Ljava/lang/String;")]= + ID_cprover_string_of_int_func; + string_functions + [irep_idt("java::java.lang.Integer.toHexString:(I)Ljava/lang/String;")]= + ID_cprover_string_of_int_hex_func; + string_functions + [irep_idt("java::java.lang.String.valueOf:(L)Ljava/lang/String;")]= + ID_cprover_string_of_long_func; + string_functions + [irep_idt("java::java.lang.String.valueOf:(D)Ljava/lang/String;")]= + ID_cprover_string_of_double_func; + string_functions + [irep_idt("java::java.lang.String.valueOf:(Z)Ljava/lang/String;")]= + ID_cprover_string_of_bool_func; + string_functions + [irep_idt("java::java.lang.String.valueOf:(C)Ljava/lang/String;")]= + ID_cprover_string_of_char_func; + string_functions + [irep_idt("java::java.lang.Integer.parseInt:(Ljava/lang/String;)I")]= + ID_cprover_string_parse_int_func; + + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.append:(Ljava/lang/String;)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_concat_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.setCharAt:(IC)V")]= + ID_cprover_string_char_set_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.append:(I)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_concat_int_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.append:(J)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_concat_long_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.append:(Z)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_concat_bool_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.append:(C)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_concat_char_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.append:(D)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_concat_double_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.append:(F)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_concat_float_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.appendCodePoint:(I)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_concat_code_point_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.delete:(II)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_delete_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.deleteCharAt:(I)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_delete_char_at_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.insert:(ILjava/lang/String;)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_insert_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.insert:(II)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_insert_int_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.insert:(IJ)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_insert_long_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.insert:(IC)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_insert_char_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.insert:(IZ)" + "Ljava/lang/StringBuilder;") ]= + ID_cprover_string_insert_bool_func; + side_effect_functions + [irep_idt("java::java.lang.StringBuilder.setLength:(I)V")]= + ID_cprover_string_set_length_func; + + + side_effect_char_array_functions + [irep_idt("java::java.lang.StringBuilder.insert:(I[CII)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_insert_char_array_func; + side_effect_char_array_functions + [irep_idt("java::java.lang.StringBuilder.insert:(I[C)" + "Ljava/lang/StringBuilder;")]= + ID_cprover_string_insert_char_array_func; + + string_function_calls + [irep_idt("java::java.lang.String.:(Ljava/lang/String;)V")]= + ID_cprover_string_copy_func; + string_function_calls + [irep_idt("java::java.lang.String.:(Ljava/lang/StringBuilder;)V")]= + ID_cprover_string_copy_func; + string_function_calls + [irep_idt("java::java.lang.StringBuilder.:(Ljava/lang/String;)V")]= + ID_cprover_string_copy_func; + string_function_calls + [irep_idt("java::java.lang.String.:()V")]= + ID_cprover_string_empty_string_func; + string_function_calls + [irep_idt("java::java.lang.StringBuilder.:()V")]= + ID_cprover_string_empty_string_func; + + string_of_char_array_function_calls + [irep_idt("java::java.lang.String.:([C)V")]= + ID_cprover_string_of_char_array_func; + string_of_char_array_function_calls + [irep_idt("java::java.lang.String.:([CII)V")]= + ID_cprover_string_of_char_array_func; + string_of_char_array_functions + [irep_idt("java::java.lang.String.valueOf:([CII)Ljava/lang/String;")]= + ID_cprover_string_of_char_array_func; + string_of_char_array_functions + [irep_idt("java::java.lang.String.valueOf:([C)Ljava/lang/String;")]= + ID_cprover_string_of_char_array_func; + string_of_char_array_functions + [irep_idt("java::java.lang.String.copyValueOf:([CII)Ljava/lang/String;")]= + ID_cprover_string_of_char_array_func; + string_of_char_array_functions + [irep_idt("java::java.lang.String.copyValueOf:([C)Ljava/lang/String;")]= + ID_cprover_string_of_char_array_func; + + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_literal_func")]= + ID_cprover_string_literal_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_char_at_func")]= + ID_cprover_string_char_at_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_equal_func")]= + ID_cprover_string_equal_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_concat_func")]= + ID_cprover_string_concat_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_length_func")]= + ID_cprover_string_length_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_substring_func")]= + ID_cprover_string_substring_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_is_prefix_func")]= + ID_cprover_string_is_prefix_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_is_suffix_func")]= + ID_cprover_string_is_suffix_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_contains_func")]= + ID_cprover_string_contains_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_index_of_func")]= + ID_cprover_string_index_of_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_last_index_of_func")]= + ID_cprover_string_last_index_of_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_char_set_func")]= + ID_cprover_string_char_set_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_copy_func")]= + ID_cprover_string_copy_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_parse_int_func")]= + ID_cprover_string_parse_int_func; + c_string_functions + [irep_idt("__CPROVER_uninterpreted_string_of_int_func")]= + ID_cprover_string_of_int_func; + + + Forall_goto_functions(it, goto_functions) + replace_string_calls(it); +} diff --git a/src/goto-programs/string_refine_preprocess.h b/src/goto-programs/string_refine_preprocess.h new file mode 100644 index 00000000000..cea83582c9e --- /dev/null +++ b/src/goto-programs/string_refine_preprocess.h @@ -0,0 +1,88 @@ +/*******************************************************************\ + +Module: Preprocess a goto-programs so that calls to the java String + library are recognized by the PASS algorithm + +Author: Romain Brenguier + +Date: September 2016 + +\*******************************************************************/ + +#ifndef CPROVER_GOTO_PROGRAMS_STRING_REFINE_PREPROCESS_H +#define CPROVER_GOTO_PROGRAMS_STRING_REFINE_PREPROCESS_H + +#include +#include + +class string_refine_preprocesst:public messaget +{ + private: + namespacet ns; + symbol_tablet & symbol_table; + goto_functionst & goto_functions; + std::map string_builders; + std::map side_effect_functions; + std::map string_functions; + std::map c_string_functions; + std::map string_function_calls; + std::map string_of_char_array_functions; + std::map string_of_char_array_function_calls; + std::map side_effect_char_array_functions; + + public: + string_refine_preprocesst + (symbol_tablet &, goto_functionst &, message_handlert &); + + private: + // add a temporary symbol to the symbol table + symbol_exprt new_tmp_symbol(const std::string &name, const typet &type); + + void declare_function(irep_idt function_name, const typet &type); + + exprt replace_string_literals(const exprt &); + + // replace "lhs=s.some_function(x,...)" by "lhs=function_name(s,x,...)" + void make_string_function + (goto_programt::instructionst::iterator & i_it, irep_idt function_name); + + // replace "s.some_function(x,...)" by "s=function_name(x,...)" + void make_string_function_call + (goto_programt::instructionst::iterator & i_it, irep_idt function_name); + + // replace "r = s.some_function(x,...)" by "s=function_name(s,x,...)" + // and add a correspondance from r to s in the string_builders map + void make_string_function_side_effect + (goto_programt::instructionst::iterator & i_it, irep_idt function_name); + + // replace "return_tmp0 = s.toCharArray()" with: + // tmp_assign = MALLOC(struct java::array[reference], 17L); + // tmp_assign->length = (int)__CPROVER_uninterpreted_string_length_func(s); + // tmp_assign->data = new (void **)[tmp_assign->length]; + // tmp_nil = __CPROVER_uninterpreted_string_data_func(s, tmp_assign->data); + // return_tmp0 = tmp_assign; + void make_to_char_array_function + (goto_programt & goto_program, goto_programt::instructionst::iterator &); + + // replace "r = some_function(arr,...)" by + // "r = function_name(arr.length,arr.data,...); + void make_char_array_function + (goto_programt::instructionst::iterator & i_it, irep_idt function_name); + + // replace "r.some_function(arr,...)" by + // "r = function_name(arr.length,arr.data,...); + void make_char_array_function_call + (goto_programt::instructionst::iterator & i_it, irep_idt function_name); + + // replace `r = s.some_function(i,arr,...)` by + // `s=function_name(s,i,arr.length,arr.data)` + // and add a correspondance from r to s in the string_builders map + void make_char_array_side_effect + (goto_programt::instructionst::iterator & i_it, irep_idt function_name); + + bool has_java_string_type(const exprt &expr); + + void replace_string_calls(goto_functionst::function_mapt::iterator f_it); +}; + +#endif diff --git a/src/solvers/Makefile b/src/solvers/Makefile index b806aa47886..002902d8a0c 100644 --- a/src/solvers/Makefile +++ b/src/solvers/Makefile @@ -118,6 +118,11 @@ SRC = $(CHAFF_SRC) $(BOOLEFORCE_SRC) $(MINISAT_SRC) $(MINISAT2_SRC) \ floatbv/float_utils.cpp floatbv/float_bv.cpp \ refinement/bv_refinement_loop.cpp refinement/refine_arithmetic.cpp \ refinement/refine_arrays.cpp \ + refinement/string_constraint.cpp \ + refinement/refined_string_type.cpp \ + refinement/string_expr.cpp \ + refinement/string_refinement.cpp \ + refinement/string_constraint_generator.cpp \ miniBDD/miniBDD.cpp INCLUDES= -I .. \ diff --git a/src/solvers/refinement/bv_refinement.h b/src/solvers/refinement/bv_refinement.h index 5474a8f1af3..cc69d541dd2 100644 --- a/src/solvers/refinement/bv_refinement.h +++ b/src/solvers/refinement/bv_refinement.h @@ -83,8 +83,8 @@ class bv_refinementt:public bv_pointerst void get_values(approximationt &approximation); bool is_in_conflict(approximationt &approximation); - void check_SAT(); - void check_UNSAT(); + virtual void check_SAT(); + virtual void check_UNSAT(); bool progress; // we refine the theory of arrays diff --git a/src/solvers/refinement/refined_string_type.cpp b/src/solvers/refinement/refined_string_type.cpp new file mode 100644 index 00000000000..1360fa9d3eb --- /dev/null +++ b/src/solvers/refinement/refined_string_type.cpp @@ -0,0 +1,147 @@ +/** -*- C++ -*- *****************************************************\ + +Module: Type for string expressions used by the string solver. + These string expressions contains a field `length`, of type + `index_type`, a field `content` of type `content_type`. + This module also defines function to recognise the C and java + string types. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include +#include + +/*******************************************************************\ + +Constructor: refined_string_typet::refined_string_typet + + Inputs: type of characters + +\*******************************************************************/ +refined_string_typet::refined_string_typet(unsignedbv_typet char_type) + :struct_typet() +{ + components().resize(2); + components()[0].set_name("length"); + components()[0].set_pretty_name("length"); + components()[0].type()=refined_string_typet::index_type(); + + infinity_exprt infinite_index(refined_string_typet::index_type()); + array_typet char_array(char_type, infinite_index); + components()[1].set_name("content"); + components()[1].set_pretty_name("content"); + components()[1].type()=char_array; +} + +/*******************************************************************\ + +Function: refined_string_typet::is_c_string_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of C strings + +\*******************************************************************/ +bool refined_string_typet::is_c_string_type(const typet &type) +{ + if(type.id()==ID_struct) + { + irep_idt tag=to_struct_type(type).get_tag(); + return (tag==irep_idt("__CPROVER_string")); + } + return false; +} + +/*******************************************************************\ + +Function: refined_string_typet::is_java_string_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string pointers + +\*******************************************************************/ +bool refined_string_typet::is_java_string_pointer_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + pointer_typet pt=to_pointer_type(type); + typet subtype=pt.subtype(); + return is_java_string_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: refined_string_typet::is_java_string_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string + +\*******************************************************************/ +bool refined_string_typet::is_java_string_type(const typet &type) +{ + if(type.id()==ID_symbol) + { + irep_idt tag=to_symbol_type(type).get_identifier(); + return (tag==irep_idt("java::java.lang.String")); + } + else if(type.id()==ID_struct) + { + irep_idt tag=to_struct_type(type).get_tag(); + return (tag==irep_idt("java.lang.String")); + } + return false; +} + +/*******************************************************************\ + +Function: refined_string_typet::is_java_string_builder_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string builder + +\*******************************************************************/ +bool refined_string_typet::is_java_string_builder_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + pointer_typet pt=to_pointer_type(type); + typet subtype=pt.subtype(); + if(subtype.id()==ID_struct) + { + irep_idt tag=to_struct_type(subtype).get_tag(); + return (tag==irep_idt("java.lang.StringBuilder")); + } + } + return false; +} + +/*******************************************************************\ + +Function: refined_string_typet::is_java_char_sequence_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java char sequence + +\*******************************************************************/ +bool refined_string_typet::is_java_char_sequence_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + pointer_typet pt=to_pointer_type(type); + typet subtype=pt.subtype(); + if(subtype.id()==ID_struct) + { + irep_idt tag=to_struct_type(subtype).get_tag(); + return (tag==irep_idt("java.lang.CharSequence")); + } + } + return false; +} diff --git a/src/solvers/refinement/refined_string_type.h b/src/solvers/refinement/refined_string_type.h new file mode 100644 index 00000000000..e963fbc1c73 --- /dev/null +++ b/src/solvers/refinement/refined_string_type.h @@ -0,0 +1,87 @@ +/** -*- C++ -*- *****************************************************\ + +Module: Type for string expressions used by the string solver. + These string expressions contains a field `length`, of type + `index_type`, a field `content` of type `content_type`. + This module also defines function to recognise the C and java + string types. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_REFINEMENT_REFINED_STRING_TYPE_H +#define CPROVER_SOLVERS_REFINEMENT_REFINED_STRING_TYPE_H + +#include +#include +#include + +#define STRING_SOLVER_INDEX_WIDTH 32 +#define STRING_SOLVER_C_CHAR_WIDTH 8 +#define STRING_SOLVER_JAVA_CHAR_WIDTH 16 + +// Internal type used for string refinement +class refined_string_typet: public struct_typet +{ +public: + explicit refined_string_typet(unsignedbv_typet char_type); + + // Type for the content (list of characters) of a string + inline array_typet get_content_type() + { return to_array_type((to_struct_type(*this)).components()[1].type());} + + // Types used in this refinement + static inline unsignedbv_typet char_type() + { return unsignedbv_typet(STRING_SOLVER_C_CHAR_WIDTH); } + + static inline unsignedbv_typet java_char_type() + { return unsignedbv_typet(STRING_SOLVER_JAVA_CHAR_WIDTH);} + + static inline signedbv_typet index_type() + { return signedbv_typet(STRING_SOLVER_INDEX_WIDTH);} + + static inline exprt index_zero() + { + return constant_exprt(integer2binary(0, STRING_SOLVER_INDEX_WIDTH), + index_type()); + } + + // For C the unrefined string type is __CPROVER_string, for java it is a + // pointer to a strict with tag java.lang.String + + static bool is_c_string_type(const typet & type); + + static bool is_java_string_pointer_type(const typet & type); + + static bool is_java_string_type(const typet & type); + + static bool is_java_string_builder_type(const typet & type); + + static bool is_java_char_sequence_type(const typet & type); + + static inline unsignedbv_typet get_char_type(const exprt & expr) + { + if(is_c_string_type(expr.type())) + return char_type(); + else + return java_char_type(); + } + + static inline bool is_unrefined_string_type(const typet & type) + { + return (is_c_string_type(type) + || is_java_string_pointer_type(type) + || is_java_string_builder_type(type) + || is_java_char_sequence_type(type)); + } + + static inline bool is_unrefined_string(const exprt & expr) + { return (is_unrefined_string_type(expr.type())); } + + static inline constant_exprt index_of_int(int i) + { return from_integer(i, index_type()); } +}; + + +#endif diff --git a/src/solvers/refinement/string_constraint.cpp b/src/solvers/refinement/string_constraint.cpp new file mode 100644 index 00000000000..d9725b2c408 --- /dev/null +++ b/src/solvers/refinement/string_constraint.cpp @@ -0,0 +1,15 @@ +/** -*- C++ -*- *****************************************************\ + +Module: String constraints. + These are formulas talking about strings. We implemented two + forms of constraints: `string_constraintt` implements formulas + of the form $\forall univ_var \in [lb,ub[. premise => body$, + and not_contains_constraintt implements those of the form: + $\forall x in [lb,ub[. p(x) => \exists y in [lb,ub[. + s1[x+y] != s2[y]$. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h new file mode 100644 index 00000000000..5fa578e452e --- /dev/null +++ b/src/solvers/refinement/string_constraint.h @@ -0,0 +1,192 @@ +/** -*- C++ -*- *****************************************************\ + +Module: String constraints. + These are formulas talking about strings. We implemented two + forms of constraints: `string_constraintt` implements formulas + of the form $\forall univ_var \in [lb,ub[. premise => body$, + and not_contains_constraintt implements those of the form: + $\forall x in [lb,ub[. p(x) => \exists y in [lb,ub[. + s1[x+y] != s2[y]$. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_H +#define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_H + +#include +#include +#include + +class string_constraintt: public exprt +{ +public: + // String constraints are of the form + // forall univ_var in [lower_bound,upper_bound[. premise => body + // or premise => body + + inline const exprt &premise() const + { + return op0(); + } + + inline const exprt &body() const + { + return op1(); + } + + inline const exprt &univ_var() const + { + return op2(); + } + + inline const exprt &upper_bound() const + { + return op3(); + } + + inline const exprt &lower_bound() const + { + return operands()[4]; + } + + // Trivial constraint + string_constraintt() : exprt(ID_string_constraint) + { + assert(false); // string constraints should not be initialized directly + copy_to_operands(true_exprt(), true_exprt()); + } + + string_constraintt( + const symbol_exprt &univ, + const exprt &bound_inf, + const exprt &bound_sup, + const exprt &prem, + const exprt &body) + : exprt(ID_string_constraint) + { + copy_to_operands(prem, body); + copy_to_operands(univ, bound_sup, bound_inf); + }; + + // Default bound inferior is 0 + string_constraintt( + const symbol_exprt &univ, + const exprt &bound_sup, + const exprt &prem, + const exprt &body) + : string_constraintt(univ, refined_string_typet::index_zero(), + bound_sup, prem, body) + {}; + + // Default premise is true + string_constraintt + (const symbol_exprt &univ, const exprt &bound_sup, const exprt &body) + : string_constraintt + (univ, refined_string_typet::index_zero(), bound_sup, true_exprt(), body) + {}; + + bool is_simple() const { return (operands().size()==2); } + bool is_univ_quant() const { return (operands().size()==5); } + bool is_not_contains() const { return false; } + + inline symbol_exprt get_univ_var() const + { return to_symbol_expr(univ_var()); } + + inline exprt univ_within_bounds() const + { + return and_exprt + (binary_relation_exprt(lower_bound(), ID_le, get_univ_var()), + binary_relation_exprt(upper_bound(), ID_gt, get_univ_var())); + } +}; + +extern inline const string_constraintt &to_string_constraint(const exprt &expr) +{ + assert(expr.id()==ID_string_constraint && expr.operands().size()==5); + return static_cast(expr); +} + +extern inline string_constraintt &to_string_constraint(exprt &expr) +{ + assert(expr.id()==ID_string_constraint && expr.operands().size()==5); + return static_cast(expr); +} + +class string_not_contains_constraintt: public exprt +{ +public: + // string_not contains_constraintt are formula of the form: + // forall x in [lb,ub[. p(x) => exists y in [lb,ub[. s1[x+y] != s2[y] + + string_not_contains_constraintt( + exprt univ_lower_bound, + exprt univ_bound_sup, + exprt premise, + exprt exists_bound_inf, + exprt exists_bound_sup, + exprt s0, + exprt s1) + :exprt(ID_string_not_contains_constraint) + { + copy_to_operands(univ_lower_bound, univ_bound_sup, premise); + copy_to_operands(exists_bound_inf, exists_bound_sup, s0); + copy_to_operands(s1); + }; + + bool is_not_contains() const { return true; } + + inline const exprt &univ_lower_bound() const + { + return operands()[0]; + } + + inline const exprt &univ_upper_bound() const + { + return operands()[1]; + } + + inline const exprt &premise() const + { + return operands()[2]; + } + + inline const exprt &exists_lower_bound() const + { + return operands()[3]; + } + + inline const exprt &exists_upper_bound() const + { + return operands()[4]; + } + + inline const exprt &s0() const + { + return operands()[5]; + } + + inline const exprt &s1() const + { + return operands()[6]; + } +}; + +extern inline const string_not_contains_constraintt +&to_string_not_contains_constraint(const exprt &expr) +{ + assert(expr.id()==ID_string_not_contains_constraint + && expr.operands().size()==7); + return static_cast(expr); +} + +extern inline string_not_contains_constraintt +&to_string_not_contains_constraint(exprt &expr) +{ + assert(expr.id()==ID_string_not_contains_constraint + && expr.operands().size()==7); + return static_cast(expr); +} + +#endif diff --git a/src/solvers/refinement/string_constraint_generator.cpp b/src/solvers/refinement/string_constraint_generator.cpp new file mode 100644 index 00000000000..b8c6016d64c --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator.cpp @@ -0,0 +1,2145 @@ +/** -*- C++ -*- *****************************************************\ + +Module: Generates string constraints to link results from string functions + with their arguments. This is inspired by the PASS paper at HVC'13 + which gives examples of constraints for several functions. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +constant_exprt string_constraint_generatort::constant_char(int i) const +{ + if(mode==ID_C) + return from_integer(i, refined_string_typet::char_type()); + else if(mode==ID_java) + return from_integer(i, refined_string_typet::java_char_type()); + else + assert(false); // only C and java modes supported +} + +unsignedbv_typet string_constraint_generatort::get_char_type() const +{ + if(mode==ID_C) + return refined_string_typet::char_type(); + else if(mode==ID_java) + return refined_string_typet::java_char_type(); + else + assert(false); // only C and java modes supported +} + +size_t string_constraint_generatort::get_char_width() const +{ + if(mode==ID_C) + return STRING_SOLVER_C_CHAR_WIDTH; + else if(mode==ID_java) + return STRING_SOLVER_JAVA_CHAR_WIDTH; + else + assert(false); // only C and java modes supported +} + +symbol_exprt string_constraint_generatort::fresh_univ_index +(const irep_idt &prefix) +{ + return string_exprt::fresh_symbol(prefix, refined_string_typet::index_type()); +} + +symbol_exprt string_constraint_generatort::fresh_exist_index +(const irep_idt &prefix) +{ + symbol_exprt s=string_exprt::fresh_symbol + (prefix, refined_string_typet::index_type()); + index_symbols.push_back(s); + return s; +} + +symbol_exprt string_constraint_generatort::fresh_boolean +(const irep_idt &prefix) +{ + symbol_exprt b=string_exprt::fresh_symbol(prefix, bool_typet()); + boolean_symbols.push_back(b); + return b; +} + + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_string_expr + + Inputs: an expression of type string + + Outputs: a string expression that is link to the argument through + axioms that are added to the list + + Purpose: obtain a refined string expression corresponding to string + variable of string function call + +\*******************************************************************/ +string_exprt string_constraint_generatort::add_axioms_for_string_expr +(const exprt & unrefined_string) +{ + string_exprt s; + + if(unrefined_string.id()==ID_function_application) + { + exprt res=add_axioms_for_function_application + (to_function_application_expr(unrefined_string)); + assert(res.type()==refined_string_typet(get_char_type())); + s=to_string_expr(res); + } + else if(unrefined_string.id()==ID_symbol) + s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string)); + else if(unrefined_string.id()==ID_address_of) + { + assert(unrefined_string.op0().id()==ID_symbol); + s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string.op0())); + } + else if(unrefined_string.id()==ID_if) + s=add_axioms_for_if(to_if_expr(unrefined_string)); + else if(unrefined_string.id()==ID_nondet_symbol + || unrefined_string.id()==ID_struct) + { + // We ignore non deterministic symbols and struct + } + else + { + throw("add_axioms_for_string_expr:\n"+unrefined_string.pretty()+ + "\nwhich is not a function application, "+ + "a symbol or an if expression"); + } + + axioms.push_back(s.longer(refined_string_typet::index_zero())); + return s; +} + +string_exprt string_constraint_generatort::add_axioms_for_if +(const if_exprt &expr) +{ + string_exprt res(get_char_type()); + assert(refined_string_typet::is_unrefined_string_type + (expr.true_case().type())); + string_exprt t=add_axioms_for_string_expr(expr.true_case()); + assert(refined_string_typet::is_unrefined_string_type + (expr.false_case().type())); + string_exprt f=add_axioms_for_string_expr(expr.false_case()); + + axioms.push_back(implies_exprt(expr.cond(), res.same_length(t))); + symbol_exprt qvar=fresh_univ_index("QA_string_if_true"); + equal_exprt qequal(res[qvar], t[qvar]); + axioms.push_back(string_constraintt(qvar, t.length(), expr.cond(), qequal)); + axioms.push_back(implies_exprt(expr.cond(), res.same_length(f))); + symbol_exprt qvar2=fresh_univ_index("QA_string_if_false"); + equal_exprt qequal2(res[qvar2], f[qvar2]); + string_constraintt sc2(qvar2, f.length(), not_exprt(expr.cond()), qequal2); + axioms.push_back(sc2); + return res; +} + + +string_exprt string_constraint_generatort::find_or_add_string_of_symbol +(const symbol_exprt & sym) +{ + irep_idt id=sym.get_identifier(); + std::map::iterator f=symbol_to_string.find(id); + if(f!=symbol_to_string.end()) + return f->second; + + symbol_to_string[id]= string_exprt(get_char_type()); + return symbol_to_string[id]; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_function_application + + Inputs: an expression containing a function application + + Outputs: expression corresponding to the result of the function application + + Purpose: strings contained in this call are converted to objects of type + `string_exprt`, through adding axioms. Axioms are then added to + enforce that the result corresponds to the function application. + +\*******************************************************************/ +exprt string_constraint_generatort::add_axioms_for_function_application +(const function_application_exprt & expr) +{ + const exprt &name=expr.function(); + assert(name.id()==ID_symbol); + + const irep_idt &id=is_ssa_expr(name)?to_ssa_expr(name).get_object_name(): + to_symbol_expr(name).get_identifier(); + + if(id==ID_cprover_char_literal_func) + return add_axioms_for_char_literal(expr); + else if(id==ID_cprover_string_length_func) + return add_axioms_for_length(expr); + else if(id==ID_cprover_string_equal_func) + return add_axioms_for_equals(expr); + else if(id==ID_cprover_string_equals_ignore_case_func) + return add_axioms_for_equals_ignore_case(expr); + else if(id==ID_cprover_string_is_empty_func) + return add_axioms_for_is_empty(expr); + else if(id==ID_cprover_string_char_at_func) + return add_axioms_for_char_at(expr); + else if(id==ID_cprover_string_is_prefix_func) + return add_axioms_for_is_prefix(expr); + else if(id==ID_cprover_string_is_suffix_func) + return add_axioms_for_is_suffix(expr); + else if(id==ID_cprover_string_startswith_func) + return add_axioms_for_is_prefix(expr, true); + else if(id==ID_cprover_string_endswith_func) + return add_axioms_for_is_suffix(expr, true); + else if(id==ID_cprover_string_contains_func) + return add_axioms_for_contains(expr); + else if(id==ID_cprover_string_hash_code_func) + return add_axioms_for_hash_code(expr); + else if(id==ID_cprover_string_index_of_func) + return add_axioms_for_index_of(expr); + else if(id==ID_cprover_string_last_index_of_func) + return add_axioms_for_last_index_of(expr); + else if(id==ID_cprover_string_parse_int_func) + return add_axioms_for_parse_int(expr); + else if(id==ID_cprover_string_to_char_array_func) + return add_axioms_for_to_char_array(expr); + else if(id==ID_cprover_string_code_point_at_func) + return add_axioms_for_code_point_at(expr); + else if(id==ID_cprover_string_code_point_before_func) + return add_axioms_for_code_point_before(expr); + else if(id==ID_cprover_string_code_point_count_func) + return add_axioms_for_code_point_count(expr); + else if(id==ID_cprover_string_offset_by_code_point_func) + return add_axioms_for_offset_by_code_point(expr); + else if(id==ID_cprover_string_compare_to_func) + return add_axioms_for_compare_to(expr); + else if(id==ID_cprover_string_literal_func) + return add_axioms_from_literal(expr); + else if(id==ID_cprover_string_concat_func) + return add_axioms_for_concat(expr); + else if(id==ID_cprover_string_concat_int_func) + return add_axioms_for_concat_int(expr); + else if(id==ID_cprover_string_concat_long_func) + return add_axioms_for_concat_long(expr); + else if(id==ID_cprover_string_concat_bool_func) + return add_axioms_for_concat_bool(expr); + else if(id==ID_cprover_string_concat_char_func) + return add_axioms_for_concat_char(expr); + else if(id==ID_cprover_string_concat_double_func) + return add_axioms_for_concat_double(expr); + else if(id==ID_cprover_string_concat_float_func) + return add_axioms_for_concat_float(expr); + else if(id==ID_cprover_string_concat_code_point_func) + return add_axioms_for_concat_code_point(expr); + else if(id==ID_cprover_string_insert_func) + return add_axioms_for_insert(expr); + else if(id==ID_cprover_string_insert_int_func) + return add_axioms_for_insert_int(expr); + else if(id==ID_cprover_string_insert_long_func) + return add_axioms_for_insert_long(expr); + else if(id==ID_cprover_string_insert_bool_func) + return add_axioms_for_insert_bool(expr); + else if(id==ID_cprover_string_insert_char_func) + return add_axioms_for_insert_char(expr); + else if(id==ID_cprover_string_insert_double_func) + return add_axioms_for_insert_double(expr); + else if(id==ID_cprover_string_insert_float_func) + return add_axioms_for_insert_float(expr); + else if(id==ID_cprover_string_insert_char_array_func) + return add_axioms_for_insert_char_array(expr); + else if(id==ID_cprover_string_substring_func) + return add_axioms_for_substring(expr); + else if(id==ID_cprover_string_trim_func) + return add_axioms_for_trim(expr); + else if(id==ID_cprover_string_to_lower_case_func) + return add_axioms_for_to_lower_case(expr); + else if(id==ID_cprover_string_to_upper_case_func) + return add_axioms_for_to_upper_case(expr); + else if(id==ID_cprover_string_char_set_func) + return add_axioms_for_char_set(expr); + else if(id==ID_cprover_string_value_of_func) + return add_axioms_for_value_of(expr); + else if(id==ID_cprover_string_empty_string_func) + return add_axioms_for_empty_string(expr); + else if(id==ID_cprover_string_copy_func) + return add_axioms_for_copy(expr); + else if(id==ID_cprover_string_of_int_func) + return add_axioms_from_int(expr); + else if(id==ID_cprover_string_of_int_hex_func) + return add_axioms_from_int_hex(expr); + else if(id==ID_cprover_string_of_float_func) + return add_axioms_from_float(expr); + else if(id==ID_cprover_string_of_double_func) + return add_axioms_from_double(expr); + else if(id==ID_cprover_string_of_long_func) + return add_axioms_from_long(expr); + else if(id==ID_cprover_string_of_bool_func) + return add_axioms_from_bool(expr); + else if(id==ID_cprover_string_of_char_func) + return add_axioms_from_char(expr); + else if(id==ID_cprover_string_of_char_array_func) + return add_axioms_from_char_array(expr); + else if(id==ID_cprover_string_set_length_func) + return add_axioms_for_set_length(expr); + else if(id==ID_cprover_string_delete_func) + return add_axioms_for_delete(expr); + else if(id==ID_cprover_string_delete_char_at_func) + return add_axioms_for_delete_char_at(expr); + else if(id==ID_cprover_string_replace_func) + return add_axioms_for_replace(expr); + else if(id==ID_cprover_string_data_func) + return add_axioms_for_data(expr); + else + { + std::string msg("string_exprt::function_application: unknown symbol :"); + msg+=id.c_str(); + throw msg; + } +} + + +irep_idt extract_java_string(const symbol_exprt & s) +{ + std::string tmp(s.get(ID_identifier).c_str()); + std::string prefix("java::java.lang.String.Literal."); + assert(has_prefix(tmp, prefix)); + std::string value=tmp.substr(prefix.length()); + return irep_idt(value); +} + +string_exprt string_constraint_generatort::add_axioms_for_constant +(irep_idt sval, int char_width, unsignedbv_typet char_type) +{ + string_exprt res(char_type); + std::string str=sval.c_str(); + // should only do this for java + std::wstring utf16=utf8_to_utf16le(str); + + for(std::size_t i=0; i s[i]=s1[i]) && (i >= k ==> s[i]=0) + + axioms.push_back(res.has_length(k)); + symbol_exprt idx=fresh_univ_index("QA_index_set_length"); + string_constraintt a1 + (idx, k, and_exprt + (implies_exprt(s1.strictly_longer(idx), equal_exprt(s1[idx], res[idx])), + implies_exprt(s1.shorter(idx), equal_exprt(s1[idx], constant_char(0))))); + axioms.push_back(a1); + + return res; +} + + +string_exprt string_constraint_generatort::add_axioms_for_java_char_array +(const exprt & char_array) +{ + string_exprt res(get_char_type()); + exprt arr=to_address_of_expr(char_array).object(); + exprt len=member_exprt(arr, "length", res.length().type()); + exprt cont=member_exprt(arr, "data", res.content().type()); + res.op0()=len; + res.op1()=cont; + return res; +} + + +string_exprt string_constraint_generatort::add_axioms_for_value_of +(const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + if(args.size()==3) + { + string_exprt res(get_char_type()); + exprt char_array=args[0]; + exprt offset=args[1]; + exprt count=args[2]; + string_exprt str=add_axioms_for_java_char_array(char_array); + axioms.push_back(res.has_length(count)); + symbol_exprt idx=fresh_univ_index("QA_index_value_of"); + equal_exprt eq(str[plus_exprt(idx, offset)], res[idx]); + string_constraintt a1(idx, count, eq); + axioms.push_back(a1); + return res; + } + else + { + assert(args.size()==1); + return add_axioms_for_java_char_array(args[0]); + } +} + +string_exprt string_constraint_generatort::add_axioms_for_substring +(const function_application_exprt &f) +{ + assert(f.arguments().size()>=2); + string_exprt str=add_axioms_for_string_expr(f.arguments()[0]); + exprt i(f.arguments()[1]); + exprt j; + if(f.arguments().size()==3) + { + j=f.arguments()[2]; + } + else + { + assert(f.arguments().size()==2); + j=str.length(); + } + return add_axioms_for_substring(str, i, j); +} + +string_exprt string_constraint_generatort::add_axioms_for_substring +(const string_exprt & str, const exprt & start, const exprt & end) +{ + symbol_exprt idx=fresh_exist_index("index_substring"); + assert(start.type()==refined_string_typet::index_type()); + assert(end.type()==refined_string_typet::index_type()); + string_exprt res(get_char_type()); + + exprt is_empty=res.has_length(refined_string_typet::index_zero()); + axioms.push_back(implies_exprt(binary_relation_exprt(start, ID_lt, end), + res.has_length(minus_exprt(end, start)))); + axioms.push_back(implies_exprt(binary_relation_exprt(start, ID_ge, end), + is_empty)); + + // Warning: check what to do if the string is not long enough + axioms.push_back(str.longer(end)); + + // forall idx= 0 + // a3 : str >= idx + // a4 : |res|>= 0 + // a5 : |res|<=|str| + // (this is necessary to prevent exceeding the biggest integer) + // a6 : forall n res[idx]=str[idx]+'a'-'A' + // a3 : forall idx res[idx]=str[idx] + // forall idx res[idx]=str[idx]+'A'-'a' + // a3 : forall idx res[idx]=str[idx] + exprt a1=res.same_length(str); + axioms.push_back(a1); + + symbol_exprt idx=fresh_univ_index("QA_upper_case"); + exprt is_lower_case=and_exprt(binary_relation_exprt(char_a, ID_le, str[idx]), + binary_relation_exprt(str[idx], ID_le, char_z)); + minus_exprt diff(char_A, char_a); + equal_exprt convert(res[idx], plus_exprt(str[idx], diff)); + string_constraintt a2(idx, res.length(), is_lower_case, convert); + axioms.push_back(a2); + + equal_exprt eq(res[idx], str[idx]); + string_constraintt a3(idx, res.length(), not_exprt(is_lower_case), eq); + axioms.push_back(a3); + return res; +} + + +string_exprt string_constraint_generatort::add_axioms_from_int +(const function_application_exprt &expr) +{ return add_axioms_from_int(args(expr, 1)[0], 10); } + +string_exprt string_constraint_generatort::add_axioms_from_long +(const function_application_exprt &expr) +{ return add_axioms_from_int(args(expr, 1)[0], 30); } + +string_exprt string_constraint_generatort::add_axioms_from_float +(const function_application_exprt &f) +{ return add_axioms_from_float(args(f, 1)[0], false); } + +string_exprt string_constraint_generatort::add_axioms_from_double +(const function_application_exprt &f) +{ return add_axioms_from_float(args(f, 1)[0], true); } + +string_exprt string_constraint_generatort::add_axioms_from_float +(const exprt &f, bool double_precision) +{ + // Warning: we currently only have partial specification + unsignedbv_typet char_type=get_char_type(); + string_exprt res(char_type); + axioms.push_back(res.shorter(refined_string_typet::index_of_int(24))); + + string_exprt magnitude(char_type); + string_exprt sign_string(char_type); + string_exprt nan_string=add_axioms_for_constant("NaN"); + + // We add the axioms: + // a1 : If the argument is NaN, the result length is that of "NaN". + // a2 : If the argument is NaN, the result content is the string "NaN". + // a3 : f<0 => |sign_string|=1 + // a4 : f>=0 => |sign_string|=0 + // a5 : f<0 => sign_string[0]='-' + // a6 : f infinite => |magnitude|=|"Infinity"| + // a7 : forall i<|"Infinity"|, f infinite => magnitude[i]="Infinity"[i] + // a8 : f=0 => |magnitude|=|"0.0"| + // a9 : forall i<|"0.0"|, f=0 => f[i]="0.0"[i] + ieee_float_spect fspec= + double_precision?ieee_float_spect::double_precision() + :ieee_float_spect::single_precision(); + + exprt isnan=float_bvt().isnan(f, fspec); + implies_exprt a1(isnan, magnitude.same_length(nan_string)); + axioms.push_back(a1); + + symbol_exprt qvar=fresh_univ_index("QA_equal_nan"); + string_constraintt a2(qvar, nan_string.length(), + isnan, equal_exprt(magnitude[qvar], nan_string[qvar])); + axioms.push_back(a2); + + // If the argument is not NaN, the result is a string that represents + // the sign and magnitude (absolute value) of the argument. + // If the sign is negative, the first character of the result is '-'; + // if the sign is positive, no sign character appears in the result. + + const bitvector_typet &bv_type=to_bitvector_type(f.type()); + unsigned width=bv_type.get_width(); + exprt isneg=extractbit_exprt(f, width-1); + + implies_exprt a3(isneg, sign_string.has_length(1)); + axioms.push_back(a3); + + implies_exprt a4(not_exprt(isneg), sign_string.has_length(0)); + axioms.push_back(a4); + + implies_exprt a5(isneg, equal_exprt(sign_string[0], constant_char('-'))); + axioms.push_back(a5); + + // If m is infinity, it is represented by the characters "Infinity"; + // thus, positive infinity produces the result "Infinity" and negative + // infinity produces the result "-Infinity". + + string_exprt infinity_string=add_axioms_for_constant("Infinity"); + exprt isinf=float_bvt().isinf(f, fspec); + implies_exprt a6(isinf, magnitude.same_length(infinity_string)); + axioms.push_back(a6); + + symbol_exprt qvar_inf=fresh_univ_index("QA_equal_infinity"); + equal_exprt meq(magnitude[qvar_inf], infinity_string[qvar_inf]); + string_constraintt a7(qvar_inf, infinity_string.length(), isinf, meq); + axioms.push_back(a7); + + // If m is zero, it is represented by the characters "0.0"; thus, negative + // zero produces the result "-0.0" and positive zero produces "0.0". + + string_exprt zero_string=add_axioms_for_constant("0.0"); + exprt iszero=float_bvt().is_zero(f, fspec); + implies_exprt a8(iszero, magnitude.same_length(zero_string)); + axioms.push_back(a8); + + symbol_exprt qvar_zero=fresh_univ_index("QA_equal_zero"); + equal_exprt eq_zero(magnitude[qvar_zero], zero_string[qvar_zero]); + string_constraintt a9(qvar_zero, zero_string.length(), iszero, eq_zero); + axioms.push_back(a9); + + return add_axioms_for_concat(sign_string, magnitude); +} + + +string_exprt string_constraint_generatort::add_axioms_from_bool +(const function_application_exprt &f) +{ return add_axioms_from_bool(args(f, 1)[0]); } + + +string_exprt string_constraint_generatort::add_axioms_from_bool(const exprt &i) +{ + unsignedbv_typet char_type=get_char_type(); + string_exprt res(char_type); + + assert(i.type()==bool_typet() || i.type().id()==ID_c_bool); + + typecast_exprt eq(i, bool_typet()); + + string_exprt true_string=add_axioms_for_constant("true"); + string_exprt false_string=add_axioms_for_constant("false"); + + axioms.push_back(implies_exprt(eq, res.same_length(true_string))); + symbol_exprt qvar=fresh_univ_index("QA_equal_true"); + axioms.push_back + (string_constraintt(qvar, true_string.length(), eq, + equal_exprt(res[qvar], true_string[qvar]))); + + axioms.push_back(implies_exprt(not_exprt(eq), res.same_length(false_string))); + symbol_exprt qvar1=fresh_univ_index("QA_equal_false"); + axioms.push_back + (string_constraintt(qvar, false_string.length(), not_exprt(eq), + equal_exprt(res[qvar1], false_string[qvar1]))); + + return res; +} + +// Gives the smallest integer with `nb` digits +int smallest_by_digit(int nb) +{ + int res=1; + for(int i=1; i + // i=sum+str[0]-'0' && all_numbers + // a4 : |res|=size&&res[0]='-' => i=-sum + // a5 : size>1 => |res|=size&&'0'<=res[0]<='9' => res[0]!='0' + // a6 : size>1 => |res|=size&&res[0]'-' => res[1]!='0' + // a7 : size==max_size => i>1000000000 + exprt sum=from_integer(0, type); + exprt all_numbers=true_exprt(); + chr=res[0]; + exprt first_value=typecast_exprt(minus_exprt(chr, zero_char), type); + + for(size_t j=1; j1) + { + equal_exprt r0_zero(res[zero], zero_char); + implies_exprt a5(and_exprt(premise, starts_with_digit), + not_exprt(r0_zero)); + axioms.push_back(a5); + + exprt one=refined_string_typet::index_of_int(1); + equal_exprt r1_zero(res[one], zero_char); + implies_exprt a6(and_exprt(premise, starts_with_minus), + not_exprt(r1_zero)); + axioms.push_back(a6); + } + + // we have to be careful when exceeding the maximal size of integers + // Warning this should be different depending on max size + if(size==max_size) + { + exprt smallest_with_10_digits= + from_integer(smallest_by_digit(max_size), type); + binary_relation_exprt big(i, ID_ge, smallest_with_10_digits); + implies_exprt a7(premise, big); + axioms.push_back(a7); + } + } + return res; +} + + +exprt string_constraint_generatort::int_of_hex_char +(exprt chr, unsigned char_width, typet char_type) const +{ + exprt zero_char=constant_char('0'); + exprt nine_char=constant_char('9'); + exprt a_char=constant_char('a'); + return if_exprt(binary_relation_exprt(chr, ID_gt, nine_char), + minus_exprt(chr, constant_char(0x61-10)), + minus_exprt(chr, zero_char)); +} + + +string_exprt string_constraint_generatort::add_axioms_from_int_hex +(const exprt &i) +{ + string_exprt res(get_char_type()); + typet type=i.type(); + assert(type.id()==ID_signedbv); + exprt sixteen=from_integer(16, type); + exprt minus_char=constant_char('-'); + exprt zero_char=constant_char('0'); + exprt nine_char=constant_char('9'); + exprt a_char=constant_char('a'); + exprt f_char=constant_char('f'); + + size_t max_size=8; + axioms.push_back(and_exprt(res.strictly_longer(0), + res.shorter(max_size))); + + for(size_t size=1; size<=max_size; size++) + { + exprt sum=from_integer(0, type); + exprt all_numbers=true_exprt(); + exprt chr=res[0]; + + for(size_t j=0; j1) + axioms.push_back + (implies_exprt(premise, not_exprt(equal_exprt(res[0], zero_char)))); + } + return res; +} + +string_exprt string_constraint_generatort::add_axioms_from_int_hex +(const function_application_exprt &f) +{ return add_axioms_from_int_hex(args(f, 1)[0]); } + +string_exprt string_constraint_generatort::add_axioms_from_char +(const function_application_exprt &f) +{ return add_axioms_from_char(args(f, 1)[0]); } + +string_exprt string_constraint_generatort::add_axioms_from_char(const exprt &c) +{ + string_exprt res(get_char_type()); + and_exprt lemma(equal_exprt(res[0], c), res.has_length(1)); + axioms.push_back(lemma); + return res; +} + + +string_exprt string_constraint_generatort::add_axioms_for_code_point +(const exprt &code_point) +{ + string_exprt res(get_char_type()); + typet type=code_point.type(); + assert(type.id()==ID_signedbv); + + // We add axioms: + // a1 : code_point<0x010000 => |res|=1 + // a2 : code_point>=0x010000 => |res|=2 + // a3 : code_point<0x010000 => res[0]=code_point + // a4 : code_point>=0x010000 => res[0]=0xD800+(code_point-0x10000)/0x0400 + // a5 : code_point>=0x010000 => res[1]=0xDC00+(code_point-0x10000)/0x0400 + // For more explenations about this conversion, see: + // https://en.wikipedia.org/wiki/UTF-16 + + exprt hex010000=from_integer(0x010000, type); + exprt hexD800=from_integer(0xD800, type); + exprt hexDC00=from_integer(0xDC00, type); + exprt hex0400=from_integer(0x0400, type); + + binary_relation_exprt small(code_point, ID_lt, hex010000); + implies_exprt a1(small, res.has_length(1)); + axioms.push_back(a1); + + implies_exprt a2(not_exprt(small), res.has_length(2)); + axioms.push_back(a2); + + typecast_exprt code_point_as_char(code_point, get_char_type()); + implies_exprt a3(small, equal_exprt(res[0], code_point_as_char)); + axioms.push_back(a3); + + plus_exprt first_char + (hexD800, div_exprt(minus_exprt(code_point, hex010000), hex0400)); + implies_exprt a4 + (not_exprt(small), + equal_exprt(res[0], typecast_exprt(first_char, get_char_type()))); + axioms.push_back(a4); + + plus_exprt second_char(hexDC00, mod_exprt(code_point, hex0400)); + implies_exprt a5 + (not_exprt(small), + equal_exprt(res[1], typecast_exprt(second_char, get_char_type()))); + axioms.push_back(a5); + + return res; +} + + +string_exprt string_constraint_generatort::add_axioms_for_char_set +(const function_application_exprt &f) +{ + string_exprt res(get_char_type()); + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + with_exprt sarrnew(str.content(), args(f, 3)[1], args(f, 3)[2]); + implies_exprt lemma(binary_relation_exprt(args(f, 3)[1], ID_lt, str.length()), + and_exprt(equal_exprt(res.content(), sarrnew), + res.same_length(str))); + axioms.push_back(lemma); + return res; +} + +string_exprt string_constraint_generatort::add_axioms_for_replace +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + exprt oldChar=args(f, 3)[1]; + exprt newChar=args(f, 3)[2]; + string_exprt res(get_char_type()); + + // We add axioms: + // a1 : |res| = |str| + // a2 : forall qvar, 0<=qvar<|res|, + // str[qvar]=oldChar => res[qvar]=newChar + // !str[qvar]=oldChar => res[qvar]=str[qvar] + + axioms.push_back(res.same_length(str)); + + symbol_exprt qvar=fresh_univ_index("QA_replace"); + implies_exprt case1(equal_exprt(str[qvar], oldChar), + equal_exprt(res[qvar], newChar)); + implies_exprt case2(not_exprt(equal_exprt(str[qvar], oldChar)), + equal_exprt(res[qvar], str[qvar])); + string_constraintt a1(qvar, res.length(), and_exprt(case1, case2)); + axioms.push_back(a1); + return res; +} + +string_exprt string_constraint_generatort::add_axioms_for_delete_char_at +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + exprt index_one=refined_string_typet::index_of_int(1); + return add_axioms_for_delete(str, args(f, 2)[1], + plus_exprt(args(f, 2)[1], index_one)); +} + +string_exprt string_constraint_generatort::add_axioms_for_delete +(const string_exprt &str, const exprt & start, const exprt & end) +{ + assert(start.type()==refined_string_typet::index_type()); + assert(end.type()==refined_string_typet::index_type()); + string_exprt str1=add_axioms_for_substring + (str, refined_string_typet::index_zero(), start); + string_exprt str2=add_axioms_for_substring(str, end, str.length()); + return add_axioms_for_concat(str1, str2); +} + +string_exprt string_constraint_generatort::add_axioms_for_delete +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + return add_axioms_for_delete(str, args(f, 3)[1], args(f, 3)[2]); +} + + +string_exprt string_constraint_generatort::add_axioms_for_concat_int +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_int(args(f, 2)[1], 10); + return add_axioms_for_concat(s1, s2); +} + +string_exprt string_constraint_generatort::add_axioms_for_concat_long +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_int(args(f, 2)[1], 30); + return add_axioms_for_concat(s1, s2); +} + +string_exprt string_constraint_generatort::add_axioms_for_concat_bool +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_bool(args(f, 2)[1]); + return add_axioms_for_concat(s1, s2); +} + +string_exprt string_constraint_generatort::add_axioms_for_concat_char +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_char(args(f, 2)[1]); + return add_axioms_for_concat(s1, s2); +} + +string_exprt string_constraint_generatort::add_axioms_for_concat_double +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], 30); + return add_axioms_for_concat(s1, s2); +} + +string_exprt string_constraint_generatort::add_axioms_for_concat_float +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], 10); + return add_axioms_for_concat(s1, s2); +} + +string_exprt string_constraint_generatort::add_axioms_for_concat_code_point +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_for_code_point(args(f, 2)[1]); + return add_axioms_for_concat(s1, s2); +} + +string_exprt string_constraint_generatort::add_axioms_for_insert +(const string_exprt & s1, const string_exprt & s2, const exprt & offset) +{ + assert(offset.type()==refined_string_typet::index_type()); + string_exprt pref=add_axioms_for_substring + (s1, refined_string_typet::index_zero(), offset); + string_exprt suf=add_axioms_for_substring(s1, offset, s1.length()); + string_exprt concat1=add_axioms_for_concat(pref, s2); + return add_axioms_for_concat(concat1, suf); +} + +string_exprt string_constraint_generatort::add_axioms_for_insert +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_for_string_expr(args(f, 3)[2]); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +string_exprt string_constraint_generatort::add_axioms_for_insert_int +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_int(args(f, 3)[2], 10); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +string_exprt string_constraint_generatort::add_axioms_for_insert_long +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_int(args(f, 3)[2], 30); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +string_exprt string_constraint_generatort::add_axioms_for_insert_bool +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_bool(args(f, 3)[2]); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +string_exprt string_constraint_generatort::add_axioms_for_insert_char +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_char(args(f, 3)[2]); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +string_exprt string_constraint_generatort::add_axioms_for_insert_double +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +string_exprt string_constraint_generatort::add_axioms_for_insert_float +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + + +exprt string_constraint_generatort::add_axioms_for_equals +(const function_application_exprt &f) +{ + assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); + symbol_exprt eq=fresh_boolean("equal"); + typecast_exprt tc_eq(eq, f.type()); + + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + + // We want to write: + // eq <=> (s1.length=s2.length && forall i s1.length=s2.length + // a2 : forall i s1[i]=s2[i] + // a3 : !eq => s1.length!=s2.length + // || (witness |s1|=|s2| + // a2 : forall qvar, 0<=qvar<|s1|, + // eq => char_equal_ignore_case(s1[qvar],s2[qvar]); + // a3 : !eq => |s1|!=s2 || (0 <=witness<|s1| && !char_equal_ignore_case) + + implies_exprt a1(eq, s1.same_length(s2)); + axioms.push_back(a1); + + symbol_exprt qvar=fresh_univ_index("QA_equal_ignore_case"); + exprt constr2= + character_equals_ignore_case(s1[qvar], s2[qvar], char_a, char_A, char_Z); + string_constraintt a2(qvar, s1.length(), eq, constr2); + axioms.push_back(a2); + + symbol_exprt witness=fresh_exist_index("witness_unequal_ignore_case"); + exprt zero=refined_string_typet::index_zero(); + and_exprt bound_witness + (binary_relation_exprt(witness, ID_lt, s1.length()), + binary_relation_exprt(witness, ID_ge, zero)); + exprt witness_eq=character_equals_ignore_case + (s1[witness], s2[witness], char_a, char_A, char_Z); + not_exprt witness_diff(witness_eq); + implies_exprt a3(not_exprt(eq), + or_exprt(notequal_exprt(s1.length(), s2.length()), + and_exprt(bound_witness, witness_diff))); + axioms.push_back(a3); + + return tc_eq; +} + + +exprt string_constraint_generatort::add_axioms_for_length +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + return str.length(); +} + +exprt string_constraint_generatort::add_axioms_for_data +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + exprt tab_data=args(f, 3)[1]; + exprt data=args(f, 3)[2]; + symbol_exprt qvar=fresh_univ_index("QA_string_data"); + + // translating data[qvar] to the correct expression + // which is (signed int)byte_extract_little_endian + // (data, (2l*qvar) + POINTER_OFFSET(byte_extract_little_endian + // (tab.data, 0l, unsigned short int *)), unsigned short int) + mult_exprt qvar2(from_integer(2, signedbv_typet(64)), + typecast_exprt(qvar, signedbv_typet(64))); + byte_extract_exprt extract( + ID_byte_extract_little_endian, + tab_data, + from_integer(0, signedbv_typet(64)), + pointer_typet(unsignedbv_typet(16))); + plus_exprt arg2(qvar2, pointer_offset(extract)); + + byte_extract_exprt extract2 + (ID_byte_extract_little_endian, data, arg2, unsignedbv_typet(16)); + exprt char_in_tab= typecast_exprt(extract2, get_char_type()); + + string_constraintt eq + (qvar, str.length(), equal_exprt(str[qvar], char_in_tab)); + axioms.push_back(eq); + + exprt void_expr; + void_expr.type()=void_typet(); + return void_expr; +} + +string_exprt string_constraint_generatort::add_axioms_from_char_array( + const exprt & length, + const exprt & data, + const exprt & offset, + const exprt & count) +{ + string_exprt str(get_char_type()); + symbol_exprt qvar=fresh_univ_index("QA_string_of_char_array"); + exprt char_in_tab=data; + assert(char_in_tab.id()==ID_index); + char_in_tab.op1()=plus_exprt(qvar, offset); + + string_constraintt eq(qvar, count, equal_exprt(str[qvar], char_in_tab)); + axioms.push_back(eq); + axioms.push_back(equal_exprt(str.length(), count)); + + return str; +} + +string_exprt string_constraint_generatort::add_axioms_from_char_array +(const function_application_exprt &f) +{ + exprt offset; + exprt count; + if(f.arguments().size()==4) + { + offset=f.arguments()[2]; + count=f.arguments()[3]; + } + else + { + assert(f.arguments().size()==2); + count=f.arguments()[0]; + offset=from_integer(0, signedbv_typet(32)); + } + exprt tab_length=f.arguments()[0]; + exprt data=f.arguments()[1]; + return add_axioms_from_char_array(tab_length, data, offset, count); +} + +string_exprt string_constraint_generatort::add_axioms_for_insert_char_array +(const function_application_exprt &f) +{ + exprt offset; + exprt count; + if(f.arguments().size()==6) + { + offset=f.arguments()[4]; + count=f.arguments()[5]; + } + else + { + assert(f.arguments().size()==4); + count=f.arguments()[2]; + offset=from_integer(0, signedbv_typet(32)); + } + + string_exprt str=add_axioms_for_string_expr(f.arguments()[0]); + exprt length=f.arguments()[2]; + exprt data=f.arguments()[3]; + string_exprt arr=add_axioms_from_char_array(length, data, offset, count); + return add_axioms_for_insert(str, arr, f.arguments()[1]); +} + + + +exprt is_positive(const exprt & x) +{ + return binary_relation_exprt + (x, ID_ge, refined_string_typet::index_of_int(0)); +} + + +exprt string_constraint_generatort::add_axioms_for_is_prefix + (const string_exprt &prefix, const string_exprt &str, const exprt & offset) +{ + symbol_exprt isprefix=fresh_boolean("isprefix"); + + // We add axioms: + // a1 : isprefix => |str| >= |prefix|+offset + // a2 : forall 0<=qvar + // s0[witness+offset]=s2[witness] + // a3 : !isprefix => |str| < |prefix|+offset + // || (|str| >= |prefix|+offset && 0<=witness<|prefix| + // && str[witness+ofsset]!=prefix[witness]) + implies_exprt a1(isprefix, str.longer(plus_exprt(prefix.length(), offset))); + axioms.push_back(a1); + + symbol_exprt qvar=fresh_univ_index("QA_isprefix"); + string_constraintt a2 + (qvar, prefix.length(), isprefix, + equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])); + axioms.push_back(a2); + + symbol_exprt witness=fresh_exist_index("witness_not_isprefix"); + and_exprt witness_diff + (is_positive(witness), + and_exprt(prefix.strictly_longer(witness), + notequal_exprt(str[plus_exprt(witness, offset)], + prefix[witness]))); + or_exprt s0_notpref_s1 + (not_exprt(str.longer(plus_exprt(prefix.length(), offset))), + and_exprt(str.longer(plus_exprt(prefix.length(), offset)), witness_diff)); + + implies_exprt a3(not_exprt(isprefix), s0_notpref_s1); + axioms.push_back(a3); + return isprefix; +} + +exprt string_constraint_generatort::add_axioms_for_is_prefix +(const function_application_exprt &f, bool swap_arguments) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); + string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); + string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + exprt offset; + if(args.size()==2) + offset=refined_string_typet::index_zero(); + else if(args.size()==3) + offset=args[2]; + return typecast_exprt(add_axioms_for_is_prefix(s0, s1, offset), f.type()); +} + +exprt string_constraint_generatort::add_axioms_for_is_empty +(const function_application_exprt &f) +{ + assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); + symbol_exprt is_empty=fresh_boolean("is_empty"); + string_exprt s0=add_axioms_for_string_expr(args(f, 1)[0]); + axioms.push_back(implies_exprt(is_empty, s0.has_length(0))); + axioms.push_back(implies_exprt(s0.has_length(0), is_empty)); + return typecast_exprt(is_empty, f.type()); +} + +exprt string_constraint_generatort::add_axioms_for_is_suffix +(const function_application_exprt &f, bool swap_arguments) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(args.size()==2); // bad args to string issuffix? + assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); + + symbol_exprt issuffix=fresh_boolean("issuffix"); + typecast_exprt tc_issuffix(issuffix, f.type()); + string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); + string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + + // We add axioms: + // a1 : issufix => s0.length >= s1.length + // a2 : forall witness s1[witness]=s0[witness + s0.length-s1.length] + // a3 : !issuffix => + // s1.length > s0.length + // || (s1.length > witness>=0 + // && s1[witness]!=s0[witness + s0.length-s1.length] + + implies_exprt a1(issuffix, s1.longer(s0)); + axioms.push_back(a1); + + symbol_exprt qvar=fresh_univ_index("QA_suffix"); + exprt qvar_shifted=plus_exprt(qvar, minus_exprt(s1.length(), s0.length())); + string_constraintt a2(qvar, s0.length(), issuffix, + equal_exprt(s0[qvar], s1[qvar_shifted])); + axioms.push_back(a2); + + symbol_exprt witness=fresh_exist_index("witness_not_suffix"); + exprt shifted=plus_exprt(witness, minus_exprt(s1.length(), s0.length())); + or_exprt constr3(s0.strictly_longer(s1), + and_exprt(notequal_exprt(s0[witness], s1[shifted]), + and_exprt(s0.strictly_longer(witness), + is_positive(witness)))); + implies_exprt a3(not_exprt(issuffix), constr3); + + axioms.push_back(a3); + + return tc_issuffix; +} + + +exprt string_constraint_generatort::add_axioms_for_contains +(const function_application_exprt &f) +{ + assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); + symbol_exprt contains=fresh_boolean("contains"); + typecast_exprt tc_contains(contains, f.type()); + string_exprt s0=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[1]); + + // We add axioms: + // a1 : contains => s0.length >= s1.length + // a2 : 0 <= startpos <= s0.length-s1.length + // a3 : forall qvar s1[qvar]=s0[startpos + qvar] + // a4 : !contains => s1.length > s0.length + // || (forall startpos <= s0.length-s1.length. + // exists witness= |s1| ) + // ==> exists witness<|s1|. s1[witness]!=s0[startpos+witness] + string_not_contains_constraintt a4 + (refined_string_typet::index_zero(), + plus_exprt(refined_string_typet::index_of_int(1), length_diff), + and_exprt(not_exprt(contains), s0.longer(s1)), + refined_string_typet::index_zero(), s1.length(), s0, s1); + axioms.push_back(a4); + + return tc_contains; +} + + +exprt string_constraint_generatort::add_axioms_for_hash_code +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + typet return_type=f.type(); + + // initialisation of the missing pool variable + std::map::iterator it; + for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + if(hash.find(it->second)==hash.end()) + hash[it->second]=string_exprt::fresh_symbol("hash", return_type); + + // for each string s. either: + // c1: hash(str)=hash(s) + // c2: |str|!=|s| + // c3: (|str|==|s| && exists i<|s|. s[i]!=str[i]) + + // WARNING: the specification may be incomplete + for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + { + symbol_exprt i=fresh_exist_index("index_hash"); + equal_exprt c1(hash[it->second], hash[str]); + not_exprt c2(equal_exprt(it->second.length(), str.length())); + and_exprt c3(equal_exprt(it->second.length(), str.length()), + and_exprt(not_exprt(equal_exprt(str[i], it->second[i])), + and_exprt(str.strictly_longer(i), is_positive(i)))); + axioms.push_back(or_exprt(c1, or_exprt(c2, c3))); + } + return hash[str]; +} + +exprt string_constraint_generatort::add_axioms_for_index_of +(const string_exprt &str, const exprt & c, const exprt & from_index) +{ + symbol_exprt index=fresh_exist_index("index_of"); + symbol_exprt contains=fresh_boolean("contains_in_index_of"); + + // We add axioms: + // a1 : -1 <= index<|str| + // a2 : !contains <=> index=-1 + // a3 : contains => from_index<=index&&str[index]=c + // a4 : forall n, from_index<=n str[n]!=c + // a5 : forall m, from_index<=n<|str|. !contains => str[m]!=c + + exprt minus1=refined_string_typet::index_of_int(-1); + and_exprt a1(binary_relation_exprt(index, ID_ge, minus1), + binary_relation_exprt(index, ID_lt, str.length())); + axioms.push_back(a1); + + equal_exprt a2(not_exprt(contains), equal_exprt(index, minus1)); + axioms.push_back(a2); + + implies_exprt a3 + (contains, and_exprt(binary_relation_exprt(from_index, ID_le, index), + equal_exprt(str[index], c))); + axioms.push_back(a3); + + symbol_exprt n=fresh_univ_index("QA_index_of"); + string_constraintt a4 + (n, from_index, index, contains, not_exprt(equal_exprt(str[n], c))); + axioms.push_back(a4); + + symbol_exprt m=fresh_univ_index("QA_index_of"); + string_constraintt a5(m, from_index, str.length(), not_exprt(contains), + not_exprt(equal_exprt(str[m], c))); + axioms.push_back(a5); + + return index; +} + +exprt string_constraint_generatort::add_axioms_for_index_of_string( + const string_exprt &str, + const string_exprt & substring, + const exprt & from_index) +{ + symbol_exprt offset=fresh_exist_index("index_of"); + symbol_exprt contains=fresh_boolean("contains_substring"); + + // We add axioms: + // a1 : contains => |substring|>=offset>=from_index + // a2 : !contains => offset=-1 + // a3 : forall 0 <= witness str[witness+offset]=substring[witness] + + implies_exprt a1 + (contains, and_exprt(str.longer(plus_exprt(substring.length(), offset)), + binary_relation_exprt(offset, ID_ge, from_index))); + axioms.push_back(a1); + + implies_exprt a2(not_exprt(contains), + equal_exprt(offset, refined_string_typet::index_of_int(-1))); + axioms.push_back(a2); + + symbol_exprt qvar=fresh_univ_index("QA_index_of_string"); + string_constraintt a3(qvar, substring.length(), contains, + equal_exprt(str[plus_exprt(qvar, offset)], + substring[qvar])); + axioms.push_back(a3); + + return offset; +} + +exprt string_constraint_generatort::add_axioms_for_last_index_of_string( + const string_exprt &str, + const string_exprt & substring, + const exprt & from_index) +{ + symbol_exprt offset=fresh_exist_index("index_of"); + symbol_exprt contains=fresh_boolean("contains_substring"); + + // We add axioms: + // a1 : contains => |substring| >= length && offset <= from_index + // a2 : !contains => offset=-1 + // a3 : forall 0 <= witness str[witness+offset]=substring[witness] + + implies_exprt a1(contains, and_exprt + (str.longer(plus_exprt(substring.length(), offset)), + binary_relation_exprt(offset, ID_le, from_index))); + axioms.push_back(a1); + + implies_exprt a2(not_exprt(contains), + equal_exprt(offset, refined_string_typet::index_of_int(-1))); + axioms.push_back(a2); + + symbol_exprt qvar=fresh_univ_index("QA_index_of_string"); + equal_exprt constr3(str[plus_exprt(qvar, offset)], substring[qvar]); + string_constraintt a3(qvar, substring.length(), contains, constr3); + axioms.push_back(a3); + + return offset; +} + + +exprt string_constraint_generatort::add_axioms_for_index_of +(const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(f.type()==refined_string_typet::index_type()); + string_exprt str=add_axioms_for_string_expr(args[0]); + exprt c=args[1]; + exprt from_index; + + if(args.size()==2) + from_index=refined_string_typet::index_zero(); + else if(args.size()==3) + from_index=args[2]; + else + assert(false); + + if(refined_string_typet::is_java_string_pointer_type(c.type())) + { + string_exprt sub=add_axioms_for_string_expr(c); + return add_axioms_for_index_of_string(str, sub, from_index); + } + else + return add_axioms_for_index_of(str, typecast_exprt(c, get_char_type()), + from_index); +} + +exprt string_constraint_generatort::add_axioms_for_last_index_of +(const string_exprt &str, const exprt & c, const exprt & from_index) +{ + symbol_exprt index=fresh_exist_index("last_index_of"); + symbol_exprt contains=fresh_boolean("contains_in_last_index_of"); + + // We add axioms: + // a1 : -1 <= i <= from_index + // a2 : (i=-1 <=> !contains) + // a3 : (contains => i <= from_index && s[i]=c) + // a4 : forall n. i+1 <= n < from_index +1 && contains => s[n]!=c + // a5 : forall m. 0 <= m < from_index +1 && !contains => s[m]!=c + + exprt index1=refined_string_typet::index_of_int(1); + exprt minus1=refined_string_typet::index_of_int(-1); + exprt from_index_plus_one=plus_exprt(from_index, index1); + and_exprt a1(binary_relation_exprt(index, ID_ge, minus1), + binary_relation_exprt(index, ID_lt, from_index_plus_one)); + axioms.push_back(a1); + + equal_exprt a2(not_exprt(contains), equal_exprt(index, minus1)); + axioms.push_back(a2); + + implies_exprt a3 + (contains, and_exprt(binary_relation_exprt(from_index, ID_ge, index), + equal_exprt(str[index], c))); + axioms.push_back(a3); + + symbol_exprt n=fresh_univ_index("QA_last_index_of"); + string_constraintt a4(n, plus_exprt(index, index1), + from_index_plus_one, contains, + not_exprt(equal_exprt(str[n], c))); + axioms.push_back(a4); + + symbol_exprt m=fresh_univ_index("QA_last_index_of"); + string_constraintt a5(m, from_index_plus_one, not_exprt(contains), + not_exprt(equal_exprt(str[m], c))); + axioms.push_back(a5); + + return index; +} + +exprt string_constraint_generatort::add_axioms_for_last_index_of +(const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(f.type()==refined_string_typet::index_type()); + string_exprt str=add_axioms_for_string_expr(args[0]); + exprt c=args[1]; + exprt from_index; + + if(args.size()==2) + from_index=minus_exprt(str.length(), refined_string_typet::index_of_int(1)); + else if(args.size()==3) + from_index=args[2]; + else + assert(false); + + if(refined_string_typet::is_java_string_pointer_type(c.type())) + { + string_exprt sub=add_axioms_for_string_expr(c); + return add_axioms_for_last_index_of_string(str, sub, from_index); + } + else + return add_axioms_for_last_index_of + (str, typecast_exprt(c, get_char_type()), from_index); +} + +exprt string_constraint_generatort::add_axioms_for_char_literal +(const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(args.size()==1); // there should be exactly 1 argument to char literal + + const exprt &arg=args[0]; + // for C programs argument to char literal should be one string constant + // of size 1. + if(arg.operands().size()==1 && + arg.op0().operands().size()==1 && + arg.op0().op0().operands().size()==2 && + arg.op0().op0().op0().id()==ID_string_constant) + { + const string_constantt s=to_string_constant(arg.op0().op0().op0()); + irep_idt sval=s.get_value(); + assert(sval.size()==1); + return from_integer(unsigned(sval[0]), get_char_type()); + } + else + { + throw "convert_char_literal unimplemented"; + } +} + + +exprt string_constraint_generatort::add_axioms_for_char_at +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + symbol_exprt char_sym=string_exprt::fresh_symbol("char", get_char_type()); + axioms.push_back(equal_exprt(char_sym, str[args(f, 2)[1]])); + return char_sym; +} + +exprt string_constraint_generatort::add_axioms_for_parse_int +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + typet type=f.type(); + symbol_exprt i=string_exprt::fresh_symbol("parsed_int", type); + + exprt zero_char=constant_char('0'); + exprt minus_char=constant_char('-'); + exprt plus_char=constant_char('+'); + assert(type.id()==ID_signedbv); + exprt ten=from_integer(10, type); + + exprt chr=str[0]; + exprt starts_with_minus=equal_exprt(chr, minus_char); + exprt starts_with_plus=equal_exprt(chr, plus_char); + exprt starts_with_digit=binary_relation_exprt(chr, ID_ge, zero_char); + + for(unsigned size=1; size<=10; size++) + { + exprt sum=from_integer(0, type); + exprt first_value=typecast_exprt(minus_exprt(chr, zero_char), type); + + for(unsigned j=1; j i=sum+first_value + // a2 : starts_with_plus => i=sum + // a3 : starts_with_minus => i=-sum + + equal_exprt premise=str.has_length(size); + implies_exprt a1(and_exprt(premise, starts_with_digit), + equal_exprt(i, plus_exprt(sum, first_value))); + axioms.push_back(a1); + + implies_exprt a2(and_exprt(premise, starts_with_plus), equal_exprt(i, sum)); + axioms.push_back(a2); + + implies_exprt a3(and_exprt(premise, starts_with_minus), + equal_exprt(i, unary_minus_exprt(sum))); + axioms.push_back(a3); + } + return i; +} + + +exprt string_constraint_generatort::is_high_surrogate(const exprt & chr) const +{ + return and_exprt + (binary_relation_exprt(chr, ID_ge, constant_char(0xD800)), + binary_relation_exprt(chr, ID_le, constant_char(0xDBFF))); +} + +exprt string_constraint_generatort::is_low_surrogate(const exprt & chr) const +{ + return and_exprt + (binary_relation_exprt(chr, ID_ge, constant_char(0xDC00)), + binary_relation_exprt(chr, ID_le, constant_char(0xDFFF))); +} + +// pair_value=0x10000+(((char1%0x0800)*0x0400)+char2%0x0400) +// char1 and char2 should be of type return_type +exprt pair_value(exprt char1, exprt char2, typet return_type) +{ + exprt hex010000=from_integer(0x010000, return_type); + exprt hex0800=from_integer(0x0800, return_type); + exprt hex0400=from_integer(0x0400, return_type); + mult_exprt m1(mod_exprt(char1, hex0800), hex0400); + mod_exprt m2(char2, hex0400); + plus_exprt pair_value(hex010000, plus_exprt(m1, m2)); + return pair_value; +} + +exprt string_constraint_generatort::add_axioms_for_code_point_at +(const function_application_exprt &f) +{ + typet return_type=f.type(); + assert(return_type.id()==ID_signedbv); + string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + exprt pos=args(f, 2)[1]; + + symbol_exprt result=string_exprt::fresh_symbol("char", return_type); + exprt index1=refined_string_typet::index_of_int(1); + exprt char1=str[pos]; + exprt char2=str[plus_exprt(pos, index1)]; + exprt char1_as_int=typecast_exprt(char1, return_type); + exprt char2_as_int=typecast_exprt(char2, return_type); + exprt pair=pair_value(char1_as_int, char2_as_int, return_type); + exprt is_low=is_low_surrogate(str[plus_exprt(pos, index1)]); + exprt return_pair=and_exprt(is_high_surrogate(str[pos]), is_low); + + axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); + axioms.push_back(implies_exprt(not_exprt(return_pair), + equal_exprt(result, char1_as_int))); + return result; +} + +exprt string_constraint_generatort::add_axioms_for_code_point_before +(const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(args.size()==2); + typet return_type=f.type(); + assert(return_type.id()==ID_signedbv); + symbol_exprt result=string_exprt::fresh_symbol("char", return_type); + string_exprt str=add_axioms_for_string_expr(args[0]); + + exprt char1=str[minus_exprt(args[1], refined_string_typet::index_of_int(2))]; + exprt char2=str[minus_exprt(args[1], refined_string_typet::index_of_int(1))]; + exprt char1_as_int=typecast_exprt(char1, return_type); + exprt char2_as_int=typecast_exprt(char2, return_type); + + exprt pair=pair_value(char1_as_int, char2_as_int, return_type); + exprt return_pair= + and_exprt(is_high_surrogate(char1), is_low_surrogate(char2)); + + axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); + axioms.push_back(implies_exprt(not_exprt(return_pair), + equal_exprt(result, char2_as_int))); + return result; +} + +exprt string_constraint_generatort::add_axioms_for_code_point_count +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + exprt begin=args(f, 3)[1]; + exprt end=args(f, 3)[2]; + typet return_type=f.type(); + symbol_exprt result= + string_exprt::fresh_symbol("code_point_count", return_type); + + exprt length=minus_exprt(end, begin); + exprt minimum=div_exprt(length, refined_string_typet::index_of_int(2)); + axioms.push_back(binary_relation_exprt(result, ID_le, length)); + axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); + + return result; +} + +exprt string_constraint_generatort::add_axioms_for_offset_by_code_point +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + exprt index=args(f, 3)[1]; + exprt offset=args(f, 3)[2]; + typet return_type=f.type(); + symbol_exprt result= + string_exprt::fresh_symbol("offset_by_code_point", return_type); + + exprt minimum=plus_exprt(index, offset); + exprt maximum=plus_exprt(index, plus_exprt(offset, offset)); + axioms.push_back(binary_relation_exprt(result, ID_le, maximum)); + axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); + + return result; +} + + +exprt string_constraint_generatort::add_axioms_for_to_char_array +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + return str.content(); +} + + +exprt string_constraint_generatort::add_axioms_for_compare_to +(const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + typet return_type=f.type(); + symbol_exprt res=string_exprt::fresh_symbol("compare_to", return_type); + + // In the lexicographic comparison, x is the first point where the two + // strings differ. + // We add axioms: + // a1 : res==0 => |s1|=|s2| + // a2 : forall i<|s1|. s1[i]==s2[i] + // a3 : exists x. + // res!=0 ==> x> 0 && + // ((|s1| <= |s2| && x<|s1|) || (|s1| >= |s2| && x<|s2|) + // && res=s1[x]-s2[x] ) + // || cond2: + // (|s1|<|s2| && x=|s1|) || (|s1| > |s2| && x=|s2|) && res=|s1|-|s2|) + // a4 : forall i s1[i]=s2[i] + + assert(return_type.id()==ID_signedbv); + + equal_exprt res_null=equal_exprt(res, from_integer(0, return_type)); + implies_exprt a1(res_null, s1.same_length(s2)); + axioms.push_back(a1); + + symbol_exprt i=fresh_univ_index("QA_compare_to"); + string_constraintt a2(i, s1.length(), res_null, equal_exprt(s1[i], s2[i])); + axioms.push_back(a2); + + symbol_exprt x=fresh_exist_index("index_compare_to"); + equal_exprt ret_char_diff + (res, typecast_exprt(minus_exprt(s1[x], s2[x]), return_type)); + equal_exprt ret_length_diff + (res, typecast_exprt(minus_exprt(s1.length(), s2.length()), return_type)); + or_exprt guard1(and_exprt(s1.shorter(s2), s1.strictly_longer(x)), + and_exprt(s1.longer(s2), s2.strictly_longer(x))); + and_exprt cond1(ret_char_diff, guard1); + or_exprt guard2(and_exprt(s2.strictly_longer(s1), s1.has_length(x)), + and_exprt(s1.strictly_longer(s2), s2.has_length(x))); + and_exprt cond2(ret_length_diff, guard2); + + implies_exprt a3 + (not_exprt(res_null), + and_exprt(binary_relation_exprt(x, ID_ge, from_integer(0, return_type)), + or_exprt(cond1, cond2))); + axioms.push_back(a3); + + string_constraintt a4(i, x, not_exprt(res_null), equal_exprt(s1[i], s2[i])); + axioms.push_back(a4); + + return res; +} + +symbol_exprt string_constraint_generatort::add_axioms_for_intern +(const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + typet return_type=f.type(); + + // initialisation of the missing pool variable + std::map::iterator it; + for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + if(pool.find(it->second)==pool.end()) + pool[it->second]=string_exprt::fresh_symbol("pool", return_type); + + // intern(str)=s_0 || s_1 || ... + // for each string s. + // intern(str)=intern(s) || |str|!=|s| + // || (|str|==|s| && exists i<|s|. s[i]!=str[i]) + + // symbol_exprt intern=string_exprt::fresh_symbol("intern",return_type); + + exprt disj=false_exprt(); + for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + disj=or_exprt + (disj, equal_exprt(pool[str], symbol_exprt(it->first, return_type))); + + axioms.push_back(disj); + + + // WARNING: the specification may be incomplete or incorrect + for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + if(it->second!=str) + { + symbol_exprt i=fresh_exist_index("index_intern"); + axioms.push_back + (or_exprt + (equal_exprt(pool[it->second], pool[str]), + or_exprt + (not_exprt(str.same_length(it->second)), + and_exprt + (str.same_length(it->second), + and_exprt + (not_exprt(equal_exprt(str[i], it->second[i])), + and_exprt(str.strictly_longer(i), is_positive(i))))))); + } + + + return pool[str]; +} + + +void string_constraint_generatort::set_string_symbol_equal_to_expr +(const symbol_exprt & sym, const exprt & str) +{ + if(str.id()==ID_symbol) + assign_to_symbol(sym, find_or_add_string_of_symbol(to_symbol_expr(str))); + else + assign_to_symbol(sym, add_axioms_for_string_expr(str)); +} diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h new file mode 100644 index 00000000000..f6e5e21378e --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator.h @@ -0,0 +1,372 @@ +/** -*- C++ -*- *****************************************************\ + +Module: Generates string constraints to link results from string functions + with their arguments. This is inspired by the PASS paper at HVC'13 + which gives examples of constraints for several functions. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H +#define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H + +#include + +class string_constraint_generatort +{ +public: + // This module keeps a list of axioms. It has methods which generate + // string constraints for different string funcitons and add them + // to the axiom list. + + string_constraint_generatort(): mode(ID_unknown) { } + + void set_mode(irep_idt _mode) + { + // only C and java modes supported + assert((_mode==ID_java) || (_mode==ID_C)); + mode=_mode; + } + + inline irep_idt &get_mode() { return mode; } + + unsignedbv_typet get_char_type() const; + inline signedbv_typet get_index_type() const + { return refined_string_typet::index_type(); } + + // Axioms are of three kinds: universally quantified string constraint, + // not contains string constraints and simple formulas. + std::vector axioms; + + // Boolean symbols for the results of some string functions + std::vector boolean_symbols; + + // Symbols used in existential quantifications + std::vector index_symbols; + + // Used to store information about witnesses for not_contains constraints + std::map witness; + + inline exprt get_witness_of + (const string_not_contains_constraintt & c, const exprt & univ_val) const + { return index_exprt(witness.at(c), univ_val); } + + // Generates fresh indexes + symbol_exprt fresh_exist_index(const irep_idt &prefix); + symbol_exprt fresh_univ_index(const irep_idt &prefix); + + // Generates a fresh Boolean variable + symbol_exprt fresh_boolean(const irep_idt &prefix); + + // We maintain a map from symbols to strings. + std::map symbol_to_string; + + // If a symbol is not yet present we will create a new one with + // the correct type depending on whether the mode is java or c + string_exprt find_or_add_string_of_symbol(const symbol_exprt & sym); + + inline void assign_to_symbol + (const symbol_exprt & sym, const string_exprt & expr) + { symbol_to_string[sym.get_identifier()]= expr; } + + + // Add to the list of axioms, lemmas which should hold for the string to be + // equal to the given expression. + // string_exprt create_string_equal_to_expr(const exprt & unrefined_string); + + string_exprt add_axioms_for_string_expr(const exprt & expr); + void set_string_symbol_equal_to_expr + (const symbol_exprt & sym, const exprt & str); + + // The following functions convert different string functions + // and add the corresponding lemmas to a list of properties to be checked + exprt add_axioms_for_function_application + (const function_application_exprt &expr); + + +private: + // The following functions add axioms for the returned value + // to be equal to the result of the function given as argument. + // They are not accessed directly from other classes: they call + // `add_axioms_for_function_application` which determines which of + // these methodes should be called. + + // Add axioms corresponding to the String.charAt java function + exprt add_axioms_for_char_at(const function_application_exprt &f); + + // Add axioms corresponding to the String.codePointAt java function + exprt add_axioms_for_code_point_at(const function_application_exprt &f); + + // Add axioms corresponding to the String.codePointBefore java function + exprt add_axioms_for_code_point_before(const function_application_exprt &f); + + // Add axioms corresponding to the String.contains java function + exprt add_axioms_for_contains(const function_application_exprt &f); + + // Add axioms corresponding to the String.equals java function + exprt add_axioms_for_equals(const function_application_exprt &f); + + // Add axioms corresponding to the String.equalsIgnoreCase java function + exprt add_axioms_for_equals_ignore_case(const function_application_exprt &f); + + // Add axioms for accessing the data field of java strings + exprt add_axioms_for_data(const function_application_exprt &f); + + // Add axioms corresponding to the String.hashCode java function + // The specification is partial: the actual value is not actualy computed + // but we ensure that hash codes of equal strings are equal. + exprt add_axioms_for_hash_code(const function_application_exprt &f); + + // Add axioms corresponding to the String.isEmpty java function + exprt add_axioms_for_is_empty(const function_application_exprt &f); + + // Add axioms corresponding to the String.isPrefix java function + exprt add_axioms_for_is_prefix + (const string_exprt &prefix, const string_exprt &str, const exprt & offset); + exprt add_axioms_for_is_prefix + (const function_application_exprt &f, bool swap_arguments=false); + + // Add axioms corresponding to the String.isSuffix java function + exprt add_axioms_for_is_suffix + (const function_application_exprt &f, bool swap_arguments=false); + + // Add axioms corresponding to the String.length java function + exprt add_axioms_for_length(const function_application_exprt &f); + + // Add axioms corresponding to the empty string "" + string_exprt add_axioms_for_empty_string(const function_application_exprt &f); + + // Add axioms corresponding to the StringBuilder.setCharAt java function + string_exprt add_axioms_for_char_set(const function_application_exprt &expr); + + // Add axioms for making a copy of a string + string_exprt add_axioms_for_copy(const function_application_exprt &f); + + // Add axioms corresponding to the String.concat(String) java function + string_exprt add_axioms_for_concat + (const string_exprt & s1, const string_exprt & s2); + string_exprt add_axioms_for_concat(const function_application_exprt &f); + + // Add axioms corresponding to the StringBuilder.append(I) java function + string_exprt add_axioms_for_concat_int(const function_application_exprt &f); + + // Add axioms corresponding to the StringBuilder.append(J) java function + string_exprt add_axioms_for_concat_long(const function_application_exprt &f); + + // Add axioms corresponding to the StringBuilder.append(Z) java function + string_exprt add_axioms_for_concat_bool(const function_application_exprt &f); + + // Add axioms corresponding to the StringBuilder.append(C) java function + string_exprt add_axioms_for_concat_char(const function_application_exprt &f); + + // Add axioms corresponding to the StringBuilder.append(D) java function + string_exprt add_axioms_for_concat_double + (const function_application_exprt &f); + + // Add axioms corresponding to the StringBuilder.append(F) java function + string_exprt add_axioms_for_concat_float(const function_application_exprt &f); + + // Add axioms corresponding to the StringBuilder.appendCodePoint(F) function + string_exprt add_axioms_for_concat_code_point + (const function_application_exprt &f); + + // Add axioms from a string constant + string_exprt add_axioms_for_constant + (irep_idt sval, int char_width, unsignedbv_typet char_type); + string_exprt add_axioms_for_constant(irep_idt sval); + + // Add axioms corresponding to the StringBuilder.delete java function + string_exprt add_axioms_for_delete + (const string_exprt &str, const exprt & start, const exprt & end); + string_exprt add_axioms_for_delete(const function_application_exprt &expr); + + // Add axioms corresponding to the StringBuilder.deleteCharAt java function + string_exprt add_axioms_for_delete_char_at + (const function_application_exprt &expr); + + // Add axioms corresponding to the StringBuilder.insert java functions + string_exprt add_axioms_for_insert + (const string_exprt & s1, const string_exprt & s2, const exprt &offset); + string_exprt add_axioms_for_insert(const function_application_exprt &f); + string_exprt add_axioms_for_insert_int(const function_application_exprt &f); + string_exprt add_axioms_for_insert_long(const function_application_exprt &f); + string_exprt add_axioms_for_insert_bool(const function_application_exprt &f); + string_exprt add_axioms_for_insert_char(const function_application_exprt &f); + string_exprt add_axioms_for_insert_double + (const function_application_exprt &f); + string_exprt add_axioms_for_insert_float(const function_application_exprt &f); + string_exprt add_axioms_for_insert_char_array + (const function_application_exprt &f); + + // Add axioms for a string literal (calls `add_axioms_for_constant` with the + // right parameters) + string_exprt add_axioms_from_literal(const function_application_exprt &f); + + // Add axioms corresponding to the String.valueOf(I) java function + string_exprt add_axioms_from_int(const function_application_exprt &f); + string_exprt add_axioms_from_int(const exprt &i, size_t max_size); + + // Add axioms corresponding to the Integer.toHexString(I) java function + string_exprt add_axioms_from_int_hex(const exprt &i); + string_exprt add_axioms_from_int_hex(const function_application_exprt &f); + + // Add axioms corresponding to the String.valueOf(J) java function + string_exprt add_axioms_from_long(const function_application_exprt &f); + string_exprt add_axioms_from_long(const exprt &i, size_t max_size); + + // Add axioms corresponding to the String.valueOf(Z) java function + string_exprt add_axioms_from_bool(const function_application_exprt &f); + string_exprt add_axioms_from_bool(const exprt &i); + + // Add axioms corresponding to the String.valueOf(C) java function + string_exprt add_axioms_from_char(const function_application_exprt &f); + string_exprt add_axioms_from_char(const exprt &i); + + // Add axioms corresponding to the StringBuilder.insert:(I[CII) java function + string_exprt add_axioms_from_char_array(const function_application_exprt &f); + string_exprt add_axioms_from_char_array( + const exprt & length, + const exprt & data, + const exprt & offset, + const exprt & count); + + // Add axioms corresponding to the String.indexOf:(CI) java function + exprt add_axioms_for_index_of( + const string_exprt &str, + const exprt & c, + const exprt & from_index); + + // Add axioms corresponding to the String.indexOf:(String;I) java function + // Warning: the specifications are only partial + exprt add_axioms_for_index_of_string( + const string_exprt &str, + const string_exprt & substring, + const exprt & from_index); + + // Add axioms corresponding to the String.indexOf java functions + // Warning: the specifications are only partial for some of them + exprt add_axioms_for_index_of(const function_application_exprt &f); + + // Add axioms corresponding to the String.lastIndexOf:(String;I) java function + // Warning: the specifications are only partial + exprt add_axioms_for_last_index_of_string( + const string_exprt &str, + const string_exprt & substring, + const exprt & from_index); + + // Add axioms corresponding to the String.lastIndexOf:(CI) java function + exprt add_axioms_for_last_index_of( + const string_exprt &str, + const exprt & c, + const exprt & from_index); + + // Add axioms corresponding to the String.lastIndexOf java functions + // Warning: the specifications are only partial for some of them + exprt add_axioms_for_last_index_of(const function_application_exprt &f); + + // Add axioms corresponding to the String.valueOf(F) java function + // Warning: the specifications of these functions is only partial + string_exprt add_axioms_from_float(const function_application_exprt &f); + string_exprt add_axioms_from_float( + const exprt &f, + bool double_precision=false); + + // Add axioms corresponding to the String.valueOf(D) java function + // Warning: the specifications is only partial + string_exprt add_axioms_from_double(const function_application_exprt &f); + + // Add axioms corresponding to the String.replace java function + string_exprt add_axioms_for_replace(const function_application_exprt &f); + + // Add axioms corresponding to the StringBuilder.setLength java function + string_exprt add_axioms_for_set_length(const function_application_exprt &f); + + // Add axioms corresponding to the String.substring java function + // Warning: the specification may not be correct for the + // case where the string is not long enough + string_exprt add_axioms_for_substring + (const string_exprt & str, const exprt & start, const exprt & end); + string_exprt add_axioms_for_substring(const function_application_exprt &expr); + + // Add axioms corresponding to the String.toLowerCase java function + string_exprt add_axioms_for_to_lower_case + (const function_application_exprt &expr); + + // Add axioms corresponding to the String.toUpperCase java function + string_exprt add_axioms_for_to_upper_case + (const function_application_exprt &expr); + + // Add axioms corresponding to the String.trim java function + string_exprt add_axioms_for_trim(const function_application_exprt &expr); + + // Add axioms corresponding to the String.valueOf([CII) function + // Warning: not working correctly at the moment + string_exprt add_axioms_for_value_of(const function_application_exprt &f); + + // Add axioms for converting a integer representing a code point to a utf-16 + // string + string_exprt add_axioms_for_code_point(const exprt &code_point); + + // Add axioms corresponding to the String.valueOf([C) java function + string_exprt add_axioms_for_java_char_array(const exprt & char_array); + + // Add axioms for an if expression that should return a string + string_exprt add_axioms_for_if(const if_exprt &expr); + + // Add axioms for a character litteral (of the form 'c') to a string + exprt add_axioms_for_char_literal(const function_application_exprt &f); + + // Add axioms corresponding the String.codePointCount java function + // Warning: this function is underspecified, we do not compute the exact value + // but over approximate it. + exprt add_axioms_for_code_point_count(const function_application_exprt &f); + + // Add axioms corresponding the String.offsetByCodePointCount java function + // Warning: this function is underspecified, it should return the index within + // this String that is offset from the given first argument by second argument + // code points and we approximate this by saying the result is + // between index + offset and index + 2 * offset + exprt add_axioms_for_offset_by_code_point + (const function_application_exprt &f); + + // Add axioms corresponding to the Integer.parseInt java function + exprt add_axioms_for_parse_int(const function_application_exprt &f); + + // Add axioms corresponding to the String.toCharArray java function + exprt add_axioms_for_to_char_array(const function_application_exprt &f); + + // Add axioms corresponding to the String.compareTo java function + exprt add_axioms_for_compare_to(const function_application_exprt &f); + + // Add axioms corresponding to the String.intern java function + // Warning: this does not work at the moment because of the way we treat + // string pointers + symbol_exprt add_axioms_for_intern(const function_application_exprt &f); + + // Tells which language is used. C and Java are supported + irep_idt mode; + + // assert that the number of argument is equal to nb and extract them + inline static function_application_exprt::argumentst args + (const function_application_exprt &expr, size_t nb) + { + function_application_exprt::argumentst args = expr.arguments(); + assert(args.size()==nb); + return args; + } + + constant_exprt constant_char(int i) const; + size_t get_char_width() const; + exprt int_of_hex_char(exprt chr, unsigned char_width, typet char_type) const; + exprt is_high_surrogate(const exprt & chr) const; + exprt is_low_surrogate(const exprt & chr) const; + + // Pool used for the intern method + std::map pool; + + // Used to determine whether hashcode should be equal + std::map hash; +}; + +#endif diff --git a/src/solvers/refinement/string_expr.cpp b/src/solvers/refinement/string_expr.cpp new file mode 100644 index 00000000000..52f6c651222 --- /dev/null +++ b/src/solvers/refinement/string_expr.cpp @@ -0,0 +1,34 @@ +/** -*- C++ -*- *****************************************************\ + +Module: String expressions for PASS algorithm + (see the PASS paper at HVC'13) + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include + +unsigned string_exprt::next_symbol_id = 1; + +symbol_exprt string_exprt::fresh_symbol +(const irep_idt &prefix, const typet &tp) +{ + std::ostringstream buf; + buf << "string_refinement#" << prefix << "#" << (next_symbol_id++); + std::string s = buf.str(); + irep_idt name(s.c_str()); + return symbol_exprt(name, tp); +} + +string_exprt::string_exprt(unsignedbv_typet char_type) + : struct_exprt(refined_string_typet(char_type)) +{ + refined_string_typet t(char_type); + symbol_exprt length= + fresh_symbol("string_length", refined_string_typet::index_type()); + symbol_exprt content=fresh_symbol("string_content", t.get_content_type()); + move_to_operands(length, content); +} + + diff --git a/src/solvers/refinement/string_expr.h b/src/solvers/refinement/string_expr.h new file mode 100644 index 00000000000..ec63349d5d8 --- /dev/null +++ b/src/solvers/refinement/string_expr.h @@ -0,0 +1,93 @@ +/** -*- C++ -*- *****************************************************\ + +Module: String expressions for PASS algorithm + (see the PASS paper at HVC'13) + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_EXPR_H +#define CPROVER_SOLVERS_REFINEMENT_STRING_EXPR_H + +#include + +#include +#include +#include + + +// Expressions that encode strings +class string_exprt: public struct_exprt +{ +public: + // Initialize string from the type of characters + explicit string_exprt(unsignedbv_typet char_type); + + // Default uses C character type + string_exprt() : string_exprt(refined_string_typet::char_type()) {} + + // Generate a new symbol of the given type tp with a prefix + static symbol_exprt fresh_symbol + (const irep_idt &prefix, const typet &tp=bool_typet()); + + // Expression corresponding to the length of the string + inline const exprt & length() const + { return op0(); } + + // Expression corresponding to the content (array of characters) of the string + inline const exprt & content() const + { return op1(); } + + static exprt within_bounds(const exprt & idx, const exprt & bound); + + // Expression of the character at position idx in the string + inline index_exprt operator[] (const exprt & idx) const + { return index_exprt(content(), idx);} + inline index_exprt operator[] (int i) const + { return index_exprt(content(), refined_string_typet::index_of_int(i));} + + // Comparison on the length of the strings + inline binary_relation_exprt longer(const string_exprt & rhs) const + { return binary_relation_exprt(length(), ID_ge, rhs.length()); } + inline binary_relation_exprt longer(const exprt & rhs) const + { return binary_relation_exprt(length(), ID_ge, rhs); } + inline binary_relation_exprt strictly_longer(const exprt & rhs) const + { return binary_relation_exprt(rhs, ID_lt, length()); } + inline binary_relation_exprt strictly_longer(const string_exprt & rhs) const + { return binary_relation_exprt(rhs.length(), ID_lt, length()); } + inline binary_relation_exprt strictly_longer(int i) const + { return strictly_longer(refined_string_typet::index_of_int(i)); } + inline binary_relation_exprt shorter(const string_exprt & rhs) const + { return binary_relation_exprt(length(), ID_le, rhs.length()); } + inline binary_relation_exprt shorter(const exprt & rhs) const + { return binary_relation_exprt(length(), ID_le, rhs); } + inline binary_relation_exprt shorter(int i) const + { return shorter(refined_string_typet::index_of_int(i)); } + inline binary_relation_exprt strictly_shorter(const string_exprt & rhs) const + { return binary_relation_exprt(length(), ID_lt, rhs.length()); } + inline binary_relation_exprt strictly_shorter(const exprt & rhs) const + { return binary_relation_exprt(length(), ID_lt, rhs); } + inline equal_exprt same_length(const string_exprt & rhs) const + { return equal_exprt(length(), rhs.length()); } + inline equal_exprt has_length(const exprt & rhs) const + { return equal_exprt(length(), rhs); } + inline equal_exprt has_length(int i) const + { return has_length(refined_string_typet::index_of_int(i)); } + + static irep_idt extract_java_string(const symbol_exprt & s); + + static unsigned next_symbol_id; + + friend inline string_exprt &to_string_expr(exprt &expr); +}; + + +inline string_exprt &to_string_expr(exprt &expr) +{ + assert(expr.id()==ID_struct); + return static_cast(expr); +} + + +#endif diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp new file mode 100644 index 00000000000..d39622c4d47 --- /dev/null +++ b/src/solvers/refinement/string_refinement.cpp @@ -0,0 +1,845 @@ +/** -*- C++ -*- *****************************************************\ + +Module: String support via creating string constraints and progressively + instantiating the universal constraints as needed. + The procedure is described in the PASS paper at HVC'13. + +Author: Alberto Griggio, alberto.griggio@gmail.com + +\*******************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +string_refinementt::string_refinementt(const namespacet &_ns, propt &_prop): + supert(_ns, _prop) +{ + use_counter_example=false; + variable_with_multiple_occurence_in_index=false; + initial_loop_bound=100; +} + +void string_refinementt::set_mode() +{ + debug() << "initializing mode" << eom; + symbolt init=ns.lookup(irep_idt("__CPROVER_initialize")); + irep_idt mode=init.mode; + debug() << "mode detected as " << mode << eom; + generator.set_mode(mode); +} + +void string_refinementt::display_index_set() +{ + for(auto i : index_set) + { + const exprt &s=i.first; + debug() << "IS(" << from_expr(s) << ")=={"; + + for(auto j : i.second) + debug() << from_expr(j) << "; "; + debug() << "}" << eom; + } +} + +// We compute the index set for all formulas, instantiate the formulas +// with the found indexes, and add them as lemmas. +void string_refinementt::add_instantiations() +{ + debug() << "string_constraint_generatort::add_instantiations: " + << "going through the current index set:" << eom; + for(std::map::iterator i=current_index_set.begin(), + end=current_index_set.end(); i!=end; i++) + { + const exprt &s=i->first; + debug() << "IS(" << from_expr(s) << ")=={"; + + for(auto j : i->second) + debug() << from_expr(j) << "; "; + debug() << "}" << eom; + + for(auto j : i->second) + { + const exprt &val=j; + + for(std::size_t k=0; k0) + { + decision_proceduret::resultt res=supert::dec_solve(); + + switch(res) + { + case D_SATISFIABLE: + if(!check_axioms()) + { + debug() << "check_SAT: got SAT but the model is not correct" << eom; + } + else + { + debug() << "check_SAT: the model is correct" << eom; + return D_SATISFIABLE; + } + + debug() << "refining.." << eom; + current_index_set.clear(); + update_index_set(cur); + cur.clear(); + add_instantiations(); + + if(variable_with_multiple_occurence_in_index) + debug() << "WARNING: some variable appears multiple times" << eom; + + if(current_index_set.empty()) + { + debug() << "current index set is empty" << eom; + return D_SATISFIABLE; + } + + display_index_set(); + debug()<< "instantiating NOT_CONTAINS constraints" << eom; + for(unsigned i=0; i lemmas; + instantiate_not_contains(not_contains_axioms[i], lemmas); + for(unsigned j=0; j500) return "very long string"; + if(n==0) return "\"\""; + + std::ostringstream buf; + buf << "\""; + exprt val=get(arr); + + if(val.id()=="array-list") + { + for(size_t i=0; i(integer_of_expr(to_constant_expr(value))); + if(31::iterator it; + for(auto it : generator.symbol_to_string) + { + string_exprt refined=it.second; + const exprt &econtent=refined.content(); + const exprt &elength =refined.length(); + + exprt len=get(elength); + exprt arr=get_array(econtent, len); + + fmodel[elength]=len; + fmodel[econtent]=arr; + debug() << it.first << "=" << from_expr(it.second) + << " of length " << from_expr(len) <<" := " << eom + << from_expr(get(econtent)) << eom + << string_of_array(econtent, len) << eom; + } + + for(auto it : generator.boolean_symbols) + { + debug() << "" << it.get_identifier() << " := " + << from_expr(get(it)) << eom; + fmodel[it]=get(it); + } + + for(auto it : generator.index_symbols) + { + debug() << "" << it.get_identifier() << " := " + << from_expr(get(it)) << eom; + fmodel[it]=get(it); + } + + debug() << "in check axiom, the model may be incomplete" << eom; + std::vector< std::pair > violated; + + debug() << "there are " << universal_axioms.size() + << " universal axioms" << eom; + for(size_t i=0; i new_axioms(violated.size()); + + // Checking if the current solution satisfies the constraints + for(size_t i=0; i string_refinementt::map_of_sum(const exprt &f) +{ + // number of time the element should be added (can be negative) + std::map elems; + + std::vector > to_process; + to_process.push_back(std::make_pair(f, true)); + + while(!to_process.empty()) + { + exprt cur=to_process.back().first; + bool positive=to_process.back().second; + to_process.pop_back(); + if(cur.id()==ID_plus) + { + to_process.push_back(std::make_pair(cur.op1(), positive)); + to_process.push_back(std::make_pair(cur.op0(), positive)); + } + else if(cur.id()==ID_minus) + { + to_process.push_back(std::make_pair(cur.op1(), !positive)); + to_process.push_back(std::make_pair(cur.op0(), positive)); + } + else if(cur.id()==ID_unary_minus) + { + to_process.push_back(std::make_pair(cur.op0(), !positive)); + } + else + { + if(positive) + elems[cur]=elems[cur]+1; + else + elems[cur]=elems[cur]-1; + } + } + return elems; +} + + +exprt string_refinementt::sum_of_map(std::map & m, bool negated) +{ + exprt sum=refined_string_typet::index_of_int(0); + mp_integer constants=0; + + for(auto it : m) + { + // We should group constants together... + const exprt &t=it.first; + int second=negated?(-it.second):it.second; + if(t.id()==ID_constant) + { + std::string value(to_constant_expr(t).get_value().c_str()); + constants+=binary2integer(value, true)*second; + } + else + { + if(second!=0) + { + if(second==-1) + { + if(sum==refined_string_typet::index_of_int(0)) + sum=unary_minus_exprt(t); + else + sum=minus_exprt(sum, t); + } + else if(second==1) + { + if(sum==refined_string_typet::index_of_int(0)) + sum=t; + else + sum=plus_exprt(sum, t); + } + } + else + { + debug() << "in string_refinementt::sum_of_map:" + << " warning: several occurences of the same variable: " + << t.pretty() << eom; + variable_with_multiple_occurence_in_index=true; + if(second>1) + { + for(int i=0; isecond; i--) + sum=minus_exprt(sum, t); + } + } + } + } + + exprt index_const=from_integer(constants, refined_string_typet::index_type()); + return plus_exprt(sum, index_const); +} + +exprt string_refinementt::simplify_sum(const exprt &f) +{ + std::map map=map_of_sum(f); + return sum_of_map(map); +} + +exprt string_refinementt::compute_subst +(const exprt &qvar, const exprt &val, const exprt &f) +{ + exprt positive, negative; + // number of time the element should be added (can be negative) + // qvar has to be equal to val - f(0) if it appears positively in f + // (ie if f(qvar)=f(0) + qvar) and f(0) - val if it appears negatively + // in f. So we start by computing val - f(0). + std::map elems=map_of_sum(minus_exprt(val, f)); + + bool found=false; + bool neg=false; // true if qvar appears negatively in f (pos in elems) + + for(std::map::iterator it=elems.begin(), end=elems.end(); + it!=end; it++) + { + const exprt &t=it->first; + if(t==qvar) + { + if(it->second==1 || it->second==-1) + { + found=true; + neg=(it->second==1); + } + else + { + debug() << "in string_refinementt::compute_subst:" + << " warning: occurences of qvar canceled out " << eom; + assert(it->second==0); + } + elems.erase(it); + } + } + + if(!found) + { + debug() << "string_refinementt::compute_subst: qvar not found" << eom; + debug() << "qvar=" << qvar.pretty() << eom + << "val=" << val.pretty() << eom + << "f=" << f.pretty() << eom; + assert(false); + } + + return sum_of_map(elems, neg); +} + + + +class find_qvar_visitort: public const_expr_visitort +{ +private: + const exprt &qvar_; + +public: + explicit find_qvar_visitort(const exprt &qvar): qvar_(qvar) {} + + void operator()(const exprt &expr) + { + if(expr==qvar_) throw true; + } +}; + +// Look for the given symbol in the index expression +static bool find_qvar(const exprt index, const symbol_exprt & qvar) +{ + find_qvar_visitort v2(qvar); + try + { + index.visit(v2); + return false; + } + catch (bool found) {return found;} +} + + +void string_refinementt::initial_index_set +(const std::vector & string_axioms) +{ + for(size_t i=0; i & cur) +{ + for(size_t i=0; i to_process; + to_process.push_back(axiom.body()); + + while(!to_process.empty()) + { + exprt cur=to_process.back(); + to_process.pop_back(); + if(cur.id()==ID_index) + { + const exprt &s=cur.op0(); + const exprt &i=cur.op1(); + + bool has_quant_var=find_qvar(i, qvar); + + // if cur is of the form s[i] and no quantified variable appears in i + if(!has_quant_var) + { + current_index_set[s].insert(i); + index_set[s].insert(i); + } + else + { + // otherwise we add k-1 + exprt e(i); + minus_exprt kminus1(axiom.upper_bound(), + refined_string_typet::index_of_int(1)); + replace_expr(qvar, kminus1, e); + current_index_set[s].insert(e); + index_set[s].insert(e); + } + } + else + forall_operands(it, cur) + to_process.push_back(*it); + } +} + + + +void string_refinementt::update_index_set(const exprt &formula) +{ + std::vector to_process; + to_process.push_back(formula); + + while(!to_process.empty()) + { + exprt cur=to_process.back(); + to_process.pop_back(); + if(cur.id()==ID_index) + { + const exprt &s=cur.op0(); + const exprt &i=cur.op1(); + assert(s.type().id()==ID_array); + const exprt &simplified=simplify_sum(i); + if(index_set[s].insert(simplified).second) + { + debug() << "adding to index set of " << from_expr(s) + << ": " << from_expr(simplified) << eom; + current_index_set[s].insert(simplified); + } + } + else + { + forall_operands(it, cur) + to_process.push_back(*it); + } + } +} + + +// Will be used to visit an expression and return the index used +// with the given char array +class find_index_visitort: public const_expr_visitort +{ +private: + const exprt &str_; + +public: + explicit find_index_visitort(const exprt &str): str_(str) {} + + void operator()(const exprt &expr) + { + if(expr.id()==ID_index) + { + const index_exprt &i=to_index_expr(expr); + if(i.array()==str_) + throw i.index(); + } + } +}; + +// Find an index used in the char array str +exprt find_index(const exprt & expr, const exprt & str) +{ + find_index_visitort v1(str); + try + { + expr.visit(v1); + return nil_exprt(); + } + catch (exprt i) { return i; } +} + + + +exprt string_refinementt::instantiate +(const string_constraintt &axiom, const exprt &str, const exprt &val) +{ + assert(axiom.is_univ_quant()); + exprt idx=find_index(axiom.body(), str); + if(idx.is_nil()) return true_exprt(); + if(!find_qvar(idx, axiom.get_univ_var())) return true_exprt(); + + exprt r=compute_subst(axiom.get_univ_var(), val, idx); + implies_exprt instance(axiom.premise(), axiom.body()); + replace_expr(axiom.get_univ_var(), r, instance); + // We are not sure the index set contains only positive numbers + exprt bounds=and_exprt(axiom.univ_within_bounds(), + binary_relation_exprt + (refined_string_typet::index_zero(), ID_le, val)); + replace_expr(axiom.get_univ_var(), r, bounds); + return implies_exprt(bounds, instance); +} + + +void string_refinementt::instantiate_not_contains +(const string_not_contains_constraintt & axiom, std::vector & new_lemmas) +{ + exprt s0=axiom.s0(); + exprt s1=axiom.s1(); + + debug() << "instantiate not contains " << from_expr(s0) << " : " + << from_expr(s1) << eom; + expr_sett index_set0=index_set[to_string_expr(s0).content()]; + expr_sett index_set1=index_set[to_string_expr(s1).content()]; + + for(auto it0 : index_set0) + for(auto it1 : index_set1) + { + debug() << from_expr(it0) << " : " << from_expr(it1) << eom; + exprt val=minus_exprt(it0, it1); + exprt witness=generator.get_witness_of(axiom, val); + and_exprt prem_and_is_witness(axiom.premise(), + equal_exprt(witness, it1)); + + not_exprt differ(equal_exprt(to_string_expr(s0)[it0], + to_string_expr(s1)[it1])); + exprt lemma=implies_exprt(prem_and_is_witness, differ); + + new_lemmas.push_back(lemma); + // we put bounds on the witnesses: + // 0 <= v <= |s0| - |s1| ==> 0 <= v+w[v] < |s0| && 0 <= w[v] < |s1| + exprt zero=refined_string_typet::index_zero(); + binary_relation_exprt c1(zero, ID_le, plus_exprt(val, witness)); + binary_relation_exprt c2 + (to_string_expr(s0).length(), ID_gt, plus_exprt(val, witness)); + binary_relation_exprt c3(to_string_expr(s1).length(), ID_gt, witness); + binary_relation_exprt c4(zero, ID_le, witness); + + minus_exprt diff(to_string_expr(s0).length(), + to_string_expr(s1).length()); + + and_exprt premise(binary_relation_exprt(zero, ID_le, val), + binary_relation_exprt(diff, ID_ge, val)); + exprt witness_bounds=implies_exprt + (premise, and_exprt(and_exprt(c1, c2), and_exprt(c3, c4))); + new_lemmas.push_back(witness_bounds); + } +} diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h new file mode 100644 index 00000000000..8f86f6d6b72 --- /dev/null +++ b/src/solvers/refinement/string_refinement.h @@ -0,0 +1,142 @@ +/** -*- C++ -*- *****************************************************\ + +Module: String support via creating string constraints and progressively + instantiating the universal constraints as needed. + The procedure is described in the PASS paper at HVC'13. + +Author: Alberto Griggio, alberto.griggio@gmail.com + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H +#define CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H + +#include + +#include +#include +#include +#include + +class string_refinementt: public bv_refinementt +{ +public: + string_refinementt(const namespacet &_ns, propt &_prop); + ~string_refinementt() {} + + // Determine which language should be used + void set_mode(); + + // Should we use counter examples at each iteration? + bool use_counter_example; + + // Number of time we refine the index set before actually launching the solver + int initial_loop_bound; + + virtual std::string decision_procedure_text() const + { return "string refinement loop with "+prop.solver_text(); } + + static exprt is_positive(const exprt & x); + +private: + // Base class + typedef bv_refinementt supert; + +protected: + typedef std::set expr_sett; + + virtual bvt convert_symbol(const exprt &expr); + virtual bvt convert_function_application + (const function_application_exprt &expr); + + decision_proceduret::resultt dec_solve(); + + // fills as many 0 as necessary in the bit vectors to have the right width + bvt convert_bool_bv(const exprt &boole, const exprt &orig); + + +private: + string_constraint_generatort generator; + + // Simple constraints that have been given to the solver + expr_sett seen_instances; + + std::vector universal_axioms; + + std::vector not_contains_axioms; + + int nb_sat_iteration; + + // Unquantified lemmas that have newly been added + std::vector cur; + + // See the definition in the PASS article + // Warning: this is indexed by array_expressions and not string expressions + std::map current_index_set; + std::map index_set; + + // for debugging + void display_index_set(); + + // Tells if there is a index in the index set where the same variable occurs + // several times. + bool variable_with_multiple_occurence_in_index; + + // Natural number expression corresponding to a constant integer + constant_exprt constant_of_nat(int i, typet t); + + void add_lemma(const exprt &lemma, bool add_to_index_set=true); + + bool boolbv_set_equality_to_true(const equal_exprt &expr); + + literalt convert_rest(const exprt &expr); + + // Instantiate forall constraints with index from the index set + void add_instantiations(); + + // Return true if the current model satisfies all the axioms + bool check_axioms(); + + // Add to the index set all the indices that appear in the formula + void update_index_set(const exprt &formula); + void update_index_set(const std::vector &cur); + void initial_index_set(const string_constraintt &axiom); + void initial_index_set(const std::vector &string_axioms); + + // Takes an universaly quantified formula [axiom], + // an array of char variable [s], and an index expression [val]. + // Computes one index [v1] in which [axiom.idx] appears, takes the + // corresponding substitition [r] (obtained with [compute_subst]). + // Then substitutes [axiom.idx] with [r] in [axiom]. + // axiom is not constant because we may record some information about + // instantiation of existential variables. + exprt instantiate + (const string_constraintt &axiom, const exprt &str, const exprt &val); + + void instantiate_not_contains( + const string_not_contains_constraintt &axiom, + std::vector & new_lemmas); + + // For expressions f of a certain form, returns an expression corresponding + // to $f^{−1}(val)$. + // i.e. the value that is necessary for qvar for f to be equal to val. + // Takes an expression containing + and − operations in which qvar appears + // exactly once. Rewrites it as a sum of qvar and elements in list elems + // different from qvar. Takes e minus the sum of the element in elems. + exprt compute_subst(const exprt &qvar, const exprt &val, const exprt &f); + + // Rewrite a sum in a simple form: sum m_i * expr_i + std::map map_of_sum(const exprt &f); + exprt sum_of_map(std::map &m, bool negated=false); + + // Simplify a sum (an expression with only plus and minus expr) + exprt simplify_sum(const exprt &f); + + // Gets a model of an array and put it in a certain form + exprt get_array(const exprt &arr, const exprt &size); + + // Convert the content of a string to a more readable representation + std::string string_of_array(const exprt &arr, const exprt &size); +}; + +#endif diff --git a/src/util/irep_ids.txt b/src/util/irep_ids.txt index 82ff934bd2f..a804b8266aa 100644 --- a/src/util/irep_ids.txt +++ b/src/util/irep_ids.txt @@ -733,3 +733,66 @@ verilog_array low high bswap +string_constraint +string_not_contains_constraint +cprover_char_literal_func +cprover_string_literal_func +cprover_string_char_at_func +cprover_string_char_set_func +cprover_string_code_point_at_func +cprover_string_code_point_before_func +cprover_string_code_point_count_func +cprover_string_offset_by_code_point_func +cprover_string_compare_to_func +cprover_string_concat_func +cprover_string_concat_int_func +cprover_string_concat_long_func +cprover_string_concat_char_func +cprover_string_concat_bool_func +cprover_string_concat_double_func +cprover_string_concat_float_func +cprover_string_concat_code_point_func +cprover_string_contains_func +cprover_string_copy_func +cprover_string_delete_func +cprover_string_delete_char_at_func +cprover_string_equal_func +cprover_string_equals_ignore_case_func +cprover_string_empty_string_func +cprover_string_endswith_func +cprover_string_format_func +cprover_string_hash_code_func +cprover_string_index_of_func +cprover_string_intern_func +cprover_string_insert_func +cprover_string_insert_int_func +cprover_string_insert_long_func +cprover_string_insert_bool_func +cprover_string_insert_char_func +cprover_string_insert_float_func +cprover_string_insert_double_func +cprover_string_insert_char_array_func +cprover_string_is_prefix_func +cprover_string_is_suffix_func +cprover_string_is_empty_func +cprover_string_last_index_of_func +cprover_string_length_func +cprover_string_data_func +cprover_string_of_int_func +cprover_string_of_int_hex_func +cprover_string_of_long_func +cprover_string_of_bool_func +cprover_string_of_float_func +cprover_string_of_double_func +cprover_string_of_char_func +cprover_string_of_char_array_func +cprover_string_parse_int_func +cprover_string_replace_func +cprover_string_set_length_func +cprover_string_startswith_func +cprover_string_substring_func +cprover_string_to_char_array_func +cprover_string_to_lower_case_func +cprover_string_to_upper_case_func +cprover_string_trim_func +cprover_string_value_of_func diff --git a/src/util/unicode.cpp b/src/util/unicode.cpp index 442d87f2f3c..bf5d9c4b675 100644 --- a/src/util/unicode.cpp +++ b/src/util/unicode.cpp @@ -7,6 +7,8 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ #include +#include +#include #include "unicode.h" @@ -253,3 +255,34 @@ const char **narrow_argv(int argc, const wchar_t **argv_wide) return argv_narrow; } + +std::wstring utf8_to_utf16be(const std::string& in) +{ + std::wstring_convert > converter; + return converter.from_bytes(in); +} + +std::wstring utf8_to_utf16le(const std::string& in) +{ + std::wstring_convert > converter; + return converter.from_bytes(in); +} + +std::string utf16le_to_ascii(const std::wstring& in) +{ + std::string result; + std::locale loc; + for(const auto c : in) + { + if(c <= 255 && isprint(c,loc)) + result+=(unsigned char)c; + else + { + result+="\\u"; + char hex[5]; + sprintf(hex,"%04x",(wchar_t)c); + result+=hex; + } + } + return result; +} diff --git a/src/util/unicode.h b/src/util/unicode.h index 44038a26c04..e22bb10574c 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -21,6 +21,10 @@ std::wstring widen(const std::string &s); std::string utf32_to_utf8(const std::basic_string &s); std::string utf16_to_utf8(const std::basic_string &s); +std::string utf16le_to_ascii(const std::wstring&); + +std::wstring utf8_to_utf16be(const std::string&); +std::wstring utf8_to_utf16le(const std::string&); const char **narrow_argv(int argc, const wchar_t **argv_wide);