diff --git a/regression/memory-analyzer/pointer_03/test.desc b/regression/memory-analyzer/pointer_03/test.desc index ccf6e09bf48..7498e1a4131 100644 --- a/regression/memory-analyzer/pointer_03/test.desc +++ b/regression/memory-analyzer/pointer_03/test.desc @@ -2,6 +2,6 @@ CORE main.gb --breakpoint checkpoint --symbols x,p x = 3; -p = &x; +p = \(void \*\)\&x; ^EXIT=0$ ^SIGNAL=0$ diff --git a/regression/memory-analyzer/pointer_to_struct_01/test.desc b/regression/memory-analyzer/pointer_to_struct_01/test.desc index 17dd6abb2d5..fcaf3688d69 100644 --- a/regression/memory-analyzer/pointer_to_struct_01/test.desc +++ b/regression/memory-analyzer/pointer_to_struct_01/test.desc @@ -1,8 +1,7 @@ CORE main.gb --breakpoint checkpoint --symbols p -struct S tmp; -tmp = \{ \.next=\(\(struct S \*\)0\) \}; -p = \&tmp; +st = \{ .next=\&st \}; +p = \&st; ^EXIT=0$ ^SIGNAL=0$ diff --git a/regression/memory-analyzer/structs_02/test.desc b/regression/memory-analyzer/structs_02/test.desc index aeb1a8838e8..a9aed33cb9e 100644 --- a/regression/memory-analyzer/structs_02/test.desc +++ b/regression/memory-analyzer/structs_02/test.desc @@ -1,8 +1,7 @@ CORE main.gb --breakpoint checkpoint --symbols st -signed int tmp; -tmp = 3; -st = \{ .c1=1, .c2=&tmp \}; +i = 3; +st = \{ .c1=1, .c2=\&i \}; ^EXIT=0$ ^SIGNAL=0$ diff --git a/regression/snapshot-harness/arrays_02/main.c b/regression/snapshot-harness/arrays_02/main.c new file mode 100644 index 00000000000..465782f87da --- /dev/null +++ b/regression/snapshot-harness/arrays_02/main.c @@ -0,0 +1,28 @@ +#include + +int array[] = {1, 2, 3}; +int *p; +int *q; + +void initialize() +{ + p = &(array[1]); + q = array + 1; + array[0] = 4; +} + +void checkpoint() +{ +} + +int main() +{ + initialize(); + checkpoint(); + + assert(p == q); + assert(*p == *q); + *p = 4; + q = q - 1; + assert(*q == *p); +} diff --git a/regression/snapshot-harness/arrays_02/test.desc b/regression/snapshot-harness/arrays_02/test.desc new file mode 100644 index 00000000000..137ebcc03fa --- /dev/null +++ b/regression/snapshot-harness/arrays_02/test.desc @@ -0,0 +1,11 @@ +CORE +main.c +p,q --harness-type initialise-with-memory-snapshot --initial-goto-location main:4 +^EXIT=0$ +^SIGNAL=0$ +\[main.assertion.1\] line [0-9]+ assertion p == q: SUCCESS +\[main.assertion.2\] line [0-9]+ assertion \*p == \*q: SUCCESS +\[main.assertion.3\] line [0-9]+ assertion \*q == \*p: SUCCESS +VERIFICATION SUCCESSFUL +-- +^warning: ignoring diff --git a/regression/snapshot-harness/dynamic-array-int/main.c b/regression/snapshot-harness/dynamic-array-int/main.c new file mode 100644 index 00000000000..54dd2265d54 --- /dev/null +++ b/regression/snapshot-harness/dynamic-array-int/main.c @@ -0,0 +1,47 @@ +#include +#include + +int *array; +int *iterator1; +int *iterator2; +int *iterator3; + +void initialize() +{ + array = (int *)malloc(sizeof(int) * 10); + array[0] = 1; + array[1] = 2; + array[2] = 3; + array[3] = 4; + array[4] = 5; + array[5] = 6; + array[6] = 7; + array[7] = 8; + array[8] = 9; + array[9] = 10; + iterator1 = (int *)array; + iterator2 = &array[1]; + iterator3 = array + 1; +} + +void checkpoint() +{ +} + +int main() +{ + initialize(); + checkpoint(); + + assert(*iterator1 == 1); + assert(iterator1 != iterator2); + assert(iterator2 == iterator3); + assert(iterator2 == &array[1]); + assert(*iterator3 == array[1]); + assert(*iterator3 == 2); + iterator3 = &array[9]; + iterator3++; + assert(*iterator3 == 0); + + return 0; +} diff --git a/regression/snapshot-harness/dynamic-array-int/test.desc b/regression/snapshot-harness/dynamic-array-int/test.desc new file mode 100644 index 00000000000..2e02b76fdea --- /dev/null +++ b/regression/snapshot-harness/dynamic-array-int/test.desc @@ -0,0 +1,16 @@ +CORE +main.c +array,iterator1,iterator2,iterator3 --harness-type initialise-with-memory-snapshot --initial-goto-location main:4 +^EXIT=10$ +^SIGNAL=0$ +\[main.assertion.1\] line [0-9]+ assertion \*iterator1 == 1: SUCCESS +\[main.assertion.2\] line [0-9]+ assertion iterator1 != iterator2: SUCCESS +\[main.assertion.3\] line [0-9]+ assertion iterator2 == iterator3: SUCCESS +\[main.assertion.4\] line [0-9]+ assertion iterator2 == \&array\[1\]: SUCCESS +\[main.assertion.5\] line [0-9]+ assertion \*iterator3 == array\[1\]: SUCCESS +\[main.assertion.6\] line [0-9]+ assertion \*iterator3 == 2: SUCCESS +\[main.pointer_dereference.27\] line [0-9]+ dereference failure: pointer outside object bounds in \*iterator3: FAILURE +\[main.assertion.7\] line [0-9]+ assertion \*iterator3 == 0: FAILURE +VERIFICATION FAILED +-- +unwinding assertion loop \d+: FAILURE diff --git a/regression/snapshot-harness/pointer-to-array-int/main.c b/regression/snapshot-harness/pointer-to-array-int/main.c index 622f6f61c9a..919bd3ebb62 100644 --- a/regression/snapshot-harness/pointer-to-array-int/main.c +++ b/regression/snapshot-harness/pointer-to-array-int/main.c @@ -31,14 +31,10 @@ int main() checkpoint(); assert(first == second); - // The following assertions will be check in the following PR once - // dynamically allocated snapshots are properly implemented. - /* assert(array_size >= prefix_size); */ - /* assert(prefix_size >= 0); */ - /* assert(second[prefix_size] != 6); */ - /* assert(second[4] == 4); */ + assert(array_size >= prefix_size); + assert(prefix_size >= 0); + assert(second[prefix_size] != 6); + assert(second[4] == 4); - /* for(int i = 0; i < prefix_size; i++) */ - /* assert(second[i] != prefix[i]); */ return 0; } diff --git a/regression/snapshot-harness/static-array-float/main.c b/regression/snapshot-harness/static-array-float/main.c new file mode 100644 index 00000000000..afbc16dcb15 --- /dev/null +++ b/regression/snapshot-harness/static-array-float/main.c @@ -0,0 +1,32 @@ +#include + +float array[10]; +float *iterator1; +float *iterator2; + +void initialize() +{ + array[0] = 1.11; + array[8] = 9.999; + array[9] = 10.0; + iterator1 = (float *)array; + iterator2 = &array[9]; +} + +void checkpoint() +{ +} + +int main() +{ + initialize(); + checkpoint(); + + assert(*iterator1 >= 1.10 && *iterator1 <= 1.12); + assert(iterator1 != iterator2); + assert(iterator2 == &array[9]); + iterator2++; + assert(*iterator2 == 0.0); + + return 0; +} diff --git a/regression/snapshot-harness/static-array-float/test.desc b/regression/snapshot-harness/static-array-float/test.desc new file mode 100644 index 00000000000..23f45c15428 --- /dev/null +++ b/regression/snapshot-harness/static-array-float/test.desc @@ -0,0 +1,15 @@ +FUTURE +main.c +array,iterator1,iterator2 --harness-type initialise-with-memory-snapshot --initial-goto-location main:4 +^EXIT=10$ +^SIGNAL=0$ +\[main.assertion.1\] line [0-9]+ assertion \*iterator1 \>= 1.10 \&\& \*iterator1 \<= 1.12: SUCCESS +\[main.assertion.2\] line [0-9]+ assertion iterator1 != iterator2: SUCCESS +\[main.assertion.3\] line [0-9]+ assertion iterator2 == \&array\[9\]: SUCCESS +\[main.pointer_dereference.13\] line [0-9]+ dereference failure: pointer outside object bounds in \*iterator2: FAILURE +\[main.assertion.4\] line [0-9]+ assertion \*iterator2 == 0: FAILURE +VERIFICATION FAILED +-- +unwinding assertion loop \d+: FAILURE +-- +memory analyzer does not yet allow extract floating point values diff --git a/regression/snapshot-harness/static-array-int/main.c b/regression/snapshot-harness/static-array-int/main.c new file mode 100644 index 00000000000..9a9d84fab97 --- /dev/null +++ b/regression/snapshot-harness/static-array-int/main.c @@ -0,0 +1,32 @@ +#include + +int array[10]; +int *iterator1; +int *iterator2; + +void initialize() +{ + array[0] = 1; + array[8] = 9; + array[9] = 10; + iterator1 = (int *)array; + iterator2 = &array[9]; +} + +void checkpoint() +{ +} + +int main() +{ + initialize(); + checkpoint(); + + assert(*iterator1 == 1); + assert(iterator1 != iterator2); + assert(iterator2 == &array[9]); + iterator2++; + assert(*iterator2 == 0); + + return 0; +} diff --git a/regression/snapshot-harness/static-array-int/test.desc b/regression/snapshot-harness/static-array-int/test.desc new file mode 100644 index 00000000000..6d4a62f7ca0 --- /dev/null +++ b/regression/snapshot-harness/static-array-int/test.desc @@ -0,0 +1,13 @@ +CORE +main.c +array,iterator1,iterator2 --harness-type initialise-with-memory-snapshot --initial-goto-location main:4 +^EXIT=10$ +^SIGNAL=0$ +\[main.assertion.1\] line [0-9]+ assertion \*iterator1 == 1: SUCCESS +\[main.assertion.2\] line [0-9]+ assertion iterator1 != iterator2: SUCCESS +\[main.assertion.3\] line [0-9]+ assertion iterator2 == \&array\[9\]: SUCCESS +\[main.pointer_dereference.13\] line [0-9]+ dereference failure: pointer outside object bounds in \*iterator2: FAILURE +\[main.assertion.4\] line [0-9]+ assertion \*iterator2 == 0: FAILURE +VERIFICATION FAILED +-- +unwinding assertion loop \d+: FAILURE diff --git a/src/goto-cc/Makefile b/src/goto-cc/Makefile index db278cd7234..8a70a3f6705 100644 --- a/src/goto-cc/Makefile +++ b/src/goto-cc/Makefile @@ -40,15 +40,16 @@ INCLUDES= -I .. LIBS = -CLEANFILES = goto-cc$(EXEEXT) goto-cl$(EXEEXT) +CLEANFILES = goto-cc$(EXEEXT) goto-gcc$(EXEEXT) goto-cl$(EXEEXT) include ../config.inc include ../common ifeq ($(BUILD_ENV_),MSVC) all: goto-cl$(EXEEXT) +else +all: goto-gcc$(EXEEXT) endif -all: goto-cc$(EXEEXT) ifneq ($(wildcard ../jsil/Makefile),) OBJ += ../jsil/jsil$(LIBEXT) @@ -57,6 +58,9 @@ endif ############################################################################### +goto-gcc$(EXEEXT): goto-cc$(EXEEXT) + ln -sf goto-cc$(EXEEXT) goto-gcc$(EXEEXT) + goto-cc$(EXEEXT): $(OBJ) $(LINKBIN) diff --git a/src/goto-harness/memory_snapshot_harness_generator.cpp b/src/goto-harness/memory_snapshot_harness_generator.cpp index 0d59add0caa..55ef862eb01 100644 --- a/src/goto-harness/memory_snapshot_harness_generator.cpp +++ b/src/goto-harness/memory_snapshot_harness_generator.cpp @@ -207,6 +207,18 @@ size_t memory_snapshot_harness_generatort::pointer_depth(const typet &t) const return pointer_depth(t.subtype()) + 1; } +bool memory_snapshot_harness_generatort::refers_to( + const exprt &expr, + const irep_idt &name) const +{ + if(expr.id() == ID_symbol) + return to_symbol_expr(expr).get_identifier() == name; + return std::any_of( + expr.operands().begin(), + expr.operands().end(), + [this, name](const exprt &subexpr) { return refers_to(subexpr, name); }); +} + code_blockt memory_snapshot_harness_generatort::add_assignments_to_globals( const symbol_tablet &snapshot, goto_modelt &goto_model) const @@ -230,8 +242,11 @@ code_blockt memory_snapshot_harness_generatort::add_assignments_to_globals( ordered_snapshot_symbols.begin(), ordered_snapshot_symbols.end(), [this](const snapshot_pairt &left, const snapshot_pairt &right) { - return pointer_depth(left.second.symbol_expr().type()) < - pointer_depth(right.second.symbol_expr().type()); + if(refers_to(right.second.value, left.first)) + return true; + else + return pointer_depth(left.second.symbol_expr().type()) < + pointer_depth(right.second.symbol_expr().type()); }); code_blockt code; diff --git a/src/goto-harness/memory_snapshot_harness_generator.h b/src/goto-harness/memory_snapshot_harness_generator.h index 1b6a98a1196..8b7ffdd3039 100644 --- a/src/goto-harness/memory_snapshot_harness_generator.h +++ b/src/goto-harness/memory_snapshot_harness_generator.h @@ -244,6 +244,12 @@ class memory_snapshot_harness_generatort : public goto_harness_generatort /// \return pointer depth of type \p t size_t pointer_depth(const typet &t) const; + /// Recursively test pointer reference + /// \param expr: expression to be tested + /// \param name: name to be located + /// \return true if \p expr refers to an object named \p name + bool refers_to(const exprt &expr, const irep_idt &name) const; + /// data to store the command-line options std::string memory_snapshot_file; std::string initial_goto_location_line; diff --git a/src/memory-analyzer/analyze_symbol.cpp b/src/memory-analyzer/analyze_symbol.cpp index 0cc6a296265..3e874b28219 100644 --- a/src/memory-analyzer/analyze_symbol.cpp +++ b/src/memory-analyzer/analyze_symbol.cpp @@ -7,6 +7,8 @@ Author: Malte Mues \*******************************************************************/ +#include + #include "analyze_symbol.h" #include @@ -29,19 +31,108 @@ gdb_value_extractort::gdb_value_extractort( { } +gdb_value_extractort::memory_scopet::memory_scopet( + const memory_addresst &begin, + const mp_integer &byte_size, + const irep_idt &name) + : begin_int(safe_string2size_t(begin.address_string, 0)), + byte_size(byte_size), + name(name) +{ +} + +size_t gdb_value_extractort::memory_scopet::address2size_t( + const memory_addresst &point) const +{ + return safe_string2size_t(point.address_string, 0); +} + +mp_integer gdb_value_extractort::memory_scopet::distance( + const memory_addresst &point, + mp_integer member_size) const +{ + auto point_int = address2size_t(point); + CHECK_RETURN(check_containment(point_int)); + return (point_int - begin_int) / member_size; +} + +std::vector::iterator +gdb_value_extractort::find_dynamic_allocation(irep_idt name) +{ + return std::find_if( + dynamically_allocated.begin(), + dynamically_allocated.end(), + [&name](const memory_scopet &scope) { return scope.id() == name; }); +} + +std::vector::iterator +gdb_value_extractort::find_dynamic_allocation(const memory_addresst &point) +{ + return std::find_if( + dynamically_allocated.begin(), + dynamically_allocated.end(), + [&point](const memory_scopet &memory_scope) { + return memory_scope.contains(point); + }); +} + +mp_integer gdb_value_extractort::get_malloc_size(irep_idt name) +{ + const auto scope_it = find_dynamic_allocation(name); + if(scope_it == dynamically_allocated.end()) + return 1; + else + return scope_it->size(); +} + +optionalt gdb_value_extractort::get_malloc_pointee( + const memory_addresst &point, + mp_integer member_size) +{ + const auto scope_it = find_dynamic_allocation(point); + if(scope_it == dynamically_allocated.end()) + return {}; + + const auto pointer_distance = scope_it->distance(point, member_size); + return id2string(scope_it->id()) + + (pointer_distance > 0 ? "+" + integer2string(pointer_distance) : ""); +} + +mp_integer gdb_value_extractort::get_type_size(const typet &type) const +{ + const auto maybe_size = pointer_offset_bits(type, ns); + CHECK_RETURN(maybe_size.has_value()); + return *maybe_size / 8; +} + void gdb_value_extractort::analyze_symbols(const std::vector &symbols) { // record addresses of given symbols for(const auto &id : symbols) { - const symbol_exprt &symbol_expr = ns.lookup(id).symbol_expr(); - const address_of_exprt aoe(symbol_expr); + const symbolt &symbol = ns.lookup(id); + if(symbol.type.id() != ID_pointer || is_c_char_type(symbol.type.subtype())) + { + const symbol_exprt &symbol_expr = ns.lookup(id).symbol_expr(); + const address_of_exprt aoe(symbol_expr); - const std::string c_expr = c_converter.convert(aoe); - const pointer_valuet &value = gdb_api.get_memory(c_expr); - CHECK_RETURN(value.pointee.empty() || (id == value.pointee)); + const std::string c_expr = c_converter.convert(aoe); + const pointer_valuet &value = gdb_api.get_memory(c_expr); + CHECK_RETURN(value.pointee.empty() || (id == value.pointee)); - values.insert({value.address, symbol_expr}); + memory_map[id] = value; + } + else + { + const std::string c_symbol = c_converter.convert(symbol.symbol_expr()); + const pointer_valuet &symbol_value = gdb_api.get_memory(c_symbol); + size_t symbol_size = gdb_api.query_malloc_size(c_symbol); + + if(symbol_size > 1) + dynamically_allocated.emplace_back( + symbol_value.address, symbol_size, id); + memory_map[id] = symbol_value; + } } for(const auto &id : symbols) @@ -126,7 +217,8 @@ symbol_tablet gdb_value_extractort::get_snapshot_as_symbol_table() void gdb_value_extractort::add_assignment(const exprt &lhs, const exprt &value) { - assignments.push_back(std::make_pair(lhs, value)); + if(assignments.count(lhs) == 0) + assignments.emplace(std::make_pair(lhs, value)); } exprt gdb_value_extractort::get_char_pointer_value( @@ -199,73 +291,31 @@ exprt gdb_value_extractort::get_pointer_to_member_value( const symbolt *struct_symbol = symbol_table.lookup(struct_name); DATA_INVARIANT(struct_symbol != nullptr, "unknown struct"); - const auto maybe_struct_size = - pointer_offset_size(struct_symbol->symbol_expr().type(), ns); - bool found = false; - CHECK_RETURN(maybe_struct_size.has_value()); - for(const auto &value_pair : values) + + if(!has_known_memory_location(struct_name)) { - const auto &value_symbol_expr = value_pair.second; - if(to_symbol_expr(value_symbol_expr).get_identifier() == struct_name) - { - found = true; - break; - } + memory_map[struct_name] = gdb_api.get_memory(struct_name); + analyze_symbol(irep_idt{struct_name}); } - if(!found) + const auto &struct_symbol_expr = struct_symbol->symbol_expr(); + if(struct_symbol->type.id() == ID_array) { - const typet target_type = expr.type().subtype(); - - symbol_exprt dummy("tmp", expr.type()); - code_blockt assignments; - - auto emplace_pair = values.emplace( - memory_location, - allocate_objects.allocate_automatic_local_object( - assignments, dummy, target_type)); - const symbol_exprt &new_symbol = to_symbol_expr(emplace_pair.first->second); - - dereference_exprt dereference_expr(expr); - - const auto zero_expr = zero_initializer(target_type, location, ns); - CHECK_RETURN(zero_expr); - - // add assignment of value to newly created symbol - add_assignment(new_symbol, *zero_expr); - - const auto &struct_symbol = values.find(memory_location); - - const auto maybe_member_expr = get_subexpression_at_offset( - struct_symbol->second, member_offset, expr.type().subtype(), ns); - CHECK_RETURN(maybe_member_expr.has_value()); - return *maybe_member_expr; + return index_exprt{ + struct_symbol_expr, + from_integer( + member_offset / get_type_size(expr.type().subtype()), index_type())}; } - - const auto it = values.find(memory_location); - // if the structure we are pointing to does not exists we need to build a - // temporary object for it: get the type from symbol table, query gdb for - // value, allocate new object for it and then store into assignments - if(it == values.end()) + if(struct_symbol->type.id() == ID_pointer) { - const auto symbol_expr = struct_symbol->symbol_expr(); - const auto zero = zero_initializer(symbol_expr.type(), location, ns); - CHECK_RETURN(zero.has_value()); - const auto val = get_expr_value(symbol_expr, *zero, location); - - symbol_exprt dummy("tmp", pointer_type(symbol_expr.type())); - code_blockt assignments; - - const symbol_exprt new_symbol = - to_symbol_expr(allocate_objects.allocate_automatic_local_object( - assignments, dummy, symbol_expr.type())); - - add_assignment(new_symbol, val); - values[memory_location] = val; + return dereference_exprt{ + plus_exprt{struct_symbol_expr, + from_integer(member_offset, size_type()), + expr.type()}}; } const auto maybe_member_expr = get_subexpression_at_offset( - struct_symbol->symbol_expr(), member_offset, expr.type().subtype(), ns); + struct_symbol_expr, member_offset, expr.type().subtype(), ns); DATA_INVARIANT( maybe_member_expr.has_value(), "structure doesn't have member"); @@ -276,17 +326,27 @@ exprt gdb_value_extractort::get_pointer_to_member_value( exprt gdb_value_extractort::get_non_char_pointer_value( const exprt &expr, - const memory_addresst &memory_location, + const pointer_valuet &value, const source_locationt &location) { PRECONDITION(expr.type().id() == ID_pointer); PRECONDITION(!is_c_char_type(expr.type().subtype())); + const auto &memory_location = value.address; PRECONDITION(!memory_location.is_null()); auto it = values.find(memory_location); if(it == values.end()) { + if(!value.pointee.empty() && value.pointee != c_converter.convert(expr)) + { + analyze_symbol(value.pointee); + const auto pointee_symbol = symbol_table.lookup(value.pointee); + CHECK_RETURN(pointee_symbol != nullptr); + const auto pointee_symbol_expr = pointee_symbol->symbol_expr(); + return pointee_symbol_expr; + } + values.insert(std::make_pair(memory_location, nil_exprt())); const typet target_type = expr.type().subtype(); @@ -302,13 +362,10 @@ exprt gdb_value_extractort::get_non_char_pointer_value( // expected positions. Since the allocated size is over-approximation we may // end up querying pass the allocated bounds and building larger array with // meaningless values. - size_t allocated_size = - gdb_api.query_malloc_size(c_converter.convert(expr)); + mp_integer allocated_size = get_malloc_size(c_converter.convert(expr)); // get the sizeof(target_type) and thus the number of elements - const auto target_size_bits = pointer_offset_bits(target_type, ns); - CHECK_RETURN(target_size_bits.has_value()); - const auto number_of_elements = allocated_size / (*target_size_bits / 8); - if(number_of_elements > 1) + const auto number_of_elements = allocated_size / get_type_size(target_type); + if(allocated_size != 1 && number_of_elements > 1) { array_exprt::operandst elements; // build the operands by querying for an index expression @@ -337,6 +394,7 @@ exprt gdb_value_extractort::get_non_char_pointer_value( // add assignment of value to newly created symbol add_assignment(array_symbol, new_array); values[memory_location] = array_symbol; + CHECK_RETURN(array_symbol.type().id() == ID_array); return array_symbol; } @@ -359,6 +417,8 @@ exprt gdb_value_extractort::get_non_char_pointer_value( { const auto &known_value = it->second; const auto &expected_type = expr.type().subtype(); + if(find_dynamic_allocation(memory_location) != dynamically_allocated.end()) + return known_value; if(known_value.is_not_nil() && known_value.type() != expected_type) { return symbol_exprt{to_symbol_expr(known_value).get_identifier(), @@ -369,11 +429,22 @@ exprt gdb_value_extractort::get_non_char_pointer_value( } bool gdb_value_extractort::points_to_member( - const pointer_valuet &pointer_value) const + pointer_valuet &pointer_value, + const typet &expected_type) { if(pointer_value.has_known_offset()) return true; + if(pointer_value.pointee.empty()) + { + const auto maybe_pointee = get_malloc_pointee( + pointer_value.address, get_type_size(expected_type.subtype())); + if(maybe_pointee.has_value()) + pointer_value.pointee = *maybe_pointee; + if(pointer_value.pointee.find("+") != std::string::npos) + return true; + } + const symbolt *pointee_symbol = symbol_table.lookup(pointer_value.pointee); if(pointee_symbol == nullptr) return false; @@ -394,7 +465,12 @@ exprt gdb_value_extractort::get_pointer_value( PRECONDITION(expr.type() == zero_expr.type()); std::string c_expr = c_converter.convert(expr); - const pointer_valuet value = gdb_api.get_memory(c_expr); + const auto known_pointer = memory_map.find(c_expr); + + pointer_valuet value = (known_pointer == memory_map.end() || + known_pointer->second.pointee == c_expr) + ? gdb_api.get_memory(c_expr) + : known_pointer->second; if(!value.valid) return zero_expr; @@ -403,7 +479,7 @@ exprt gdb_value_extractort::get_pointer_value( if(!memory_location.is_null()) { // pointers-to-char can point to members as well, e.g. char[] - if(points_to_member(value)) + if(points_to_member(value, expr.type())) { const auto target_expr = get_pointer_to_member_value(expr, value, location); @@ -417,7 +493,7 @@ exprt gdb_value_extractort::get_pointer_value( const auto target_expr = is_c_char_type(expr.type().subtype()) ? get_char_pointer_value(expr, memory_location, location) - : get_non_char_pointer_value(expr, memory_location, location); + : get_non_char_pointer_value(expr, value, location); // postpone if we cannot resolve now if(target_expr.is_nil()) @@ -435,7 +511,10 @@ exprt gdb_value_extractort::get_pointer_value( const auto result_indexed_expr = get_subexpression_at_offset( target_expr, 0, zero_expr.type().subtype(), ns); CHECK_RETURN(result_indexed_expr.has_value()); + if(result_indexed_expr->type() == zero_expr.type()) + return *result_indexed_expr; const auto result_expr = address_of_exprt{*result_indexed_expr}; + CHECK_RETURN(result_expr.type() == zero_expr.type()); return result_expr; } @@ -445,7 +524,8 @@ exprt gdb_value_extractort::get_pointer_value( // otherwise the address of target should type-match const auto result_expr = address_of_exprt(target_expr); - CHECK_RETURN(result_expr.type() == zero_expr.type()); + if(result_expr.type() != zero_expr.type()) + return typecast_exprt{result_expr, zero_expr.type()}; return result_expr; } diff --git a/src/memory-analyzer/analyze_symbol.h b/src/memory-analyzer/analyze_symbol.h index 1a199b2e5c7..d622d15fc72 100644 --- a/src/memory-analyzer/analyze_symbol.h +++ b/src/memory-analyzer/analyze_symbol.h @@ -81,7 +81,7 @@ class gdb_value_extractort allocate_objectst allocate_objects; /// Sequence of assignments collected during \ref analyze_symbols - std::vector> assignments; + std::map assignments; /// Mapping pointer expression for which \ref get_non_char_pointer_value /// returned nil expression to memory location (from \ref gdb_apit). @@ -91,6 +91,108 @@ class gdb_value_extractort /// value of `symbol`. std::map values; + struct memory_scopet + { + private: + size_t begin_int; + mp_integer byte_size; + irep_idt name; + + /// Convert base-16 memory address to a natural number + /// \param point: the memory address to be converted + /// \return base-10 unsigned integer equal in value to \p point + size_t address2size_t(const memory_addresst &point) const; + + /// Helper function that check if a point in memory points inside this scope + /// \param point_int: memory point as natural number + /// \return true if the point is inside this scope + bool check_containment(const size_t &point_int) const + { + return point_int >= begin_int && (begin_int + byte_size) > point_int; + } + + public: + memory_scopet( + const memory_addresst &begin, + const mp_integer &byte_size, + const irep_idt &name); + + /// Check if \p point points somewhere in this memory scope + /// \param point: memory address to be check for presence + /// \return true if \p point is inside *this + bool contains(const memory_addresst &point) const + { + return check_containment(address2size_t(point)); + } + + /// Compute the distance of \p point from the beginning of this scope + /// \param point: memory address to have the offset computed + /// \param member_size: size of one element of this scope in bytes + /// \return `n' such that \p point is the n-th element of this scope + mp_integer + distance(const memory_addresst &point, mp_integer member_size) const; + + /// Getter for the name of this memory scope + /// \return the name as irep id + irep_idt id() const + { + return name; + } + + /// Getter for the allocation size of this memory scope + /// \return the size in bytes + mp_integer size() const + { + return byte_size; + } + }; + + /// Keep track of the dynamically allocated memory + std::vector dynamically_allocated; + + /// Keep track of the memory location for the analyzed symbols + std::map memory_map; + + bool has_known_memory_location(const irep_idt &id) const + { + return memory_map.count(id) != 0; + } + + /// Search for a memory scope allocated under \p name + /// \param name: name of the pointer used during allocation + /// \return iterator to the right memory scope + std::vector::iterator find_dynamic_allocation(irep_idt name); + + /// Search for a memory scope allocated under \p name + /// \param point: potentially dynamically allocated memory address + /// \return iterator to the right memory scope + std::vector::iterator + find_dynamic_allocation(const memory_addresst &point); + + /// Search for the size of the allocated memory for \p name + /// \param name: name of the pointer used during allocation + /// \return the size if have a record of \p name's allocation (1 otherwise) + mp_integer get_malloc_size(irep_idt name); + + /// Build the pointee string for address \p point assuming it points to a + /// dynamic allocation of `n' elements each of size \p member_size. E.g.: + /// + /// int *p = (int*)malloc(sizeof(int)*4); + /// int *q = &(p[2]); + /// + /// get_malloc_pointee(get_memory(q), sizeof(int)) -> "p+8" + /// + /// \param point: potentially dynamically allocated memory address + /// \param member_size: size of each allocated element + /// \return pointee as a string if we have a record of the allocation + optionalt + get_malloc_pointee(const memory_addresst &point, mp_integer member_size); + + /// Wrapper for call get_offset_pointer_bits + /// \param type: type to get the size of + /// \return the size of the type in bytes + mp_integer get_type_size(const typet &type) const; + /// Assign the gdb-extracted value for \p symbol_name to its symbol /// expression and then process outstanding assignments that this /// extraction introduced. @@ -176,12 +278,12 @@ class gdb_value_extractort /// \ref gdb_apit::get_memory, calls \ref get_expr_value on _dereferenced_ /// \p expr (the result of which is assigned to a new symbol). /// \param expr: the pointer expression to be analysed - /// \param memory_location: pointer value from \ref gdb_apit::get_memory + /// \param value: pointer value from \ref gdb_apit::get_memory /// \param location: the source location /// \return symbol expression associated with \p memory_location exprt get_non_char_pointer_value( const exprt &expr, - const memory_addresst &memory_location, + const pointer_valuet &value, const source_locationt &location); /// If \p memory_location is found among \ref values then return the symbol @@ -210,8 +312,10 @@ class gdb_value_extractort /// Analyzes the \p pointer_value to decide if it point to a struct or a /// union (or array) /// \param pointer_value: pointer value to be analyzed + /// \param expected_type: type of the potential member /// \return true if pointing to a member - bool points_to_member(const pointer_valuet &pointer_value) const; + bool + points_to_member(pointer_valuet &pointer_value, const typet &expected_type); }; #endif // CPROVER_MEMORY_ANALYZER_ANALYZE_SYMBOL_H diff --git a/src/memory-analyzer/gdb_api.cpp b/src/memory-analyzer/gdb_api.cpp index 470ae9c020b..a0a27981eaa 100644 --- a/src/memory-analyzer/gdb_api.cpp +++ b/src/memory-analyzer/gdb_api.cpp @@ -9,7 +9,8 @@ Author: Malte Mues /// \file /// Low-level interface to gdb - +/// +/// Implementation of the GDB/MI API for extracting values of expressions. #include #include @@ -59,43 +60,13 @@ gdb_apit::~gdb_apit() size_t gdb_apit::query_malloc_size(const std::string &pointer_expr) { -#ifdef __linux__ - write_to_gdb("-var-create tmp * malloc_usable_size(" + pointer_expr + ")"); -#elif __APPLE__ - write_to_gdb("-var-create tmp * malloc_size(" + pointer_expr + ")"); -#else - // Under non-linux/OSX system we simple return 1, i.e. as if the \p - // pointer_expr was not dynamically allocated. - return 1; -#endif + const auto maybe_address_string = get_value(pointer_expr); + CHECK_RETURN(maybe_address_string.has_value()); - if(!was_command_accepted()) - { + if(allocated_memory.count(*maybe_address_string) == 0) return 1; - } - - write_to_gdb("-var-evaluate-expression tmp"); - gdb_output_recordt record = get_most_recent_record("^done", true); - - write_to_gdb("-var-delete tmp"); - check_command_accepted(); - - const auto it = record.find("value"); - CHECK_RETURN(it != record.end()); - - const std::string value = it->second; - - INVARIANT( - value.back() != '"' || - (value.length() >= 2 && value[value.length() - 2] == '\\'), - "quotes should have been stripped off from value"); - INVARIANT(value.back() != '\n', "value should not end in a newline"); - - const auto result = string2optional_size_t(value); - if(result.has_value()) - return *result; else - return 1; + return allocated_memory[*maybe_address_string]; } void gdb_apit::create_gdb_process() @@ -299,10 +270,58 @@ void gdb_apit::run_gdb_from_core(const std::string &corefile) gdb_state = gdb_statet::STOPPED; } +void gdb_apit::collect_malloc_calls() +{ + // this is what the registers look like at the function call entry: + // + // reg. name hex. value dec. value + // 0: rax 0xffffffff 4294967295 + // 1: rbx 0x20000000 536870912 + // 2: rcx 0x591 1425 + // 3: rdx 0x591 1425 + // 4: rsi 0x1 1 + // 5: rdi 0x591 1425 + // ... + // rax will eventually contain the return value and + // rdi now stores the first (integer) argument + // in the machine interface they are referred to by numbers, hence: + write_to_gdb("-data-list-register-values d 5"); + auto record = get_most_recent_record("^done", true); + auto allocated_size = safe_string2size_t(get_register_value(record)); + + write_to_gdb("-exec-finish"); + if(!most_recent_line_has_tag("*running")) + { + throw gdb_interaction_exceptiont("could not run program"); + } + record = get_most_recent_record("*stopped"); + auto frame_content = get_value_from_record(record, "frame"); + + // the malloc breakpoint may be inside another malloc function + if(frame_content.find("func=\"malloc\"") != std::string::npos) + { + // so we need to finish the outer malloc as well + write_to_gdb("-exec-finish"); + if(!most_recent_line_has_tag("*running")) + { + throw gdb_interaction_exceptiont("could not run program"); + } + record = get_most_recent_record("*stopped"); + } + + // now we can read the rax register to the the allocated memory address + write_to_gdb("-data-list-register-values x 0"); + record = get_most_recent_record("^done", true); + allocated_memory[get_register_value(record)] = allocated_size; +} + bool gdb_apit::run_gdb_to_breakpoint(const std::string &breakpoint) { PRECONDITION(gdb_state == gdb_statet::CREATED); + write_to_gdb("-break-insert " + malloc_name); + bool malloc_is_known = was_command_accepted(); + std::string command("-break-insert"); command += " " + breakpoint; @@ -320,6 +339,30 @@ bool gdb_apit::run_gdb_to_breakpoint(const std::string &breakpoint) } gdb_output_recordt record = get_most_recent_record("*stopped"); + + // malloc function is known, i.e. present among the symbols + if(malloc_is_known) + { + // stop at every entry into malloc call + while(hit_malloc_breakpoint(record)) + { + // and store the information about the allocated memory + collect_malloc_calls(); + write_to_gdb("-exec-continue"); + if(!most_recent_line_has_tag("*running")) + { + throw gdb_interaction_exceptiont("could not run program"); + } + record = get_most_recent_record("*stopped"); + } + + write_to_gdb("-break-delete 1"); + if(!was_command_accepted()) + { + throw gdb_interaction_exceptiont("could not delete breakpoint at malloc"); + } + } + const auto it = record.find("reason"); CHECK_RETURN(it != record.end()); @@ -540,3 +583,46 @@ gdb_apit::r_or(const std::string ®ex_left, const std::string ®ex_right) { return R"((?:)" + regex_left + '|' + regex_right + R"())"; } + +std::string gdb_apit::get_value_from_record( + const gdb_output_recordt &record, + const std::string &value_name) +{ + const auto it = record.find(value_name); + CHECK_RETURN(it != record.end()); + const auto value = it->second; + + INVARIANT( + value.back() != '"' || + (value.length() >= 2 && value[value.length() - 2] == '\\'), + "quotes should have been stripped off from value"); + INVARIANT(value.back() != '\n', "value should not end in a newline"); + + return value; +} + +bool gdb_apit::hit_malloc_breakpoint(const gdb_output_recordt &stopped_record) +{ + const auto it = stopped_record.find("reason"); + CHECK_RETURN(it != stopped_record.end()); + + if(it->second != "breakpoint-hit") + return false; + + return safe_string2size_t(get_value_from_record(stopped_record, "bkptno")) == + 1; +} + +std::string gdb_apit::get_register_value(const gdb_output_recordt &record) +{ + // we expect the record of form: + // {[register-values]->[name=name_string, value=\"value_string\"],..} + auto record_value = get_value_from_record(record, "register-values"); + std::string value_eq_quotes = "value=\""; + auto value_eq_quotes_size = value_eq_quotes.size(); + + auto starting_pos = record_value.find(value_eq_quotes) + value_eq_quotes_size; + auto ending_pos = record_value.find('\"', starting_pos); + auto value_length = ending_pos - starting_pos; + return std::string{record_value, starting_pos, value_length}; +} diff --git a/src/memory-analyzer/gdb_api.h b/src/memory-analyzer/gdb_api.h index 89262b88741..886620b5b01 100644 --- a/src/memory-analyzer/gdb_api.h +++ b/src/memory-analyzer/gdb_api.h @@ -31,6 +31,9 @@ class gdb_apit { public: using commandst = std::forward_list; + + /// Memory address imbued with the explicit boolean data indicating if the + /// address is null or not. struct memory_addresst { bool null_address; @@ -67,6 +70,8 @@ class gdb_apit /// writing to gdb) ~gdb_apit(); + /// Data associated with the value of a pointer, i.e. not only the address but + /// also the pointee (if known), string (in the case of char*), etc. struct pointer_valuet { pointer_valuet( @@ -83,10 +88,10 @@ class gdb_apit { } - const memory_addresst address; - const std::string pointee; - const std::string character; - const optionalt string; + memory_addresst address; + std::string pointee; + std::string character; + optionalt string; bool has_known_offset() const { @@ -97,15 +102,10 @@ class gdb_apit bool valid; }; - /// Get the allocated size estimate for a pointer by evaluating - /// `malloc_usable_size'. The function returns the number of usable bytes in - /// the block pointed to by the pointer to a block of memory allocated by - /// `malloc' or a related function. The value may be greater than the - /// requested size of the allocation because of alignment and minimum size - /// constraints. + /// Get the exact allocated size for a pointer \p pointer_expr. /// \param pointer_expr: expression with a pointer name /// \return 1 if the pointer was not allocated with malloc otherwise return - /// the result of calling `malloc_usable_size' + /// the number of allocated bytes size_t query_malloc_size(const std::string &pointer_expr); /// Create a new gdb process for analysing the binary indicated by the member @@ -128,6 +128,9 @@ class gdb_apit /// \return memory address in hex format optionalt get_value(const std::string &expr); + /// Get the value of a pointer associated with \p expr + /// \param expr: the expression to be analyzed + /// \return the \p pointer_valuet filled with data gdb produced for \p expr pointer_valuet get_memory(const std::string &expr); /// Return the vector of commands that have been written to gdb so far @@ -152,6 +155,10 @@ class gdb_apit gdb_statet gdb_state; + /// track the allocated size for each malloc call + /// maps hexadecimal address to the number of bytes + std::map allocated_memory; + typedef std::map gdb_output_recordt; static gdb_output_recordt parse_gdb_output_record(const std::string &s); @@ -169,6 +176,28 @@ class gdb_apit bool was_command_accepted(); void check_command_accepted(); + /// Intercepts the gdb-analysis at the malloc call-site to add the + /// corresponding information into \ref allocated_memory. + void collect_malloc_calls(); + + /// Locate and return the value for a given name + /// \param record: gdb record to search + /// \param value_name: name of the value to be extracted + /// \return the value associated with \p value_name + std::string get_value_from_record( + const gdb_output_recordt &record, + const std::string &value_name); + + /// Check if the breakpoint we hit is inside a malloc + /// \param stopped_record: gdb record pertaining to a breakpoint being hit + /// \return true if the breakpoint the gdb stopped at was malloc + bool hit_malloc_breakpoint(const gdb_output_recordt &stopped_record); + + /// Parse the record produced by listing register value + /// \param record: gdb record for one register value + /// \return get the value associated with some register value + std::string get_register_value(const gdb_output_recordt &record); + static std::string r_opt(const std::string ®ex); static std::string @@ -189,6 +218,9 @@ class gdb_apit // regex group for string (optional part of the output of gdb when printing a // pointer), matches e.g. \"abc\" and extracts \"abc\" const std::string r_string = R"((\\".*\\"))"; + + // name of malloc function + const std::string malloc_name = "malloc"; }; class gdb_interaction_exceptiont : public cprover_exception_baset