diff --git a/src/goto-instrument/dump_c.cpp b/src/goto-instrument/dump_c.cpp index 6013a17ad7e..de05e7288ed 100644 --- a/src/goto-instrument/dump_c.cpp +++ b/src/goto-instrument/dump_c.cpp @@ -663,6 +663,9 @@ void dump_ct::cleanup_decl( tmp.add(goto_programt::make_end_function()); + // goto_program2codet requires valid location numbers: + tmp.update(); + std::unordered_set typedef_names; for(const auto &td : typedef_map) typedef_names.insert(td.first); diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index 7ee633d0082..1873c8c27ce 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -692,6 +692,10 @@ int goto_instrument_parse_optionst::doit() // applied restore_returns(goto_model); + // dump_c (actually goto_program2code) requires valid instruction + // location numbers: + goto_model.goto_functions.update(); + if(cmdline.args.size()==2) { #ifdef _MSC_VER @@ -1520,6 +1524,10 @@ void goto_instrument_parse_optionst::instrument_goto_program() do_indirect_call_and_rtti_removal(); log.status() << "Performing a reachability slice" << messaget::eom; + + // reachability_slicer requires that the model has unique location numbers: + goto_model.goto_functions.update(); + if(cmdline.isset("property")) reachability_slicer(goto_model, cmdline.get_values("property")); else @@ -1546,7 +1554,11 @@ void goto_instrument_parse_optionst::instrument_goto_program() if(cmdline.isset("property")) property_slicer(goto_model, cmdline.get_values("property")); else + { + // full_slicer requires that the model has unique location numbers: + goto_model.goto_functions.update(); full_slicer(goto_model); + } } // splice option @@ -1567,6 +1579,9 @@ void goto_instrument_parse_optionst::instrument_goto_program() { do_indirect_call_and_rtti_removal(); + // reachability_slicer requires that the model has unique location numbers: + goto_model.goto_functions.update(); + log.status() << "Slicing away initializations of unused global variables" << messaget::eom; slice_global_inits(goto_model); @@ -1595,6 +1610,8 @@ void goto_instrument_parse_optionst::instrument_goto_program() aggressive_slicer.doit(); + goto_model.goto_functions.update(); + log.status() << "Performing a reachability slice" << messaget::eom; if(cmdline.isset("property")) reachability_slicer(goto_model, cmdline.get_values("property")); diff --git a/src/goto-programs/cfg.h b/src/goto-programs/cfg.h index cfeb28ce892..a3755408fcf 100644 --- a/src/goto-programs/cfg.h +++ b/src/goto-programs/cfg.h @@ -12,8 +12,9 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_GOTO_PROGRAMS_CFG_H #define CPROVER_GOTO_PROGRAMS_CFG_H -#include +#include #include +#include #include "goto_functions.h" @@ -31,6 +32,29 @@ struct cfg_base_nodet:public graph_nodet, public T I PC; }; +/// Functor to convert cfg nodes into dense integers, used by \ref cfg_baset. +/// Default implementation: the identity function. +template +class cfg_instruction_to_dense_integert +{ +public: + std::size_t operator()(T &&t) const + { + return std::forward(identity_functort{}(t)); + } +}; + +/// GOTO-instruction to location number functor. +template <> +class cfg_instruction_to_dense_integert +{ +public: + std::size_t operator()(const goto_programt::const_targett &t) const + { + return t->location_number; + } +}; + /// A multi-procedural control flow graph (CFG) whose nodes store references to /// instructions in a GOTO program. /// @@ -69,7 +93,11 @@ class cfg_baset:public grapht< cfg_base_nodet > class entry_mapt final { - typedef std::map data_typet; + typedef dense_integer_mapt< + goto_programt::const_targett, + entryt, + cfg_instruction_to_dense_integert> + data_typet; data_typet data; public: @@ -114,6 +142,12 @@ class cfg_baset:public grapht< cfg_base_nodet > { return data.at(t); } + + template + void setup_for_keys(Iter begin, Iter end) + { + data.setup_for_keys(begin, end); + } }; entry_mapt entry_map; @@ -173,12 +207,30 @@ class cfg_baset:public grapht< cfg_base_nodet > void operator()( const goto_functionst &goto_functions) { + std::vector possible_keys; + for(const auto &id_and_function : goto_functions.function_map) + { + const auto &instructions = id_and_function.second.body.instructions; + possible_keys.reserve( + possible_keys.size() + + std::distance(instructions.begin(), instructions.end())); + for(auto it = instructions.begin(); it != instructions.end(); ++it) + possible_keys.push_back(it); + } + entry_map.setup_for_keys(possible_keys.begin(), possible_keys.end()); compute_edges(goto_functions); } void operator()(P &goto_program) { goto_functionst goto_functions; + std::vector possible_keys; + const auto &instructions = goto_program.instructions; + possible_keys.reserve( + std::distance(instructions.begin(), instructions.end())); + for(auto it = instructions.begin(); it != instructions.end(); ++it) + possible_keys.push_back(it); + entry_map.setup_for_keys(possible_keys.begin(), possible_keys.end()); compute_edges(goto_functions, goto_program); } @@ -187,19 +239,19 @@ class cfg_baset:public grapht< cfg_base_nodet > /// in that particular case you should just use `cfg.get_node(i)`). Storing /// node indices saves a map lookup, so it can be worthwhile when you expect /// to repeatedly look up the same program point. - entryt get_node_index(const I &program_point) const + entryt get_node_index(const goto_programt::const_targett &program_point) const { return entry_map.at(program_point); } /// Get the CFG graph node relating to \p program_point. - nodet &get_node(const I &program_point) + nodet &get_node(const goto_programt::const_targett &program_point) { return (*this)[get_node_index(program_point)]; } /// Get the CFG graph node relating to \p program_point. - const nodet &get_node(const I &program_point) const + const nodet &get_node(const goto_programt::const_targett &program_point) const { return (*this)[get_node_index(program_point)]; } diff --git a/src/util/dense_integer_map.h b/src/util/dense_integer_map.h new file mode 100644 index 00000000000..c69810c04cd --- /dev/null +++ b/src/util/dense_integer_map.h @@ -0,0 +1,350 @@ +/*******************************************************************\ + +Module: Dense integer map + +Author: Diffblue Ltd + +\*******************************************************************/ + +/// \file +/// Dense integer map + +#ifndef CPROVER_UTIL_DENSE_INTEGER_MAP_H +#define CPROVER_UTIL_DENSE_INTEGER_MAP_H + +#include +#include +#include + +#include +#include + +/// Identity functor. When we use C++20 this can be replaced with std::identity. +class identity_functort +{ +public: + template + constexpr T &&operator()(T &&t) const + { + return std::forward(t); + } +}; + +/// A map type that is backed by a vector, which relies on the ability to (a) +/// see the keys that might be used in advance of their usage, and (b) map those +/// keys onto a dense range of integers. You should specialise +/// key_to_dense_integer for your key type before using. If it maps your keys +/// onto too sparse a range, considerable memory will be wasted, as well as time +/// spent skipping over the unused indices while iterating. +/// +/// Value type V must be default constructible. +/// +/// The type is optimised for fast lookups at the expense of flexibility. +/// It makes one compromise on the iterface of std::map / unordered_map: the +/// iterator refers to a pair>, where the value optional +/// will always be defined. This is because the backing store uses optionalt +/// this way and we don't want to impose the price of copying the key and value +/// each time we move the iterator just so we have a & to give out. +/// +/// Undocumented functions with matching names have the same semantics as +/// std::map equivalents (including perfect iterator stability, with ordering +/// according to the given KeyToDenseInteger function) +template +class dense_integer_mapt +{ +public: + /// Type of the container returned by \ref possible_keys + typedef std::vector possible_keyst; + +private: + // Offset between keys resulting from KeyToDenseInteger{}(key) and indices + // into map. + int64_t offset; + + typedef std::vector> backing_storet; + + // Main store: contains pairs, where entries at positions with + // no corresponding key are default-initialized and entries with a + // corresponding key but no value set yet have the correct key but a default- + // initialized value. Both of these are skipped by this type's iterators. + backing_storet map; + + // Indicates whether a given position in \ref map's value has been set, and + // therefore whether our iterators should stop at a given location. We use + // this auxiliary structure rather than `pair>` in \ref map + // because this way we can more easily return a std::map-like + // std::pair & from the iterator. + std::vector value_set; + + // Population count (number of '1's) in value_set, i.e., number of keys whose + // values have been set. + std::size_t n_values_set; + + // "Allowed" keys passed to \ref setup_for_keys. Attempting to use keys not + // included in this vector may result in an invariant failure, but can + // sometimes silently succeed + possible_keyst possible_keys_vector; + + // Convert a key into an index into \ref map + std::size_t key_to_index(const K &key) const + { + auto key_integer = KeyToDenseInteger{}(key); + INVARIANT(((int64_t)key_integer) >= offset, "index should be in range"); + auto ret = (std::size_t)key_integer - offset; + INVARIANT(ret < map.size(), "index should be in range"); + return ret; + } + + // Note that \ref map entry at offset \ref index has been set. + void mark_index_set(std::size_t index) + { + if(!value_set[index]) + { + ++n_values_set; + value_set[index] = true; + } + } + + // Has the \ref map entry at offset \ref index been set? + bool index_is_set(std::size_t index) const + { + return value_set[index]; + } + + // Support class used to implement both const and non-const iterators + // This is just a std::vector iterator pointing into \ref map, but with an + // operator++ that skips unset values. + template + class iterator_templatet + : public std::iterator + { + // Type of the std::iterator support class we inherit + typedef std::iterator + base_typet; + // Type of this template instantiation + typedef iterator_templatet self_typet; + // Type of our containing \ref dense_integer_mapt + typedef dense_integer_mapt map_typet; + + public: + iterator_templatet(UnderlyingIterator it, const map_typet &underlying_map) + : underlying_iterator(it), underlying_map(underlying_map) + { + it = skip_unset_values(it); + } + + /// Convert iterator to const_iterator + /// (redundant when defined in the const_iterator itself) + operator iterator_templatet< + typename backing_storet::const_iterator, + const typename backing_storet::value_type>() const + { + return {underlying_iterator, underlying_map}; + } + + self_typet operator++() + { + self_typet i = *this; + underlying_iterator = advance(underlying_iterator); + return i; + } + self_typet operator++(int junk) + { + underlying_iterator = advance(underlying_iterator); + return *this; + } + typename base_typet::reference operator*() const + { + return *underlying_iterator; + } + typename base_typet::pointer operator->() const + { + return &*underlying_iterator; + } + bool operator==(const self_typet &rhs) const + { + return underlying_iterator == rhs.underlying_iterator; + } + bool operator!=(const self_typet &rhs) const + { + return underlying_iterator != rhs.underlying_iterator; + } + + private: + // Advance \ref it to the next map entry with an initialized value + UnderlyingIterator advance(UnderlyingIterator it) + { + return skip_unset_values(std::next(it)); + } + + // Return the next iterator >= it with its value set + UnderlyingIterator skip_unset_values(UnderlyingIterator it) + { + auto iterator_pos = std::distance( + underlying_map.map.begin(), + (typename backing_storet::const_iterator)it); + while((std::size_t)iterator_pos < underlying_map.map.size() && + !underlying_map.value_set.at(iterator_pos)) + { + ++iterator_pos; + ++it; + } + return it; + } + + // Wrapped std::vector iterator + UnderlyingIterator underlying_iterator; + const map_typet &underlying_map; + }; + +public: + /// iterator. Stable with respect to all operations on this type once + /// setup_for_keys has been called. + typedef iterator_templatet< + typename backing_storet::iterator, + typename backing_storet::value_type> + iterator; + + /// const iterator. Stable with respect to all operations on this type once + /// setup_for_keys has been called. + typedef iterator_templatet< + typename backing_storet::const_iterator, + const typename backing_storet::value_type> + const_iterator; + + dense_integer_mapt() : offset(0), n_values_set(0) + { + } + + /// Set the keys that are allowed to be used in this container. Checks that + /// the integers produced for each key by `KeyToDenseInteger` are unique + /// and fall within a std::size_t's range (the integers are allowed to be + /// negative so long as max(integers) - min(integers) <= max-size_t). + /// This should be called before the container is used and not called again. + /// Using keys not provided to this function with operator[], insert, at(...) + /// etc may cause an invariant failure or undefined behaviour. + template + void setup_for_keys(Iter first, Iter last) + { + INVARIANT( + size() == 0, + "setup_for_keys must only be called on a newly-constructed container"); + + int64_t highest_key = std::numeric_limits::min(); + int64_t lowest_key = std::numeric_limits::max(); + std::unordered_set seen_keys; + for(Iter it = first; it != last; ++it) + { + int64_t integer_key = KeyToDenseInteger{}(*it); + highest_key = std::max(integer_key, highest_key); + lowest_key = std::min(integer_key, lowest_key); + INVARIANT( + seen_keys.insert(integer_key).second, + "keys for use in dense_integer_mapt must be unique"); + } + + if(highest_key < lowest_key) + { + offset = 0; + } + else + { + auto map_size = (highest_key - lowest_key) + 1; + INVARIANT( + map_size > 0, // overflowed? + "dense_integer_mapt size should fit in std::size_t"); + INVARIANT( + ((std::size_t)map_size) <= std::numeric_limits::max(), + "dense_integer_mapt size should fit in std::size_t"); + offset = lowest_key; + map.resize((highest_key - lowest_key) + 1); + for(Iter it = first; it != last; ++it) + map.at(key_to_index(*it)).first = *it; + value_set.resize((highest_key - lowest_key) + 1); + } + + possible_keys_vector.insert(possible_keys_vector.end(), first, last); + } + + const V &at(const K &key) const + { + std::size_t index = key_to_index(key); + INVARIANT(index_is_set(index), "map value should be set"); + return map.at(index).second; + } + V &at(const K &key) + { + std::size_t index = key_to_index(key); + INVARIANT(index_is_set(index), "map value should be set"); + return map.at(index).second; + } + + V &operator[](const K &key) + { + std::size_t index = key_to_index(key); + mark_index_set(index); + return map.at(index).second; + } + + std::pair insert(const std::pair &pair) + { + std::size_t index = key_to_index(pair.first); + iterator it{std::next(map.begin(), index), *this}; + + if(index_is_set(index)) + return {it, false}; + else + { + mark_index_set(index); + map.at(index).second = pair.second; + return {it, true}; + } + } + + std::size_t count(const K &key) const + { + return index_is_set(key_to_index(key)); + } + + std::size_t size() const + { + return n_values_set; + } + + const possible_keyst &possible_keys() const + { + return possible_keys_vector; + } + + iterator begin() + { + return iterator{map.begin(), *this}; + } + + iterator end() + { + return iterator{map.end(), *this}; + } + + const_iterator begin() const + { + return const_iterator{map.begin(), *this}; + } + + const_iterator end() const + { + return const_iterator{map.end(), *this}; + } + + const_iterator cbegin() const + { + return begin(); + } + + const_iterator cend() const + { + return end(); + } +}; + +#endif // CPROVER_UTIL_DENSE_INTEGER_MAP_H diff --git a/unit/Makefile b/unit/Makefile index f2eecc918b6..3da7f1f18dd 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -63,6 +63,7 @@ SRC += analyses/ai/ai.cpp \ solvers/strings/string_refinement/union_find_replace.cpp \ util/allocate_objects.cpp \ util/cmdline.cpp \ + util/dense_integer_map.cpp \ util/expr_cast/expr_cast.cpp \ util/expr.cpp \ util/expr_iterator.cpp \ diff --git a/unit/util/dense_integer_map.cpp b/unit/util/dense_integer_map.cpp new file mode 100644 index 00000000000..01327947234 --- /dev/null +++ b/unit/util/dense_integer_map.cpp @@ -0,0 +1,220 @@ +/*******************************************************************\ + +Module: Unit tests for dense_integer_map + +Author: Diffblue Ltd + +\*******************************************************************/ + +#include + +#include + +TEST_CASE("no keys", "[core][util][dense_integer_map]") +{ + dense_integer_mapt map; + std::vector empty; + map.setup_for_keys(empty.begin(), empty.end()); + + cbmc_invariants_should_throwt invariants_throw_in_this_scope; + + REQUIRE_THROWS_AS(map.at(0), invariant_failedt); + REQUIRE_THROWS_AS(map[0], invariant_failedt); + REQUIRE_THROWS_AS(map.insert({0, 0}), invariant_failedt); +} + +TEST_CASE("one key", "[core][util][dense_integer_map]") +{ + dense_integer_mapt map; + std::vector allowed_keys = {1}; + map.setup_for_keys(allowed_keys.begin(), allowed_keys.end()); + + cbmc_invariants_should_throwt invariants_throw_in_this_scope; + + REQUIRE(map.size() == 0); + + REQUIRE_THROWS_AS(map.at(1), invariant_failedt); + REQUIRE(!map.count(1)); + REQUIRE(map[1] == 0); + + REQUIRE(map.size() == 1); + REQUIRE(map.count(1)); + + map[1] = 2; + REQUIRE(map.at(1) == 2); + REQUIRE(map[1] == 2); + + auto insert_result = map.insert({1, 5}); + REQUIRE(!insert_result.second); + REQUIRE(insert_result.first == map.begin()); + + REQUIRE_THROWS_AS(map.at(0), invariant_failedt); + REQUIRE_THROWS_AS(map[0], invariant_failedt); + REQUIRE_THROWS_AS(map.insert({0, 0}), invariant_failedt); +} + +TEST_CASE("insert fresh key", "[core][util][dense_integer_map]") +{ + dense_integer_mapt map; + std::vector allowed_keys = {1}; + map.setup_for_keys(allowed_keys.begin(), allowed_keys.end()); + + cbmc_invariants_should_throwt invariants_throw_in_this_scope; + + auto insert_result = map.insert({1, 5}); + REQUIRE(insert_result.second); + REQUIRE(insert_result.first == map.begin()); + + REQUIRE(map.at(1) == 5); + REQUIRE(map[1] == 5); +} + +TEST_CASE("multiple, sparse keys", "[core][util][dense_integer_map]") +{ + dense_integer_mapt map; + std::vector allowed_keys = {1, 10}; + map.setup_for_keys(allowed_keys.begin(), allowed_keys.end()); + + cbmc_invariants_should_throwt invariants_throw_in_this_scope; + + REQUIRE(map.size() == 0); + + map.insert({1, 2}); + REQUIRE(map.size() == 1); + auto second_insert_result = map.insert({10, 11}); + REQUIRE(second_insert_result.second); + REQUIRE(second_insert_result.first == std::next(map.begin())); + + REQUIRE_THROWS_AS(map[0], invariant_failedt); + REQUIRE(map[1] == 2); + // Keys in the gap are silently accepted, though this is bad practice: + // REQUIRE_THROWS_AS(map[2], invariant_failedt); + // REQUIRE_THROWS_AS(map[3], invariant_failedt); + // REQUIRE_THROWS_AS(map[4], invariant_failedt); + // REQUIRE_THROWS_AS(map[5], invariant_failedt); + // REQUIRE_THROWS_AS(map[6], invariant_failedt); + // REQUIRE_THROWS_AS(map[7], invariant_failedt); + // REQUIRE_THROWS_AS(map[8], invariant_failedt); + // REQUIRE_THROWS_AS(map[9], invariant_failedt); + REQUIRE(map[10] == 11); + REQUIRE_THROWS_AS(map[11], invariant_failedt); +} + +TEST_CASE("iterators", "[core][util][dense_integer_map]") +{ + dense_integer_mapt map; + std::vector allowed_keys = {1, 10}; + map.setup_for_keys(allowed_keys.begin(), allowed_keys.end()); + + map.insert({1, 5}); + map.insert({10, 11}); + + std::vector> iterator_result{map.begin(), map.end()}; + REQUIRE( + iterator_result == std::vector>{{1, 5}, {10, 11}}); + + int acc = 0; + for(auto &key_value : map) + key_value.second = ++acc; + + iterator_result = std::vector>{map.begin(), map.end()}; + REQUIRE(iterator_result == std::vector>{{1, 1}, {10, 2}}); + + REQUIRE(map.begin() != map.end()); + REQUIRE(map.begin() != std::next(map.begin())); + REQUIRE(map.begin() == map.begin()); + REQUIRE(map.size() == 2); + REQUIRE(std::distance(map.begin(), map.end()) == map.size()); + + { + const auto &const_map = map; + + iterator_result = + std::vector>{const_map.begin(), const_map.end()}; + REQUIRE( + iterator_result == std::vector>{{1, 1}, {10, 2}}); + + auto non_const_iterator = map.begin(); + auto converted_non_const_iterator = + (decltype(map)::const_iterator)non_const_iterator; + auto const_iterator = const_map.begin(); + auto other_const_iterator = const_map.begin(); + + REQUIRE(converted_non_const_iterator == const_iterator); + REQUIRE(converted_non_const_iterator == other_const_iterator); + } +} + +TEST_CASE("keys must be unique", "[core][util][dense_integer_map]") +{ + dense_integer_mapt map; + std::vector allowed_keys = {1, 1}; + + cbmc_invariants_should_throwt invariants_throw_in_this_scope; + + REQUIRE_THROWS_AS( + map.setup_for_keys(allowed_keys.begin(), allowed_keys.end()), + invariant_failedt); + + allowed_keys = {1, 2, 1}; + REQUIRE_THROWS_AS( + map.setup_for_keys(allowed_keys.begin(), allowed_keys.end()), + invariant_failedt); + + allowed_keys = {1, 2}; + map.setup_for_keys(allowed_keys.begin(), allowed_keys.end()); +} + +class reverse_orderingt +{ +public: + int operator()(int x) + { + return 10 - x; + } +}; + +TEST_CASE( + "ordering by custom key-to-integer function", + "[core][util][dense_integer_map]") +{ + dense_integer_mapt map; + std::vector allowed_keys = {-20, 0, 20}; + + map.setup_for_keys(allowed_keys.begin(), allowed_keys.end()); + map.insert({0, 1}); + map.insert({-20, 2}); + map.insert({20, 3}); + + std::vector> iterator_result{map.begin(), map.end()}; + + REQUIRE( + iterator_result == + std::vector>{{20, 3}, {0, 1}, {-20, 2}}); +} + +class index_is_mod2t +{ +public: + int operator()(int x) + { + return x % 2; + } +}; + +TEST_CASE("indices must be unique", "[core][util][dense_integer_map]") +{ + dense_integer_mapt map; + + cbmc_invariants_should_throwt invariants_throw_in_this_scope; + + // Illegal keys (are equal mod 2) + std::vector allowed_keys = {2, 4}; + REQUIRE_THROWS_AS( + map.setup_for_keys(allowed_keys.begin(), allowed_keys.end()), + invariant_failedt); + + // Legal keys (not equal mod 2) + allowed_keys = {2, 3}; + map.setup_for_keys(allowed_keys.begin(), allowed_keys.end()); +}