diff --git a/src/pointer-analysis/Makefile b/src/pointer-analysis/Makefile index 7e9837ac737..6af38c803c3 100644 --- a/src/pointer-analysis/Makefile +++ b/src/pointer-analysis/Makefile @@ -11,7 +11,6 @@ SRC = add_failed_symbols.cpp \ value_set_analysis_fivr.cpp \ value_set_analysis_fivrns.cpp \ value_set_dereference.cpp \ - value_set_domain.cpp \ value_set_domain_fi.cpp \ value_set_domain_fivr.cpp \ value_set_domain_fivrns.cpp \ diff --git a/src/pointer-analysis/show_value_sets.h b/src/pointer-analysis/show_value_sets.h index 3b080f66e62..b704f6a2964 100644 --- a/src/pointer-analysis/show_value_sets.h +++ b/src/pointer-analysis/show_value_sets.h @@ -12,10 +12,10 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_POINTER_ANALYSIS_SHOW_VALUE_SETS_H #define CPROVER_POINTER_ANALYSIS_SHOW_VALUE_SETS_H +#include #include class goto_modelt; -class value_set_analysist; void show_value_sets( ui_message_handlert::uit, diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index d9dc369b373..18b6280eea8 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -33,6 +33,9 @@ Author: Daniel Kroening, kroening@kroening.com #include "add_failed_symbols.h" +// Due to a large number of functions defined inline, `value_sett` and +// associated types are documented in its header file, `value_set.h`. + const value_sett::object_map_dt value_sett::object_map_dt::blank{}; object_numberingt value_sett::object_numbering; @@ -1205,6 +1208,12 @@ void value_sett::assign( object_mapt values_rhs; get_value_set(rhs, values_rhs, ns, is_simplified); + // Permit custom subclass to alter the read values prior to write: + adjust_assign_rhs_values(rhs, ns, values_rhs); + + // Permit custom subclass to perform global side-effects prior to write: + apply_assign_side_effects(lhs, rhs, ns); + assign_rec(lhs, values_rhs, "", ns, add_to_sets); } } @@ -1484,7 +1493,7 @@ void value_sett::do_end_function( assign(lhs, rhs, ns, false, false); } -void value_sett::apply_code( +void value_sett::apply_code_rec( const codet &code, const namespacet &ns) { @@ -1493,7 +1502,7 @@ void value_sett::apply_code( if(statement==ID_block) { forall_operands(it, code) - apply_code(to_code(*it), ns); + apply_code_rec(to_code(*it), ns); } else if(statement==ID_function_call) { @@ -1611,6 +1620,10 @@ void value_sett::apply_code( { // doesn't do anything } + else if(statement==ID_dead) + { + // Ignore by default; could prune the value set. + } else { // std::cerr << code.pretty() << '\n'; diff --git a/src/pointer-analysis/value_set.h b/src/pointer-analysis/value_set.h index aed52e05f2e..4005d9b0da0 100644 --- a/src/pointer-analysis/value_set.h +++ b/src/pointer-analysis/value_set.h @@ -22,6 +22,22 @@ Author: Daniel Kroening, kroening@kroening.com class namespacet; +/// State type in value_set_domaint, used is value-set analysis and goto-symex. +/// Represents a mapping from expressions to the addresses that may be stored +/// there; for example, a global that is either null or points to a +/// heap-allocated object, which itself has two fields, one pointing to another +/// heap object and the other having unknown target, would be represented: +/// +/// global_x -> { null, } +/// dynamic_object1.field1 -> { } +/// dynamic_object1.field2 -> { * (unknown) } +/// +/// LHS expressions can be either symbols or fields thereof, and are stored as +/// strings; RHS expressions may be symbols, dynamic objects, integer addresses +/// (as in `(int*)0x1234`) or unknown (printed as `*`), and are stored as +/// pairs of an `exprt` and an optional offset if known (0 for both dynamic +/// objects in the example given above). RHS expressions are represented using +/// numbering to avoid storing redundant duplicate expressions. class value_sett { public: @@ -34,26 +50,40 @@ class value_sett const typet &type, const namespacet &); + /// Matches the location_number field of the instruction that corresponds + /// to this value_sett instance in value_set_domaint's state map unsigned location_number; + + /// Global shared object numbering, used to abbreviate expressions stored + /// in value sets. static object_numberingt object_numbering; typedef irep_idt idt; + /// Represents the offset into an object: either a unique integer offset, + /// or an unknown value, represented by `!offset_is_set`. class objectt { public: + /// Constructs an unknown offset objectt():offset_is_set(false) { } + /// Constructs a known offset explicit objectt(const mp_integer &_offset): offset(_offset), offset_is_set(true) { } + /// Offset into the target object. Ignored if `offset_is_set` is false. mp_integer offset; + + /// If true, `offset` gives a unique integer offset; if false, represents + /// an unknown offset. bool offset_is_set; + bool offset_is_zero() const { return offset_is_set && offset.is_zero(); } @@ -69,6 +99,13 @@ class value_sett } }; + /// Represents a set of expressions (`exprt` instances) with corresponding + /// offsets (`objectt` instances). This is the RHS set of a single row of + /// the enclosing `value_sett`, such as `{ null, dynamic_object1 }`. + /// The set is represented as a map from numbered `exprt`s to `objectt` + /// instead of a set of pairs to make lookup by `exprt` easier. All + /// methods matching the interface of `std::map` forward those methods + /// to the internal map. class object_map_dt { typedef std::map data_typet; @@ -125,25 +162,63 @@ class value_sett ~object_map_dt()=default; }; + /// Converts an `object_map_dt` entry `object_number -> offset` into an + /// `object_descriptor_offsett` with + /// `.object() == object_numbering.at(object_number)` and + /// `.offset() == offset`. exprt to_expr(const object_map_dt::value_type &it) const; + /// Reference-counts instances of `object_map_dt`, such that multiple + /// instructions' `value_sett` instances can share them. For example, if + /// we have a pair of instructions with value-sets: + /// + /// x = new A; + /// x -> { dynamic_object1 } + /// y = new B; + /// x -> { dynamic_object1 } + /// y -> { dynamic_object2 } + /// + /// Then the set { dynamic_object1 }, represented by an `object_map_dt`, can + /// be shared between the two `value_sett` instances. typedef reference_counting object_mapt; + /// Sets an entry in object map `dest` to match an existing entry `it` + /// from a different map. Any existing entry for the same `exprt` is + /// overwritten. + /// \param dest: object map to update. + /// \param it: iterator pointing to new entry void set(object_mapt &dest, const object_map_dt::value_type &it) const { dest.write()[it.first]=it.second; } + /// Merges an existing entry into an object map. If the destination map + /// has an existing entry for the same expression with a different offset + /// its offset is marked unknown (so e.g. `x -> 0` and `x -> 1` merge into + /// `x -> ?`). + /// \param dest: object map to update. + /// \param it: iterator pointing to new entry bool insert(object_mapt &dest, const object_map_dt::value_type &it) const { return insert(dest, it.first, it.second); } + /// Adds an expression to an object map, with unknown offset. If the + /// destination map has an existing entry for the same expression + /// its offset is marked unknown. + /// \param dest: object map to update + /// \param src: expression to add bool insert(object_mapt &dest, const exprt &src) const { return insert(dest, object_numbering.number(src), objectt()); } + /// Adds an expression to an object map, with known offset. If the + /// destination map has an existing entry for the same expression + /// with a differing offset its offset is marked unknown. + /// \param dest: object map to update + /// \param src: expression to add + /// \param offset: offset into `src` bool insert( object_mapt &dest, const exprt &src, @@ -152,16 +227,33 @@ class value_sett return insert(dest, object_numbering.number(src), objectt(offset)); } + /// Adds a numbered expression and offset to an object map. If the + /// destination map has an existing entry for the same expression + /// with a differing offset its offset is marked unknown. + /// \param dest: object map to update + /// \param n: object number to add; must be mapped to the corresponding + /// expression by `object_numbering`. + /// \param object: offset into object `n` (may be unknown). bool insert( object_mapt &dest, object_numberingt::number_type n, const objectt &object) const; + /// Adds an expression and offset to an object map. If the + /// destination map has an existing entry for the same expression + /// with a differing offset its offset is marked unknown. + /// \param dest: object map to update + /// \param expr: expression to add + /// \param object: offset into `expr` (may be unknown). bool insert(object_mapt &dest, const exprt &expr, const objectt &object) const { return insert(dest, object_numbering.number(expr), object); } + /// Represents a row of a `value_sett`. For example, this might represent + /// `dynamic_object1.field1 -> { }`, with + /// `identifier == dynamic_object1`, `suffix` == `.field1` and + /// a single-entry `object_map` representing `{ }`. struct entryt { object_mapt object_map; @@ -191,25 +283,50 @@ class value_sett } }; + /// Set of expressions; only used for the `get` API, not for internal + /// data representation. typedef std::set expr_sett; + /// Set of dynamic object numbers, equivalent to a set of + /// `dynamic_object_exprt`s with corresponding IDs. Used only in internal + /// implementation details. typedef std::set dynamic_object_id_sett; + /// Map representing the entire value set, mapping from string LHS IDs + /// to RHS expression sets. Note this data structure is somewhat + /// denormalized, for example mapping + /// + /// dynamic_object1.field2 -> + /// entryt { + /// .identifier = dynamic_object1, + /// .suffix = .field2, + /// .object_map = ... + /// } + /// + /// The components of the ID are thus duplicates in the `valuest` key and in + /// `entryt` fields. #ifdef USE_DSTRING typedef std::map valuest; #else typedef std::unordered_map valuest; #endif + /// Gets values pointed to by `expr`, including following dereference + /// operators (i.e. this is not a simple lookup in `valuest`). + /// \param expr: query expression + /// \param [out] dest: assigned a set of expressions that `expr` may point to + /// \param ns: global namespace void get_value_set( const exprt &expr, value_setst::valuest &dest, const namespacet &ns) const; + /// Appears to be unimplemented. expr_sett &get( const idt &identifier, const std::string &suffix); + /// Clears value set (not used in the CBMC repository) void make_any() { values.clear(); @@ -220,36 +337,84 @@ class value_sett values.clear(); } + /// Gets or inserts an entry in this value-set. + /// \param e: entry to find. Its `id` and `suffix` fields will be used + /// to find a corresponding entry; if a fresh entry is created its + /// `object_map` (RHS value set) will be copied; otherwise it will be + /// ignored. Therefore it should probably be left blank and any RHS updates + /// conducted against the returned reference. + /// \param type: type of `e.identifier`, used to determine whether `e`'s + /// suffix should be used to find a field-sensitive value-set or whether + /// a single entry should be shared by all of symbol `e.identifier`. + /// \param ns: global namespace entryt &get_entry( const entryt &e, const typet &type, - const namespacet &); + const namespacet &ns); + /// Pretty-print this value-set + /// \param ns: global namespace + /// \param [out] out: stream to write to void output( const namespacet &ns, std::ostream &out) const; + /// Stores the LHS ID -> RHS expression set map. See `valuest` documentation + /// for more detail. valuest values; - // true = added something new + /// Merges two RHS expression sets + /// \param [in, out] dest: set to merge into + /// \param src: set to merge in + /// \return true if anything changed. bool make_union(object_mapt &dest, const object_mapt &src) const; - // true = added something new + /// Merges an entire existing value_sett's data into this one + /// \param new_values: new values to merge in + /// \return true if anything changed. bool make_union(const valuest &new_values); - // true = added something new + /// Merges an entire existing value_sett's data into this one + /// \return true if anything changed. bool make_union(const value_sett &new_values) { return make_union(new_values.values); } + /// Transforms this value-set by assuming an expression holds. + /// Currently this can only mark dynamic objects valid; all other + /// assumptions are ignored. + /// \param expr: condition to assume + /// \param ns: global namespace void guard( const exprt &expr, const namespacet &ns); + /// Transforms this value-set by executing a given statement against it. + /// For example, assignment statements will update `valuest` by reading + /// the value-set corresponding to their right-hand side and assigning it + /// to their LHS. + /// \param code: instruction to apply + /// \param ns: global namespace void apply_code( const codet &code, - const namespacet &ns); + const namespacet &ns) + { + apply_code_rec(code, ns); + } + /// Transforms this value-set by executing executing the assignment + /// `lhs := rhs` against it. + /// \param lhs: written expression + /// \param rhs: read expression + /// \param ns: global namespace + /// \param is_simplified: if false, `simplify` will be used to simplify + /// RHS and LHS before execution. If you know they are already simplified, + /// set this to save a little time. + /// \param add_to_sets: if true, execute a weak assignment (the LHS possible + /// values are added to but not overwritten). Otherwise execute a strong + /// (overwriting) assignment. Note the nature of `lhs` may internally + /// prevent a strong assignment, as in `x == y ? z : w := a`, where either + /// `y` or `z` MAY, but not MUST, be overwritten. void assign( const exprt &lhs, const exprt &rhs, @@ -257,39 +422,69 @@ class value_sett bool is_simplified, bool add_to_sets); + /// Transforms this value-set by executing the actual -> formal parameter + /// assignments for a particular callee. For example, for function + /// `f(int* x, void* y)` and call `f(&g, &h)` this would execute assignments + /// `x := &g` and `y := &h`. + /// \param function: function being called + /// \param arguments: actual arguments + /// \param ns: global namespace void do_function_call( const irep_idt &function, const exprt::operandst &arguments, const namespacet &ns); - // edge back to call site + /// Transforms this value-set by assigning this function's return value to + /// a given LHS expression. Note this has no effect if `remove_returns` has + /// been run, in which case returns are explicitly passed via global + /// variables named `function_name#return_value` and are handled via the usual + /// `apply_code` path. + /// \param lhs: expression that recieves the return value + /// \param ns: global namespace void do_end_function( const exprt &lhs, const namespacet &ns); + /// Gets the set of expressions that `expr` may refer to, according to this + /// value set. Note the contrast with `get_value_set`: if `x` holds a pointer + /// to `y`, then `get_value_set(x)` includes `y` while `get_reference_set(x)` + /// returns `{ x }`. + /// \param expr: query expression + /// \param [out] dest: overwritten with result expression set + /// \param ns: global namespace void get_reference_set( const exprt &expr, value_setst::valuest &dest, const namespacet &ns) const; + /// Tries to resolve any `pointer_offset_exprt` expressions in `expr` to + /// constant integers using this value-set's information. For example, if + /// `expr` contained `POINTER_OFFSET(y)`, and this value set indicates `y` + /// points to object `z` offset `4`, then the expression will be replaced by + /// constant `4`. If we don't know where `y` points, or it may point to + /// multiple distinct offsets, then the expression will be left alone. + /// \param expr: expression whose pointer offsets should be replaced + /// \param ns: global namespace + /// \return true if any offset expression was successfully replaced. bool eval_pointer_offset( exprt &expr, const namespacet &ns) const; protected: - void get_value_set_rec( - const exprt &expr, - object_mapt &dest, - const std::string &suffix, - const typet &original_type, - const namespacet &ns) const; - + /// Reads the set of objects pointed to by `expr`, including making + /// recursive lookups for dereference operations etc. + /// \param expr: query expression + /// \param dest [out]: overwritten by the set of object numbers pointed to + /// \param ns; global namespace + /// \param is_simplified: if false, simplify `expr` before reading. void get_value_set( const exprt &expr, object_mapt &dest, const namespacet &ns, bool is_simplified) const; + /// See the other overload of `get_reference_set`. This one returns object + /// numbers and offsets instead of expressions. void get_reference_set( const exprt &expr, object_mapt &dest, @@ -298,30 +493,81 @@ class value_sett get_reference_set_rec(expr, dest, ns); } + /// See get_reference_set. void get_reference_set_rec( const exprt &expr, object_mapt &dest, const namespacet &ns) const; + /// Sets `dest` to `src` with any wrapping typecasts removed void dereference_rec( const exprt &src, exprt &dest) const; - void assign_rec( - const exprt &lhs, - const object_mapt &values_rhs, - const std::string &suffix, - const namespacet &ns, - bool add_to_sets); - + /// Marks objects that may be pointed to by `op` as maybe-invalid + /// \param op: pointer to invalidate + /// \param ns: global namespace void do_free( const exprt &op, const namespacet &ns); + /// Extracts a member from a struct- or union-typed expression. + /// Usually that means making a `member_exprt`, but this can shortcut + /// extracting members from constants or with-expressions. + /// \param src: base struct-or-union-typed expression + /// \param component_name: member to extract + /// \param ns: global namespace exprt make_member( const exprt &src, const irep_idt &component_name, const namespacet &ns); + + // Subclass customisation points: + +protected: + /// Subclass customisation point for recursion over RHS expression: + virtual void get_value_set_rec( + const exprt &expr, + object_mapt &dest, + const std::string &suffix, + const typet &original_type, + const namespacet &ns) const; + + /// Subclass customisation point for recursion over LHS expression: + virtual void assign_rec( + const exprt &lhs, + const object_mapt &values_rhs, + const std::string &suffix, + const namespacet &ns, + bool add_to_sets); + + /// Subclass customisation point for applying code to this domain: + virtual void apply_code_rec( + const codet &code, + const namespacet &ns); + + private: + /// Subclass customisation point to filter or otherwise alter the value-set + /// returned from get_value_set before it is passed into assign. For example, + /// this is used in one subclass to tag and thus differentiate values that + /// originated in a particular place, vs. those that have been copied. + virtual void adjust_assign_rhs_values( + const exprt &rhs, + const namespacet &ns, + object_mapt &rhs_values) const + { + } + + /// Subclass customisation point to apply global side-effects to this domain, + /// after RHS values are read but before they are written. For example, this + /// is used in a recency-analysis plugin to demote existing most-recent + /// objects to general case ones. + virtual void apply_assign_side_effects( + const exprt &lhs, + const exprt &rhs, + const namespacet &ns) + { + } }; #endif // CPROVER_POINTER_ANALYSIS_VALUE_SET_H diff --git a/src/pointer-analysis/value_set_analysis.cpp b/src/pointer-analysis/value_set_analysis.cpp index d4a3388142e..6d55ebc1761 100644 --- a/src/pointer-analysis/value_set_analysis.cpp +++ b/src/pointer-analysis/value_set_analysis.cpp @@ -18,22 +18,11 @@ Author: Daniel Kroening, kroening@kroening.com #include -void value_set_analysist::initialize( - const goto_programt &goto_program) -{ - baset::initialize(goto_program); -} - -void value_set_analysist::initialize( - const goto_functionst &goto_functions) -{ - baset::initialize(goto_functions); -} - -void value_set_analysist::convert( +void value_sets_to_xml( + std::function get_value_set, const goto_programt &goto_program, const irep_idt &identifier, - xmlt &dest) const + xmlt &dest) { source_locationt previous_location; @@ -48,7 +37,7 @@ void value_set_analysist::convert( continue; // find value set - const value_sett &value_set=(*this)[i_it].value_set; + const value_sett &value_set=get_value_set(i_it); xmlt &i=dest.new_element("instruction"); i.new_element()=::xml(location); @@ -81,6 +70,7 @@ void value_set_analysist::convert( } } } + void convert( const goto_functionst &goto_functions, const value_set_analysist &value_set_analysis, diff --git a/src/pointer-analysis/value_set_analysis.h b/src/pointer-analysis/value_set_analysis.h index b41355abd73..31dc306026a 100644 --- a/src/pointer-analysis/value_set_analysis.h +++ b/src/pointer-analysis/value_set_analysis.h @@ -20,26 +20,37 @@ Author: Daniel Kroening, kroening@kroening.com class xmlt; -class value_set_analysist: +void value_sets_to_xml( + std::function get_value_set, + const goto_programt &goto_program, + const irep_idt &identifier, + xmlt &dest); + +template +class value_set_analysis_templatet: public value_setst, - public static_analysist + public static_analysist { public: - explicit value_set_analysist(const namespacet &_ns): - static_analysist(_ns) + typedef VSDT domaint; + typedef static_analysist baset; + typedef typename baset::locationt locationt; + + explicit value_set_analysis_templatet(const namespacet &ns):baset(ns) { } - typedef static_analysist baset; - - // overloading - virtual void initialize(const goto_programt &goto_program); - virtual void initialize(const goto_functionst &goto_functions); - void convert( const goto_programt &goto_program, const irep_idt &identifier, - xmlt &dest) const; + xmlt &dest) const + { + value_sets_to_xml( + [this](locationt l) { return (*this)[l].value_set; }, + goto_program, + identifier, + dest); + } public: // interface value_sets @@ -48,10 +59,14 @@ class value_set_analysist: const exprt &expr, value_setst::valuest &dest) { - (*this)[l].value_set.get_value_set(expr, dest, ns); + (*this)[l].value_set.get_value_set(expr, dest, baset::ns); } }; +typedef + value_set_analysis_templatet> + value_set_analysist; + void convert( const goto_functionst &goto_functions, const value_set_analysist &value_set_analysis, diff --git a/src/pointer-analysis/value_set_domain.h b/src/pointer-analysis/value_set_domain.h index e59250a0bc0..27e1201c0c2 100644 --- a/src/pointer-analysis/value_set_domain.h +++ b/src/pointer-analysis/value_set_domain.h @@ -17,14 +17,15 @@ Author: Daniel Kroening, kroening@kroening.com #include "value_set.h" -class value_set_domaint:public domain_baset +template +class value_set_domain_templatet:public domain_baset { public: - value_sett value_set; + VST value_set; // overloading - bool merge(const value_set_domaint &other, locationt to) + bool merge(const value_set_domain_templatet &other, locationt to) { return value_set.make_union(other.value_set); } @@ -58,4 +59,8 @@ class value_set_domaint:public domain_baset } }; +typedef value_set_domain_templatet value_set_domaint; + +#include "value_set_domain_transform.inc" + #endif // CPROVER_POINTER_ANALYSIS_VALUE_SET_DOMAIN_H diff --git a/src/pointer-analysis/value_set_domain.cpp b/src/pointer-analysis/value_set_domain_transform.inc similarity index 63% rename from src/pointer-analysis/value_set_domain.cpp rename to src/pointer-analysis/value_set_domain_transform.inc index 4b296b541df..bd83d8ecb66 100644 --- a/src/pointer-analysis/value_set_domain.cpp +++ b/src/pointer-analysis/value_set_domain_transform.inc @@ -9,11 +9,12 @@ Author: Daniel Kroening, kroening@kroening.com /// \file /// Value Set -#include "value_set_domain.h" +// The body of value_set_domaint::transform, included from +// value_set_domain.h but retained in this out-of-line file to ease +// git merges: -#include - -void value_set_domaint::transform( +template +void value_set_domain_templatet::transform( const namespacet &ns, locationt from_l, locationt to_l) @@ -25,13 +26,18 @@ void value_set_domaint::transform( break; case END_FUNCTION: - value_set.do_end_function(static_analysis_baset::get_return_lhs(to_l), ns); + { + value_set.do_end_function( + static_analysis_baset::get_return_lhs(to_l), ns); break; + } + // Note intentional fall-through here: case RETURN: case OTHER: case ASSIGN: case DECL: + case DEAD: value_set.apply_code(from_l->code, ns); break; @@ -44,7 +50,8 @@ void value_set_domaint::transform( const code_function_callt &code= to_code_function_call(from_l->code); - value_set.do_function_call(to_l->function, code.arguments(), ns); + value_set.do_function_call( + to_l->function, code.arguments(), ns); } break; diff --git a/unit/Makefile b/unit/Makefile index 1eb519b64fe..043d8ef8d37 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -23,6 +23,7 @@ SRC += unit_tests.cpp \ miniBDD_new.cpp \ java_bytecode/java_string_library_preprocess/convert_exprt_to_string_exprt.cpp \ java_bytecode/java_utils_test.cpp \ + pointer-analysis/custom_value_set_analysis.cpp \ sharing_node.cpp \ solvers/refinement/string_constraint_generator_valueof/calculate_max_string_length.cpp \ solvers/refinement/string_constraint_generator_valueof/get_numeric_value_from_character.cpp \ diff --git a/unit/pointer-analysis/CustomVSATest.jar b/unit/pointer-analysis/CustomVSATest.jar new file mode 100644 index 00000000000..8a5f9aaac70 Binary files /dev/null and b/unit/pointer-analysis/CustomVSATest.jar differ diff --git a/unit/pointer-analysis/CustomVSATest.java b/unit/pointer-analysis/CustomVSATest.java new file mode 100644 index 00000000000..7df3c6910a6 --- /dev/null +++ b/unit/pointer-analysis/CustomVSATest.java @@ -0,0 +1,46 @@ + +public class CustomVSATest { + + public Object never_read; + + public static Object cause; + public static Object effect; + public static Object first_effect_read; + public static Object second_effect_read; + public static Object maybe_unknown_read; + + public static void test() { + + Object weak_local = new Object(); + // Under standard VSA the following should be a strong write; + // with our test custom VSA it will be weak. + weak_local = new Object(); + + // Normally this would set the value of `ignored`, but our custom + // VSA discards the instruction completely: + Object ignored = new Object(); + + // Similarly this write should have no effect: + Object no_write = new Object(); + + CustomVSATest vsa = new CustomVSATest(); + vsa.never_read = new Object(); + + // Again this should be disregarded: + Object read = vsa.never_read; + + // This should acquire a "*" unknown-object, even though + // it is obvious what it points to: + Object maybe_unknown = new Object(); + maybe_unknown_read=maybe_unknown; + + effect = new Object(); + first_effect_read = effect; + + // Under our custom VSA, this write should cause effect to become null: + cause = new Object(); + second_effect_read = effect; + + } + +} diff --git a/unit/pointer-analysis/custom_value_set_analysis.cpp b/unit/pointer-analysis/custom_value_set_analysis.cpp new file mode 100644 index 00000000000..b3f64a14476 --- /dev/null +++ b/unit/pointer-analysis/custom_value_set_analysis.cpp @@ -0,0 +1,325 @@ +/*******************************************************************\ + +Module: Value-set analysis tests + +Author: Chris Smowton, chris@smowton.net + +\*******************************************************************/ + +#include + +#include +#include +#include +#include +#include +#include +#include + +/// An example customised value_sett. It makes a series of small changes +/// to the underlying value_sett logic, which can then be verified by the +/// test below: +/// * Writes to variables with 'ignored' in their name are ignored. +/// * Never propagate values via the field "never_read" +/// * Adds an ID_unknown to the value of variable "maybe_unknown" as it is read +/// * When a variable named `cause` is written, one named `effect` is zeroed. +class test_value_sett:public value_sett +{ +public: + static bool assigns_to_ignored_variable(const code_assignt &assign) + { + if(assign.lhs().id()!=ID_symbol) + return false; + const irep_idt &id=to_symbol_expr(assign.lhs()).get_identifier(); + return id2string(id).find("ignored")!=std::string::npos; + } + + void apply_code_rec(const codet &code, const namespacet &ns) override + { + // Ignore assignments to the local "ignored" + if(code.get_statement()==ID_assign && + assigns_to_ignored_variable(to_code_assign(code))) + { + return; + } + else + { + value_sett::apply_code_rec(code, ns); + } + } + + void assign_rec( + const exprt &lhs, + const object_mapt &values_rhs, + const std::string &suffix, + const namespacet &ns, + bool add_to_sets) override + { + // Disregard writes against variables containing 'no_write': + if(lhs.id()==ID_symbol) + { + const irep_idt &id=to_symbol_expr(lhs).get_identifier(); + if(id2string(id).find("no_write")!=std::string::npos) + return; + } + + value_sett::assign_rec(lhs, values_rhs, suffix, ns, add_to_sets); + } + + void get_value_set_rec( + const exprt &expr, + object_mapt &dest, + const std::string &suffix, + const typet &original_type, + const namespacet &ns) const override + { + // Ignore reads from fields named "never_read" + if(expr.id()==ID_member && + to_member_expr(expr).get_component_name()=="never_read") + { + return; + } + else + { + value_sett::get_value_set_rec( + expr, dest, suffix, original_type, ns); + } + } + + void adjust_assign_rhs_values( + const exprt &expr, + const namespacet &ns, + object_mapt &dest) const override + { + // Always add an ID_unknown to reads from variables containing + // "maybe_unknown": + exprt read_sym=expr; + while(read_sym.id()==ID_typecast) + read_sym=read_sym.op0(); + if(read_sym.id()==ID_symbol) + { + const irep_idt &id=to_symbol_expr(read_sym).get_identifier(); + if(id2string(id).find("maybe_unknown")!=std::string::npos) + insert(dest, exprt(ID_unknown, read_sym.type())); + } + } + + void apply_assign_side_effects( + const exprt &lhs, + const exprt &rhs, + const namespacet &ns) override + { + // Whenever someone touches the variable "cause", null the + // variable "effect": + if(lhs.id()==ID_symbol) + { + const irep_idt &id=to_symbol_expr(lhs).get_identifier(); + const auto &id_str=id2string(id); + auto find_idx=id_str.find("cause"); + if(find_idx!=std::string::npos) + { + std::string effect_id=id_str; + effect_id.replace(find_idx, 5, "effect"); + assign( + symbol_exprt(effect_id, lhs.type()), + null_pointer_exprt(to_pointer_type(lhs.type())), + ns, + true, + false); + } + } + } +}; + +typedef + value_set_analysis_templatet> + test_value_set_analysist; + +#define TEST_PREFIX "java::CustomVSATest." +#define TEST_FUNCTION_NAME TEST_PREFIX "test:()V" +#define TEST_LOCAL_PREFIX TEST_FUNCTION_NAME "::" + +template +static value_setst::valuest +get_values(const VST &value_set, const namespacet &ns, const exprt &expr) +{ + value_setst::valuest vals; + value_set.get_value_set(expr, vals, ns); + return vals; +} + +static std::size_t exprs_with_id( + const value_setst::valuest &exprs, const irep_idt &id) +{ + return std::count_if( + exprs.begin(), + exprs.end(), + [&id](const exprt &expr) + { + return expr.id()==id || + (expr.id()==ID_object_descriptor && + to_object_descriptor_expr(expr).object().id()==id); + }); +} + +SCENARIO("test_value_set_analysis", + "[core][pointer-analysis][value_set_analysis]") +{ + GIVEN("Normal and custom value-set analysis of CustomVSATest::test") + { + null_message_handlert null_output; + cmdlinet command_line; + + // This classpath is the default, but the config object + // is global and previous unit tests may have altered it + command_line.set("java-cp-include-files", "."); + config.java.classpath={"."}; + command_line.args.push_back("pointer-analysis/CustomVSATest.jar"); + + register_language(new_java_bytecode_language); + + goto_modelt goto_model= + initialize_goto_model(command_line, null_output); + + namespacet ns(goto_model.symbol_table); + + // Fully inline the test program, to avoid VSA conflating + // constructor callsites confusing the results we're trying to check: + goto_function_inline(goto_model, TEST_FUNCTION_NAME, null_output); + + const goto_programt &test_function= + goto_model.goto_functions.function_map.at(TEST_PREFIX "test:()V").body; + + value_set_analysist::locationt test_function_end= + std::prev(test_function.instructions.end()); + + value_set_analysist normal_analysis(ns); + normal_analysis(goto_model.goto_functions); + const auto &normal_function_end_vs= + normal_analysis[test_function_end].value_set; + + test_value_set_analysist test_analysis(ns); + test_analysis(goto_model.goto_functions); + const auto &test_function_end_vs= + test_analysis[test_function_end].value_set; + + reference_typet jlo_ref_type=java_lang_object_type(); + + WHEN("Writing to a local named 'ignored'") + { + symbol_exprt written_symbol( + TEST_LOCAL_PREFIX "23::ignored", jlo_ref_type); + THEN("The normal analysis should write to it") + { + auto normal_exprs= + get_values(normal_function_end_vs, ns, written_symbol); + REQUIRE(exprs_with_id(normal_exprs, ID_dynamic_object)==1); + REQUIRE(exprs_with_id(normal_exprs, ID_unknown)==0); + } + THEN("The custom analysis should ignore the write to it") + { + auto test_exprs= + get_values(test_function_end_vs, ns, written_symbol); + REQUIRE(exprs_with_id(test_exprs, ID_dynamic_object)==0); + REQUIRE(exprs_with_id(test_exprs, ID_unknown)==1); + } + } + + WHEN("Writing to a local named 'no_write'") + { + symbol_exprt written_symbol( + TEST_LOCAL_PREFIX "31::no_write", jlo_ref_type); + THEN("The normal analysis should write to it") + { + auto normal_exprs= + get_values(normal_function_end_vs, ns, written_symbol); + REQUIRE(exprs_with_id(normal_exprs, ID_dynamic_object)==1); + REQUIRE(exprs_with_id(normal_exprs, ID_unknown)==0); + } + THEN("The custom analysis should ignore the write to it") + { + auto test_exprs= + get_values(test_function_end_vs, ns, written_symbol); + REQUIRE(exprs_with_id(test_exprs, ID_dynamic_object)==0); + REQUIRE(exprs_with_id(test_exprs, ID_unknown)==1); + } + } + + WHEN("Reading from a field named 'never_read'") + { + symbol_exprt written_symbol( + TEST_LOCAL_PREFIX "55::read", jlo_ref_type); + THEN("The normal analysis should find a dynamic object") + { + auto normal_exprs= + get_values(normal_function_end_vs, ns, written_symbol); + REQUIRE(exprs_with_id(normal_exprs, ID_dynamic_object)==1); + REQUIRE(exprs_with_id(normal_exprs, ID_unknown)==0); + } + THEN("The custom analysis should have no information about it") + { + auto test_exprs= + get_values(test_function_end_vs, ns, written_symbol); + REQUIRE(test_exprs.size()==0); + } + } + + WHEN("Reading from a variable named 'maybe_unknown'") + { + symbol_exprt written_symbol( + TEST_PREFIX "maybe_unknown_read", jlo_ref_type); + THEN("The normal analysis should find a dynamic object") + { + auto normal_exprs= + get_values(normal_function_end_vs, ns, written_symbol); + REQUIRE(exprs_with_id(normal_exprs, ID_dynamic_object)==1); + REQUIRE(exprs_with_id(normal_exprs, ID_unknown)==0); + } + THEN("The custom analysis should find a dynamic object " + "*and* an unknown entry") + { + auto test_exprs= + get_values(test_function_end_vs, ns, written_symbol); + REQUIRE(test_exprs.size()==2); + REQUIRE(exprs_with_id(test_exprs, ID_unknown)==1); + REQUIRE(exprs_with_id(test_exprs, ID_dynamic_object)==1); + } + } + + WHEN("Writing to a variable named 'cause'") + { + symbol_exprt read_before_cause_write( + TEST_PREFIX "first_effect_read", jlo_ref_type); + auto normal_exprs_before= + get_values(normal_function_end_vs, ns, read_before_cause_write); + auto test_exprs_before= + get_values(test_function_end_vs, ns, read_before_cause_write); + symbol_exprt read_after_cause_write( + TEST_PREFIX "second_effect_read", jlo_ref_type); + auto normal_exprs_after= + get_values(normal_function_end_vs, ns, read_after_cause_write); + auto test_exprs_after= + get_values(test_function_end_vs, ns, read_after_cause_write); + + THEN("Before writing to 'cause' both analyses should think 'effect' " + "points to some object") + { + REQUIRE(normal_exprs_before.size()==1); + REQUIRE(exprs_with_id(normal_exprs_before, ID_dynamic_object)==1); + + REQUIRE(test_exprs_before.size()==1); + REQUIRE(exprs_with_id(test_exprs_before, ID_dynamic_object)==1); + } + + THEN("After writing to 'cause', the normal analysis should see no change " + "to 'effect', while the custom analysis should see it changed") + { + REQUIRE(normal_exprs_after.size()==1); + REQUIRE(exprs_with_id(normal_exprs_after, ID_dynamic_object)==1); + + REQUIRE(test_exprs_after.size()==1); + REQUIRE(exprs_with_id(test_exprs_after, "NULL-object")==1); + } + } + } +}