diff --git a/.gitignore b/.gitignore
index edac3d3e326..560bd432c77 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,9 +28,16 @@ src/ansi-c/gcc_builtin_headers_alpha.inc
src/ansi-c/gcc_builtin_headers_arm.inc
src/ansi-c/gcc_builtin_headers_generic.inc
src/ansi-c/gcc_builtin_headers_ia32-2.inc
+src/ansi-c/gcc_builtin_headers_ia32-3.inc
+src/ansi-c/gcc_builtin_headers_ia32-4.inc
src/ansi-c/gcc_builtin_headers_ia32.inc
+src/ansi-c/gcc_builtin_headers_math.inc
+src/ansi-c/gcc_builtin_headers_mem_string.inc
+src/ansi-c/gcc_builtin_headers_omp.inc
+src/ansi-c/gcc_builtin_headers_tm.inc
src/ansi-c/gcc_builtin_headers_mips.inc
src/ansi-c/gcc_builtin_headers_power.inc
+src/ansi-c/gcc_builtin_headers_ubsan.inc
# regression/test files
*.out
@@ -69,6 +76,10 @@ src/xmllang/xml_lex.yy.cpp
src/xmllang/xml_y.output
src/xmllang/xml_y.tab.cpp
src/xmllang/xml_y.tab.h
+src/memory-models/mm_lex.yy.cpp
+src/memory-models/mm_y.output
+src/memory-models/mm_y.tab.cpp
+src/memory-models/mm_y.tab.h
# binaries
src/cbmc/cbmc
@@ -100,3 +111,8 @@ src/ansi-c/library/converter
src/ansi-c/library/converter.exe
src/util/irep_ids_convert
src/util/irep_ids_convert.exe
+
+*.pyc
+
+# auto generated documentation
+doc/html/
diff --git a/.travis.yml b/.travis.yml
index 7375549537b..4683eeba79d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -134,6 +134,15 @@ matrix:
script: scripts/travis_lint.sh
before_cache:
+ - env: NAME="DOXYGEN-CHECK"
+ addons:
+ apt:
+ packages:
+ - doxygen
+ install:
+ script: scripts/travis_doxygen.sh
+ before_cache:
+
allow_failures:
- env: NAME="CPP-LINT"
install:
@@ -152,9 +161,9 @@ install:
script:
- if [ -e bin/gcc ] ; then export PATH=$PWD/bin:$PATH ; fi ;
- COMMAND="env UBSAN_OPTIONS=print_stacktrace=1 make -C regression test" &&
+ COMMAND="env UBSAN_OPTIONS=print_stacktrace=1 make -C regression test CXX=\"$COMPILER\" CXXFLAGS=\"-Wall -Werror -pedantic -O2 -g $EXTRA_CXXFLAGS\"" &&
eval ${PRE_COMMAND} ${COMMAND}
- - COMMAND="make -C unit CXX=\"$COMPILER\" CXXFLAGS=\"$FLAGS $EXTRA_CXXFLAGS\" -j2" &&
+ - COMMAND="make -C unit CXX=\"$COMPILER\" CXXFLAGS=\"-Wall -Werror -pedantic -O2 -g $EXTRA_CXXFLAGS\" -j2" &&
eval ${PRE_COMMAND} ${COMMAND}
- COMMAND="make -C unit test" && eval ${PRE_COMMAND} ${COMMAND}
diff --git a/CHANGELOG b/CHANGELOG
index 1718d32243d..e497b1cadc2 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -7,6 +7,9 @@
* GOTO-INSTRUMENT: New option --print-path-lenghts
* GOTO-ANALYZER: New option --unreachable-functions, --reachable-functions
* GOTO-INSTRUMENT: New option --undefined-function-is-assume-false
+* GOTO-INSTRUMENT: New option --remove-function-body
+* GOTO-INSTRUMENT: New option --use-all-headers, changed --use-system-headers to
+ --no-system-headers
5.7
diff --git a/CODING_STANDARD b/CODING_STANDARD
deleted file mode 100644
index 0d8c44da54c..00000000000
--- a/CODING_STANDARD
+++ /dev/null
@@ -1,202 +0,0 @@
-Here a few minimalistic coding rules for the CPROVER source tree.
-
-Whitespaces:
-- Use 2 spaces indent, no tabs.
-- No lines wider than 80 chars.
- - When line is wider, do the following:
- - Subsequent lines should be indented two more than the initial line
- - Break after = if it is part of an assignment
- - For chained calls, prefer immediately before .
- - For other operators (e.g. &&, +) prefer immediately after the operator
- - For brackets, break after the bracket
- - In the case of function calls, put each argument on a separate line if
- they do not fit after one line break
- - Nested function calls do not need to be broken up into separate lines even
- if the outer function call does.
-- If a method is bigger than 50 lines, break it into parts.
-- Put matching { } into the same column.
-- No spaces around operators (=, +, ==, ...)
- Exceptions: Spaces around &&, || and <<
-- Space after comma (parameter lists, argument lists, ...)
-- Space after colon inside 'for'
-- For pointers and references, the */& should be attached to the variable name
- as oppposed to the tyep. E.g. for a pointer to an int the syntax would be:
- `int *x;`
-- No whitespaces at end of line
-- No whitespaces in blank lines
-- Put argument lists on next line (and ident 2 spaces) if too long
-- Put parameters on separate lines (and ident 2 spaces) if too long
-- No whitespaces around colon for inheritance,
- put inherited into separate lines in case of multiple inheritance
-- The initializer list follows the constructor without a whitespace
- around the colon. Break line after the colon if required and indent members.
-- if(...), else, for(...), do, and while(...) are always in a separate line
-- Break expressions in if, for, while if necessary and align them
- with the first character following the opening parenthesis
-- Use {} instead of ; for the empty statement
-- Single line blocks without { } are allowed,
- but put braces around multi-line blocks
-- Use blank lines to visually separate logically cohesive code blocks
- within a function
-- Have a newline at the end of a file
-
-Comments:
-- Do not use /* */ except for file and function comment blocks
-- Each source and header file must start with a comment block
- stating the Module name and Author [will be changed when we roll out doxygen]
-- Each function in the source file (not the header) is preceded
- by a function comment header consisting of a comment block stating
- Name, Inputs, Outputs and Purpose [will be changed when we roll
- out doxygen]
- - It should look like this:
- ```
- /*******************************************************************\
-
- Function: class_namet::function_name
-
- Inputs:
- arg_name - Description of its purpose
- long_arg_name - Descriptions should be indented
- to match the first line of the
- description
-
- Outputs: A description of what the function returns
-
- Purpose: A description of what the function does.
- Again, indentation with the line above
-
- \*******************************************************************/
- ```
-- An empty line should appear between the bottom of the function comment header
- and the function.
-- Put comments on a separate line
-- Use comments to explain the non-obvious
-- Use #if 0 for commenting out source code
-- Use #ifdef DEBUG to guard debug code
-
-Naming:
-- Identifiers may use the characters [a-z0-9_] and should start with a
- lower-case letter (parameters in constructors may start with _).
-- Use american spelling for identifiers.
-- Separate basic words by _
-- Avoid abbreviations (e.g. prefer symbol_table to of st).
-- User defined type identifiers have to be terminated by 't'. Moreover,
- before 't' may not be '_'.
-- Do not use 'm_' prefix nor '_' suffix for names of attributes of structured
- types.
-- Enum values may use the characters [A-Z0-9_]
-
-Header files:
-- Avoid unnecessary #includes, especially in header files
-- Prefer forward declaration to includes, but forward declare at the top
- of the header file rather than in line
-- Guard headers with #ifndef CPROVER_DIRECTORIES_FILE_H, etc
-
-Make files
-- Each source file should appear on a separate line
-- The final source file should still be followed by a trailing slash
-- The last line should be a comment to not be deleted, i.e. should look like:
-```
-SRC = source_file.cpp \
- source_file2.cpp \
- # Empty last line
-```
-- This ensures the Makefiles can be easily merged.
-
-Program Command Line Options
-- Each program contains a program_name_parse_optionst class which should
- contain a define PROGRAM_NAME_PARSE_OPTIONS which is a string of all the
- parse options in brackets (with a colon after the bracket if it takes a
- parameter)
-- Each parameter should be one per line to yield easy merging
-- If parameters are shared between programs, they should be pulled out into
- a common file and then included using a define
-- The defines should be OPT_FLAG_NAMES which should go into the OPTIONS define
-- The defines should include HELP_FLAG_NAMES which should contain the help
- output of the format:
- ```
- " --flag explanations\n" \
- " --another flag more explanation\n" \
- <-------30 chars------>
-- The defines may include PARSE_OPTIONS_FLAG_NAMES which move the options
- from the command line into the options
-
-C++ features:
-- Do not use namespaces.
-- Prefer use of 'typedef' insted of 'using'.
-- Prefer use of 'class' instead of 'struct'.
-- Write type modifiers before the type specifier.
-- Make references const whenever possible
-- Make functions const whenever possible
-- Do not hide base class functions
-- You are encouraged to use override
-- Single argument constructors must be explicit
-- Avoid implicit conversions
-- Avoid friend declarations
-- Avoid iterators, use ranged for instead
-- Avoid allocation with new/delete, use unique_ptr
-- Avoid pointers, use references
-- Avoid char *, use std::string
-- For numbers, use int, unsigned, long, unsigned long, double
-- Use mp_integer, not BigInt
-- Use the functions in util for conversions between numbers and strings
-- Avoid C-style functions. Use classes with an operator() instead.
-- Use irep_idt for identifiers (not std::string)
-- Avoid destructive updates if possible. The irept has constant time copy.
-- Use instances of std::size_t for comparison with return values of .size() of
- STL containers and algorithms, and use them as indices to arrays or vectors.
-- Do not use default values in public functions
-- Use assertions to detect programming errors, e.g. whenever you make
- assumptions on how your code is used
-- Use exceptions only when the execution of the program has to abort
- because of erroneous user input
-- We allow to use 3rd-party libraries directly.
- No wrapper matching the coding rules is required.
- Allowed libraries are: STL.
-- When throwing, omit the brackets, i.e. `throw "error"`.
-- Error messages should start with a lower case letter.
-- Use the auto keyword if and only if one of the following
- - The type is explictly repeated on the RHS (e.g. a constructor call)
- - Adding the type will increase confusion (e.g. iterators, function pointers)
-
-Architecture-specific code:
-- Avoid if possible.
-- Use __LINUX__, __MACH__, and _WIN32 to distinguish the architectures.
-- Don't include architecture-specific header files without #ifdef ...
-
-Output:
-- Do not output to cout or cerr directly (except in temporary debug code,
- and then guard #include by #ifdef DEBUG)
-- Derive from messaget if the class produces output and use the streams provided
- (status(), error(), debug(), etc)
-- Use '\n' instead of std::endl
-
-Unit tests:
- - Unit tests are written using Catch: https://github.com/philsquared/Catch/
- - For large classes:
- - Create a separate file that contains the tests for each method of each
- class
- - The file should be named according to
- `unit/class/path/class_name/function_name.cpp`
- - For small classes:
- - Create a separate file that contains the tests for all methods of each
- class
- - The file should be named according to unit/class/path/class_name.cpp
- - Catch supports tagging, tests should be tagged with all the following tags:
- - [core] should be used for all tests unless the test takes more than 1
- second to run, then it should be tagged with [long]
- - [folder_name] of the file being tested
- - [class_name] of the class being tested
- - [function_name] of the function being tested
-
-You are allowed to break rules if you have a good reason to do so.
-
-Pre-commit hook to run cpplint locally
---------------------------------------
-To install the hook
-cp .githooks/pre-commit .git/hooks/pre-commit
-or use a symbolic link.
-Then, when running git commit, you should get the linter output
-(if any) before being prompted to enter a commit message.
-To bypass the check (e.g. if there was a false positive),
-add the option --no-verify.
diff --git a/CODING_STANDARD.md b/CODING_STANDARD.md
new file mode 100644
index 00000000000..06c6bf53666
--- /dev/null
+++ b/CODING_STANDARD.md
@@ -0,0 +1,231 @@
+Here a few minimalistic coding rules for the CPROVER source tree.
+
+# Whitespaces
+- Use 2 spaces indent, no tabs.
+- No lines wider than 80 chars.
+ - When line is wider, do the following:
+ - Subsequent lines should be indented two more than the initial line
+ - Break after `=` if it is part of an assignment
+ - For chained calls, prefer immediately before `.`
+ - For other operators (e.g. `&&`, `+`) prefer immediately after the
+ operator
+ - For brackets, break after the bracket
+ - In the case of function calls, put each argument on a separate line if
+ they do not fit after one line break
+ - Nested function calls do not need to be broken up into separate lines
+ even if the outer function call does.
+- If a method is bigger than 50 lines, break it into parts.
+- Put matching `{ }` into the same column.
+- No spaces around operators (`=`, `+`, `==` ...) Exceptions: Spaces around
+ `&&`, `||` and `<<`
+- Space after comma (parameter lists, argument lists, ...)
+- Space after colon inside `for`
+- For pointers and references, the `*`/`&` should be attached to the variable
+ name as opposed to the type. E.g. for a pointer to an int the syntax would
+ be: `int *x;`
+- No whitespaces at end of line
+- No whitespaces in blank lines
+- Put argument lists on next line (and indent 2 spaces) if too long
+- Put parameters on separate lines (and indent 2 spaces) if too long
+- No whitespaces around colon for inheritance, put inherited into separate
+ lines in case of multiple inheritance
+- The initializer list follows the constructor without a whitespace around the
+ colon. Break line after the colon if required and indent members.
+- `if(...)`, `else`, `for(...)`, `do`, and `while(...)` are always in a
+ separate line
+- Break expressions in `if`, `for`, `while` if necessary and align them with
+ the first character following the opening parenthesis
+- Use `{}` instead of `;` for the empty statement
+- Single line blocks without `{ }` are allowed, but put braces around
+ multi-line blocks
+- Use blank lines to visually separate logically cohesive code blocks within a
+ function
+- Have a newline at the end of a file
+
+# Comments
+- Do not use `/* */`
+- Each source and header file must start with a comment block stating the
+ author. See existing source for an example of the format of this block. This
+ should be followed by a Doxygen `\file` comment:
+ ```c++
+ /// \file
+ ///
+ ```
+ Note that the `\file` tag must be immediately followed by a newline in order
+ for Doxygen to relate the comment to the current file.
+- Each function should be preceded by a Doxygen comment describing that
+ function. The format should match the [LLVM
+ guidelines](http://llvm.org/docs/CodingStandards.html#doxygen-use-in-documentation-comments),
+ with one extension: `\param` and `\return` comments longer than a single line
+ should have subsequent lines indented by two spaces, so that the tags stand
+ out. An example:
+ ```c++
+ /// This sentence, until the first dot followed by whitespace, becomes
+ /// the brief description. More detailed text follows. Feel free to
+ /// break this into paragraphs to aid readability.
+ /// \param arg_name: This parameter doesn't need much description
+ /// \param [out] long_arg_name: This parameter is mutated by the function.
+ /// Extra info about the parameter gets indented an extra two columns,
+ /// like this.
+ /// \return The return value is literally the value returned by the
+ /// function. For out-parameters, use "\param [out]".
+ return_typet my_function(argt arg_name, argt &long_arg_name)
+ ```
+- The priority of documentation is readability. Therefore, feel free to use
+ Doxygen features, or to add whitespace for multi-paragraph comment blocks if
+ necessary.
+- A comment block should immediately precede the definition of the entity it
+ documents, which will generally mean that it will live in the source file.
+ This allows us to take advantage of the one definition rule. If each entity
+ is defined only once, then it is also documented only once.
+- The documentation block must *immediately* precede the entity it documents.
+ Don't insert empty lines between docs and functions, because this will
+ confuse Doxygen.
+- Put comments on a separate line
+- Use comments to explain the non-obvious
+- Use #if 0 for commenting out source code
+- Use #ifdef DEBUG to guard debug code
+
+# Naming
+- Identifiers may use the characters `[a-z0-9_]` and should start with a
+ lower-case letter (parameters in constructors may start with `_`).
+- Use American spelling for identifiers.
+- Separate basic words by `_`
+- Avoid abbreviations (e.g. prefer `symbol_table` to `st`).
+- User defined type identifiers have to be terminated by `t`. Moreover, before
+ `t` may not be `_`.
+- Do not use `m_` prefix nor `_` suffix for names of attributes of structured
+ types.
+- Enum values may use the characters `[A-Z0-9_]`
+
+# Header files
+- Avoid unnecessary `#include`s, especially in header files
+- Prefer forward declaration to includes, but forward declare at the top of the
+ header file rather than in line
+- Guard headers with `#ifndef CPROVER_DIRECTORIES_FILE_H`, etc
+- The corresponding header for a given source file should always be the *first*
+ include in the source file. For example, given `foo.h` and `foo.cpp`, the
+ line `#include "foo.h"` should precede all other include statements in
+ `foo.cpp`.
+- Use the C++ versions of C headers (e.g. `cmath` instead of `math.h`).
+ Some of the C headers use macros instead of functions which can have
+ unexpected consequences.
+
+# Makefiles
+- Each source file should appear on a separate line
+- The final source file should still be followed by a trailing slash
+- The last line should be a comment to not be deleted, i.e. should look like:
+ ```makefile
+ SRC = source_file.cpp \
+ source_file2.cpp \
+ # Empty last line
+ ```
+ This ensures the Makefiles can be easily merged.
+
+# Program Command Line Options
+- Each program contains a `program_name_parse_optionst` class which should
+ contain a define `PROGRAM_NAME_PARSE_OPTIONS` which is a string of all the
+ parse options in brackets (with a colon after the bracket if it takes a
+ parameter)
+- Each parameter should be one per line to yield easy merging
+- If parameters are shared between programs, they should be pulled out into a
+ common file and then included using a define
+- The defines should be `OPT_FLAG_NAMES` which should go into the `OPTIONS`
+ define
+- The defines should include `HELP_FLAG_NAMES` which should contain the help
+ output in the format:
+ ```
+ " --flag explanations\n" \
+ " --another flag more explanation\n" \
+ <-------30 chars------>
+ ```
+- The defines may include `PARSE_OPTIONS_FLAG_NAMES` which move the options
+ from the command line into the options
+
+# C++ features
+- Do not use namespaces, except for anonymous namespaces.
+- Prefer use of `typedef` instead of `using`.
+- Prefer use of `class` instead of `struct`.
+- Write type modifiers before the type specifier.
+- Make references `const` whenever possible
+- Make member functions `const` whenever possible
+- Do not hide base class functions
+- You are encouraged to use `override`
+- Single argument constructors must be `explicit`
+- Avoid implicit conversions
+- Avoid `friend` declarations
+- Avoid iterators, use ranged `for` instead
+- Avoid allocation with `new`/`delete`, use `unique_ptr`
+- Avoid pointers, use references
+- Avoid `char *`, use `std::string`
+- For numbers, use `int`, `unsigned`, `long`, `unsigned long`, `double`
+- Use `mp_integer`, not `BigInt`
+- Use the functions in util for conversions between numbers and strings
+- Avoid C-style functions. Use classes with an `operator()` instead.
+- Use `irep_idt` for identifiers (not `std::string`)
+- Avoid destructive updates if possible. The `irept` has constant time copy.
+- Use instances of `std::size_t` for comparison with return values of `.size()`
+ of STL containers and algorithms, and use them as indices to arrays or
+ vectors.
+- Do not use default values in public functions
+- Use assertions to detect programming errors, e.g. whenever you make
+ assumptions on how your code is used
+- Use exceptions only when the execution of the program has to abort because of
+ erroneous user input
+- We allow to use 3rd-party libraries directly. No wrapper matching the coding
+ rules is required. Allowed libraries are: STL.
+- When throwing, omit the brackets, i.e. `throw "error"`.
+- Error messages should start with a lower case letter.
+- Use the `auto` keyword if and only if one of the following
+ - The type is explicitly repeated on the RHS (e.g. a constructor call)
+ - Adding the type will increase confusion (e.g. iterators, function pointers)
+- Avoid `assert`. If the condition is an actual invariant, use INVARIANT,
+ PRECONDITION, POSTCONDITION, CHECK_RETURN, UNREACHABLE or DATA_INVARIANT. If
+ there are possible reasons why it might fail, throw an exception.
+
+# Architecture-specific code
+- Avoid if possible.
+- Use `__LINUX__`, `__MACH__`, and `_WIN32` to distinguish the architectures.
+- Don't include architecture-specific header files without `#ifdef` ...
+
+# Output
+- Do not output to `cout` or `cerr` directly (except in temporary debug code,
+ and then guard `#include ` by `#ifdef DEBUG`)
+- Derive from `messaget` if the class produces output and use the streams
+ provided (`status()`, `error()`, `debug()`, etc)
+- Use `\n` instead of `std::endl`
+
+# Unit tests
+- Unit tests are written using [Catch](https://github.com/philsquared/Catch)
+- For large classes:
+ - Create a separate file that contains the tests for each method of each
+ class
+ - The file should be named according to
+ `unit/class/path/class_name/function_name.cpp`
+- For small classes:
+ - Create a separate file that contains the tests for all methods of each
+ class
+ - The file should be named according to `unit/class/path/class_name.cpp`
+- Catch supports tagging, tests should be tagged with all the following tags:
+ - [core] should be used for all tests unless the test takes more than 1
+ second to run, then it should be tagged with [long]
+ - [folder_name] of the file being tested
+ - [class_name] of the class being tested
+ - [function_name] of the function being tested
+
+---
+
+You are allowed to break rules if you have a good reason to do so.
+
+---
+
+# Pre-commit hook to run cpplint locally
+
+To install the hook
+```sh
+cp .githooks/pre-commit .git/hooks/pre-commit
+```
+or use a symbolic link. Then, when running git commit, you should get the
+linter output (if any) before being prompted to enter a commit message. To
+bypass the check (e.g. if there was a false positive), add the option
+`--no-verify`.
diff --git a/COMPILING b/COMPILING
index 4b47b38b790..3df46849022 100644
--- a/COMPILING
+++ b/COMPILING
@@ -32,7 +32,7 @@ We assume that you have a Debian/Ubuntu or Red Hat-like distribution.
The GNU Make needs to be version 3.81 or higher.
On Debian-like distributions, do
- apt-get install g++-6 gcc flex bison make git libwww-perl patch
+ apt-get install g++ gcc flex bison make git libwww-perl patch
On Red Hat/Fedora or derivates, do
@@ -44,13 +44,7 @@ We assume that you have a Debian/Ubuntu or Red Hat-like distribution.
git clone https://github.com/diffblue/cbmc cbmc-git
-2) On Debian, do
-
- cd cbmc-git/src
- make minisat2-download
- make CXX=g++-6
-
- On Ubuntu, or other distributions with recent g++, do
+2) On Debian or Ubuntu, do
cd cbmc-git/src
make minisat2-download
diff --git a/appveyor.yml b/appveyor.yml
index f1f66142620..3a7c2d5c363 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -75,6 +75,10 @@ test_script:
cat goto-instrument-typedef/chain.sh || true
rem HACK disable failing tests
+ rmdir /s /q ansi-c\arch_flags_mcpu_bad
+ rmdir /s /q ansi-c\arch_flags_mcpu_good
+ rmdir /s /q ansi-c\arch_flags_mthumb_bad
+ rmdir /s /q ansi-c\arch_flags_mthumb_good
rmdir /s /q ansi-c\Forward_Declaration2
rmdir /s /q ansi-c\Incomplete_Type1
rmdir /s /q ansi-c\Union_Padding1
diff --git a/doc/CPPLINT.cfg b/doc/CPPLINT.cfg
new file mode 100644
index 00000000000..51ff339c184
--- /dev/null
+++ b/doc/CPPLINT.cfg
@@ -0,0 +1 @@
+exclude_files=.*
diff --git a/doc/architectural/cbmc-guide.md b/doc/architectural/cbmc-guide.md
new file mode 100644
index 00000000000..ccbf0068cea
--- /dev/null
+++ b/doc/architectural/cbmc-guide.md
@@ -0,0 +1,601 @@
+\ingroup module_hidden
+\page cbmc-guide CBMC Guide
+
+\author Martin Brain
+
+Background Information
+======================
+
+First off; read the \ref cprover-manual "CProver Manual". It describes
+how to get, build and use CBMC and SATABS. This document covers the
+internals of the system and how to get started on development.
+
+Documentation
+-------------
+
+Apart from the (user-orientated) CPROVER manual and this document, most
+of the rest of the documentation is inline in the code as `doxygen` and
+some comments. A man page for CBMC, goto-cc and goto-instrument is
+contained in the `doc/` directory and gives some options for these
+tools. All of these could be improved and patches are very welcome. In
+some cases the algorithms used are described in the relevant papers.
+
+Architecture
+------------
+
+CPROVER is structured in a similar fashion to a compiler. It has
+language specific front-ends which perform limited syntactic analysis
+and then convert to an intermediate format. The intermediate format can
+be output to files (this is what `goto-cc` does) and are (informally)
+referred to as “goto binaries” or “goto programs”. The back-end are
+tools process this format, either directly from the front-end or from
+it’s saved output. These include a wide range of analysis and
+transformation tools (see Section \[section:other-apps\]).
+
+Coding Standards
+----------------
+
+CPROVER is written in a fairly minimalist subset of C++; templates and
+meta-programming are avoided except where necessary. The standard
+library is used but in many cases there are alternatives provided in
+`util/` (see Section \[section:util\]) which are preferred. Boost is
+not used.
+
+Patches should be formatted so that code is indented with two space
+characters, not tab and wrapped to 75 or 72 columns. Headers for doxygen
+should be given (and preferably filled!) and the author will be the
+person who first created the file.
+
+Identifiers should be lower case with underscores to separate words.
+Types (classes, structures and typedefs) names must[^1] end with a `t`.
+Types that model types (i.e. C types in the program that is being
+interpreted) are named with `_typet`. For example `ui_message_handlert`
+rather than `UI_message_handlert` or `UIMessageHandler` and
+`union_typet`.
+
+How to Contribute
+-----------------
+
+Fixes, changes and enhancements to the CPROVER code base should be
+developed against the `trunk` version and submitted to Daniel as patches
+produced by `diff -Naur` or `svn diff`. Entire applications are best
+developed independently (`git svn` is a popular choice for tracking the
+main trunk but also having local development) until it is clear what
+their utility, future and maintenance is likely to be.
+
+Other Useful Code {#section:other-apps}
+-----------------
+
+The CPROVER subversion archive contains a number of separate programs.
+Others are developed separately as patches or separate
+branches.Interfaces are have been and are continuing to stablise but
+older code may require work to compile and function correctly.
+
+In the main archive:
+
+* `CBMC`: A bounded model checking tool for C and C++. See Section
+ \[section:CBMC\].
+
+* `goto-cc`: A drop-in, flag compatible replacement for GCC and other
+ compilers that produces goto-programs rather than executable binaries.
+ See Section \[section:goto-cc\].
+
+* `goto-instrument`: A collection of functions for instrumenting and
+ modifying goto-programs. See Section \[section:goto-instrument\].
+
+Model checkers and similar tools:
+
+* `SatABS`: A CEGAR model checker using predicate abstraction. Is
+ roughly 10,000 lines of code (on top of the CPROVER code base) and is
+ developed in its own subversion archive. It uses an external model
+ checker to find potentially feasible paths. Key limitations are
+ related to code with pointers and there is scope for significant
+ improvement.
+
+* `Scratch`: Alistair Donaldson’s k-induction based tool. The
+ front-end is in the old project CVS and some of the functionality is
+ in `goto-instrument`.
+
+* `Wolverine`: An implementation of Ken McMillan’s IMPACT algorithm
+ for sequential programs. In the old project CVS.
+
+* `C-Impact`: An implementation of Ken McMillan’s IMPACT algorithm for
+ parallel programs. In the old project CVS.
+
+* `LoopFrog`: A loop summarisation tool.
+
+* `???`: Christoph’s termination analyser.
+
+Test case generation:
+
+* `cover`: A basic test-input generation tool. In the old
+ project CVS.
+
+* `FShell`: A test-input generation tool that allows the user to
+ specify the desired coverage using a custom language (which includes
+ regular expressions over paths). It uses incremental SAT and is thus
+ faster than the naïve “add assertions one at a time and use the
+ counter-examples” approach. Is developed in its own subversion.
+
+Alternative front-ends and input translators:
+
+* `Scoot`: A System-C to C translator. Probably in the old
+ project CVS.
+
+* `???`: A Simulink to C translator. In the old project CVS.
+
+* `???`: A Verilog front-end. In the old project CVS.
+
+* `???`: A converter from Codewarrior project files to Makefiles. In
+ the old project CVS.
+
+Other tools:
+
+* `ai`: Leo’s hybrid abstract interpretation / CEGAR tool.
+
+* `DeltaCheck?`: Ajitha’s slicing tool, aimed at locating changes and
+ differential verification. In the old project CVS.
+
+There are tools based on the CPROVER framework from other research
+groups which are not listed here.
+
+Source Walkthrough
+==================
+
+This section walks through the code bases in a rough order of interest /
+comprehensibility to the new developer.
+
+`doc`
+-----
+
+At the moment just contains the CBMC man page.
+
+`regression/`
+-------------
+
+The regression tests are currently being moved from CVS. The
+`regression/` directory contains all of those that have
+been moved. They are grouped into directories for each of the tools.
+Each of these contains a directory per test case, containing a C or C++
+file that triggers the bug and a `.dsc` file that describes
+the tests, expected output and so on. There is a Perl script,
+`test.pl` that is used to invoke the tests as:
+
+ ../test.pl -c PATH_TO_CBMC
+
+The `–help` option gives instructions for use and the
+format of the description files.
+
+`src/`
+------
+
+The source code is divided into a number of sub-directories, each
+containing the code for a different part of the system. In the top level
+files there are only a few files:
+
+* `config.inc`: The user-editable configuration parameters for the
+ build process. The main use of this file is setting the paths for the
+ various external SAT solvers that are used. As such, anyone building
+ from source will likely need to edit this.
+
+* `Makefile`: The main systems Make file. Parallel builds are
+ supported and encouraged; please don’t break them!
+
+* `common`: System specific magic required to get the system to build.
+ This should only need to be edited if porting CBMC to a new platform /
+ build environment.
+
+* `doxygen.cfg`: The config file for doxygen.cfg
+
+### `util/` {#section:util}
+
+`util/` contains the low-level data structures and
+manipulation functions that are used through-out the CPROVER code-base.
+For almost any low-level task, the code required is probably in
+`util/`. Key files include:
+
+* `irep.h`: This contains the definition of `irept`, the basis of many
+ of the data structures in the project. They should not be used
+ directly; one of the derived classes should be used. For more
+ information see Section \[section:irept\].
+
+* `expr.h`: The parent class for all of the expressions. Provides a
+ number of generic functions, `exprt` can be used with these but when
+ creating data, subclasses of `exprt` should be used.
+
+* `std_expr.h`: Provides subclasses of `exprt` for common kinds of
+ expression for example `plus_exprt`, `minus_exprt`,
+ `dereference_exprt`. These are the intended interface for creating
+ expressions.
+
+* `std_types.h`: Provides subclasses of `typet` (a subclass of
+ `irept`) to model C and C++ types. This is one of the preferred
+ interfaces to `irept`. The front-ends handle type promotion and most
+ coercision so the type system and checking goto-programs is simpler
+ than C.
+
+* `dstring.h`: The CPROVER string class. This enables sharing between
+ strings which significantly reduces the amount of memory required and
+ speeds comparison. `dstring` should not be used directly, `irep_idt`
+ should be used instead, which (dependent on build options) is an alias
+ for `dstring`.
+
+* `mp_arith.h`: The wrapper class for multi-precision arithmetic
+ within CPROVER. Also see `arith_tools.h`.
+
+* `ieee_float.h`: The arbitrary precision float model used within
+ CPROVER. Based on `mp_integer`s.
+
+* `context.h`: A generic container for symbol table like constructs
+ such as namespaces. Lookup gives type, location of declaration, name,
+ ‘pretty name’, whether it is static or not.
+
+* `namespace.h`: The preferred interface for the context class. The
+ key function is `lookup` which converts a string (`irep_idt`) to a
+ symbol which gives the scope of declaration, type and so on. This
+ works for functions as well as variables.
+
+### `langapi/`
+
+This contains the basic interfaces and support classes for programming
+language front ends. Developers only really need look at this if they
+are adding support for a new language. It’s main users are the two (in
+trunk) language front-ends; `ansi-c/` and
+`cpp/`.
+
+### `ansi-c/`
+
+Contains the front-end for ANSI C, plus a variety of common extensions.
+This parses the file, performs some basic sanity checks (this is one
+area in which the UI could be improved; patches most welcome) and then
+produces a goto-program (see below). The parser is a traditional Flex /
+Bison system.
+
+`internal_addition.c` contains the implementation of various ‘magic’
+functions that are that allow control of the analysis from the source
+code level. These include assertions, assumptions, atomic blocks, memory
+fences and rounding modes.
+
+The `library/` subdirectory contains versions of some of the C standard
+header files that make use of the CPROVER built-in functions. This
+allows CPROVER programs to be ‘aware’ of the functionality and model it
+correctly. Examples include `stdio.c`, `string.c`, `setjmp.c` and
+various threading interfaces.
+
+### `cpp/`
+
+This directory contains the C++ front-end. It supports the subset of C++
+commonly found in embedded and system applications. Consequentially it
+doesn’t have full support for templates and many of the more advanced
+and obscure C++ features. The subset of the language that can be handled
+is being extended over time so bug reports of programs that cannot be
+parsed are useful.
+
+The functionality is very similar to the ANSI C front end; parsing the
+code and converting to goto-programs. It makes use of code from
+`langapi` and `ansi-c`.
+
+### `goto-programs/`
+
+Goto programs are the intermediate representation of the CPROVER tool
+chain. They are language independent and similar to many of the compiler
+intermediate languages. Section \[section:goto-programs\] describes the
+`goto_programt` and `goto_functionst` data structures in detail. However
+it useful to understand some of the basic concepts. Each function is a
+list of instructions, each of which has a type (one of 18 kinds of
+instruction), a code expression, a guard expression and potentially some
+targets for the next instruction. They are not natively in static
+single-assign (SSA) form. Transitions are nondeterministic (although in
+practise the guards on the transitions normally cover form a disjoint
+cover of all possibilities). Local variables have non-deterministic
+values if they are not initialised. Variables and data within the
+program is commonly one of three types (parameterised by width):
+`unsignedbv_typet`, `signedbv_typet` and `floatbv_typet`, see
+`util/std_types.h` for more information. Goto programs can be serialised
+in a binary (wrapped in ELF headers) format or in XML (see the various
+`_serialization` files).
+
+The `cbmc` option `–show-goto-programs` is often a good starting point
+as it outputs goto-programs in a human readable form. However there are
+a few things to be aware of. Functions have an internal name (for
+example `c::f00`) and a ‘pretty name’ (for example `f00`) and which is
+used depends on whether it is internal or being presented to the user.
+The `main` method is the ‘logical’ main which is not necessarily the
+main method from the code. In the output `NONDET` is use to represent a
+nondeterministic assignment to a variable. Likewise `IF` as a beautified
+`GOTO` instruction where the guard expression is used as the condition.
+`RETURN` instructions may be dropped if they precede an `END_FUNCTION`
+instruction. The comment lines are generated from the `locationt` field
+of the `instructiont` structure.
+
+`goto-programs/` is one of the few places in the CPROVER codebase that
+templates are used. The intention is to allow the general architecture
+of program and functions to be used for other formalisms. At the moment
+most of the templates have a single instantiation; for example
+`goto_functionst` and `goto_function_templatet` and `goto_programt` and
+`goto_program_templatet`.
+
+### `goto-symex/`
+
+This directory contains a symbolic evaluation system for goto-programs.
+This takes a goto-program and translates it to an equation system by
+traversing the program, branching and merging and unwinding loops as
+needed. Each reverse goto has a separate counter (the actual counting is
+handled by `cbmc`, see the `–unwind` and `–unwind-set` options). When a
+counter limit is reach, an assertion can be added to explicitly show
+when analysis is incomplete. The symbolic execution includes constant
+folding so loops that have a constant number of iterations will be
+handled completely (assuming the unwinding limit is sufficient).
+
+The output of the symbolic execution is a system of equations; an object
+containing a list of `symex_target_elements`, each of which are
+equalities between `expr` expressions. See `symex_target_equation.h`.
+The output is in static, single assignment (SSA) form, which is *not*
+the case for goto-programs.
+
+### `pointer-analysis/`
+
+To perform symbolic execution on programs with dereferencing of
+arbitrary pointers, some alias analysis is needed. `pointer-analysis`
+contains the three levels of analysis; flow and context insensitive,
+context sensitive and flow and context sensitive. The code needed is
+subtle and sophisticated and thus there may be bugs.
+
+### `solvers/`
+
+The `solvers/` directory contains interfaces to a number of
+different decision procedures, roughly one per directory.
+
+* prop/: The basic and common functionality. The key file is
+ `prop_conv.h` which defines `prop_convt`. This is the base class that
+ is used to interface to the decision procedures. The key functions are
+ `convert` which takes an `exprt` and converts it to the appropriate,
+ solver specific, data structures and `dec_solve` (inherited from
+ `decision_proceduret`) which invokes the actual decision procedures.
+ Individual decision procedures (named `*_dect`) objects can be created
+ but `prop_convt` is the preferred interface for code that uses them.
+
+* flattening/: A library that converts operations to bit-vectors,
+ including calling the conversions in `floatbv` as necessary. Is
+ implemented as a simple conversion (with caching) and then a
+ post-processing function that adds extra constraints. This is not used
+ by the SMT or CVC back-ends.
+
+* dplib/: Provides the `dplib_dect` object which used the decision
+ procedure library from “Decision Procedures : An Algorithmic Point of
+ View”.
+
+* cvc/: Provides the `cvc_dect` type which interfaces to the old (pre
+ SMTLib) input format for the CVC family of solvers. This format is
+ still supported by depreciated in favour of SMTLib 2.
+
+* smt1/: Provides the `smt1_dect` type which converts the formulae to
+ SMTLib version 1 and then invokes one of Boolector, CVC3, OpenSMT,
+ Yices, MathSAT or Z3. Again, note that this format is depreciated.
+
+* smt2/: Provides the `smt2_dect` type which functions in a similar
+ way to `smt1_dect`, calling Boolector, CVC3, MathSAT, Yices or Z3.
+ Note that the interaction with the solver is batched and uses
+ temporary files rather than using the interactive command supported by
+ SMTLib 2. With the `–fpa` option, this output mode will not flatten
+ the floating point arithmetic and instead output the proposed SMTLib
+ floating point standard.
+
+* qbf/: Back-ends for a variety of QBF solvers. Appears to be no
+ longer used or maintained.
+
+* sat/: Back-ends for a variety of SAT solvers and DIMACS output.
+
+### `cbmc/` {#section:CBMC}
+
+This contains the first full application. CBMC is a bounded model
+checker that uses the front ends (`ansi-c`, `cpp`, goto-program or
+others) to create a goto-program, `goto-symex` to unwind the loops the
+given number of times and to produce and equation system and finally
+`solvers` to find a counter-example (technically, `goto-symex` is then
+used to construct the counter-example trace).
+
+### `goto-cc/` {#section:goto-cc}
+
+`goto-cc` is a compiler replacement that just performs the first step of
+the process; converting C or C++ programs to goto-binaries. It is
+intended to be dropped in to an existing build procedure in place of the
+compiler, thus it emulates flags that would affect the semantics of the
+code produced. Which set of flags are emulated depends on the naming of
+the `goto-cc/` binary. If it is called `goto-cc` then it emulates GCC
+flags, `goto-armcc` emulates the ARM compiler, `goto-cl` emulates VCC
+and `goto-cw` emulates the Code Warrior compiler. The output of this
+tool can then be used with `cbmc` or `goto-instrument`.
+
+### `goto-instrument/` {#section:goto-instrument}
+
+The `goto-instrument/` directory contains a number of tools, one per
+file, that are built into the `goto-instrument` program. All of them
+take in a goto-program (produced by `goto-cc`) and either modify it or
+perform some analysis. Examples include `nondet_static.cpp` which
+initialises static variables to a non-deterministic value,
+`nondet_volatile.cpp` which assigns a non-deterministic value to any
+volatile variable before it is read and `weak_memory.h` which performs
+the necessary transformations to reason about weak memory models. The
+exception to the “one file for each piece of functionality” rule are the
+program instrumentation options (mostly those given as “Safety checks”
+in the `goto-instrument` help text) which are included in the
+`goto-program/` directory. An example of this is
+`goto-program/stack_depth.h` and the general rule seems to be that
+transformations and instrumentation that `cbmc` uses should be in
+`goto-program/`, others should be in `goto-instrument`.
+
+`goto-instrument` is a very good template for new analysis tools. New
+developers are advised to copy the directory, remove all files apart
+from `main.*`, `parseoptions.*` and the `Makefile` and use these as the
+skeleton of their application. The `doit()` method in `parseoptions.cpp`
+is the preferred location for the top level control for the program.
+
+### `linking/`
+
+Probably the code to emulate a linker. This allows multiple ‘object
+files’ (goto-programs) to be linked into one ‘executable’ (another
+goto-program), thus allowing existing build systems to be used to build
+complete goto-program binaries.
+
+### `big-int/`
+
+CPROVER is distributed with its own multi-precision arithmetic library;
+mainly for historical and portability reasons. The library is externally
+developed and thus `big-int` contains the source as it is distributed.
+This should not be used directly, see `util/mp_arith.h` for the CPROVER
+interface.
+
+### `xmllang/`
+
+CPROVER has optional XML output for results and there is an XML format
+for goto-programs. It is used to interface to various IDEs. The
+`xmllang/` directory contains the parser and helper functions for
+handling this format.
+
+### `floatbv/`
+
+This library contains the code that is used to convert floating point
+variables (`floatbv`) to bit vectors (`bv`). This is referred to as
+‘bit-blasting’ and is called in the `solver` code during conversion to
+SAT or SMT. It also contains the abstraction code described in the
+FMCAD09 paper.
+
+Data Structures
+===============
+
+This section discusses some of the key data-structures used in the
+CPROVER codebase.
+
+`irept` {#section:irept}
+------------------------
+
+There are a large number of kind of tree structured or tree-like data in
+CPROVER. `irept` provides a single, unified representation for all of
+these, allowing structure sharing and reference counting of data. As
+such `irept` is the basic unit of data in CPROVER. Each `irept`
+contains[^2] a basic unit of data (of type `dt`) which contains four
+things:
+
+* `data`: A string[^3], which is returned when the `id()` function is
+ used.
+
+* `named_sub`: A map from `irep_namet` (a string) to an `irept`. This
+ is used for named children, i.e. subexpressions, parameters, etc.
+
+* `comments`: Another map from `irep_namet` to `irept` which is used
+ for annotations and other ‘non-semantic’ information
+
+* `sub`: A vector of `irept` which is used to store ordered but
+ unnamed children.
+
+The `irept::pretty` function outputs the contents of an `irept` directly
+and can be used to understand an debug problems with `irept`s.
+
+On their own `irept`s do not “mean” anything; they are effectively
+generic tree nodes. Their interpretation depends on the contents of
+result of the `id` function (the `data`) field. `util/irep_ids.txt`
+contains the complete list of `id` values. During the build process it
+is used to generate `util/irep_ids.h` which gives constants for each id
+(named `ID_`). These can then be used to identify what kind of data
+`irept` stores and thus what can be done with it.
+
+To simplify this process, there are a variety of classes that inherit
+from `irept`, roughly corresponding to the ids listed (i.e. `ID_or`
+(the string `"or”`) corresponds to the class `or_exprt`). These give
+semantically relevant accessor functions for the data; effectively
+different APIs for the same underlying data structure. None of these
+classes add fields (only methods) and so static casting can be used. The
+inheritance graph of the subclasses of `irept` is a useful starting
+point for working out how to manipulate data.
+
+There are three main groups of classes (or APIs); those derived from
+`typet`, `codet` and `exprt` respectively. Although all of these inherit
+from `irept`, these are the most abstract level that code should handle
+data. If code is manipulating plain `irept`s then something is wrong
+with the architecture of the code.
+
+Many of the key descendent of `exprt` are declared in `std_expr.h`. All
+expressions have a named subfield / annotation which gives the type of
+the expression (slightly simplified from C/C++ as `unsignedbv_typet`,
+`signedbv_typet`, `floatbv_typet`, etc.). All type conversions are
+explicit with an expression with `id() == ID_typecast` and an ‘interface
+class’ named `typecast_exprt`. One key descendent of `exprt` is
+`symbol_exprt` which creates `irept` instances with the id of “symbol”.
+These are used to represent variables; the name of which can be found
+using the `get_identifier` accessor function.
+
+`codet` inherits from `exprt` and is defined in `std_code.h`. They
+represent executable code; statements in C rather than expressions. In
+the front-end there are versions of these that hold whole code blocks,
+but in goto-programs these have been flattened so that each `irept`
+represents one sequence point (almost one line of code / one
+semi-colon). The most common descendents of `codet` are `code_assignt`
+so a common pattern is to cast the `codet` to an assignment and then
+recurse on the expression on either side.
+
+`goto-programs` {#section:goto-programs}
+----------------------------------------
+
+The common starting point for working with goto-programs is the
+`read_goto_binary` function which populates an object of
+`goto_functionst` type. This is defined in `goto_functions.h` and is an
+instantiation of the template `goto_functions_templatet` which is
+contained in `goto_functions_template.h`. They are wrappers around a map
+from strings to `goto_programt`’s and iteration macros are provided.
+Note that `goto_function_templatet` (no `s`) is defined in the same
+header as `goto_functions_templatet` and is gives the C type for the
+function and Boolean which indicates whether the body is available
+(before linking this might not always be true). Also note the slightly
+counter-intuitive naming; `goto_functionst` instances are the top level
+structure representing the program and contain `goto_programt` instances
+which represent the individual functions. At the time of writing
+`goto_functionst` is the only instantiation of the template
+`goto_functions_templatet` but other could be produced if a different
+data-structures / kinds of models were needed for functions.
+
+`goto_programt` is also an instantiation of a template. In a similar
+fashion it is `goto_program_templatet` and allows the types of the guard
+and expression used in instructions to be parameterised. Again, this is
+currently the only use of the template. As such there are only really
+helper functions in `goto_program.h` and thus `goto_program_template.h`
+is probably the key file that describes the representation of (C)
+functions in the goto-program format. It is reasonably stable and
+reasonably documented and thus is a good place to start looking at the
+code.
+
+An instance of `goto_program_templatet` is effectively a list of
+instructions (and inner template called `instructiont`). It is important
+to use the copy and insertion functions that are provided as iterators
+are used to link instructions to their predecessors and targets and
+careless manipulation of the list could break these. Likewise there are
+helper macros for iterating over the instructions in an instance of
+`goto_program_templatet` and the use of these is good style and strongly
+encouraged.
+
+Individual instructions are instances of type `instructiont`. They
+represent one step in the function. Each has a type, an instance of
+`goto_program_instruction_typet` which denotes what kind of instruction
+it is. They can be computational (such as `ASSIGN` or `FUNCTION_CALL`),
+logical (such as `ASSUME` and `ASSERT`) or informational (such as
+`LOCATION` and `DEAD`). At the time of writing there are 18 possible
+values for `goto_program_instruction_typet` / kinds of instruction.
+Instructions also have a guard field (the condition under which it is
+executed) and a code field (what the instruction does). These may be
+empty depending on the kind of instruction. In the default
+instantiations these are of type `exprt` and `codet` respectively and
+thus covered by the previous discussion of `irept` and its descendents.
+The next instructions (remembering that transitions are guarded by
+non-deterministic) are given by the list `targets` (with the
+corresponding list of labels `labels`) and the corresponding set of
+previous instructions is get by `incoming_edges`. Finally `instructiont`
+have informational `function` and `location` fields that indicate where
+they are in the code.
+
+[^1]: There are a couple of exceptions, including the graph classes
+
+[^2]: Or references, if reference counted data sharing is enabled. It is
+ enabled by default; see the `SHARING` macro.
+
+[^3]: When `USE_DSTRING` is enabled (it is by default), this is actually
+a `dstring` and thus an integer which is a reference into a string table
diff --git a/doc/architectural/front-page.md b/doc/architectural/front-page.md
new file mode 100644
index 00000000000..13e832074a8
--- /dev/null
+++ b/doc/architectural/front-page.md
@@ -0,0 +1,46 @@
+CProver Documentation
+=====================
+
+\author Kareem Khazem
+
+These pages contain user tutorials, automatically-generated API
+documentation, and higher-level architectural overviews for the
+CProver codebase. Users can download CProver tools from the
+CProver website; contributors
+should use the repository
+hosted on GitHub.
+
+### For users:
+
+* The \ref cprover-manual "CProver Manual" details the capabilities of
+ CBMC and SATABS and describes how to install and use these tools. It
+ also covers the underlying theory and prerequisite concepts behind how
+ these tools work.
+
+### For contributors:
+
+* If you already know exactly what you're looking for, the API reference
+ is generated from the codebase. You can search for classes and class
+ members in the search bar at top-right or use one of the links in the
+ sidebar.
+
+* For higher-level architectural information, each of the pages under
+ the "Modules" link in the sidebar gives an overview of a directory in
+ the CProver codebase.
+
+* The \ref module_cbmc "CBMC guided tour" is a good start for new
+ contributors to CBMC. It describes the stages through which CBMC
+ transforms source files into bug reports and counterexamples, linking
+ to the relevant documentation for each stage.
+
+* The \subpage cbmc-hacking "CBMC hacking HOWTO" helps new contributors
+ to CProver to get their feet wet through a series of programming
+ exercises---mostly modifying goto-instrument, and thus learning to
+ manipulate the main data structures used within CBMC.
+
+* The \subpage cbmc-guide "CBMC guide" is a single document describing
+ the layout of the codebase and many of the important data structures.
+ It probably contains more information than the module pages at the
+ moment, but may be somewhat out-of-date.
+
+\defgroup module_hidden _hidden
diff --git a/doc/architectural/howto.md b/doc/architectural/howto.md
new file mode 100644
index 00000000000..5eee0f0058f
--- /dev/null
+++ b/doc/architectural/howto.md
@@ -0,0 +1,243 @@
+\ingroup module_hidden
+\page cbmc-hacking CBMC Hacking HOWTO
+
+\author Kareem Khazem
+
+This is an introduction to hacking on the `cprover` codebase. It is not
+intended as a user guide to `CBMC` or related tools. It is structured
+as a series of programming exercises that aim to acclimatise the reader
+to the basic data structures and workflow needed for contributing to
+`CBMC`.
+
+
+## Initial setup
+
+Clone the [CBMC repository][cbmc-repo] and build it:
+
+ git clone https://github.com/diffblue/cbmc.git
+ cd cbmc/src
+ make minisat2-download
+ make
+
+Ensure that [graphviz][graphviz] is installed on your system (in
+particular, you should be able to run a program called `dot`). Install
+[Doxygen][doxygen] and generate doxygen documentation:
+
+ # In the src directory
+ doxygen doxyfile
+ # View the documentation in a web browser
+ firefox doxy/html/index.html
+
+If you've never used doxygen documentation before, get familiar with the
+layout. Open the generated HTML page in a web browser; search for the
+class `goto_programt` in the search bar, and jump to the documentation
+for that class; and read through the copious documentation.
+
+The build writes executable programs into several of the source
+directories. In this tutorial, we'll be using binaries inside the
+`cbmc`, `goto-instrument`, and `goto-cc` directories. Add these
+directories to your `$PATH`:
+
+ # Assuming you cloned CBMC into ~/code
+ export PATH=~/code/cbmc/src/goto-instrument:~/code/cbmc/src/goto-cc:~/code/cbmc/src/cbmc:$PATH
+ # Add to your shell's startup configuration file so that you don't have to run that command every time.
+ echo 'export PATH=~/code/cbmc/src/goto-instrument:~/code/cbmc/src/goto-cc:~/code/cbmc/src/cbmc:$PATH' >> .bashrc
+
+Optional: install an image viewer that can read images on stdin.
+I use [feh][feh].
+
+[cbmc-repo]: https://github.com/diffblue/cbmc/
+[doxygen]: http://www.stack.nl/~dimitri/doxygen/
+[graphviz]: http://www.graphviz.org/
+[feh]: https://feh.finalrewind.org/
+
+
+
+## Whirlwind tour of the tools
+
+CBMC's code is located under the `cbmc` directory. Even if you plan to
+contribute only to CBMC, it is important to be familiar with several
+other of cprover's auxiliary tools.
+
+
+### Compiling with `goto-cc`
+
+There should be an executable file called `goto-cc` in the `goto-cc`
+directory; make a symbolic link to it called `goto-gcc`:
+
+ cd cbmc/src/goto-cc
+ ln -s "$(pwd)/goto-cc" goto-gcc
+
+Find or write a moderately-interesting C program; we'll call it `main.c`.
+Run the following commands:
+
+ goto-gcc -o main.goto main.c
+ cc -o main.exe main.c
+
+Invoke `./main.goto` and `./main.exe` and observe that they run identically.
+The version that was compiled with `goto-gcc` is larger, though:
+
+ du -hs *.{goto,exe}
+
+Programs compiled with `goto-gcc` are mostly identical to their `clang`-
+or `gcc`-compiled counterparts, but contain additional object code in
+cprover's intermediate representation. The intermediate representation
+is (informally) called a *goto-program*.
+
+
+### Viewing goto-programs
+
+`goto-instrument` is a Swiss army knife for viewing goto-programs and
+performing single program analyses on them. Run the following command:
+
+ goto-instrument --show-goto-functions main.goto
+
+Many of the instructions in the goto-program intermediate representation
+are similar to their C counterparts. `if` and `goto` statements replace
+structured programming constructs.
+
+Find or write a small C program (2 or 3 functions, each containing a few
+varied statements). Compile it using `goto-gcc` as above into an object
+file called `main`. If you installed `feh`, try the following command
+to dump a control-flow graph:
+
+ goto-instrument --dot main | tail -n +2 | dot -Tpng | feh -
+
+If you didn't install `feh`, you can write the diagram to the file and
+then view it:
+
+ goto-instrument --dot main | tail -n +2 | dot -Tpng > main.png
+ Now open main.png with an image viewer
+
+(the invocation of `tail` is used to filter out the first line of
+`goto-instrument` output. If `goto-instrument` writes more or less
+debug output by the time you read this, read the output of
+`goto-instrument --dot main` and change the invocation of `tail`
+accordingly.)
+
+There are a few other views of goto-programs. Run `goto-instrument -h`
+and try the various switches under the "Diagnosis" section.
+
+
+
+## Learning about goto-programs
+
+In this section, you will learn about the basic goto-program data
+structures. Reading from and manipulating these data structures form
+the core of writing an analysis for CBMC.
+
+
+### First steps with `goto-instrument`
+
+
+**Task:** Write a simple C program with a few functions, each containing
+a few statements. Compile the program with `goto-gcc` into a binary
+called `main`.
+
+
+
+The entry point of `goto-instrument` is in `goto_instrument_main.cpp`.
+Follow the control flow into `goto_instrument_parse_optionst::doit()`, located in `goto_instrument_parse_options.cpp`.
+At some point in that function, there will be a long sequence of `if` statements.
+
+
+**Task:** Add a `--greet` switch to `goto-instrument`, taking an optional
+argument, with the following behaviour:
+
+ $ goto-instrument --greet main
+ hello, world!
+ $ goto-instrument --greet Leperina main
+ hello, Leperina!
+
+You will also need to add the `greet` option to the
+`goto_instrument_parse_options.h` file in order for this to work.
+Notice that in the `.h` file, options that take an argument are followed
+by a colon (like `(property):`), while simple switches have no colon.
+Make sure that you `return 0;` after printing the message.
+
+
+The idea behind `goto-instrument` is that it parses a goto-program and
+then performs one single analysis on that goto-program, and then
+returns. Each of the switches in `doit` function of
+`goto_instrument_parse_options` does something different with the
+goto-program that was supplied on the command line.
+
+
+### Goto-program basics
+
+At this point in `goto-instrument_parse_options` (where the `if`
+statements are), the goto-program will have been loaded into the object
+`goto_functions`, of type `goto_functionst`. This has a field called
+`function_map`, a map from function names to functions.
+
+
+
+**Task:** Add a `--print-function-names` switch to `goto-instrument`
+that prints out the name of every function in the goto-binary. Are
+there any functions that you didn't expect to see?
+
+
+The following is quite difficult to follow from doxygen, but: the value
+type of `function_map` is `goto_function_templatet`.
+
+
+
+**Task:** Read the documentation for `goto_function_templatet`
+and `goto_programt`.
+
+
+Each goto_programt object contains a list of
+\ref goto_program_templatet::instructiont called
+`instructions`. Each instruction has a field called `code`, which has
+type \ref codet.
+
+
+**Task:** Add a `--pretty-program` switch to `goto-instrument`. This
+switch should use the `codet::pretty()` function to pretty-print every
+\ref codet in the entire program. The strings that `pretty()` generates
+for a codet look like this:
+
+ index
+ * type: unsignedbv
+ * width: 8
+ * #c_type: char
+ 0: symbol
+ * type: array
+ * size: nil
+ * type:
+ * #source_location:
+ * file: src/main.c
+ * line: 18
+ * function:
+ * working_directory: /some/dir
+ 0: unsignedbv
+ * width: 8
+ * #c_type: char
+ ...
+
+
+The sub-nodes of a particular node in the pretty representation are
+numbered, starting from 0. They can be accessed through the `op0()`,
+`op1()` and `op2()` methods in the `exprt` class.
+
+Every node in the pretty representation has an identifier, accessed
+through the `id()` function. The file `util/irep_ids.def` lists the
+possible values of these identifiers; have a quick scan through that
+file. In the pretty representation above, the following facts are true
+of that particular node:
+
+ - `node.id() == ID_index`
+ - `node.type().id() == ID_unsignedbv`
+ - `node.op0().id() == ID_symbol`
+ - `node.op0().type().id() == ID_array`
+
+The fact that the `op0()` child has a `symbol` ID menas that you could
+cast it to a `symbol_exprt` (which is a subtype of `exprt`) using the
+function `to_symbol_expr`.
+
+
+**Task:** Add flags to `goto-instrument` to print out the following information:
+* the name of every function that is *called* in the program;
+* the value of every constant in the program;
+* the value of every symbol in the program.
+
diff --git a/doc/html-manual/binsearch.c b/doc/assets/binsearch.c
similarity index 100%
rename from doc/html-manual/binsearch.c
rename to doc/assets/binsearch.c
diff --git a/doc/html-manual/c_to_ir.svg b/doc/assets/c_to_ir.svg
similarity index 100%
rename from doc/html-manual/c_to_ir.svg
rename to doc/assets/c_to_ir.svg
diff --git a/doc/html-manual/cegar-1.png b/doc/assets/cegar-1.png
similarity index 100%
rename from doc/html-manual/cegar-1.png
rename to doc/assets/cegar-1.png
diff --git a/doc/html-manual/counter.c b/doc/assets/counter.c
similarity index 100%
rename from doc/html-manual/counter.c
rename to doc/assets/counter.c
diff --git a/doc/html-manual/boop-example/driver.c b/doc/assets/driver.c
similarity index 100%
rename from doc/html-manual/boop-example/driver.c
rename to doc/assets/driver.c
diff --git a/doc/html-manual/boop-example/driver.h b/doc/assets/driver.h
similarity index 100%
rename from doc/html-manual/boop-example/driver.h
rename to doc/assets/driver.h
diff --git a/doc/html-manual/expr.c b/doc/assets/expr.c
similarity index 100%
rename from doc/html-manual/expr.c
rename to doc/assets/expr.c
diff --git a/doc/html-manual/expr.svg b/doc/assets/expr.svg
similarity index 100%
rename from doc/html-manual/expr.svg
rename to doc/assets/expr.svg
diff --git a/doc/html-manual/file1.c b/doc/assets/file1.c
similarity index 100%
rename from doc/html-manual/file1.c
rename to doc/assets/file1.c
diff --git a/doc/html-manual/file2.c b/doc/assets/file2.c
similarity index 100%
rename from doc/html-manual/file2.c
rename to doc/assets/file2.c
diff --git a/doc/html-manual/gcc-wrap.c b/doc/assets/gcc-wrap.c
similarity index 100%
rename from doc/html-manual/gcc-wrap.c
rename to doc/assets/gcc-wrap.c
diff --git a/doc/html-manual/goto_program.svg b/doc/assets/goto_program.svg
similarity index 100%
rename from doc/html-manual/goto_program.svg
rename to doc/assets/goto_program.svg
diff --git a/doc/html-manual/ireptree.svg b/doc/assets/ireptree.svg
similarity index 100%
rename from doc/html-manual/ireptree.svg
rename to doc/assets/ireptree.svg
diff --git a/doc/html-manual/boop-example/kdev_t.h b/doc/assets/kdev_t.h
similarity index 100%
rename from doc/html-manual/boop-example/kdev_t.h
rename to doc/assets/kdev_t.h
diff --git a/doc/html-manual/lock-example-fixed.c b/doc/assets/lock-example-fixed.c
similarity index 100%
rename from doc/html-manual/lock-example-fixed.c
rename to doc/assets/lock-example-fixed.c
diff --git a/doc/html-manual/lock-example.c b/doc/assets/lock-example.c
similarity index 100%
rename from doc/html-manual/lock-example.c
rename to doc/assets/lock-example.c
diff --git a/doc/html-manual/boop-example/modules.h b/doc/assets/modules.h
similarity index 100%
rename from doc/html-manual/boop-example/modules.h
rename to doc/assets/modules.h
diff --git a/doc/html-manual/pid.c b/doc/assets/pid.c
similarity index 100%
rename from doc/html-manual/pid.c
rename to doc/assets/pid.c
diff --git a/doc/html-manual/pid.png b/doc/assets/pid.png
similarity index 100%
rename from doc/html-manual/pid.png
rename to doc/assets/pid.png
diff --git a/doc/html-manual/refinement.png b/doc/assets/refinement.png
similarity index 100%
rename from doc/html-manual/refinement.png
rename to doc/assets/refinement.png
diff --git a/doc/html-manual/ring_buffer1.c b/doc/assets/ring_buffer1.c
similarity index 100%
rename from doc/html-manual/ring_buffer1.c
rename to doc/assets/ring_buffer1.c
diff --git a/doc/html-manual/ring_buffer2.c b/doc/assets/ring_buffer2.c
similarity index 100%
rename from doc/html-manual/ring_buffer2.c
rename to doc/assets/ring_buffer2.c
diff --git a/doc/html-manual/boop-example/spec.c b/doc/assets/spec.c
similarity index 100%
rename from doc/html-manual/boop-example/spec.c
rename to doc/assets/spec.c
diff --git a/doc/html-manual/states.png b/doc/assets/states.png
similarity index 100%
rename from doc/html-manual/states.png
rename to doc/assets/states.png
diff --git a/doc/cprover-manual.md b/doc/cprover-manual.md
new file mode 100644
index 00000000000..90d609193d9
--- /dev/null
+++ b/doc/cprover-manual.md
@@ -0,0 +1,2982 @@
+\ingroup module_hidden
+\page cprover-manual CProver User Manual
+
+\author Daniel Kroening
+
+This tutorial is intended for users of CProver (CBMC, SatAbs, and
+associated tools).
+
+\tableofcontents
+
+\section man_introduction Introduction
+
+## Motivation
+
+Numerous tools to hunt down functional design flaws in silicon have been
+available for many years, mainly due to the enormous cost of hardware
+bugs. The use of such tools is wide-spread. In contrast, the market for
+tools that address the need for quality software is still in its
+infancy.
+
+Research in software quality has an enormous breadth. We focus the
+presentation using two criteria:
+
+1. We believe that any form of quality requires a specific *guarantee*,
+ in theory and practice.
+2. The sheer size of software designs requires techniques that are
+ highly *automated*.
+
+In practice, quality guarantees usually do not refer to "total
+correctness" of a design, as ensuring the absence of all bugs is too
+expensive for most applications. In contrast, a guarantee of the absence
+of specific flaws is achievable, and is a good metric of quality.
+
+We document two programs that try to achieve formal guarantees of the
+absence of specific problems: CBMC and SATABS. The algorithms
+implemented by CBMC and SATABS are complementary, and often, one tool is
+able to solve a problem that the other cannot solve.
+
+Both CBMC and SATABS are verification tools for ANSI-C/C++ programs.
+They verify array bounds (buffer overflows), pointer safety, exceptions
+and user-specified assertions. Both tools model integer arithmetic
+accurately, and are able to reason about machine-level artifacts such as
+integer overflow. CBMC and SATABS are therefore able to detect a class
+of bugs that has so far gone unnoticed by many other verification tools.
+This manual also covers some variants of CBMC, which includes HW-CBMC
+for
+\ref man_hard-soft-introduction "hardware/software co-verification".
+
+## Bounded Model Checking with CBMC
+
+CBMC implements a technique called *Bounded Model Checking* (BMC). In
+BMC, the transition relation for a complex state machine and its
+specification are jointly unwound to obtain a Boolean formula, which is
+then checked for satisfiability by using an efficient SAT procedure. If
+the formula is satisfiable, a counterexample is extracted from the
+output of the SAT procedure. If the formula is not satisfiable, the
+program can be unwound more to determine if a longer counterexample
+exists.
+
+In many engineering domains, real-time guarantees are a strict
+requirement. An example is software embedded in automotive controllers.
+As a consequence, the loop constructs in these types of programs often
+have a strict bound on the number of iterations. CBMC is able to
+formally verify such bounds by means of *unwinding assertions*. Once
+this bound is established, CBMC is able to prove the absence of errors.
+
+A more detailed description of how to apply CBMC to verify programs is
+\ref man_cbmc-tutorial "here".
+
+## Automatic Program Verification with SATABS
+
+In many cases, lightweight properties such as array bounds do not rely
+on the entire program. A large fraction of the program is *irrelevant*
+to the property. SATABS exploits this observation and computes an
+*abstraction* of the program in order to handle large amounts of code.
+
+In order to use SATABS it is not necessary to understand the abstraction
+refinement process. For the interested reader, a high-level introduction
+to abstraction refinement is provided
+\ref man_satabs-overview "here".
+We also provide
+\ref man_satabs-tutorials "tutorials on how to use SATABS".
+
+Just as CBMC, SATABS attempts to build counterexamples that refute the
+property. If such a counterexample is found, it is presented to the
+engineer to facilitate localization and repair of the program.
+
+### Example: Buffer Overflows
+
+In order to give a brief overview of the capabilities of CBMC and SATABS
+we start with a small example. The issue of *[buffer
+overflows](http://en.wikipedia.org/wiki/Buffer_overflow)* has obtained
+wide public attention. A buffer is a contiguously-allocated chunk of
+memory, represented by an array or a pointer in C. Programs written in C
+do not provide automatic bounds checking on the buffer, which means a
+program can – accidentally or maliciously – write past a buffer. The
+following example is a perfectly valid C program (in the sense that a
+compiler compiles it without any errors):
+
+```{.c}
+int main()
+{
+ int buffer[10];
+ buffer[20] = 10;
+}
+```
+
+However, the write access to an address outside the allocated memory
+region can lead to unexpected behavior. In particular, such bugs can be
+exploited to overwrite the return address of a function, thus enabling
+the execution of arbitrary user-induced code. CBMC and SATABS are able
+to detect this problem and reports that the "upper bound property" of
+the buffer is violated. CBMC and SATABS are capable of checking these
+lower and upper bounds, even for arrays with dynamic size. A detailed
+discussion of the properties that CBMC and SATABS can check
+automatically is \ref man_instrumentation-properties "here".
+
+## Hardware/Software Co-Verification
+
+Software programs often interact with hardware in a non-trivial manner,
+and many properties of the overall design only arise from the interplay
+of both components. CBMC and SATABS therefore support *Co-Verification*,
+i.e., are able to reason about a C/C++ program together with a circuit
+description given in Verilog.
+
+These co-verification capabilities can also be applied to perform
+refinement proofs. Software programs are often used as high-level
+descriptions of circuitry. While both describe the same functionality,
+the hardware implementation usually contains more detail. It is highly
+desirable to establish some form for equivalence between the two
+descriptions. Hardware/Software co-verification and equivalence checking
+with CBMC and SATABS are described
+\ref man_hard-soft-introduction "here".
+
+
+\section man_installation Installation
+
+\subsection man_install-cbmc Installing CBMC
+
+### Requirements
+
+CBMC is available for Windows, i86 Linux, and MacOS X. CBMC requires a
+code pre-processing environment comprising of a suitable preprocessor
+and an a set of header files.
+
+1. **Linux:** the preprocessor and the header files typically come with
+ a package called *gcc*, which must be installed prior to the
+ installation of CBMC.
+
+2. **Windows:** The Windows version of CBMC requires the preprocessor
+ `cl.exe`, which is part of Microsoft Visual Studio. We recommend the
+ free [Visual Studio Community
+ 2013](http://www.visualstudio.com/en-us/products/visual-studio-community-vs).
+
+3. **MacOS:** Install the [XCode Command Line
+ Utilities](http://developer.apple.com/technologies/xcode.html) prior
+ to installing CBMC. Just installing XCode alone is not enough.
+
+Important note for Windows users: Visual Studio's `cl.exe` relies on a
+complex set of environment variables to identify the target architecture
+and the directories that contain the header files. You must run CBMC
+from within the *Visual Studio Command Prompt*.
+
+Note that the distribution files for the [Eclipse
+plugin](installation-plugin.shtml) include the CBMC executable.
+Therefore, if you intend to run CBMC exclusively within Eclipse, you can
+skip the installation of the CBMC executable. However, you still have to
+install the compiler environment as described above.
+
+### Installing the CBMC Binaries
+
+1. Download CBMC for your operating system. The binaries are available
+ from http://www.cprover.org/cbmc/.
+2. Unzip/untar the archive into a directory of your choice. We
+ recommend to add this directory to your `PATH` environment variable.
+
+You are now ready to \ref man_cbmc-tutorial "use CBMC"!
+
+### Building CBMC from Source
+
+Alternatively, the CBMC source code is available [via
+SVN](http://www.cprover.org/svn/cbmc/). To compile the source code,
+follow [these
+instructions](http://www.cprover.org/svn/cbmc/trunk/COMPILING).
+
+\subsection man_install-satabs Installing SATABS
+
+### Requirements
+
+SATABS is available for Windows, i86 Linux, and MacOS X. SATABS requires
+a code pre-processing environment comprising of a suitable preprocessor
+and an a set of header files.
+
+1. **Linux:** the preprocessor and the header files typically come with
+ a package called *gcc*, which must be installed prior to the
+ installation of SATABS.
+2. **Windows:** The Windows version of SATABS requires the preprocessor
+ `cl.exe`, which is part of Visual Studio (including the free [Visual
+ Studio
+ Express](http://msdn.microsoft.com/vstudio/express/visualc/)).
+3. **MacOS:** Install
+ [XCode](http://developer.apple.com/technologies/xcode.html) prior to
+ installing SATABS.
+
+Important note for Windows users: Visual Studio's `cl.exe` relies on a
+complex set of environment variables to identify the target architecture
+and the directories that contain the header files. You must run SATABS
+from within the *Visual Studio Command Prompt*.
+
+Note that the distribution files for the [Eclipse
+plugin](installation-plugin.shtml) include the command-line tools.
+Therefore, if you intend to run SATABS exclusively within Eclipse, you
+can skip the installation of the command-line tools. However, you still
+have to install the compiler environment as described above.
+
+### Choosing and Installing a Model Checker
+
+You need to install a Model Checker in order to be able to run SATABS.
+You can choose between following alternatives:
+
+- **Cadence SMV**. Available from http://www.kenmcmil.com/smv.html.
+ Cadence SMV is a commercial model checker. The free version that is
+ available on the homepage above must not be used for commercial
+ purposes (read the license agreement thoroughly before you download
+ the tool). The documentation for SMV can be found in the directory
+ where you unzip/untar SMV under ./smv/doc/smv/. Read the
+ installation instructions carefully. The Linux/MacOS versions
+ require setting environment variables. You must add add the
+ directory containing the `smv` binary (located in ./smv/bin/,
+ relative to the path where you unpacked it) to your `PATH`
+ environment variable. SATABS uses Cadence SMV by default.
+
+- **NuSMV**. Available from http://nusmv.irst.itc.it/. NuSMV is the
+ open source alternative to Cadence SMV. Installation instructions
+ and documentation can be found on the NuSMV homepage. The directory
+ containing the NuSMV binary should be added to your `PATH`
+ environment variable. Use the option
+
+ --modelchecker nusmv
+
+ to instruct SATABS to use NuSMV.
+
+- **BOPPO**. Available from http://www.cprover.org/boppo/. BOPPO is
+ a model checker that uses SAT-solving algorithms. BOPPO relies on a
+ built-in SAT solver and Quantor, a solver for quantified boolean
+ formulas that is currently bundled with BOPPO, but also available
+ separately from . We recommend to add
+ the directories containing both tools to your `PATH` environment
+ variable. Use the option
+
+ --modelchecker boppo
+
+ when you call SATABS and want it to use BOPPO instead of SMV.
+
+- **BOOM**. Available from http://www.cprover.org/boom/. Boom has a
+ number of unique features, including the verification of programs
+ with unbounded thread creation.
+
+### Installing SATABS
+
+1. Download SATABS for your operating system. The binaries are
+ available from .
+2. Unzip/untar the archive into a directory of your choice. We
+ recommend to add this directory to your `PATH` environment variable.
+
+Now you can execute SATABS. Try running SATABS on the small examples
+presented in the \ref man_satabs-tutorials "SATABS tutorial". If you use
+the Cadence SMV model checker, the only command line arguments you have
+to specify are the names of the files that contain your program.
+
+
+\subsection man_install-eclipse Installing the Eclipse Plugin
+
+### Requirements
+
+We provide a graphical user interface to CBMC and SATABS, which is
+realized as a plugin to the Eclipse framework. Eclipse is available at
+http://www.eclipse.org. We do not provide installation instructions for
+Eclipse (basically, you only have to download the current version and
+extract the files to your hard-disk) and assume that you have already
+installed the current version.
+
+CBMC and SATABS have their own requirements. As an example, both CBMC and SATABS require a suitable preprocessor and a set of header files. As
+first step, you should therefore follow the installation instructions
+for \ref man_install-cbmc "CBMC" and \ref man_install-satabs "SATABS".
+
+Important note for Windows users: Visual Studio's `cl.exe` relies on a
+complex set of environment variables to identify the target architecture
+and the directories that contain the header files. You must run Eclipse
+from within the *Visual Studio Command Prompt*.
+
+### Installing the Eclipse Plugin
+
+The installation instructions for the Eclipse Plugin, including the link
+to the download site, are available
+[here](http://www.cprover.org/eclipse-plugin/). This includes a small
+tutorial on how to use the Eclipse plugin.
+
+
+\section man_cbmc CBMC: Bounded Model Checking for C, C++ and Java
+
+\subsection man_cbmc-tutorial A Short Tutorial
+
+### First Steps
+
+We assume you have already installed CBMC and the necessary support
+files on your system. If not so, please follow the instructions
+\ref man_install-cbmc "here".
+
+Like a compiler, CBMC takes the names of .c files as command line
+arguments. CBMC then translates the program and merges the function
+definitions from the various .c files, just like a linker. But instead
+of producing a binary for execution, CBMC performs symbolic simulation
+on the program.
+
+As an example, consider the following simple program, named file1.c:
+
+ int puts(const char *s) { }
+
+ int main(int argc, char **argv) {
+ puts(argv[2]);
+ }
+
+Of course, this program is faulty, as the `argv` array might have fewer
+than three elements, and then the array access `argv[2]` is out of
+bounds. Now, run CBMC as follows:
+
+ cbmc file1.c --show-properties --bounds-check --pointer-check
+
+The two options `--bounds-check` and `--pointer-check` instruct CBMC to
+look for errors related to pointers and array bounds. CBMC will print
+the list of properties it checks. Note that it lists, among others, a
+property labelled with "object bounds in argv" together with the location
+of the faulty array access. As you can see, CBMC largely determines the
+property it needs to check itself. This is realized by means of a
+preliminary static analysis, which relies on computing a fixed point on
+various [abstract
+domains](http://en.wikipedia.org/wiki/Abstract_interpretation). More
+detail on automatically generated properties is provided
+\ref man_instrumentation-properties "here".
+
+Note that these automatically generated properties need not necessarily
+correspond to bugs – these are just *potential* flaws, as abstract
+interpretation might be imprecise. Whether these properties hold or
+correspond to actual bugs needs to be determined by further analysis.
+
+CBMC performs this analysis using *symbolic simulation*, which
+corresponds to a translation of the program into a formula. The formula
+is then combined with the property. Let's look at the formula that is
+generated by CBMC's symbolic simulation:
+
+ cbmc file1.c --show-vcc --bounds-check --pointer-check
+
+With this option, CBMC performs the symbolic simulation and prints the
+verification conditions on the screen. A verification condition needs to
+be proven to be valid by a [decision
+procedure](http://en.wikipedia.org/wiki/Decision_problem) in order to
+assert that the corresponding property holds. Let's run the decision
+procedure:
+
+ cbmc file1.c --bounds-check --pointer-check
+
+CBMC transforms the equation you have seen before into CNF and passes it
+to a SAT solver (more background on this step is in the book on
+[Decision Procedures](http://www.decision-procedures.org/)). It then
+determines which of the properties that it has generated for the program
+hold and which do not. Using the SAT solver, CBMC detects that the
+property for the object bounds of `argv` does not hold, and will thus
+print a line as follows:
+
+ [main.pointer_dereference.6] dereference failure: object bounds in argv[(signed long int)2]: FAILURE
+
+### Counterexample Traces
+
+Let us have a closer look at this property and why it fails. To aid the
+understanding of the problem, CBMC can generate a *counterexample trace*
+for failed properties. To obtain this trace, run
+
+ cbmc file1.c --bounds-check --trace
+
+CBMC then prints a counterexample trace, i.e., a program trace that
+begins with `main` and ends in a state which violates the property. In
+our example, the program trace ends in the faulty array access. It also
+gives the values the input variables must have for the bug to occur. In
+this example, `argc` must be one to trigger the out-of-bounds array
+access. If you add a branch to the example that requires that `argc>=3`,
+the bug is fixed and CBMC will report that the proofs of all properties
+have been successful.
+
+### Verifying Modules
+
+In the example above, we used a program that starts with a `main`
+function. However, CBMC is aimed at embedded software, and these kinds
+of programs usually have different entry points. Furthermore, CBMC is
+also useful for verifying program modules. Consider the following
+example, called file2.c:
+
+ int array[10];
+ int sum() {
+ unsigned i, sum;
+
+ sum=0;
+ for(i=0; i<10; i++)
+ sum+=array[i];
+ }
+
+In order to set the entry point to the `sum` function, run
+
+ cbmc file2.c --function sum --bounds-check
+
+It is often necessary to build a suitable *harness* for the function in
+order to set up the environment appropriately.
+
+### Loop Unwinding
+
+When running the previous example, you will have noted that CBMC unwinds
+the `for` loop in the program. As CBMC performs Bounded Model Checking,
+all loops have to have a finite upper run-time bound in order to
+guarantee that all bugs are found. CBMC can optionally check that enough
+unwinding is performed. As an example, consider the program binsearch.c:
+
+ int binsearch(int x) {
+ int a[16];
+ signed low=0, high=16;
+
+ while(low>1);
+
+ if(a[middle]x)
+ low=middle+1;
+ else // a[middle]==x
+ return middle;
+ }
+
+ return -1;
+ }
+
+If you run CBMC on this function, you will notice that the unwinding
+does not stop on its own. The built-in simplifier is not able to
+determine a run time bound for this loop. The unwinding bound has to be
+given as a command line argument:
+
+ cbmc binsearch.c --function binsearch --unwind 6 --bounds-check --unwinding-assertions
+
+CBMC verifies that verifies the array accesses are within the bounds;
+note that this actually depends on the result of the right shift. In
+addition, as CBMC is given the option `--unwinding-assertions`, it also
+checks that enough unwinding is done, i.e., it proves a run-time bound.
+For any lower unwinding bound, there are traces that require more loop
+iterations. Thus, CBMC will report that the unwinding assertion has
+failed. As usual, a counterexample trace that documents this can be
+obtained with the option `--property`.
+
+### Unbounded Loops
+
+CBMC can also be used for programs with unbounded loops. In this case,
+CBMC is used for bug hunting only; CBMC does not attempt to find all
+bugs. The following program (lock-example.c) is an example of a program
+with a user-specified property:
+
+ _Bool nondet_bool();
+ _Bool LOCK = 0;
+
+ _Bool lock() {
+ if(nondet_bool()) {
+ assert(!LOCK);
+ LOCK=1;
+ return 1; }
+
+ return 0;
+ }
+
+ void unlock() {
+ assert(LOCK);
+ LOCK=0;
+ }
+
+ int main() {
+ unsigned got_lock = 0;
+ int times;
+
+ while(times > 0) {
+ if(lock()) {
+ got_lock++;
+ /* critical section */
+ }
+
+ if(got_lock!=0)
+ unlock();
+
+ got_lock--;
+ times--;
+ }
+ }
+
+The `while` loop in the `main` function has no (useful) run-time bound.
+Thus, a bound has to be set on the amount of unwinding that CBMC
+performs. There are two ways to do so:
+
+1. The `--unwind` command-line parameter can to be used to limit the
+ number of times loops are unwound.
+2. The `--depth` command-line parameter can be used to limit the number
+ of program steps to be processed.
+
+Given the option `--unwinding-assertions`, CBMC checks whether the
+arugment to `--unwind` is large enough to cover all program paths. If
+the argument is too small, CBMC will detect that not enough unwinding is
+done reports that an unwinding assertion has failed.
+
+Reconsider the example. For a loop unwinding bound of one, no bug is
+found. But already for a bound of two, CBMC detects a trace that
+violates an assertion. Without unwinding assertions, or when using the
+`--depth` command line switch, CBMC does not prove the program correct,
+but it can be helpful to find program bugs. The various command line
+options that CBMC offers for loop unwinding are described in the section
+on \ref man_cbmc-loops "understanding loop unwinding".
+
+### A Note About Compilers and the ANSI-C Library
+
+Most C programs make use of functions provided by a library; instances
+are functions from the standard ANSI-C library such as `malloc` or
+`printf`. The verification of programs that use such functions has two
+requirements:
+
+1. Appropriate header files have to be provided. These header files
+ contain *declarations* of the functions that are to be used.
+2. Appropriate *definitions* have to be provided.
+
+Most C compilers come with header files for the ANSI-C library
+functions. We briefly discuss how to obtain/install these library files.
+
+#### Linux
+
+Linux systems that are able to compile software are usually equipped
+with the appropriate header files. Consult the documentation of your
+distribution on how to install the compiler and the header files. First
+try to compile some significant program before attempting to verify it.
+
+#### Windows
+
+On Microsoft Windows, CBMC is pre-configured to use the compiler that is
+part of Microsoft's Visual Studio. Microsoft's [Visual Studio
+Community](http://www.visualstudio.com/en-us/products/visual-studio-community-vs)
+is fully featured and available for download for free from the Microsoft
+webpage. Visual Studio installs the usual set of header files together
+with the compiler. However, the Visual Studio compiler requires a large
+set of environment variables to function correctly. It is therefore
+required to run CBMC from the *Visual Studio Command Prompt*, which can
+be found in the menu *Visual Studio Tools*.
+
+Note that in both cases, only header files are available. CBMC only
+comes with a small set of definitions, which includes functions such as
+`malloc`. Detailed information about the built-in definitions is
+\ref man_instrumentation-libraries "here".
+
+### Command Line Interface
+
+This section describes the command line interface of CBMC. Like a C
+compiler, CBMC takes the names of the .c source files as arguments.
+Additional options allow to customize the behavior of CBMC. Use
+`cbmc --help` to get a full list of the available options.
+
+Structured output can be obtained from CBMC using the option `--xml-ui`.
+Any output from CBMC (e.g., counterexamples) will then use an XML
+representation.
+
+### Further Reading
+
+- \ref man_cbmc-loops "Understanding Loop Unwinding"
+- [Hardware Verification using ANSI-C Programs as a
+ Reference](http://www-2.cs.cmu.edu/~svc/papers/view-publications-ck03.html)
+- [Behavioral Consistency of C and Verilog Programs Using Bounded
+ Model Checking](http://www-2.cs.cmu.edu/~svc/papers/view-publications-cky03.html)
+- [A Tool for Checking ANSI-C
+ Programs](http://www-2.cs.cmu.edu/~svc/papers/view-publications-ckl2004.html)
+
+We also have a [list of interesting applications of
+CBMC](http://www.cprover.org/cbmc/applications/).
+
+
+\subsection man_cbmc-loops Understanding Loop Unwinding
+
+### Iteration-based Unwinding
+
+The basic idea of CBMC is to model the computation of the programs up to
+a particular depth. Technically, this is achieved by a process that
+essentially amounts to *unwinding loops*. This concept is best
+illustrated with a generic example:
+
+ int main(int argc, char **argv) {
+ while(cond) {
+ BODY CODE
+ }
+ }
+
+A BMC instance that will find bugs with up to five iterations of the
+loop would contain five copies of the loop body, and essentially
+corresponds to checking the following loop-free program:
+
+ int main(int argc, char **argv) {
+ if(cond) {
+ BODY CODE COPY 1
+ if(cond) {
+ BODY CODE COPY 2
+ if(cond) {
+ BODY CODE COPY 3
+ if(cond) {
+ BODY CODE COPY 4
+ if(cond) {
+ BODY CODE COPY 5
+ }
+ }
+ }
+ }
+ }
+ }
+
+Note the use of the `if` statement to prevent the execution of the loop
+body in the case that the loop ends before five iterations are executed.
+The construction above is meant to produce a program that is trace
+equivalent with the original programs for those traces that contain up
+to five iterations of the loop.
+
+In many cases, CBMC is able to automatically determine an upper bound on
+the number of loop iterations. This may even work when the number of
+loop unwindings is not constant. Consider the following example:
+
+ _Bool f();
+
+ int main() {
+ for(int i=0; i<100; i++) {
+ if(f()) break;
+ }
+ assert(0);
+ }
+
+The loop in the program above has an obvious upper bound on the number
+of iterations, but note that the loop may abort prematurely depending on
+the value that is returned by `f()`. CBMC is nevertheless able to
+automatically unwind the loop to completion.
+
+This automatic detection of the unwinding bound may fail if the number
+of loop iterations is highly data-dependent. Furthermore, the number of
+iterations that are executed by any given loop may be too large or may
+simply be unbounded. For this case, CBMC offers the command-line option
+`--unwind B`, where `B` denotes a number that corresponds to the maximal
+number of loop unwindings CBMC performs on any loop.
+
+Note that the number of unwindings is measured by counting the number of
+*backjumps*. In the example above, note that the condition `i<100` is in
+fact evaluated 101 times before the loop terminates. Thus, the loop
+requires a limit of 101, and not 100.
+
+### Setting Separate Unwinding Limits
+
+The setting given with `--unwind` is used globally, that is, for all
+loops in the program. In order to set individual limits for the loops,
+first use
+
+ --show-loops
+
+to obtain a list of all loops in the program. Then identify the loops
+you need to set a separate bound for, and note their loop ID. Then use
+
+ --unwindset L:B,L:B,...
+
+where `L` denotes a loop ID and `B` denotes the bound for that loop.
+
+As an example, consider a program with two loops in the function main:
+
+ --unwindset c::main.0:10,c::main.1:20
+
+This sets a bound of 10 for the first loop, and a bound of 20 for the
+second loop.
+
+What if the number of unwindings specified is too small? In this case,
+bugs that require paths that are deeper may be missed. In order to
+address this problem, CBMC can optionally insert checks that the given
+unwinding bound is actually sufficiently large. These checks are called
+*unwinding assertions*, and are enabled with the option
+`--unwinding-assertions`. Continuing the generic example above, this
+unwinding assertion for a bound of five corresponds to checking the
+following loop-free program:
+
+ int main(int argc, char **argv) {
+ if(cond) {
+ BODY CODE COPY 1
+ if(cond) {
+ BODY CODE COPY 2
+ if(cond) {
+ BODY CODE COPY 3
+ if(cond) {
+ BODY CODE COPY 4
+ if(cond) {
+ BODY CODE COPY 5
+ assert(!cond);
+ }
+ }
+ }
+ }
+ }
+ }
+
+The unwinding assertions can be verified just like any other generated
+assertion. If all of them are proven to hold, the given loop bounds are
+sufficient for the program. This establishes a [high-level worst-case
+execution time](http://en.wikipedia.org/wiki/Worst-case_execution_time)
+(WCET).
+
+In some cases, it is desirable to cut off very deep loops in favor of
+code that follows the loop. As an example, consider the following
+program:
+
+ int main() {
+ for(int i=0; i<10000; i++) {
+ BODY CODE
+ }
+ assert(0);
+ }
+
+In the example above, small values of `--unwind` will prevent that the
+assertion is reached. If the code in the loop is considered irrelevant
+to the later assertion, use the option
+
+ --partial-loops
+
+This option will allow paths that execute loops only partially, enabling
+a counterexample for the assertion above even for small unwinding
+bounds. The disadvantage of using this option is that the resulting path
+may be spurious, i.e., may not exist in the original program.
+
+### Depth-based Unwinding
+
+The loop-based unwinding bound is not always appropriate. In particular,
+it is often difficult to control the size of the generated formula when
+using the `--unwind` option. The option
+
+ --depth nr
+
+specifies an unwinding bound in terms of the number of instructions that
+are executed on a given path, irrespectively of the number of loop
+iterations. Note that CBMC uses the number of instructions in the
+control-flow graph as the criterion, not the number of instructions in
+the source code.
+
+\subsection man_cbmc-cover COVER: Test Suite Generation with CBMC
+
+
+### A Small Tutorial with A Case Study
+
+We assume that CBMC is installed on your system. If not so, follow
+\ref man_install-cbmc "these instructions".
+
+CBMC can be used to automatically generate test cases that satisfy a
+certain [code coverage](https://en.wikipedia.org/wiki/Code_coverage)
+criteria. Common coverage criteria include branch coverage, condition
+coverage and [Modified Condition/Decision Coverage
+(MC/DC)](https://en.wikipedia.org/wiki/Modified_condition/decision_coverage).
+Among others, MC/DC is required by several avionics software development
+guidelines to ensure adequate testing of safety critical software.
+Briefly, in order to satisfy MC/DC, for every conditional statement
+containing boolean decisions, each Boolean variable should be evaluated
+one time to "true" and one time to "false", in a way that affects the
+outcome of the decision.
+
+In the following, we are going to demonstrate how to apply the test
+suite generation functionality in CBMC, by means of a case study. The
+following program is an excerpt from a real-time embedded benchmark
+[PapaBench](https://www.irit.fr/recherches/ARCHI/MARCH/rubrique.php3?id_rubrique=97),
+and implements part of a fly-by-wire autopilot for an Unmanned Aerial
+Vehicle (UAV). It is adjusted mildly for our purposes.
+
+The aim of function `climb_pid_run` is to control the vertical climb of
+the UAV. Details on the theory behind this operation are documented in
+the [wiki](https://wiki.paparazziuav.org/wiki/Theory_of_Operation) for
+the Paparazzi UAV project. The behaviour of this simple controller,
+supposing that the desired speed is 0.5 meters per second, is plotted in
+the Figure below.
+
+\image html pid.png "The pid controller"
+
+ 01: // CONSTANTS:
+ 02: #define MAX_CLIMB_SUM_ERR 10
+ 03: #define MAX_CLIMB 1
+ 04:
+ 05: #define CLOCK 16
+ 06: #define MAX_PPRZ (CLOCK*600)
+ 07:
+ 08: #define CLIMB_LEVEL_GAZ 0.31
+ 09: #define CLIMB_GAZ_OF_CLIMB 0.75
+ 10: #define CLIMB_PITCH_OF_VZ_PGAIN 0.05
+ 11: #define CLIMB_PGAIN -0.03
+ 12: #define CLIMB_IGAIN 0.1
+ 13:
+ 14: const float pitch_of_vz_pgain=CLIMB_PITCH_OF_VZ_PGAIN;
+ 15: const float climb_pgain=CLIMB_PGAIN;
+ 16: const float climb_igain=CLIMB_IGAIN;
+ 17: const float nav_pitch=0;
+ 18:
+ 19: /** PID function INPUTS */
+ 20: // The user input: target speed in vertical direction
+ 21: float desired_climb;
+ 22: // Vertical speed of the UAV detected by GPS sensor
+ 23: float estimator_z_dot;
+ 24:
+ 25: /** PID function OUTPUTS */
+ 26: float desired_gaz;
+ 27: float desired_pitch;
+ 28:
+ 29: /** The state variable: accumulated error in the control */
+ 30: float climb_sum_err=0;
+ 31:
+ 32: /** Computes desired_gaz and desired_pitch */
+ 33: void climb_pid_run()
+ 34: {
+ 35:
+ 36: float err=estimator_z_dot-desired_climb;
+ 37:
+ 38: float fgaz=climb_pgain*(err+climb_igain*climb_sum_err)+CLIMB_LEVEL_GAZ+CLIMB_GAZ_OF_CLIMB*desired_climb;
+ 39:
+ 40: float pprz=fgaz*MAX_PPRZ;
+ 41: desired_gaz=((pprz>=0 && pprz<=MAX_PPRZ) ? pprz : (pprz>MAX_PPRZ ? MAX_PPRZ : 0));
+ 42:
+ 43: /** pitch offset for climb */
+ 44: float pitch_of_vz=(desired_climb>0) ? desired_climb*pitch_of_vz_pgain : 0;
+ 45: desired_pitch=nav_pitch+pitch_of_vz;
+ 46:
+ 47: climb_sum_err=err+climb_sum_err;
+ 48: if (climb_sum_err>MAX_CLIMB_SUM_ERR) climb_sum_err=MAX_CLIMB_SUM_ERR;
+ 49: if (climb_sum_err<-MAX_CLIMB_SUM_ERR) climb_sum_err=-MAX_CLIMB_SUM_ERR;
+ 50:
+ 51: }
+ 52:
+ 53: int main()
+ 54: {
+ 55:
+ 56: while(1)
+ 57: {
+ 58: /** Non-deterministic input values */
+ 59: desired_climb=nondet_float();
+ 60: estimator_z_dot=nondet_float();
+ 61:
+ 62: /** Range of input values */
+ 63: __CPROVER_assume(desired_climb>=-MAX_CLIMB && desired_climb<=MAX_CLIMB);
+ 64: __CPROVER_assume(estimator_z_dot>=-MAX_CLIMB && estimator_z_dot<=MAX_CLIMB);
+ 65:
+ 66: __CPROVER_input("desired_climb", desired_climb);
+ 67: __CPROVER_input("estimator_z_dot", estimator_z_dot);
+ 68:
+ 69: climb_pid_run();
+ 70:
+ 71: __CPROVER_output("desired_gaz", desired_gaz);
+ 72: __CPROVER_output("desired_pitch", desired_pitch);
+ 73:
+ 74: }
+ 75:
+ 76: return 0;
+ 77: }
+
+In order to test the PID controller, we construct a main control loop,
+which repeatedly invokes the function `climb_pid_run` (line 69). This
+PID function has two input variables: the desired speed `desired_climb`
+and the estimated speed `estimated_z_dot`. In the beginning of each loop
+iteration, values of the inputs are assigned non-deterministically.
+Subsequently, the `__CPROVER_assume` statement in lines 63 and 64
+guarantees that both values are bounded within a valid range. The
+`__CPROVER_input` and `__CPROVER_output` will help clarify the inputs
+and outputs of interest for generating test suites.
+
+To demonstrate the automatic test suite generation in CBMC, we call the
+following command and we are going to explain the command line options
+one by one.
+
+ cbmc pid.c --cover mcdc --unwind 6 --xml-ui
+
+The option `--cover mcdc` specifies the code coverage criterion. There
+are four conditional statements in the PID function: in line 41, line
+44, line 48 and line 49. To satisfy MC/DC, the test suite has to meet
+multiple requirements. For instance, each conditional statement needs to
+evaluate to *true* and *false*. Consider the condition
+`"pprz>=0 && pprz<=MAX_PPRZ"` in line 41. CBMC instruments three
+coverage goals to control the respective evaluated results of
+`"pprz>=0"` and `"pprz<=MAX_PPRZ"`. We list them in below and they
+satisfy the MC/DC rules. Note that `MAX_PPRZ` is defined as 16 \* 600 in
+line 06 of the program.
+
+ !(pprz >= (float)0) && pprz <= (float)(16 * 600) id="climb_pid_run.coverage.1"
+ pprz >= (float)0 && !(pprz <= (float)(16 * 600)) id="climb_pid_run.coverage.2"
+ pprz >= (float)0 && pprz <= (float)(16 * 600) id="climb_pid_run.coverage.3"
+
+The "id" of each coverage goal is automatically assigned by CBMC. For
+every coverage goal, a test suite (if there exists) that satisfies such
+a goal is printed out in XML format, as the parameter `--xml-ui` is
+given. Multiple coverage goals can share a test suite, when the
+corresponding execution of the program satisfies all these goals at the
+same time.
+
+In the end, the following test suites are automatically generated for
+testing the PID controller. A test suite consists of a sequence of input
+parameters that are passed to the PID function `climb_pid_run` at each
+loop iteration. For example, Test 1 covers the MC/DC goal with
+id="climb\_pid\_run.coverage.1". The complete output from CBMC is in
+[pid\_test\_suites.xml](pid_test_suites.xml), where every test suite and
+the coverage goals it is for are clearly described.
+
+ Test suite:
+ Test 1.
+ (iteration 1) desired_climb=-1.000000f, estimator_z_dot=1.000000f
+
+ Test 2.
+ (iteration 1) desired_climb=-1.000000f, estimator_z_dot=1.000000f
+ (iteration 2) desired_climb=1.000000f, estimator_z_dot=-1.000000f
+
+ Test 3.
+ (iteration 1) desired_climb=0.000000f, estimator_z_dot=-1.000000f
+ (iteration 2) desired_climb=1.000000f, estimator_z_dot=-1.000000f
+
+ Test 4.
+ (iteration 1) desired_climb=1.000000f, estimator_z_dot=-1.000000f
+ (iteration 2) desired_climb=1.000000f, estimator_z_dot=-1.000000f
+ (iteration 3) desired_climb=1.000000f, estimator_z_dot=-1.000000f
+ (iteration 4) desired_climb=1.000000f, estimator_z_dot=-1.000000f
+ (iteration 5) desired_climb=0.000000f, estimator_z_dot=-1.000000f
+ (iteration 6) desired_climb=1.000000f, estimator_z_dot=-1.000000f
+
+ Test 5.
+ (iteration 1) desired_climb=-1.000000f, estimator_z_dot=1.000000f
+ (iteration 2) desired_climb=-1.000000f, estimator_z_dot=1.000000f
+ (iteration 3) desired_climb=-1.000000f, estimator_z_dot=1.000000f
+ (iteration 4) desired_climb=-1.000000f, estimator_z_dot=1.000000f
+ (iteration 5) desired_climb=-1.000000f, estimator_z_dot=1.000000f
+ (iteration 6) desired_climb=-1.000000f, estimator_z_dot=1.000000f
+
+The option `--unwind 6` unwinds the loop inside the main function body
+six times. In order to achieve the complete coverage on all the
+instrumented goals in the PID function `climb_pid_run`, the loop must be
+unwound sufficient enough times. For example, `climb_pid_run` needs to
+be called at least six times for evaluating the condition
+`climb_sum_err>MAX_CLIMB_SUM_ERR` in line 48 to *true*. This corresponds
+to the Test 5. An introduction to the use of loop unwinding can be found
+in [Understanding Loop Unwinding](cbmc-loops.shtml).
+
+In this small tutorial, we present the automatic test suite generation
+functionality of CBMC, by applying the MC/DC code coverage criterion to
+a PID controller case study. In addition to `--cover mcdc`, other
+coverage criteria like `branch`, `decision`, `path` etc. are also
+available when calling CBMC.
+
+### Coverage Criteria
+
+The table below summarizes the coverage criteria that CBMC supports.
+
+Criterion |Definition
+----------|----------
+assertion |For every assertion, generate a test that reaches it
+location |For every location, generate a test that reaches it
+branch |Generate a test for every branch outcome
+decision |Generate a test for both outcomes of every Boolean expression that is not an operand of a propositional connective
+condition |Generate a test for both outcomes of every Boolean expression
+mcdc |Modified Condition/Decision Coverage (MC/DC)
+path |Bounded path coverage
+cover |Generate a test for every `__CPROVER_cover` statement
+
+
+\section man_satabs SATABS---Predicate Abstraction with SAT
+
+\subsection man_satabs-overview Overview
+
+This section describes SATABS from the point of view of the user. To
+learn about the technology implemented in SATABS, read
+\ref man_satabs-background "this".
+
+We assume you have already installed SATABS and the necessary support
+files on your system. If not so, please follow
+\ref man_install-satabs "these instructions".
+
+While users of SATABS almost never have to be concerned about the
+underlying refinement abstraction algorithms, understanding the classes
+of properties that can be verified is crucial. Predicate abstraction is
+most effective when applied to *control-flow dominated properties*. As
+an example, reconsider the following program (lock-example-fixed.c):
+
+ _Bool nondet_bool();
+ _Bool LOCK = 0;
+
+ _Bool lock() {
+ if(nondet_bool()) {
+ assert(!LOCK);
+ LOCK=1;
+ return 1; }
+
+ return 0;
+ }
+
+ void unlock() {
+ assert(LOCK);
+ LOCK=0;
+ }
+
+ int main() {
+ unsigned got_lock = 0;
+ int times;
+
+ while(times > 0) {
+ if(lock()) {
+ got_lock++;
+ /* critical section */
+ }
+
+ if(got_lock!=0) {
+ unlock();
+ got_lock--;
+ }
+
+ times--;
+ }
+ }
+
+The two assertions in the program model that the functions `lock()` and
+`unlock()` are called in the right order. Note that the value of `times`
+is chosen non-deterministically and is not bounded. The program has no
+run-time bound, and thus, unwinding the code with CBMC will never
+terminate.
+
+### Working with Claims
+
+The two assertions will give rise to two *properties*. Each property is
+associated to a specific line of code, i.e., a property is violated when
+some condition can become false at the corresponding program location.
+SATABS will generate a list of all properties for the programs as
+follows:
+
+ satabs lock-example-fixed.c --show-properties
+
+SATABS will list two properties; each property corresponds to one of the
+two assertions. We can use SATABS to verify both properties as follows:
+
+ satabs lock-example-fixed.c
+
+SATABS will conclude the verification successfully, that is, both
+assertions hold for execution traces of any length.
+
+By default, SATABS attempts to verify all properties at once. A single
+property can be verified (or refuted) by using the `--property id`
+option of SATABS, where `id` denotes the identifier of the property in
+the list obtained by calling SATABS with the `--show-properties` flag.
+Whenever a property is violated, SATABS reports a feasible path that
+leads to a state in which the condition that corresponds to the violated
+property evaluates to false.
+
+\subsection man_satabs-libraries Programs that use Libraries
+
+SATABS cannot check programs that use functions that are only available
+in binary (compiled) form (this restriction is not imposed by the
+verification algorithms that are used by SATABS – they also work on
+assembly code). The reason is simply that so far no assembly language
+frontend is available for SATABS. At the moment, (library) functions for
+which no C source code is available have to be replaced by stubs. The
+usage of stubs and harnesses (as known from unit testing) also allows to
+check more complicated properties (like, for example, whether function
+`fopen` is always called before `fclose`). This technique is explained
+in detail in the \ref man_satabs-tutorials "SATABS tutorials".
+
+\subsection man_satabs-unit-test Unit Testing with SATABS
+
+The example presented \ref man_satabs-tutorial-driver "here" is
+obviously a toy example and can hardly be used to convince your project
+manager to use static verification in your next project. Even though we
+recommend to use formal verification and specification already in the
+early phases of your project, the sad truth is that in most projects
+verification (of any kind) is still pushed to the very end of the
+development cycle. Therefore, this section is dedicated to the
+verification of legacy code. However, the techniques presented here can
+also be used for *unit testing*.
+
+Unit testing is used in most software development projects, and static
+verification with SATABS can be very well combined with this technique.
+Unit testing relies on a number test cases that yield the desired code
+coverage. Such test cases are implemented by a software testing engineer
+in terms of a test harness (aka test driver) and a set of function
+stubs. Typically, a slight modification to the test harness allows it to
+be used with SATABS. Replacing the explicit input values with
+non-deterministic inputs (as explained in
+\ref man_satabs-tutorial-aeon "here" and
+\ref man_satabs-tutorial-driver "here")) guarantees that SATABS will try
+to achieve **full** path and state coverage (due to the fact that
+predicate abstraction implicitly detects equivalence classes). However,
+it is not guaranteed that SATABS terminates in all cases. Keep in mind
+that you must not make any assumptions about the validity of the
+properties if SATABS did not run to completion!
+
+### Further Reading
+
+- [Model Checking Concurrent Linux Device
+ Drivers](http://www.kroening.com/publications/view-publications-wbwk2007.html)
+- [SATABS: SAT-based Predicate Abstraction for
+ ANSI-C](http://www-2.cs.cmu.edu/~svc/papers/view-publications-cksy2005.html)
+- [Predicate Abstraction of ANSI-C Programs using
+ SAT](http://www-2.cs.cmu.edu/~svc/papers/view-publications-cksy2004.html)
+
+\subsection man_satabs-background Background
+
+### Sound Abstractions
+
+This section provides background information on how SATABS operates.
+Even for very trivial C programs it is impossible to exhaustively
+examine their state space (which is potentially unbounded). However, not
+all details in a C program necessarily contribute to a bug, so it may be
+sufficient to only examine those parts of the program that are somehow
+related to a bug.
+
+In practice, many static verification tools (such as `lint`) try to
+achieve this goal by applying heuristics. This approach comes at a cost:
+bugs might be overlooked because the heuristics do not cover all
+relevant aspects of the program. Therefore, the conclusion that a
+program is correct whenever such a static verification tool is unable to
+find an error is invalid.
+
+\image html cegar-1.png "CEGAR Loop"
+
+A more sophisticated approach that has been very successful recently is
+to generate a *sound* abstraction of the original program. In this
+context, *soundness* refers to the fact that the abstract program
+contains (at least) all relevant behaviors (i.e., bugs) that are present
+in the original program. In the Figure above, the first component strips
+details from the original program. The number of possible behaviors
+increases as the number of details in the abstract program decreases.
+Intuitively, the reason is that whenever the model checking tool lacks
+the information that is necessary to make an accurate decision on
+whether a branch of an control flow statement can be taken or not, both
+branches have to be considered.
+
+In the resulting *abstract program*, a set of concrete states is
+subsumed by means of a single abstract state. Consider the following
+figure:
+
+
+
+The concrete states *x*~1~ and *x*~2~ are mapped to an abstract state
+*X*, and similarly *Y* subsumes *y*~1~ and *y*~2~. However, all
+transitions that are possible in the concrete program are also possible
+in the abstract model. The abstract transition *X* → *Y* summarizes the
+concrete transitions *x*~1~ → *y*~1~ and *x*~1~ → *x*~1~, and *Y* → *X*
+corresponds to *x*~1~ → *x*~2~. The behavior *X* → *Y* → *X* is feasible
+in the original program, because it maps to *x*~1~ → *x*~1~ → *x*~2~.
+However, *Y* → *X* → *Y* is feasible only in the abstract model.
+
+### Spurious Counterexamples
+
+The consequence is that the model checker (component number two in the
+figure above) possibly reports a *spurious* counterexample. We call a
+counterexample spurious whenever it is feasible in the current abstract
+model but not in the original program. However, whenever the model
+checker is unable to find an execution trace that violates the given
+property, we can conclude that there is no such trace in the original
+program, either.
+
+The feasibility of counterexamples is checked by *symbolic simulation*
+(performed by component three in the figure above). If the
+counterexample is indeed feasible, SATABS found a bug in the original
+program and reports it to the user.
+
+### Automatic Refinement
+
+On the other hand, infeasible counterexamples (that originate from
+abstract behaviors that result from the omission of details and are not
+present in the original program) are never reported to the user.
+Instead, the information is used in order to refine the abstraction such
+that the spurious counterexample is not part of the refined model
+anymore. For instance, the reason for the infeasibility of *Y* → *X* →
+*Y* is that neither *y*~1~ nor *x*~1~ can be reached from *x*~2~.
+Therefore, the abstraction can be refined by partitioning *X*.
+
+The refinement steps can be illustrated as follows:
+
+
+
+The first step (1) is to generate a very coarse abstraction with a very
+small state space. This abstraction is then successively refined (2, 3,
+...) until either a feasible counterexample is found or the abstract
+program is detailed enough to show that there is no path that leads to a
+violation of the given property. The problem is that this point is not
+necessarily reached for every input program, i.e., it is possible that
+the the abstraction refinement loop never terminates. Therefore, SATABS
+allows to specify an upper bound for the number of iterations.
+
+When this upper bound is reached and no counterexample was found, this
+does not necessarily mean that there is none. In this case, you cannot
+make any conclusions at all with respect to the correctness of the input
+program.
+
+\subsection man_satabs-tutorials Tutorials
+
+We provide an introduction to model checking "real" C programs with
+SATABS using two small examples.
+
+\subsubsection man_satabs-tutorial-driver Reference Counting in Linux Device Drivers
+
+Microsoft's [SLAM](http://research.microsoft.com/SLAM) toolkit has been
+successfully used to find bugs in Windows device drivers. SLAM
+automatically verifies device driver whether a device driver adheres to
+a set of specifications. SLAM provides a test harness for device drivers
+that calls the device driver dispatch routines in a non-deterministic
+order. Therefore, the Model Checker examines all combinations of calls.
+Motivated by the success this approach, we provide a toy example based
+on Linux device drivers. For a more complete approach to the
+verification of Linux device drivers, consider
+[DDVerify](http://www.cprover.org/ddverify/).
+
+Dynamically loadable modules enable the Linux Kernel to load device
+drivers on demand and to release them when they are not needed anymore.
+When a device driver is registered, the kernel provides a major number
+that is used to uniquely identify the device driver. The corresponding
+device can be accessed through special files in the filesystem; by
+convention, they are located in the `/dev` directory. If a process
+accesses a device file the kernel calls the corresponding `open`, `read`
+and `write` functions of the device driver. Since a driver must not be
+released by the kernel as long as it is used by at least one process,
+the device driver must maintain a usage counter (in more recent Linux
+kernels, this is done automatically, however, drivers that must maintain
+backward compatibility have to adjust this counter).
+
+We provide a skeleton of such a driver. Download the files
+assets/spec.c, assets/driver.c, assets/driver.h, assets/kdev_t.h, and
+assets/modules.h.
+
+The driver contains following functions:
+
+1. `register_chrdev`: (in assets/spec.c) Registers a character device.
+ In our implementation, the function sets the variable `usecount` to
+ zero and returns a major number for this device (a constant, if the
+ user provides 0 as argument for the major number, and the value
+ specified by the user otherwise).
+
+ int usecount;
+
+ int register_chrdev (unsigned int major, const char* name)
+ {
+ usecount = 0;
+ if (major == 0)
+ return MAJOR_NUMBER;
+ return major;
+ }
+
+2. `unregister_chrdev`: (in assets/spec.c) Unregisters a character
+ device. This function asserts that the device is not used by any
+ process anymore (we use the macro `MOD_IN_USE` to check this).
+
+ int unregister_chrdev (unsigned int major, const char* name)
+ {
+ if (MOD_IN_USE)
+ {
+ ERROR: assert (0);
+ }
+ else
+ return 0;
+ }
+
+3. `dummy_open`: (in assets/driver.c) This function increases the
+ `usecount`. If the device is locked by some other process
+ `dummy_open` returns -1. Otherwise it locks the device for the
+ caller.
+
+4. `dummy_read`: (in assets/driver.c) This function "simulates" a read
+ access to the device. In fact it does nothing, since we are
+ currently not interested in the potential buffer overflow that may
+ result from a call to this function. Note the usage of the function
+ `nondet_int`: This is an internal SATABS-function that
+ nondeterministically returns an arbitrary integer value. The
+ function `__CPROVER_assume` tells SATABS to ignore all traces that
+ do not adhere to the given assumption. Therefore, whenever the lock
+ is held, `dummy_read` will return a value between 0 and `max`. If
+ the lock is not held, then `dummy_read` returns -1.
+
+5. `dummy_release`: (in assets/driver.c) If the lock is held, then
+ `dummy_release` decreases the `usecount`, releases the lock, and
+ returns 0. Otherwise, the function returns -1.
+
+We now want to check if any *valid* sequence of calls of the dispatch
+functions (in driver.c) can lead to the violation of the assertion (in
+assets/spec.c). Obviously, a call to `dummy_open` that is immediately
+followed by a call to `unregister_chrdev` violates the assertion.
+
+The function `main` in spec.c gives an example of how these functions
+are called. First, a character device "`dummy`" is registered. The major
+number is stored in the `inode` structure of the device. The values for
+the file structure are assigned non-deterministically. We rule out
+invalid sequences of calls by ensuring that no device is unregistered
+while it is still locked. We use the following model checking harness
+for calling the dispatching functions:
+
+ random = nondet_uchar ();
+ __CPROVER_assume (0 <= random && random <= 3);
+
+ switch (random)
+ {
+ case 1:
+ rval = dummy_open (&inode, &my_file);
+ if (rval == 0)
+ lock_held = TRUE;
+ break;
+ case 2:
+ __CPROVER_assume (lock_held);
+ count = dummy_read (&my_file, buffer, BUF_SIZE);
+ break;
+ case 3:
+ dummy_release (&inode, &my_file);
+ lock_held = FALSE;
+ break;
+ default:
+ break;
+ }
+
+The variable `random` is assigned non-deterministically. Subsequently,
+the value of `random` is restricted to be 0 &le `random` ≤ 3 by a call
+to `__CPROVER_assume`. Whenever the value of `random` is not in this
+interval, the corresponding execution trace is simply discarded by
+SATABS. Depending on the value of `random`, the harness calls either
+`dummy_open`, `dummy_read` or `dummy_close`. Therefore, if there is a
+sequence of calls to these three functions that leads to a violation of
+the assertion in `unregister_chrdev`, then SATABS will eventually
+consider it.
+
+If we ask SATABS to show us the properties it verifies with
+
+ satabs driver.c spec.c --show-properties
+
+for our example, we obtain
+
+1. Claim unregister\_chrdev.1:\
+ file spec.c line 18 function unregister\_chrdev\
+ MOD\_IN\_USE in unregister\_chrdev\
+ FALSE
+
+2. Claim dummy\_open.1:\
+ file driver.c line 15 function dummy\_open\
+ i\_rdev mismatch\
+ (unsigned int)inode->i\_rdev >> 8 == (unsigned
+ int)dummy\_major
+
+It seems obvious that the property dummy\_open.1 can never be violated.
+SATABS confirms this assumption: We call
+
+ satabs driver.c spec.c --property dummy_open.1
+
+and SATABS reports `VERIFICATION SUCCESSFUL` after a few iterations.
+
+If we try to verify property unregister\_chrdev.1, SATABS reports that
+the property in line 18 in file spec.c is violated (i.e., the assertion
+does not hold, therefore the `VERIFICATION FAILED`). Furthermore, SATABS
+provides a detailed description of the problem in the form of a
+counterexample (i.e., an execution trace that violates the property). On
+this trace, `dummy_open` is called **twice**, leading to a `usecount` of 2.
+The second call of course fails with `rval=-1`, but the counter is
+increased nevertheless:
+
+ int dummy_open (struct inode *inode, struct file *filp)
+ {
+ __CPROVER_assert(MAJOR (inode->i_rdev) == dummy_major,
+ "i_rdev mismatch");
+ MOD_INC_USE_COUNT;
+
+ if (locked)
+ return -1;
+ locked = TRUE;
+
+ return 0; /* success */
+ }
+
+Then, `dummy_release` is called to release the lock on the device.
+Finally, the loop is left and the call to `unregister_chrdev` results in
+a violation of the assertion (since `usecount` is still 1, even though
+`locked=0`).
+
+\subsubsection man_satabs-tutorial-aeon Buffer Overflow in a Mail Transfer Agent
+
+We explain how to model check Aeon version 0.2a, a small mail transfer
+agent written by Piotr Benetkiewicz. The description advertises Aeon as
+a "*good choice for **hardened** or minimalistic boxes*". The sources
+are available
+[here](http://www.cprover.org/satabs/examples/loop_detection/aeon-0.2a.tar.gz).
+
+Our first naive attempt to verify Aeon using
+
+ satabs *.c
+
+produces a positive result, but also warns us that the property holds
+trivially. It also reveals that a large number library functions are
+missing: SATABS is unable to find the source code for library functions
+like `send`, `write` and `close`.
+
+Now, do you have to provide a body for all missing library functions?
+There is no easy answer to this question, but a viable answer would be
+"most likely not". It is necessary to understand how SATABS handles
+functions without bodies: It simply assumes that such a function returns
+an arbitrary value, but that no other locations than the one on the left
+hand side of the assignment are changed. Obviously, there are cases in
+which this assumption is unsound, since the function potentially
+modifies all memory locations that it can somehow address.
+
+We now use static analysis to generate array bounds checks for Aeon:
+
+ satabs *.c --pointer-check --bounds-check --show-properties
+
+SATABS will show about 300 properties in various functions (read
+\ref man_instrumentation-properties "this" for more information on the
+property instrumentation). Now consider the first few lines of the
+`main` function of Aeon:
+
+ int main(int argc, char **argv)
+ {
+ char settings[MAX_SETTINGS][MAX_LEN];
+ ...
+ numSet = getConfig(settings);
+ if (numSet == -1) {
+ logEntry("Missing config file!");
+ exit(1);
+ }
+ ...
+
+and the function `getConfig` in `lib_aeon.c`:
+
+ int getConfig(char settings[MAX_SETTINGS][MAX_LEN])
+ {
+ char home[MAX_LEN];
+ FILE *fp; /* .rc file handler */
+ int numSet = 0; /* number of settings */
+
+ strcpy(home, getenv("HOME")); /* get home path */
+ strcat(home, "/.aeonrc"); /* full path to rc file */
+ fp = fopen(home, "r");
+ if (fp == NULL) return -1; /* no cfg - ERROR */
+ while (fgets(settings[numSet], MAX_LEN-1, fp)
+ && (numSet < MAX_SETTINGS)) numSet++;
+ fclose(fp);
+
+ return numSet;
+ }
+
+The function `getConfig` makes calls to `strcpy`, `strcat`, `getenv`,
+`fopen`, `fgets`, and `fclose`. It is very easy to provide an
+implementation for the functions from the string library (string.h), and
+SATABS comes with meaningful definitions for most of them. The
+definition of `getenv` is not so straight-forward. The man-page of
+`getenv` (which we obtain by entering `man 3 getenv` in a Unix or cygwin
+command prompt) tells us:
+
+ `` `getenv' `` searches the list of environment variable names
+ and values (using the global pointer `char **environ`) for a
+ variable whose name matches the string at `NAME`. If a variable name
+ matches, `getenv` returns a pointer to the associated value.
+
+SATABS has no information whatsoever about the content of `environ`.
+Even if SATABS could access the environment variables on your
+computer, a successful verification of `Aeon` would then only guarantee
+that the properties for this program hold on your computer with a
+specific set of environment variables. We have to assume that
+`environ` contains environment variables that have an arbitrary
+content of arbitrary length. The content of environment variables is
+not only arbitrary but could be malefic, since it can be modified by the
+user. The approximation of the behavior of `getenv` that is shipped with
+SATABS completely ignores the content of the string.
+
+Now let us have another look at the properties that SATABS generates for
+the models of the the string library and for `getenv`. Most of these
+properties require that we verify that the upper and lower bounds of
+buffers or arrays are not violated. Let us look at one of the properties
+that SATABS generates for the code in function `getConfig`:
+
+ Claim getConfig.3: file lib_aeon.c line 19 function getConfig dereference failure: NULL plus offset pointer !(SAME-OBJECT(src, NULL))`
+
+The model of the function `strcpy` dereferences the pointer returned by
+`getenv`, which may return a NULL pointer. This possibility is detected
+by the static analysis, and thus a corresponding property is generated.
+Let us check this specific property:
+
+ satabs *.c --pointer-check --bounds-check --property getConfig.3
+
+SATABS immediately returns a counterexample path that demonstrates how
+`getenv` returns a NULL, which is subsequently dereferenced. We have
+identified the first bug in this program: it requires that the
+environment variable HOME is set, and crashes otherwise.
+
+Let us examine one more property in the same function:
+
+ Claim getConfig.7: file lib_aeon.c line 19 function getConfig dereference failure: array `home' upper bound !(POINTER_OFFSET(dst) + (int)i >= 512) || !(SAME-OBJECT(dst, &home[0]))
+
+This property asserts that the upper bound of the array `home` is not
+violated. The variable `home` looks familiar: We encountered it in the
+function `getConfig` given above. The function `getenv` in combination
+with functions `strcpy`, `strcat` or `sprintf` is indeed often the
+source for buffer overflows. Therefore, we try to use SATABS to check
+the upper bound of the array `home`:
+
+ satabs *.c --pointer-check --bounds-check --property getConfig.7
+
+SATABS runs for quite a while and will eventually give up, telling us
+that its upper bound for abstraction refinement iterations has been
+exceeded. This is not exactly the result we were hoping for, and we
+could now increase the bound for iterations with help of the
+`--iterations` command line switch of SATABS.
+
+Before we do this, let us investigate why SATABS has failed to provide a
+useful result. The function `strcpy` contains a loop that counts from 1
+to the length of the input string. Predicate abstraction, the mechanism
+SATABS is based on, is unable to detect such loops and will therefore
+unroll the loop body as often as necessary. The array `home` has
+`MAX_LEN` elements, and `MAX_LEN` is defined to be 512 in `aeon.h`.
+Therefore, SATABS would have to run through at least 512 iterations,
+only to verify (or reject) one of the more than 300 properties! Does
+this fact defeat the purpose of static verification?
+
+We can make the job easier: after reducing the value of `MAX_LEN` in
+`aeon.h` to a small value, say to 10, SATABS provides a counterexample
+trace that demonstrates how the buffer overflow be reproduced. If you
+use the Eclipse plugin (as described \ref man_install-eclipse "here"),
+you can step through this counterexample. The trace contains the string
+that is returned by `getenv`.
+
+
+\section man_modelling Modelling
+
+\subsection man_modelling-nondet Nondeterminism
+
+### Rationale
+
+Programs typically read inputs from an environment. These inputs can
+take the form of data read from a file, keyboard or network socket, or
+arguments passed on the command line. It is usually desirable to analyze
+the program for any choice of these inputs. In Model Checking, inputs
+are therefore modeled by means of *nondeterminism*, which means that the
+value of the input is not specified. The program may follow any
+computation that results from any choice of inputs.
+
+### Sources of Nondeterminism
+
+The CPROVER tools support the following sources of nondeterminism:
+
+- functions that read inputs from the environments;
+- the thread schedule in concurrent programs;
+- initial values of local-scoped variables and memory allocated with
+ `malloc`;
+- initial values of variables that are `extern` in all compilation
+ units;
+- explicit functions for generating nondeterminism.
+
+The CPROVER tools are shipped with a number of stubs for the most
+commonly used library functions. When executing a statement such as
+`getchar()`, a nondeterministic value is chosen instead of reading a
+character from the keyboard.
+
+When desired, nondeterminism can be introduced explicitly into the
+program by means of functions that begin with the prefix `nondet_`. As
+an example, the following function returns a nondeterministically chosen
+unsigned short int:
+
+ unsigned short int nondet_ushortint();
+
+Note that the body of the function is not defined. The name of the
+function itself is irrelevant (save for the prefix), but must be unique.
+Also note that a nondeterministic choice is not to be confused with a
+probabilistic (or random) choice.
+
+### Uninterpreted Functions
+
+It may be necessary to check parts of a program independently.
+Nondeterminism can be used to over-approximate the behaviour of part of
+the system which is not being checked. Rather than calling a complex or
+unrelated function, a nondeterministic stub is used. However, separate
+calls to the function can return different results, even for the same
+inputs. If the function output only depends on its inputs then this can
+introduce spurious errors. To avoid this problem, functions whose names
+begin with the prefix `__CPROVER_uninterpreted_` are treated as
+uninterpreted functions. Their value is non-deterministic but different
+invocations will return the same value if their inputs are the same.
+Note that uninterpreted functions are not supported by all back-end
+solvers.
+
+\subsection man_modelling-assumptions Modeling with Assertions and Assumptions
+
+### Assertions
+
+[Assertions](http://en.wikipedia.org/wiki/Assertion_%28computing%29) are
+statements within the program that attempt to capture the programmer's
+intent. The ANSI-C standard defines a header file
+[assert.h](http://en.wikipedia.org/wiki/Assert.h), which offers a macro
+`assert(cond)`. When executing a statement such as
+
+ assert(p!=NULL);
+
+the execution is aborted with an error message if the condition
+evaluates to *false*, i.e., if `p` is NULL in the example above. The
+CPROVER tools can check the validity of the programmer-annotated
+assertions statically. Specifically, the CPROVER tools will check that
+the assertions hold for *any* nondeterministic choice that the program
+can make. The static assertion checks can be disabled using the
+`--no-assertions` command line option.
+
+In addition, there is a CPROVER-specific way to specify assertions,
+using the built-in function `__CPROVER_assert`:
+
+ __CPROVER_assert(p!=NULL, "p is not NULL");
+
+The (mandatory) string that is passed as the second argument provides an
+informal description of the assertion. It is shown in the list of
+properties together with the condition.
+
+The assertion language of the CPROVER tools is identical to the language
+used for expressions. Note that \ref man_modelling-nondet
+"nondeterminism"
+can be exploited in order to check a range of choices. As an example,
+the following code fragment asserts that **all** elements of the array
+are zero:
+
+ int a[100], i;
+
+ ...
+
+ i=nondet_uint();
+ if(i>=0 && i<100)
+ assert(a[i]==0);
+
+The nondeterministic choice will guess the element of the array that is
+nonzero. The code fragment above is therefore equivalent to
+
+ int a[100], i;
+
+ ...
+
+ for(i=0; i<100; i++)
+ assert(a[i]==0);
+
+Future CPROVER releases will support explicit quantifiers with a syntax
+that resembles Spec\#:
+
+ __CPROVER_forall { *type identifier* ; *expression* }
+ __CPROVER_exists { *type identifier* ; *expression* }
+
+### Assumptions
+
+Assumptions are used to restrict nondeterministic choices made by the
+program. As an example, suppose we wish to model a nondeterministic
+choice that returns a number from 1 to 100. There is no integer type
+with this range. We therefore use \_\_CPROVER\_assume to restrict the
+range of a nondeterministically chosen integer:
+
+ unsigned int nondet_uint();
+
+ unsigned int one_to_hundred()
+ {
+ unsigned int result=nondet_uint();
+ __CPROVER_assume(result>=1 && result<=100);
+ return result;
+ }
+
+The function above returns the desired integer from 1 to 100. You must
+ensure that the condition given as an assumption is actually satisfiable
+by some nondeterministic choice, or otherwise the model checking step
+will pass vacuously.
+
+Also note that assumptions are never retroactive: They only affect
+assertions (or other properties) that follow them in program order. This
+is best illustrated with an example. In the following fragment, the
+assumption has no effect on the assertion, which means that the
+assertion will fail:
+
+ x=nondet_uint();
+ assert(x==100);
+ __CPROVER_assume(x==100);
+
+Assumptions do restrict the search space, but only for assertions that
+follow. As an example, the following program will pass:
+
+ int main() {
+ int x;
+
+ __CPROVER_assume(x>=1 && x<=100000);
+
+ x*=-1;
+
+ __CPROVER_assert(x<0, "x is negative");
+ }
+
+Beware that nondeterminism cannot be used to obtain the effect of
+universal quantification in assumptions. As an example,
+
+ int main() {
+ int a[10], x, y;
+
+ x=nondet_int();
+ y=nondet_int();
+ __CPROVER_assume(x>=0 && x<10 && y>=0 && y<10);
+
+ __CPROVER_assume(a[x]>=0);
+
+ assert(a[y]>=0);
+ }
+
+fails, as there is a choice of x and y which results in a counterexample
+(any choice in which x and y are different).
+
+\subsection man_modelling-pointers Pointer Model
+
+### Pointers in C
+
+C programs (and sometimes C++ programs as well) make intensive use of
+pointers in order to decouple program code from specific data. A pointer
+variable does not store data such as numbers or letters, but instead
+points to a location in memory that hold the relevant data. This section
+describes the way the CPROVER tools model pointers.
+
+### Objects and Offsets
+
+The CPROVER tools represent pointers as a pair. The first member of the
+pair is the *object* the pointer points to, and the second is the offset
+within the object.
+
+In C, objects are simply continuous fragments of memory (this definition
+of "object" is not to be confused with the use of the term in
+object-oriented programming). Variables of any type are guaranteed to be
+stored as one object, irrespectively of their type. As an example, all
+members of a struct or array belong to the same object. CPROVER simply
+assigns a number to each active object. The object number of a pointer
+`p` can be extracted using the expression `__CPROVER_POINTER_OBJECT(p)`.
+As a consequence, pointers to different objects are always different,
+which is not sound.
+
+The offset (the second member of the pair that forms a pointer) is
+relative to the beginning of the object; it uses byte granularity. As an
+example, the code fragment
+
+ unsigned array[10];
+ char *p;
+
+ p=(char *)(array+1);
+ p++;
+
+will result in a pointer with offset 5. The offset of a pointer `p` can
+be extracted using the expression `__CPROVER_POINTER_OFFSET(p)`.
+
+### Dereferencing Pointers
+
+The CPROVER tools require that pointers that are dereferenced point to a
+valid object. Assertions that check this requirement can be generated
+using the option --pointer-check and, if desired, --bounds-check. These
+options will ensure that NULL pointers are not dereferenced, and that
+dynamically allocated objects have not yet been deallocated.
+
+Furthermore, the CPROVER tools check that dynamically allocated memory
+is not deallocated twice. The goto-instrument tool is also able to add
+checks for memory leaks, i.e., it detects dynamically allocated objects
+that are not deallocated once the program terminates.
+
+The CPROVER tools support pointer typecasts. Most casts are supported,
+with the following exceptions:
+
+1. One notable exception is that pointers can only be accessed using a
+ pointer type. The conversion of a pointer into an integer-type using
+ a pointer typecast is not supported.
+
+2. Casts from integers to pointers yield a pointer that is either NULL
+ (if the integer is zero) or that point into a special array for
+ modeling [memory-mapped
+ I/O](http://en.wikipedia.org/wiki/Memory-mapped_I/O). Such pointers
+ are assumed not to overlap with any other objects. This is, of
+ course, only sound if a corresponding range check is instrumented.
+
+3. Accesses to arrays via pointers that have the array subtype need to
+ be well-aligned.
+
+### Pointers in Open Programs
+
+It is frequently desired to validate an open program, i.e., a fragment
+of a program. Some variables are left undefined. In case an undefined
+pointer is dereferenced, CBMC assumes that the pointer points to a
+separate object of appropriate type with unbounded size. The object is
+assumed not to alias with any other object. This assumption may
+obviously be wrong in specific extensions of the program.
+
+\subsection man_modelling-floating-point Floating Point
+
+The CPROVER tools support bit-accurate reasoning about IEEE-754
+floating-point and fixed-point arithmetic. The C standard contains a
+number of areas of implementation-defined behaviour with regard to
+floating-point arithmetic:
+
+- CPROVER supports C99 Appendix F, and thus, the `__STD_IEC_559__`
+ macro is defined. This means that the C `float` data type maps to
+ IEEE 754 `binary32` and `double` maps to `binary64` and operations
+ on them are as specified in IEEE 754.
+
+- `long double` can be configured to be `binary64`, `binary128`
+ (quad precision) or a 96 bit type with 15 exponent bits and 80
+ significant bits. The last is an approximation of Intel's x87
+ extended precision double data type. As the C standard allows a
+ implementations a fairly wide set of options for `long double`, it
+ is best avoided for both portable code and bit-precise analysis. The
+ default is to match the build architecture as closely as possible.
+
+- In CPROVER, floating-point expressions are evaluated at the 'natural
+ precision' (the greatest of the arguments) and not at a
+ higher precision. This corresponds to `FLT_EVAL_METHOD` set to `0`.
+ Note that this is a different policy to some platforms (see below).
+
+- Expression contraction (for example, converting `x * y + c` to
+ `fma(x,y,c)`) is not performed. In effect, the `FP_CONTRACT` pragma
+ is always off.
+
+- Constant expressions are evaluated at \`run' time wherever possible
+ and so will respect changes in the rounding mode. In effect, the
+ `FENV_ACCESS` pragma is always off. Note that floating point
+ constants are treated as doubles (unless they are followed by `f`
+ when they are float) as specified in the C standard. `goto-cc`
+ supports `-fsingle-precision-constant`, which allows
+ the (non-standard) treatment of constants as floats.
+
+- Casts from int to float and float to float make use of the current
+ rounding mode. Note that the standard requires that casts from float
+ to int use round-to-zero (i.e. truncation).
+
+### x86 and Other Platform-specific Issues
+
+Not all platforms have the same implementation-defined behaviour as
+CPROVER. This can cause mismatches between the verification environment
+and the execution environment. If this occurs, check the compiler manual
+for the choices listed above. There are two common cases that can cause
+these problems: 32-bit x86 code and the use of unsafe optimisations.
+
+Many compilers that target 32-bit x86 platforms employ a different
+evaluation method. The extended precision format of the x87 unit is used
+for all computations regardless of their native precision. Most of the
+time, this results in more accurate results and avoids edge cases.
+However, it can result in some obscure and difficult to debug behaviour.
+Checking if the `FLT_EVAL_METHOD` macro is non-zero (for these platforms
+it will typically be 2), should warn of these problems. Changing the
+compiler flags to use the SSE registers will resolve many of them, give
+a more standards-compliant platform and will likely perform better. Thus
+it is *highly* recommended. Use `-msse2 -mfpmath=sse` to enable this
+option for GCC. Visual C++ does not have an option to force the
+exclusive use of SSE instructions, but `/arch:SSE2` will pick SSE
+instructions "when it \[the compiler\] determines that it is faster to
+use the SSE/SSE2 instructions" and is thus better than `/arch:IA32`,
+which exclusively uses the x87 unit.
+
+The other common cause of discrepancy between CPROVER results and the
+actual platform are the use of unsafe optimisations. Some higher
+optimisation levels enable transformations that are unsound with respect
+to the IEEE-754 standard. Consult the compiler manual and disable any
+optimisations that are described as unsafe (for example, the GCC options
+`-ffast-math`). The options `-ffp-contract=off` (which replaces
+`-mno-fused-madd`), `-frounding-math` and `-fsignaling-nans` are needed
+for GCC to be strictly compliant with IEEE-754.
+
+### Rounding Mode
+
+CPROVER supports the four rounding modes given by IEEE-754 1985; round
+to nearest (ties to even), round up, round down and round towards zero.
+By default, round to nearest is used. However, command line options
+(`--round-to-zero`, etc.) can be used to over-ride this. If more control
+is needed, CPROVER has models of `fesetround` (for POSIX systems) and
+`_controlfp` (for Windows), which can be used to change the rounding
+mode during program execution. Furthermore, the inline assembly commands
+fstcw/fnstcw/fldcw (on x86) can be used.
+
+The rounding mode is stored in the (thread local) variable
+`__CPROVER_rounding_mode`, but users are strongly advised not to use
+this directly.
+
+### Math Library
+
+CPROVER implements some of `math.h`, including `fabs`, `fpclassify` and
+`signbit`. It has very limited support for elementary functions. Care
+must be taken when verifying properties that are dependent on these
+functions as the accuracy of implementations can vary considerably. The
+C compilers can (and many do) say that the accuracy of these functions
+is unknown.
+
+### Fixed-point Arithmetic
+
+CPROVER also has support for fixed-point types. The `--fixedbv` flag
+switches `float`, `double` and `long double` to fixed-point types. The
+length of these types is platform specific. The upper half of each type
+is the integer component and the lower half is the fractional part.
+
+
+\section man_hard-soft Hardware and Software Equivalence and Co-Verification
+
+\subsection man_hard-soft-introduction Introduction
+
+A common hardware design approach employed by many companies is to first
+write a quick prototype that behaves like the planned circuit in a
+language like ANSI-C. This program is then used for extensive testing
+and debugging, in particular of any embedded software that will later on
+be shipped with the circuit. An example is the hardware of a cell phone
+and its software. After testing and debugging of the program, the actual
+hardware design is written using hardware description languages like
+[VHDL](http://en.wikipedia.org/wiki/VHDL) or
+[Verilog](http://en.wikipedia.org/wiki/Verilog).
+
+Thus, there are two implementations of the same design: one written in
+ANSI-C, which is written for simulation, and one written in register
+transfer level HDL, which is the actual product. The ANSI-C
+implementation is usually thoroughly tested and debugged.
+
+Due to market constraints, companies aim to sell the chip as soon as
+possible, i.e., shortly after the HDL implementation is designed. There
+is usually little time for additional debugging and testing of the HDL
+implementation. Thus, an automated, or nearly automated way of
+establishing the consistency of the HDL implementation is highly
+desirable.
+
+This motivates the verification problem: we want to verify the
+consistency of the HDL implementation, i.e., the product, [using the
+ANSI-C implementation as a
+reference](http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=936243&userType=inst).
+Establishing the consistency does not require a formal
+specification. However, formal methods to verify either the hardware or
+software design are still desirable.
+
+### Related Work
+
+There have been several attempts in the past to tackle the problem.
+[Semeria et al.](http://portal.acm.org/citation.cfm?id=513951) describe
+a tool for verifying the combinational equivalence of RTL-C and an HDL.
+They translate the C code into HDL and use standard equivalence checkers
+to establish the equivalence. The C code has to be very close to a
+hardware description (RTL level), which implies that the source and
+target have to be implemented in a very similar way. There are also
+variants of C specifically for this purpose. The [SystemC
+standard](http://en.wikipedia.org/wiki/SystemC) defines a subset of C++
+that can be used for synthesis. Further variants of ANSI-C for
+specifying hardware are SpecC and Handel C, among others.
+
+The concept of verifying the equivalence of a software implementation
+and a synchronous transition system was introduced by [Pnueli, Siegel,
+and Shtrichman](http://www.springerlink.com/content/ah5lpxaagrjp8ax2/).
+The C program is required to be in a very specific form, since a
+mechanical translation is assumed.
+
+In 2000, [Currie, Hu, and
+Rajan](http://doi.acm.org/10.1145/337292.337339) transform DSP assembly
+language into an equation for the Stanford Validity Checker. The
+symbolic execution of programs for comparison with RTL is now common
+practice.
+
+The previous work focuses on a small subset of ANSI-C that is
+particularly close to register transfer language. Thus, the designer is
+often required to rewrite the C program manually in order to comply
+with these constraints. We extend the methodology to handle the full set
+of ANSI-C language features. This is a challenge in the presence of
+complex, dynamic data structures and pointers that may dynamically point
+to multiple objects. Furthermore, our methodology allows arbitrary loop
+constructs.
+
+### Further Material
+
+We provide a small \ref man_hard-soft-tutorial "tutorial" and a
+description on
+\ref man_hard-soft-inputs "how to synchronize inputs between the C model and the Verilog model".
+There is also a collection of
+[benchmark problems](http://www.cprover.org/hardware/sequential-equivalence/)
+available.
+
+Further Reading
+
+- [Hardware Verification using ANSI-C Programs as a
+ Reference](http://www-2.cs.cmu.edu/~svc/papers/view-publications-ck03.html)
+- [Behavioral Consistency of C and Verilog Programs Using Bounded
+ Model Checking](http://www-2.cs.cmu.edu/~svc/papers/view-publications-cky03.html)
+- [A Tool for Checking ANSI-C
+ Programs](http://www-2.cs.cmu.edu/~svc/papers/view-publications-ckl2004.html)
+- [Checking Consistency of C and Verilog using Predicate Abstraction
+ and Induction](http://www.kroening.com/publications/view-publications-kc2004.html)
+
+
+\subsection man_hard-soft-tutorial Tutorial
+
+### Verilog vs. ANSI-C
+
+We assume that CBMC is installed on your system. If not so, follow
+\ref man_install-cbmc "these instructions".
+
+The following Verilog module implements a 4-bit counter
+(counter.v):
+
+ module top(input clk);
+
+ reg [3:0] counter;
+
+ initial counter=0;
+
+ always @(posedge clk)
+ counter=counter+1;
+
+ endmodule
+
+HW-CBMC can take Verilog modules as the one above as additional input.
+Similar as in co-simulation, the data in the Verilog modules is
+available to the C program by means of global variables. For the example
+above, the following C fragment shows the definition of the variable
+that holds the value of the `counter` register:
+
+ struct module_top {
+ unsigned int counter;
+ };
+
+ extern struct module_top top;
+
+Using this definition, the value of the `counter` register in the
+Verilog fragment above can be accessed as `top.counter`. Please note
+that the name of the variable **must** match the name of the top module.
+The C program only has a view of one state of the Verilog model. The
+Verilog model makes a transition once the function `next_timeframe()` is
+called.
+
+As CBMC performs Bounded Model Checking, the number of timeframes
+available for analysis must be bounded (SATABS). As it is desirable to
+change the bound to adjust it to the available computing capacity, the
+bound is given on the command line and not as part of the C program.
+This makes it easy to use only one C program for arbitrary bounds. The
+actual bound is available in the C program using the following
+declaration:
+
+ extern const unsigned int bound;
+
+Also note that the fragment above declares a constant variable of struct
+type. Thus, the C program can only read the trace values and is not able
+to modify them. We will later on describe how to drive inputs of the
+Verilog module from within the C program.
+
+As described in previous chapters, assertions can be used to verify
+properties of the Verilog trace. As an example, the following program
+checks two values of the trace of the counter module (counter.c):
+
+ void next_timeframe();
+
+ struct module_top {
+ unsigned int counter;
+ };
+
+ extern struct module_top top;
+
+ int main() {
+ next_timeframe();
+ next_timeframe();
+ assert(top.counter==2);
+ next_timeframe();
+ assert(top.counter==3);
+ }
+
+The following CBMC command line checks these assertions with a bound of
+20:
+
+ hw-cbmc counter.c counter.v --module top --bound 20
+
+Note that a specific version of CBMC is used, called `hw-cbmc`. The
+module name given must match the name of the module in the Verilog file.
+Multiple Verilog files can be given on the command line.
+
+The `--bound` parameter is not to be confused with the `--unwind`
+parameter. While the `--unwind` parameter specifies the maximum
+unwinding depth for loops within the C program, the `--bound` parameter
+specifies the number of times the transition relation of the Verilog
+module is to be unwound.
+
+### Counterexamples
+
+For the given example, the verification is successful. If the first
+assertion is changed to
+
+ assert(top.counter==10);
+
+and the bound on the command line is changed to 6, CBMC will produce a
+counterexample. CBMC produces two traces: One for the C program, which
+matches the traces described earlier, and a separate trace for the
+Verilog module. The values of the registers in the Verilog module are
+also shown in the C trace as part of the initial state.
+
+ Initial State
+ ----------------------------------------------------
+ bound=6 (00000000000000000000000000000110)
+ counter={ 0, 1, 2, 3, 4, 5, 6 }
+
+ Failed assertion: assertion line 6 function main
+
+ Transition system state 0
+ ----------------------------------------------------
+ counter=0 (0000)
+
+ Transition system state 1
+ ----------------------------------------------------
+ counter=1 (0001)
+
+ Transition system state 2
+ ----------------------------------------------------
+ counter=2 (0010)
+
+ Transition system state 3
+ ----------------------------------------------------
+ counter=3 (0011)
+
+ Transition system state 4
+ ----------------------------------------------------
+ counter=4 (0100)
+
+ Transition system state 5
+ ----------------------------------------------------
+ counter=5 (0101)
+
+ Transition system state 6
+ ----------------------------------------------------
+ counter=6 (0110)
+
+### Using the Bound
+
+The following program is using the bound variable to check the counter
+value in all cycles:
+
+ void next_timeframe();
+ extern const unsigned int bound;
+
+ struct module_top {
+ unsigned int counter;
+ };
+
+ extern struct module_top top;
+
+ int main() {
+ unsigned cycle;
+
+ for(cycle=0; cycle
+ #include
+
+ int main() {
+ printf("sizeof(long int): %d\n", (int)sizeof(long int));
+ printf("sizeof(int *): %d\n", (int)sizeof(int *));
+ assert(0);
+ }
+
+The counterexample trace contains the strings printed by the `printf`
+command.
+
+The effects of endianness are more subtle. Try the following program
+with `--big-endian` and `--little-endian`:
+
+ #include
+ #include
+
+ int main() {
+ int i=0x01020304;
+ char *p=(char *)&i;
+ printf("Bytes of i: %d, %d, %d, %d\n", p[0], p[1], p[2], p[3]);
+ assert(0);
+ }
+
+
+\subsection man_instrumentation-properties Property Instrumentation
+
+### Properties
+
+We have mentioned *properties* several times so far, but we never
+explained *what* kind of properties CBMC and SATABS can verify. We cover
+this topic in more detail in this section.
+
+Both CBMC and SATABS use
+[assertions](http://en.wikipedia.org/wiki/Assertion_(computing)) to
+specify program properties. Assertions are properties of the state of
+the program when the program reaches a particular program location.
+Assertions are often written by the programmer by means of the `assert`
+macro.
+
+In addition to the assertions written by the programmer, assertions for
+specific properties can also be generated automatically by CBMC and
+SATABS, often relieving the programmer from writing "obvious"
+assertions.
+
+CBMC and SATABS come with an assertion generator called
+`goto-instrument`, which performs a conservative [static
+analysis](http://en.wikipedia.org/wiki/Static_code_analysis) to
+determine program locations that potentially contain a bug. Due to the
+imprecision of the static analysis, it is important to emphasize that
+these generated assertions are only *potential* bugs, and that the Model
+Checker first needs to confirm that they are indeed genuine bugs.
+
+The assertion generator can generate assertions for the verification of
+the following properties:
+
+- **Buffer overflows.** For each array access, check whether the upper
+ and lower bounds are violated.
+- **Pointer safety.** Search for `NULL`-pointer dereferences or
+ dereferences of other invalid pointers.
+
+- **Division by zero.** Check whether there is a division by zero in
+ the program.
+
+- **Not-a-Number.** Check whether floating-point computation may
+ result in [NaNs](http://en.wikipedia.org/wiki/NaN).
+
+- **Unitialized local.** Check whether the program uses an
+ uninitialized local variable.
+
+- **Data race.** Check whether a concurrent program accesses a shared
+ variable at the same time in two threads.
+
+We refrain from explaining the properties above in detail. Most of them
+relate to behaviors that are left undefined by the respective language
+semantics. For a discussion on why these behaviors are usually very
+undesirable, read [this](http://blog.regehr.org/archives/213) blog post
+by John Regehr.
+
+All the properties described above are *reachability* properties. They
+are always of the form
+
+"*Is there a path through the program such that property ... is
+violated?*"
+
+The counterexamples to such properties are always program paths. Users
+of the Eclipse plugin can step through these counterexamples in a way
+that is similar to debugging programs. The installation of this plugin
+is explained \ref man_install-eclipse "here".
+
+### Using goto-instrument
+
+The goto-instrument static analyzer operates on goto-binaries, which is
+a binary representation of control-flow graphs. The goto-binary is
+extracted from program source code using goto-cc, which is explained
+\ref man_instrumentation-goto-cc "here". Given a goto-program,
+goto-instrument operates as follows:
+
+1. A goto-binary is read in.
+2. The specified static analyses are performed.
+3. Any potential bugs found are transformed into corresponding
+ assertions, and are added into the program.
+4. A new goto-binary (with assertions) is written to disc.
+
+As an example, we begin with small C program we call `expr.c` (taken
+from [here](http://www.spinroot.com/uno/)):
+
+ int *ptr;
+
+ int main(void) {
+ if (ptr)
+ *ptr = 0;
+ if (!ptr)
+ *ptr = 1;
+ }
+
+The program contains an obvious NULL-pointer dereference. We first
+compile the example program with goto-cc and then instrument the
+resulting goto-binary with pointer checks.
+
+ goto-cc expr.c -o in.gb goto-instrument in.gb out.gb --pointer-check
+
+We can now get a list of the assertions that have been generated as
+follows:
+
+ goto-instrument out.gb --show-properties
+
+Using either CBMC or SATABS on `out.gb`, we can obtain a counterexample
+trace for the NULL-pointer dereference:
+
+ cbmc out.gb
+
+The goto-instrument program supports the following checks:
+
+Flag | Check
+-----------------------------|----------------------------------------------
+`--no-assertions` | ignore user assertions
+`--bounds-check` | add array bounds checks
+`--div-by-zero-check` | add division by zero checks
+`--pointer-check` | add pointer checks
+`--signed-overflow-check` | add arithmetic over- and underflow checks
+`--unsigned-overflow-check` | add arithmetic over- and underflow checks
+`--undefined-shift-check` | add range checks for shift distances
+`--nan-check` | add floating-point NaN checks
+`--uninitialized-check` | add checks for uninitialized locals (experimental)
+`--error-label label` | check that given label is unreachable
+
+\subsection man_instrumentation-api The CPROVER API Reference
+
+The following sections summarize the functions available to programs
+that are passed to the CPROVER tools.
+
+### Functions
+
+#### \_\_CPROVER\_assume, \_\_CPROVER\_assert, assert
+
+ void __CPROVER_assume(_Bool assumption);
+ void __CPROVER_assert(_Bool assertion, const char *description);
+ void assert(_Bool assertion);
+
+The function **\_\_CPROVER\_assume** adds an expression as a constraint
+to the program. If the expression evaluates to false, the execution
+aborts without failure. More detail on the use of assumptions is in the
+section on [Assumptions and Assertions](modeling-assertions.shtml).
+
+#### \_\_CPROVER\_same\_object, \_\_CPROVER\_POINTER\_OBJECT, \_\_CPROVER\_POINTER\_OFFSET, \_\_CPROVER\_DYNAMIC\_OBJECT
+
+ _Bool __CPROVER_same_object(const void *, const void *);
+ unsigned __CPROVER_POINTER_OBJECT(const void *p);
+ signed __CPROVER_POINTER_OFFSET(const void *p);
+ _Bool __CPROVER_DYNAMIC_OBJECT(const void *p);
+
+The function **\_\_CPROVER\_same\_object** returns true if the two
+pointers given as arguments point to the same object. The function
+**\_\_CPROVER\_POINTER\_OFFSET** returns the offset of the given pointer
+relative to the base address of the object. The function
+**\_\_CPROVER\_DYNAMIC\_OBJECT** returns true if the pointer passed as
+arguments points to a dynamically allocated object.
+
+#### \_\_CPROVER\_is\_zero\_string, \_\_CPROVER\_zero\_string\_length, \_\_CPROVER\_buffer\_size
+
+ _Bool __CPROVER_is_zero_string(const void *);
+ __CPROVER_size_t __CPROVER_zero_string_length(const void *);
+ __CPROVER_size_t __CPROVER_buffer_size(const void *);
+
+#### \_\_CPROVER\_initialize
+
+ void __CPROVER_initialize(void);
+
+The function **\_\_CPROVER\_initialize** computes the initial state of
+the program. It is called prior to calling the main procedure of the
+program.
+
+#### \_\_CPROVER\_input, \_\_CPROVER\_output
+
+ void __CPROVER_input(const char *id, ...);
+ void __CPROVER_output(const char *id, ...);
+
+The functions **\_\_CPROVER\_input** and **\_\_CPROVER\_output** are
+used to report an input or output value. Note that they do not generate
+input or output values. The first argument is a string constant to
+distinguish multiple inputs and outputs (inputs are typically generated
+using nondeterminism, as described [here](modeling-nondet.shtml)). The
+string constant is followed by an arbitrary number of values of
+arbitrary types.
+
+#### \_\_CPROVER\_cover
+
+ void __CPROVER_cover(_Bool condition);
+
+This statement defines a custom coverage criterion, for usage with the
+[test suite generation feature](cover.shtml).
+
+#### \_\_CPROVER\_isnan, \_\_CPROVER\_isfinite, \_\_CPROVER\_isinf, \_\_CPROVER\_isnormal, \_\_CPROVER\_sign
+
+ _Bool __CPROVER_isnan(double f);
+ _Bool __CPROVER_isfinite(double f);
+ _Bool __CPROVER_isinf(double f);
+ _Bool __CPROVER_isnormal(double f);
+ _Bool __CPROVER_sign(double f);
+
+The function **\_\_CPROVER\_isnan** returns true if the double-precision
+floating-point number passed as argument is a
+[NaN](http://en.wikipedia.org/wiki/NaN).
+
+The function **\_\_CPROVER\_isfinite** returns true if the
+double-precision floating-point number passed as argument is a finite
+number.
+
+This function **\_\_CPROVER\_isinf** returns true if the
+double-precision floating-point number passed as argument is plus or
+minus infinity.
+
+The function **\_\_CPROVER\_isnormal** returns true if the
+double-precision floating-point number passed as argument is a normal
+number.
+
+This function **\_\_CPROVER\_sign** returns true if the double-precision
+floating-point number passed as argument is negative.
+
+#### \_\_CPROVER\_abs, \_\_CPROVER\_labs, \_\_CPROVER\_fabs, \_\_CPROVER\_fabsl, \_\_CPROVER\_fabsf
+
+ int __CPROVER_abs(int x);
+ long int __CPROVER_labs(long int x);
+ double __CPROVER_fabs(double x);
+ long double __CPROVER_fabsl(long double x);
+ float __CPROVER_fabsf(float x);
+
+These functions return the absolute value of the given argument.
+
+#### \_\_CPROVER\_array\_equal, \_\_CPROVER\_array\_copy, \_\_CPROVER\_array\_set
+
+ _Bool __CPROVER_array_equal(const void array1[], const void array2[]);
+ void __CPROVER_array_copy(const void dest[], const void src[]);
+ void __CPROVER_array_set(const void dest[], value);
+
+The function **\_\_CPROVER\_array\_equal** returns true if the values
+stored in the given arrays are equal. The function
+**\_\_CPROVER\_array\_copy** copies the contents of the array **src** to
+the array **dest**. The function **\_\_CPROVER\_array\_set** initializes
+the array **dest** with the given value.
+
+#### Uninterpreted Functions
+
+Uninterpreted functions are documented \ref man_modelling-nondet "here".
+
+### Predefined Types and Symbols
+
+#### \_\_CPROVER\_bitvector
+
+ __CPROVER_bitvector [ expression ]
+
+This type is only available in the C frontend. It is used to specify a
+bit vector with arbitrary but fixed size. The usual integer type
+modifiers **signed** and **unsigned** can be applied. The usual
+arithmetic promotions will be applied to operands of this type.
+
+#### \_\_CPROVER\_floatbv
+
+ __CPROVER_floatbv [ expression ] [ expression ]
+
+This type is only available in the C frontend. It is used to specify an
+IEEE-754 floating point number with arbitrary but fixed size. The first
+parameter is the total size (in bits) of the number, and the second is
+the size (in bits) of the mantissa, or significand (not including the
+hidden bit, thus for single precision this should be 23).
+
+#### \_\_CPROVER\_fixedbv
+
+ __CPROVER_fixedbv [ expression ] [ expression ]
+
+This type is only available in the C frontend. It is used to specify a
+fixed-point bit vector with arbitrary but fixed size. The first
+parameter is the total size (in bits) of the type, and the second is the
+number of bits after the radix point.
+
+#### \_\_CPROVER\_size\_t
+
+The type of sizeof expressions.
+
+#### \_\_CPROVER\_rounding\_mode
+
+ extern int __CPROVER_rounding_mode;
+
+This variable contains the IEEE floating-point arithmetic rounding mode.
+
+#### \_\_CPROVER\_constant\_infinity\_uint
+
+This is a constant that models a large unsigned integer.
+
+#### \_\_CPROVER\_integer, \_\_CPROVER\_rational
+
+**\_\_CPROVER\_integer** is an unbounded, signed integer type.
+**\_\_CPROVER\_rational** is an unbounded, signed rational number type.
+
+#### \_\_CPROVER\_memory
+
+ extern unsigned char __CPROVER_memory[];
+
+This array models the contents of integer-addressed memory.
+
+#### \_\_CPROVER::unsignedbv<N> (C++ only)
+
+This type is the equivalent of **unsigned \_\_CPROVER\_bitvector\[N\]**
+in the C++ front-end.
+
+#### \_\_CPROVER::signedbv<N> (C++ only)
+
+This type is the equivalent of **signed \_\_CPROVER\_bitvector\[N\]** in
+the C++ front-end.
+
+#### \_\_CPROVER::fixedbv<N> (C++ only)
+
+This type is the equivalent of **\_\_CPROVER\_fixedbv\[N,m\]** in the
+C++ front-end.
+
+### Concurrency
+
+Asynchronous threads are created by preceding an instruction with a
+label with the prefix **\_\_CPROVER\_ASYNC\_**.
+
+\subsection man_goto-cc-linux goto-cc: Extracting Models from the Linux Kernel
+
+The Linux kernel code consists of more than 11 million lines of
+low-level C and is frequently used to evaluate static analysis
+techniques. In the following, we show how to extract models from Linux
+2.6.39.
+
+1. First of all, you will need to make sure you have around 100 GB of
+ free disc space available.
+
+2. Download the Kernel sources at
+ .
+
+3. Now do
+
+ `bunzip2 linux-2.6.39.tar.bz2`\
+ `tar xvf linux-2.6.39.tar`\
+ `cd linux-2.6.39`
+
+4. Now ensure that you can actually compile a kernel by doing
+
+ `make defconfig`\
+ `make`
+
+ These steps need to succeed before you can try to extract models
+ from the kernel.
+
+5. Now compile [gcc-wrap.c](gcc-wrap.c) and put the resulting binary
+ into a directory that is in your PATH variable:
+
+ `lwp-download http://www.cprover.org/cprover-manual/gcc-wrap.c`\
+ `gcc gcc-wrap.c -o gcc-wrap`\
+ `cp gcc-wrap ~/bin/`\
+
+ This assumes that the directory `~/bin` exists and is in your
+ PATH variable.
+
+6. Now change the variable CC in the kernel Makefile as follows:
+
+ CC = ~/bin/gcc-wrap
+
+7. Now do
+
+ make clean
+ make
+
+ This will re-compile the kernel, but this time retaining the
+ preprocessed source files.
+
+8. You can now compile the preprocessed source files with goto-cc as
+ follows:
+
+ find ./ -name .tmp_*.i > source-file-list
+ for a in `cat source-file-list` ; do
+ goto-cc -c $a -o $a.gb
+ done
+
+ Note that it is important that the word-size of the kernel
+ configuration matches that of goto-cc. Otherwise, compile-time
+ assertions will fail, generating the error message "bit field size
+ is negative". For a kernel configured for a 64-bit word-width, pass
+ the option --64 to goto-cc.
+
+The resulting `.gb` files can be passed to any of the CPROVER tools.
+
+\subsection man_goto-cc-apache goto-cc: Extracting Models from the Apache HTTPD
+
+The [Apache HTTPD](http://httpd.apache.org/) is still the most
+frequently used web server. Together with the relevant libraries, it
+consists of around 0.4 million lines of C code. In the following, we
+show how to extract models from Apache HTTPD 2.4.2.
+
+1. First of all, we download the sources of Apache HTTPD and two
+ supporting libraries and uncompress them:
+
+ lwp-download http://www.mirrorservice.org/sites/ftp.apache.org/apr/apr-1.4.6.tar.bz2
+ lwp-download http://www.mirrorservice.org/sites/ftp.apache.org/apr/apr-util-1.4.1.tar.bz2
+ lwp-download http://mirror.catn.com/pub/apache/httpd/httpd-2.4.2.tar.bz2
+ bunzip2 < apr-1.4.6.tar.bz2 | tar x
+ bunzip2 < apr-util-1.4.1.tar.bz2 | tar x
+ bunzip2 < httpd-2.4.2.tar.bz2 | tar x
+
+2. Now compile [gcc-wrap.c](gcc-wrap.c) and put the resulting binary
+ into a directory that is in your PATH variable:
+
+ lwp-download http://www.cprover.org/cprover-manual/gcc-wrap.c
+ gcc gcc-wrap.c -o gcc-wrap
+ cp gcc-wrap ~/bin/
+
+ This assumes that the directory `~/bin` exists and is in your
+ PATH variable.
+
+3. We now build the sources with gcc:
+
+ (cd apr-1.4.6; ./configure; make CC=gcc-wrap)
+ (cd apr-util-1.4.1; ./configure --with-apr=../apr-1.4.6 ; make CC=gcc-wrap)
+ (cd httpd-2.4.2; ./configure --with-apr=../apr-1.4.6 --with-apr-util=../apr-util-1.4.1 ; make CC=gcc-wrap)
+
+4. You can now compile the preprocessed source files with goto-cc as
+ follows:
+
+ find ./ -name *.i > source-file-list
+ for a in `cat source-file-list` ; do
+ goto-cc -c $a -o $a.gb
+ done
+
+The resulting `.gb` files can be passed to any of the CPROVER tools.
diff --git a/doc/guide/CBMC-guide.tex b/doc/guide/CBMC-guide.tex
deleted file mode 100644
index 0285bf82c17..00000000000
--- a/doc/guide/CBMC-guide.tex
+++ /dev/null
@@ -1,714 +0,0 @@
-\documentclass{article}
-
-\newcommand{\dir}[1]{\texttt{#1}}
-\newcommand{\file}[1]{\texttt{#1}}
-\newcommand{\code}[1]{\texttt{#1}}
-\newcommand{\prog}[1]{\texttt{#1}}
-
-\title{Beginner's Guide to CPROVER}
-\author{Martin Brain\thanks{But most of the content is from Michael Tautschnig}}
-
-\begin{document}
-
-\maketitle
-
-\section{Background Information}
-
-First off; read the CPROVER manual. It describes how to get, build
-and use CBMC and SATABS. This document covers the internals of the
-system and how to get started on development.
-
-
-\subsection{Documentation}
-
-Apart from the (user-orientated) CPROVER manual and this document,
-most of the rest of the documentation is inline in the code
-as \texttt{doxygen} and some comments. A man page for CBMC, goto-cc
-and goto-instrument is contained in the \dir{doc/} directory and gives
-some options for these tools. All of these could be improved
-and patches are very welcome. In some cases the algorithms used are
-described in the relevant papers.
-
-\subsection{Architecture}
-
-CPROVER is structured in a similar fashion to a compiler. It has
-language specific front-ends which perform limited syntactic analysis
-and then convert to an intermediate format. The intermediate format
-can be output to files (this is what \texttt{goto-cc} does) and are
-(informally) referred to as ``goto binaries'' or ``goto programs''.
-The back-end are tools process this format, either directly from the
-front-end or from it's saved output. These include a wide range of
-analysis and transformation tools (see Section \ref{section:other-apps}).
-
-\subsection{Coding Standards}
-
-CPROVER is written in a fairly minimalist subset of C++; templates and
-meta-programming are avoided except where necessary. The standard
-library is used but in many cases there are alternatives provided in
-\dir{util/} (see Section \ref{section:util}) which are preferred.
-Boost is not used.
-
-Patches should be formatted so that code is indented with two space
-characters, not tab and wrapped to 75 or 72 columns. Headers for
-doxygen should be given (and preferably filled!) and the author will
-be the person who first created the file.
-
-Identifiers should be lower case with underscores to separate words.
-Types (classes, structures and typedefs) names must\footnote{There are
-a couple of exceptions, including the graph classes} end with a
-\code{t}. Types that model types (i.e. C types in the program that is
-being interpreted) are named with \code{\_typet}.
-For example \code{ui\_message\_handlert} rather than
-\code{UI\_message\_handlert} or \code{UIMessageHandler} and
-\code{union\_typet}.
-
-
-
-\subsection{How to Contribute}
-
-Fixes, changes and enhancements to the CPROVER code base should be
-developed against the \texttt{trunk} version and submitted to Daniel
-as patches produced by \texttt{diff -Naur} or \texttt{svn diff}.
-Entire applications are best developed independently (\texttt{git svn}
-is a popular choice for tracking the main trunk but also having local
-development) until it is clear what their utility, future and
-maintenance is likely to be.
-
-
-\subsection{Other Useful Code}
-\label{section:other-apps}
-
-The CPROVER subversion archive contains a number of separate
-programs. Others are developed separately as patches or separate
-branches.% New applications are initially developed in their version
-%control system and may be merged into the main subversion system
-%depending on their utility, popularity and maintenance.
-Interfaces are have been and are continuing to stablise but older code
-may require work to compile and function correctly.
-
-In the main archive:
-
-\begin{description}
- \item[\prog{CBMC}]{A bounded model checking tool for C and C++. See
- Section \ref{section:CBMC}.}
- \item[\prog{goto-cc}]{A drop-in, flag compatible replacement for GCC
- and other compilers that produces goto-programs rather than
- executable binaries. See Section \ref{section:goto-cc}.}
- \item[\prog{goto-instrument}]{A collection of functions for
- instrumenting and modifying goto-programs. See Section
- \ref{section:goto-instrument}.}
-\end{description}
-
-Model checkers and similar tools:
-
-\begin{description}
- \item[\prog{SatABS}]{A CEGAR model checker using predicate
- abstraction. Is roughly 10,000 lines of code (on top of the CPROVER
- code base) and is developed in its own subversion archive. It
- uses an external model checker to find potentially feasible paths.
- Key limitations are related to code with pointers and there is
- scope for significant improvement.}
-
- \item[\prog{Scratch}]{Alistair Donaldson's k-induction based tool.
- The front-end is in the old project CVS and some of the
- functionality is in \prog{goto-instrument}.}
-
- \item[\prog{Wolverine}]{An implementation of Ken McMillan's IMPACT
- algorithm for sequential programs. In the old project CVS.}
-
- \item[\prog{C-Impact}]{An implementation of Ken McMillan's IMPACT
- algorithm for parallel programs. In the old project CVS.}
-
- \item[\prog{LoopFrog}]{A loop summarisation tool.}
-
- \item[\prog{???}]{Christoph's termination analyser.}
-
-\end{description}
-
-
-Test case generation:
-
-\begin{description}
- \item[\prog{cover}]{A basic test-input generation tool. In the old
- project CVS.}
-
- \item[\prog{FShell}]{A test-input generation tool that allows the
- user to specify the desired coverage using a custom language
- (which includes regular expressions over paths). It uses
- incremental SAT and is thus faster than the na\"ive ``add
- assertions one at a time and use the counter-examples''
- approach. Is developed in its own subversion.}
-\end{description}
-
-
-
-Alternative front-ends and input translators:
-
-\begin{description}
- \item[\prog{Scoot}]{A System-C to C translator. Probably in the old
- project CVS.}
-
- \item[\prog{???}]{A Simulink to C translator. In the old project CVS.}
-
- \item[\prog{???}]{A Verilog front-end. In the old project CVS.}
-
- \item[\prog{???}]{A converter from Codewarrior project files to
- Makefiles. In the old project CVS.}
-\end{description}
-
-
-Other tools:
-
-\begin{description}
- \item[\prog{ai}]{Leo's hybrid abstract interpretation / CEGAR tool.}
-
- \item[\prog{DeltaCheck?}]{Ajitha's slicing tool, aimed at locating
- changes and differential verification. In the old project CVS.}
-\end{description}
-
-
-There are tools based on the CPROVER framework from other research
-groups which are not listed here.
-
-
-
-
-
-
-\section{Source Walkthrough}
-
-This section walks through the code bases in a rough order of interest
-/ comprehensibility to the new developer.
-
-
-
-\subsection{\dir{doc}}
-
-At the moment just contains the CBMC man page.
-
-\subsection{\dir{regression/}}
-
-The regression tests are currently being moved from CVS. The
-\dir{regression/} directory contains all of those that have been
-moved. They are grouped into directories for each of the tools. Each
-of these contains a directory per test case, containing a C or C++
-file that triggers the bug and a \file{.dsc} file that describes the
-tests, expected output and so on. There is a Perl script,
-\file{test.pl} that is used to invoke the tests as:
-
-\begin{center}
- \code{../test.pl -c PATH\_TO\_CBMC}
-\end{center}
-
-The \code{--help} option gives instructions for use and the format of
-the description files.
-
-
-
-\subsection{\dir{src/}}
-
-The source code is divided into a number of sub-directories, each
-containing the code for a different part of the system. In the top
-level files there are only a few files:
-
-\begin{description}
- \item[\file{config.inc}]{The user-editable configuration parameters
- for the build process. The main use of this file is setting the
- paths for the various external SAT solvers that are used. As
- such, anyone building from source will likely need to edit this.}
- \item[\file{Makefile}]{The main systems Make file. Parallel builds
- are supported and encouraged; please don't break them!}
- \item[\file{common}]{System specific magic required to get the
- system to build. This should only need to be edited if porting
- CBMC to a new platform / build environment.}
- \item[\file{doxygen.cfg}]{The config file for doxygen.cfg}
-\end{description}
-
-
-
-\subsubsection{\dir{util/}}
-\label{section:util}
-
-\dir{util/} contains the low-level data structures and manipulation
-functions that are used through-out the CPROVER code-base. For almost
-any low-level task, the code required is probably in \dir{util/}. Key
-files include:
-
-\begin{description}
- \item[\file{irep.h}]{This contains the definition of \code{irept},
- the basis of many of the data structures in the project. They
- should not be used directly; one of the derived classes should be
- used. For more information see Section \ref{section:irept}.}
- \item[\file{expr.h}]{The parent class for all of the expressions.
- Provides a number of generic functions, \code{exprt} can be used
- with these but when creating data, subclasses of \code{exprt}
- should be used.}
- \item[\file{std\_expr.h}]{Provides subclasses of \code{exprt} for
- common kinds of expression for example \code{plus\_exprt},
- \code{minus\_exprt}, \code{dereference\_exprt}. These are the
- intended interface for creating expressions.}
- \item[\file{std\_types.h}]{Provides subclasses of \code{typet} (a
- subclass of \code{irept}) to model C and C++ types. This is one
- of the preferred interfaces to \code{irept}. The front-ends handle
- type promotion and most coercision so the type system and checking
- goto-programs is simpler than C.}
- \item[\file{dstring.h}]{The CPROVER string class. This enables
- sharing between strings which significantly reduces the amount of
- memory required and speeds comparison. \code{dstring} should not
- be used directly, \code{irep\_idt} should be used instead, which
- (dependent on build options) is an alias for \code{dstring}.}
- \item[\file{mp\_arith.h}]{The wrapper class for multi-precision
- arithmetic within CPROVER. Also see \file{arith\_tools.h}.}
- \item[\file{ieee\_float.h}]{The arbitrary precision float model used
- within CPROVER. Based on \code{mp\_integer}s.}
- \item[\file{context.h}]{A generic container for symbol table like
- constructs such as namespaces. Lookup gives type, location of
- declaration, name, `pretty name', whether it is static or not.}
- \item[\file{namespace.h}]{The preferred interface for the context
- class. The key function is \code{lookup} which converts a string
- (\code{irep\_idt}) to a symbol which gives the scope of
- declaration, type and so on. This works for functions as well as variables.}
-\end{description}
-
-
-
-
-\subsubsection{\dir{langapi/}}
-
-This contains the basic interfaces and support classes for programming
-language front ends. Developers only really need look at this if they
-are adding support for a new language. It's main users are the two
-(in trunk) language front-ends; \dir{ansi-c/} and \dir{cpp/}.
-
-
-\subsubsection{\dir{ansi-c/}}
-
-Contains the front-end for ANSI C, plus a variety of common
-extensions. This parses the file, performs some basic sanity checks
-(this is one area in which the UI could be improved; patches most
-welcome) and then produces a goto-program (see below). The parser is
-a traditional Flex / Bison system.
-
-\file{internal\_addition.c} contains the implementation of various
-`magic' functions that are that allow control of the analysis from the
-source code level. These include assertions, assumptions, atomic
-blocks, memory fences and rounding modes.
-
-The \dir{library/} subdirectory contains versions of some of the C
-standard header files that make use of the CPROVER built-in
-functions. This allows CPROVER programs to be `aware' of the
-functionality and model it correctly. Examples include
-\file{stdio.c}, \file{string.c}, \file{setjmp.c} and various threading
-interfaces.
-
-
-\subsubsection{\dir{cpp/}}
-
-This directory contains the C++ front-end. It supports the subset of
-C++ commonly found in embedded and system applications.
-Consequentially it doesn't have full support for templates and many of
-the more advanced and obscure C++ features. The subset of the
-language that can be handled is being extended over time so bug
-reports of programs that cannot be parsed are useful.
-
-The functionality is very similar to the ANSI C front end; parsing the
-code and converting to goto-programs. It makes use of code from
-\dir{langapi} and \dir{ansi-c}.
-
-
-
-
-\subsubsection{\dir{goto-programs/}}
-
-Goto programs are the intermediate representation of the CPROVER tool
-chain. They are language independent and similar to many of the
-compiler intermediate languages. Section \ref{section:goto-programs}
-describes the \code{goto\_programt} and \code{goto\_functionst} data
-structures in detail. However it useful to understand some of the
-basic concepts. Each function is a list of instructions, each of
-which has a type (one of 18 kinds of instruction), a code expression,
-a guard expression and potentially some targets for the next
-instruction. They are not natively in static single-assign (SSA)
-form. Transitions are nondeterministic (although in practise the
-guards on the transitions normally cover form a disjoint cover of all
-possibilities). Local variables have non-deterministic values if they
-are not initialised. Variables and data within the program is
-commonly one of three types (parameterised by width):
-\code{unsignedbv\_typet}, \code{signedbv\_typet} and
-\code{floatbv\_typet}, see \file{util/std\_types.h} for more
-information. Goto programs can be serialised in a binary (wrapped in
-ELF headers) format or in XML (see the various \code{\_serialization}
-files).
-
-
-The \prog{cbmc} option \code{--show-goto-programs} is often a
-good starting point as it outputs goto-programs in a human
-readable form. However there are a few things to be aware of.
-Functions have an internal name (for example \code{c::f00}) and a
-`pretty name' (for example \code{f00}) and which is used depends on
-whether it is internal or being presented to the user. The
-\code{main} method is the `logical' main which is not necessarily the
-main method from the code. In the output \code{NONDET} is use to
-represent a nondeterministic assignment to a variable. Likewise
-\code{IF} as a beautified \code{GOTO} instruction where the guard
-expression is used as the condition. \code{RETURN} instructions may
-be dropped if they precede an \code{END\_FUNCTION} instruction. The
-comment lines are generated from the \code{locationt} field of the
-\code{instructiont} structure.
-
-\dir{goto-programs/} is one of the few places in the CPROVER codebase
-that templates are used. The intention is to allow the general
-architecture of program and functions to be used for other
-formalisms. At the moment most of the templates have a single
-instantiation; for example \code{goto\_functionst} and
-\code{goto\_function\_templatet} and \code{goto\_programt} and \code{goto\_program\_templatet}.
-
-
-
-\subsubsection{\dir{goto-symex/}}
-
-This directory contains a symbolic evaluation system for
-goto-programs. This takes a goto-program and translates it to an
-equation system by traversing the program, branching and merging and
-unwinding loops as needed. Each reverse goto has a separate counter
-(the actual counting is handled by \prog{cbmc}, see the \code{--unwind}
-and \code{--unwind-set} options). When a counter limit
-is reach, an assertion can be added to explicitly show when analysis
-is incomplete. The symbolic execution includes constant folding so
-loops that have a constant number of iterations will be handled
-completely (assuming the unwinding limit is sufficient).
-
-The output of the symbolic execution is a system of equations; an
-object containing a list of \code{symex\_target\_elements}, each of
-which are equalities between \prog{expr} expressions. See
-\file{symex\_target\_equation.h}. The output is in static, single
-assignment (SSA) form, which is \emph{not} the case for goto-programs.
-
-
-
-\subsubsection{\dir{pointer-analysis/}}
-
-To perform symbolic execution on programs with dereferencing of
-arbitrary pointers, some alias analysis is needed.
-\dir{pointer-analysis} contains the three levels of analysis; flow and
-context insensitive, context sensitive and flow and context
-sensitive. The code needed is subtle and sophisticated and thus there
-may be bugs.
-
-
-
-
-\subsubsection{\dir{solvers/}}
-
-The \dir{solvers/} directory contains interfaces to a number of
-different decision procedures, roughly one per directory.
-
-\begin{description}
-
- \item[prop/]{The basic and common functionality. The key file is
- \file{prop\_conv.h} which defines \code{prop\_convt}. This is the
- base class that is used to interface to the decision procedures.
- The key functions are \code{convert} which takes an \code{exprt}
- and converts it to the appropriate, solver specific, data
- structures and \code{dec\_solve} (inherited from
- \code{decision\_proceduret}) which invokes the actual decision
- procedures. Individual decision procedures (named
- \code{*\_dect}) objects can be created but \code{prop\_convt} is
- the preferred interface for code that uses them.}
-
- \item[flattening/]{A library that converts operations to
- bit-vectors, including calling the conversions in \dir{floatbv} as
- necessary. Is implemented as a simple conversion (with caching)
- and then a post-processing function that adds extra constraints.
- This is not used by the SMT or CVC back-ends.}
-
- %%%%
-
- \item[dplib/]{Provides the \code{dplib\_dect} object which used the
- decision procedure library from ``Decision Procedures : An
- Algorithmic Point of View''.}
-
- \item[cvc/]{Provides the \code{cvc\_dect} type which interfaces to
- the old (pre SMTLib) input format for the CVC family of solvers.
- This format is still supported by depreciated in favour of SMTLib 2.}
-
- \item[smt1/]{Provides the \code{smt1\_dect} type which converts the
- formulae to SMTLib version 1 and then invokes one of Boolector,
- CVC3, OpenSMT, Yices, MathSAT or Z3. Again, note that this format
- is depreciated.}
-
- \item[smt2/]{Provides the \code{smt2\_dect} type which functions in
- a similar way to \code{smt1\_dect}, calling Boolector, CVC3,
- MathSAT, Yices or Z3. Note that the interaction with the solver
- is batched and uses temporary files rather than using the
- interactive command supported by SMTLib 2. With the \code{--fpa}
- option, this output mode will not flatten the floating point
- arithmetic and instead output the proposed SMTLib floating point
- standard.}
-
- \item[qbf/]{Back-ends for a variety of QBF solvers. Appears to be
- no longer used or maintained.}
-
- \item[sat/]{Back-ends for a variety of SAT solvers and DIMACS
- output.}
-\end{description}
-
-
-
-
-
-\subsubsection{\dir{cbmc/}}
-\label{section:CBMC}
-
-This contains the first full application. CBMC is a bounded model
-checker that uses the front ends (\dir{ansi-c}, \dir{cpp}, goto-program or others)
-to create a goto-program, \dir{goto-symex} to unwind the loops the given
-number of times and to produce and equation system and finally
-\dir{solvers} to find a counter-example (technically, \dir{goto-symex}
-is then used to construct the counter-example trace).
-
-
-
-\subsubsection{\dir{goto-cc/}}
-\label{section:goto-cc}
-
-\dir{goto-cc} is a compiler replacement that just performs the first
-step of the process; converting C or C++ programs to goto-binaries.
-It is intended to be dropped in to an existing build procedure in
-place of the compiler, thus it emulates flags that would affect the
-semantics of the code produced. Which set of flags are emulated
-depends on the naming of the \dir{goto-cc/} binary. If it is called
-\prog{goto-cc} then it emulates GCC flags, \prog{goto-armcc} emulates
-the ARM compiler, \prog{goto-cl} emulates VCC and \prog{goto-cw}
-emulates the Code Warrior compiler. The output of this tool can then
-be used with \prog{cbmc} or \prog{goto-instrument}.
-
-
-
-
-\subsubsection{\dir{goto-instrument/}}
-\label{section:goto-instrument}
-
-The \dir{goto-instrument/} directory contains a number of tools, one
-per file, that are built into the \prog{goto-instrument} program. All
-of them take in a goto-program (produced by \prog{goto-cc}) and either
-modify it or perform some analysis. Examples include
-\file{nondet\_static.cpp} which initialises static variables to a
-non-deterministic value, \file{nondet\_volatile.cpp} which assigns
-a non-deterministic value to any volatile variable before it is read
-and \file{weak\_memory.h} which performs the necessary transformations
-to reason about weak memory models. The exception to the ``one file
-for each piece of functionality'' rule are the program instrumentation
-options (mostly those given as ``Safety checks'' in the
-\prog{goto-instrument} help text) which are included in the
-\prog{goto-program/} directory. An example of this is
-\file{goto-program/stack\_depth.h} and the general rule seems to be
-that transformations and instrumentation that \prog{cbmc} uses should
-be in \dir{goto-program/}, others should be in \dir{goto-instrument}.
-
-\prog{goto-instrument} is a very good template for new analysis
-tools. New developers are advised to copy the directory, remove all
-files apart from \file{main.*}, \file{parseoptions.*} and the
-\file{Makefile} and use these as the skeleton of their application.
-The \code{doit()} method in \file{parseoptions.cpp} is the preferred
-location for the top level control for the program.
-
-
-
-\subsubsection{\dir{linking/}}
-
-Probably the code to emulate a linker. This allows multiple `object
-files' (goto-programs) to be linked into one `executable' (another
-goto-program), thus allowing existing build systems to be used to
-build complete goto-program binaries.
-
-
-\subsubsection{\dir{big-int/}}
-
-CPROVER is distributed with its own multi-precision arithmetic
-library; mainly for historical and portability reasons. The library is externally
-developed and thus \dir{big-int} contains the source as it is
-distributed. This should not be used directly, see
-\file{util/mp\_arith.h} for the CPROVER interface.
-
-
-
-\subsubsection{\dir{xmllang/}}
-
-CPROVER has optional XML output for results and there is an XML format
-for goto-programs. It is used to interface to various IDEs. The
-\dir{xmllang/} directory contains the parser and helper functions for
-handling this format.
-
-
-
-\subsubsection{\dir{floatbv/}}
-
-This library contains the code that is used to convert floating point
-variables (\code{floatbv}) to bit vectors (\code{bv}). This is
-referred to as `bit-blasting' and is called in the \dir{solver} code
-during conversion to SAT or SMT. It also contains the abstraction
-code described in the FMCAD09 paper.
-
-
-
-
-
-
-
-
-\section{Data Structures}
-
-This section discusses some of the key data-structures used in the
-CPROVER codebase.
-
-\subsection{\code{irept}}
-\label{section:irept}
-
-There are a large number of kind of tree structured or tree-like data
-in CPROVER. \code{irept} provides a single, unified representation for
-all of these, allowing structure sharing and reference counting of
-data. As such \code{irept} is the basic unit of data in CPROVER.
-Each \code{irept} contains\footnote{Or references, if reference
- counted data sharing is enabled. It is enabled by default; see the
- \code{SHARING} macro.} a basic unit of data (of type \code{dt})
-which contains four things:
-
-\begin{description}
-\item[\code{data}]{A string\footnote{When \code{USE\_DSTRING} is enabled (it
- is by default), this is actually a \code{dstring} and thus an
- integer which is a reference into a string table}, which is
- returned when the \code{id()} function is used.}
-\item[\code{named\_sub}]{A map from \code{irep\_namet} (a string) to
- an \code{irept}. This is used for named children,
- i.e. subexpressions, parameters, etc.}
-\item[\code{comments}]{Another map from \code{irep\_namet} to
- \code{irept} which is used for annotations and other `non-semantic' information}
-\item[\code{sub}]{A vector of \code{irept} which is used to store
- ordered but unnamed children.}
-\end{description}
-
-The \code{irept::pretty} function outputs the contents of an
-\code{irept} directly and can be used to understand an debug problems
-with \code{irept}s.
-
-
-On their own \code{irept}s do not ``mean'' anything; they are
-effectively generic tree nodes. Their interpretation depends on the
-contents of result of the \code{id} function (the \code{data}) field.
-\file{util/irep\_ids.txt} contains the complete list of \code{id}
-values. During the build process it is used to generate
-\file{util/irep\_ids.h} which gives constants for each id (named
-\code{ID\_*}). These can then be used to identify what kind of data
-\code{irept} stores and thus what can be done with it.
-
-To simplify this process, there are a variety of classes that inherit
-from \code{irept}, roughly corresponding to the ids listed
-(i.e. \code{ID\_or} (the string \code{"or''}) corresponds to the class
-\code{or\_exprt}). These give semantically relevant accessor
-functions for the data; effectively different APIs for the same
-underlying data structure. None of these classes add fields (only
-methods) and so static casting can be used. The inheritance graph of
-the subclasses of \code{irept} is a useful starting point for working
-out how to manipulate data.
-
-There are three main groups of classes (or APIs); those derived from
-\code{typet}, \code{codet} and \code{exprt} respectively. Although
-all of these inherit from \code{irept}, these are the most abstract
-level that code should handle data. If code is manipulating plain
-\code{irept}s then something is wrong with the architecture of the
-code.
-
-Many of the key descendent of \code{exprt} are declared in
-\file{std\_expr.h}. All expressions have a named subfield /
-annotation which gives the type of the expression (slightly
-simplified from C/C++ as \code{unsignedbv\_typet},
-\code{signedbv\_typet}, \code{floatbv\_typet}, etc.). All type
-conversions are explicit with an expression with \code{id() ==
- ID\_typecast} and an `interface class' named
-\code{typecast\_exprt}. One key descendent of \code{exprt} is
-\code{symbol\_exprt} which creates \code{irept} instances with the id
-of ``symbol''. These are used to represent variables; the name of
-which can be found using the \code{get\_identifier} accessor function.
-
-
-\code{codet} inherits from \code{exprt} and is defined in
-\file{std\_code.h}. They represent executable code; statements in C
-rather than expressions. In the front-end there are versions of these
-that hold whole code blocks, but in goto-programs these have been
-flattened so that each \code{irept} represents one sequence point
-(almost one line of code / one semi-colon). The most common
-descendents of \code{codet} are \code{code\_assignt} so a common
-pattern is to cast the \code{codet} to an assignment and then recurse
-on the expression on either side.
-
-
-
-
-
-
-\subsection{\code{goto-programs}}
-\label{section:goto-programs}
-
-The common starting point for working with goto-programs is the
-\code{read\_goto\_binary} function which populates an object of
-\code{goto\_functionst} type. This is defined in
-\file{goto\_functions.h} and is an instantiation of the template
-\code{goto\_functions\_templatet} which is contained in
-\file{goto\_functions\_template.h}. They are wrappers around a map from
-strings to \code{goto\_programt}'s and iteration macros are provided.
-Note that \code{goto\_function\_templatet} (no \code{s}) is defined in
-the same header as \code{goto\_functions\_templatet} and is gives the
-C type for the function and Boolean which indicates whether the body
-is available (before linking this might not always be true). Also
-note the slightly counter-intuitive naming; \code{goto\_functionst}
-instances are the top level structure representing the program and
-contain \code{goto\_programt} instances which represent the individual
-functions. At the time of writing \code{goto\_functionst} is the only
-instantiation of the template \code{goto\_functions\_templatet} but
-other could be produced if a different data-structures / kinds of models
-were needed for functions.
-
-\code{goto\_programt} is also an instantiation of a template. In a
-similar fashion it is \code{goto\_program\_templatet} and allows the
-types of the guard and expression used in instructions to be
-parameterised. Again, this is currently the only use of the template.
-As such there are only really helper functions in
-\file{goto\_program.h} and thus \code{goto\_program\_template.h} is
-probably the key file that describes the representation of (C)
-functions in the goto-program format. It is reasonably stable and
-reasonably documented and thus is a good place to start looking at the code.
-
-An instance of \code{goto\_program\_templatet} is effectively a list
-of instructions (and inner template called \code{instructiont}). It
-is important to use the copy and insertion functions that are provided
-as iterators are used to link instructions to their predecessors and
-targets and careless manipulation of the list could break these.
-Likewise there are helper macros for iterating over the instructions
-in an instance of \code{goto\_program\_templatet} and the use of these
-is good style and strongly encouraged.
-
-Individual instructions are instances of type \code{instructiont}.
-They represent one step in the function. Each has a type, an instance
-of \code{goto\_program\_instruction\_typet} which denotes what kind of
-instruction it is. They can be computational (such as \code{ASSIGN}
-or \code{FUNCTION\_CALL}), logical (such as \code{ASSUME} and
-\code{ASSERT}) or informational (such as \code{LOCATION} and
-\code{DEAD}). At the time of writing there are 18 possible values for
-\code{goto\_program\_instruction\_typet} / kinds of instruction.
-Instructions also have a guard field (the condition under which it is
-executed) and a code field (what the instruction does). These may be
-empty depending on the kind of instruction. In the default
-instantiations these are of type \code{exprt} and \code{codet}
-respectively and thus covered by the previous discussion of
-\code{irept} and its descendents. The next instructions (remembering
-that transitions are guarded by non-deterministic) are given by the
-list \code{targets} (with the corresponding list of labels
-\code{labels}) and the corresponding set of previous instructions is
-get by \code{incoming\_edges}. Finally \code{instructiont} have
-informational \code{function} and \code{location} fields that indicate
-where they are in the code.
-
-
-
-\end{document}
diff --git a/doc/html-manual/api.shtml b/doc/html-manual/api.shtml
deleted file mode 100644
index 141ae88b80a..00000000000
--- a/doc/html-manual/api.shtml
+++ /dev/null
@@ -1,323 +0,0 @@
-
-
-
-The function __CPROVER_assume adds an expression as a constraint
-to the program. If the expression evaluates to false, the execution
-aborts without failure. More detail on the use of assumptions is
-in the section on Assumptions
-and Assertions.
-
-The function __CPROVER_same_object returns true
-if the two pointers given as arguments point to the same
-object.
-The function __CPROVER_POINTER_OFFSET returns
-the offset of the given pointer relative to the base
-address of the object.
-The function __CPROVER_DYNAMIC_OBJECT
-returns true if the pointer passed
-as arguments points to a dynamically allocated object.
-
-The functions __CPROVER_input and __CPROVER_output
-are used to report an input or output value. Note that they do not generate
-input or output values. The first argument is a string constant
-to distinguish multiple inputs and outputs (inputs are typically
-generated using nondeterminism, as described
-here).
-The string constant is followed by an arbitrary number of values of
-arbitrary types.
-
-The function __CPROVER_array_equal returns true if the values
-stored in the given arrays are equal.
-The function __CPROVER_array_copy copies the contents of
-the array src to the array dest.
-The function __CPROVER_array_set initializes the array dest with
-the given value.
-
-This type is only available in the C frontend. It is used
-to specify a bit vector with arbitrary but fixed size. The
-usual integer type modifiers signed and unsigned
-can be applied. The usual arithmetic promotions will be
-applied to operands of this type.
-
-This type is only available in the C frontend. It is used
-to specify an IEEE-754 floating point number with arbitrary
-but fixed size. The first parameter is the total size (in bits)
-of the number, and the second is the size (in bits) of the
-mantissa, or significand (not including the hidden bit, thus for
-single precision this should be 23).
-
-This type is only available in the C frontend. It is used
-to specify a fixed-point bit vector with arbitrary
-but fixed size. The first parameter is the total size (in bits)
-of the type, and the second is the number of bits after the radix
-point.
-
-
-
__CPROVER_size_t
-
-
-The type of sizeof expressions.
-
-
-
__CPROVER_rounding_mode
-
-
-
-extern int __CPROVER_rounding_mode;
-
-
-
-
-This variable contains the IEEE floating-point
-arithmetic rounding mode.
-
-
-
__CPROVER_constant_infinity_uint
-
-
-This is a constant that models a large unsigned
-integer.
-
-
-
__CPROVER_integer, __CPROVER_rational
-
-
-__CPROVER_integer is an unbounded, signed integer type.
-__CPROVER_rational is an unbounded, signed rational
-number type.
-
The behavior of a C/C++ program depends on a number of
-parameters that are specific to the architecture the program was compiled
-for. The three most important architectural parameters are:
-
-
-
The width of the various scalar types; e.g., compare the value
-of sizeof(long int) on various machines.
-
-
The width of pointers; e.g., compare the value
-of sizeof(int *) on various machines.
-In general, the CPROVER tools attempt to adopt the settings of the
-particular architecture the tool itself was compiled for. For example,
-when running a 64 bit binary of CBMC on Linux, the program will be processed
-assuming that sizeof(long int)==8.
-
-
-As a consequence of these architectural parameters,
-you may observe different verification results for an identical
-program when running CBMC on different machines. In order to get
-consistent results, or when aiming at validating a program written
-for a different platform, the following command-line arguments can
-be passed to the CPROVER tools:
-
-
-
The word-width can be set with --16,
---32, --64.
-
The endianness can be defined with
---little-endian and --big-endian.
-
-
-
-When using a goto binary, CBMC and the other tools read the configuration
-from the binary, i.e., the setting when running goto-cc is the one that
-matters; the option given to the model checker is ignored in this case.
-
-
-
-In order to see the effect of the options --16,
---32 and --64, pass
-the following program to CBMC:
-The basic idea of CBMC is to model the computation of the programs up to a
-particular depth. Technically, this is achieved by a process that
-essentially amounts to unwinding loops. This concept is best
-illustrated with a generic example:
-
-
-
int main(int argc, char **argv) {
- while(cond) {
- BODY CODE
- }
-}
-
-
-
-A BMC instance that will find bugs with up to five iterations of the loop would
-contain five copies of the loop body, and essentially corresponds to checking
-the following loop-free program:
-
-
-
int main(int argc, char **argv) {
- if(cond) {
- BODY CODE COPY 1
- if(cond) {
- BODY CODE COPY 2
- if(cond) {
- BODY CODE COPY 3
- if(cond) {
- BODY CODE COPY 4
- if(cond) {
- BODY CODE COPY 5
- }
- }
- }
- }
- }
-}
-
-
-
-Note the use of the if statement to prevent the execution of
-the loop body in the case that the loop ends before five iterations are executed.
-The construction above is meant to produce a program that is trace equivalent
-with the original programs for those traces that contain up to five iterations
-of the loop.
-
-
-
-In many cases, CBMC is able to automatically determine an upper bound on the
-
-number of loop iterations. This may even work when the number of loop
-unwindings is not constant. Consider the following example:
-
-The loop in the program above has an obvious upper bound on the number of
-iterations, but note that the loop may abort prematurely depending on the
-value that is returned by f(). CBMC is nevertheless able to
-automatically unwind the loop to completion.
-
-
-This automatic detection of the unwinding
-bound may fail if the number of loop iterations is highly data-dependent.
-Furthermore, the number of iterations that are executed by any given
-loop may be too large or may simply be unbounded. For this case,
-CBMC offers the command-line option --unwind B, where
-B denotes a number that corresponds to the maximal number
-of loop unwindings CBMC performs on any loop.
-
-
-
-Note that the number of unwindings is measured by counting the number of
-backjumps. In the example above, note that the condition
-i<100 is in fact evaluated 101 times before the loop
-terminates. Thus, the loop requires a limit of 101, and not 100.
-
-
Setting Separate Unwinding Limits
-
-
-The setting given with --unwind is used globally,
-that is, for all loops in the program. In order to set individual
-limits for the loops, first use
-
-
-
- --show-loops
-
-
-
-to obtain a list of all loops in the program. Then identify the loops
-you need to set a separate bound for, and note their loop ID. Then
-use
-
-
-
- --unwindset L:B,L:B,...
-
-
-
-where L denotes a loop ID and B denotes
-the bound for that loop.
-
-
-As an example, consider a program with two loops in the function
-main:
-
-
-
- --unwindset c::main.0:10,c::main.1:20
-
-
-
-This sets a bound of 10 for the first loop, and a bound of 20 for
-the second loop.
-
-
-
-What if the number of unwindings specified is too small? In this case, bugs
-that require paths that are deeper may be missed. In order to address this
-problem, CBMC can optionally insert checks that the given unwinding bound is
-actually sufficiently large. These checks are called unwinding
-assertions, and are enabled with the option
---unwinding-assertions. Continuing the generic example above,
-this unwinding assertion for a bound of five corresponds to checking the
-following loop-free program:
-
-
-
int main(int argc, char **argv) {
- if(cond) {
- BODY CODE COPY 1
- if(cond) {
- BODY CODE COPY 2
- if(cond) {
- BODY CODE COPY 3
- if(cond) {
- BODY CODE COPY 4
- if(cond) {
- BODY CODE COPY 5
- assert(!cond);
- }
- }
- }
- }
- }
-}
-
-
-
-The unwinding assertions can be verified just like any other generated
-assertion. If all of them are proven to hold, the given loop bounds are
-sufficient for the program. This establishes a high-level
-worst-case execution time (WCET).
-
-
-
-In some cases, it is desirable to cut off very deep loops in favor
-of code that follows the loop. As an example, consider the
-following program:
-
-
-
int main() {
- for(int i=0; i<10000; i++) {
- BODY CODE
- }
-
- assert(0);
-}
-
-
-
-In the example above, small values of --unwind will
-prevent that the assertion is reached. If the code in the loop
-is considered irrelevant to the later assertion, use the option
-
-
-
- --partial-loops
-
-
-
-This option will allow paths that execute loops only partially,
-enabling a counterexample for the assertion above even for
-small unwinding bounds. The disadvantage of using this option
-is that the resulting path may be spurious, i.e., may not
-exist in the original program.
-
-
-
Depth-based Unwinding
-
-
-The loop-based unwinding bound is not always appropriate. In particular,
-it is often difficult to control the size of the generated formula
-when using the --unwind option. The option
-
-
-
- --depth nr
-
-
-
-specifies an unwinding bound in terms of the number of instructions that are
-executed on a given path, irrespectively of the number of loop iterations.
-Note that CBMC uses the number of instructions in the control-flow graph
-as the criterion, not the number of instructions in the source code.
-
-We assume you have already installed CBMC and the necessary support files
-on your system. If not so, please follow
-these instructions.
-
-
-
-Like a compiler, CBMC takes the names of .c files as command line
-arguments. CBMC then translates the program and merges the function
-definitions from the various .c files, just like a linker. But instead
-of producing a binary for execution, CBMC performs symbolic simulation on
-the program.
-
-
-
-As an example, consider the following simple program, named
-file1.c:
-
-Of course, this program is faulty, as the argv array might have fewer
-than three elements, and then the array access argv[2] is out of bounds.
-Now, run CBMC as follows:
-
The two options --bounds-check and --pointer-check
-instruct CBMC to look for errors related to pointers and array bounds.
-CBMC will print the list of properties it checks. Note that it lists,
-among others, a property labeled with "object bounds in argv" together with
-the location of the faulty array access. As you can see, CBMC largely
-determines the property it needs to check itself. This is realized by means
-of a preliminary static analysis, which relies on computing a fixed point on
-various abstract
-domains. More detail on automatically generated properties is provided
-here.
-
-
-Note that these automatically generated properties need not necessarily
-correspond to bugs – these are just potential flaws, as
-abstract interpretation might be imprecise. Whether these properties
-hold or correspond to actual bugs needs to be determined by further analysis.
-
-
-
-CBMC performs this analysis using symbolic simulation, which
-corresponds to a translation of the program into a formula. The formula is
-then combined with the property. Let's look at the formula that is
-generated by CBMC's symbolic simulation:
-With this option, CBMC performs the symbolic simulation and prints the
-verification conditions on the screen. A verification condition needs
-to be proven to be valid by a
-decision procedure in order to assert that the corresponding property
-holds. Let's run the decision procedure:
-CBMC transforms the equation you have seen before into CNF and passes it to
-a SAT solver (more background on this step is in the book on Decision Procedures). It
-then determines which of the properties that it has generated for the
-program hold and which do not. Using the SAT solver, CBMC detects that the
-property for the object bounds of argv does not hold, and will
-thus print a line as follows:
-
-
-
-[main.pointer_dereference.6] dereference failure: object bounds in argv[(signed long int)2]: FAILURE
-
-
-
Counterexample Traces
-
-
-Let us have a closer look at this property and why it fails. To aid the
-understanding of the problem, CBMC can generate a counterexample
-trace for failed properties. To obtain this trace, run
-
-
-
- cbmc file1.c --bounds-check --trace
-
-
-
-CBMC then prints a counterexample trace, i.e., a program trace that begins
-with main and ends in a state which violates the property. In
-our example, the program trace ends in the faulty array access. It also
-gives the values the input variables must have for the bug to occur. In
-this example, argc must be one to trigger the out-of-bounds
-array access. If you add a branch to the example that requires that
-argc>=3, the bug is fixed and CBMC will report that the
-proofs of all properties have been successful.
-
-
Verifying Modules
-
-
-In the example above, we used a program that starts with a main
-function. However, CBMC is aimed at embedded software, and these
-kinds of programs usually have different entry points. Furthermore, CBMC
-is also useful for verifying program modules. Consider the following example,
-called file2.c:
-
-
-
int array[10];
-int sum() {
- unsigned i, sum;
-
- sum=0;
- for(i=0; i<10; i++)
- sum+=array[i];
-}
-
-
-
-In order to set the entry point to the sum function, run
-
-It is often necessary to build a suitable harness for the function
-in order to set up the environment appropriately.
-
-
-
Loop Unwinding
-
-
-When running the previous example, you will have noted that CBMC unwinds the
-for loop in the program. As CBMC performs Bounded Model
-Checking, all loops have to have a finite upper run-time bound in order to
-guarantee that all bugs are found. CBMC can optionally check that enough
-unwinding is performed. As an example, consider the program binsearch.c:
-
-If you run CBMC on this function, you will notice that the unwinding
-does not stop on its own. The built-in simplifier is not able to determine
-a run time bound for this loop. The unwinding bound has to be given as a
-command line argument:
-CBMC verifies that verifies the array accesses are within the bounds; note
-that this actually depends on the result of the right shift. In addition,
-as CBMC is given the option
---unwinding-assertions, it also checks that enough
-unwinding is done, i.e., it proves a run-time bound. For any lower
-unwinding bound, there are traces that require more loop iterations. Thus,
-CBMC will report that the unwinding assertion has failed. As usual, a counterexample
-trace that documents this can be obtained with the option
---property.
-
-
-
Unbounded Loops
-
-
-CBMC can also be used for programs with unbounded loops. In this
-case, CBMC is used for bug hunting only; CBMC does not attempt to find
-all bugs. The following program
-(lock-example.c) is an example
-of a program with a user-specified property:
-The while loop in the main function has no
-(useful) run-time bound. Thus, a bound has to be set on the amount of
-unwinding that CBMC performs. There are two ways to do so:
-
-
-
-
-
The --unwind command-line parameter can to be used to limit
-the number of times loops are unwound.
-
-
The --depth command-line parameter can be used to limit
-the number of program steps to be processed.
-
-
-
-
-Given the option --unwinding-assertions, CBMC checks whether
-the arugment to --unwind is large enough to cover all program
-paths. If the argument is too small, CBMC will detect that not enough
-unwinding is done reports that an unwinding assertion has failed.
-
-
-
-Reconsider the example. For a loop unwinding bound of one, no bug is found.
-But already for a bound of two, CBMC detects a trace that violates an
-assertion. Without unwinding assertions, or when using the --depth
-command line switch, CBMC does not prove the program correct, but it can be
-helpful to find program bugs. The various command line options that CBMC
-offers for loop unwinding are described in the section on
-understanding loop unwinding.
-
-
A Note About Compilers and the ANSI-C Library
-
-
-Most C programs make use of functions provided by a library; instances are
-functions from the standard ANSI-C library such as malloc or
-printf. The verification of programs that use such functions
-has two requirements:
-
-
-
-
Appropriate header files have to be provided. These header files
-contain declarations of the functions that are to be used.
-
-
-
Appropriate definitions have to be provided.
-
-
-
-
-Most C compilers come with header files for the ANSI-C library functions.
-We briefly discuss how to obtain/install these library files.
-
-
-
Linux
-
-
-Linux systems that are able to compile software are usually equipped with
-the appropriate header files. Consult the documentation of your distribution
-on how to install the compiler and the header files. First try to compile
-some significant program before attempting to verify it.
-
-
-
Windows
-
-
-On Microsoft Windows, CBMC is pre-configured to use the compiler that is
-part of Microsoft's Visual Studio. Microsoft's
-Visual Studio Community is fully featured and available for download for
-free from the Microsoft webpage. Visual Studio installs the usual set of
-header files together with the compiler. However, the Visual Studio
-compiler requires a large set of environment variables to function
-correctly. It is therefore required to run CBMC from the Visual Studio
-Command Prompt, which can be found in the menu Visual Studio
-Tools.
-
-
-
-Note that in both cases, only header files are available. CBMC only
-comes with a small set of definitions, which includes functions such as
-malloc. Detailed information about the built-in definitions is
-here.
-
-
Command Line Interface
-
-
-This section describes the command line interface of CBMC. Like a C
-compiler, CBMC takes the names of the .c source files as arguments.
-Additional options allow to customize the behavior of CBMC. Use
-cbmc --help to get a full list of the available options.
-
-
-
-Structured output can be obtained from CBMC using the option --xml-ui.
-Any output from CBMC (e.g., counterexamples) will then use an XML
-representation.
-
-We assume that CBMC is installed on your system. If not so, follow
-these instructions.
-
-
-CBMC can be used to automatically generate test cases that satisfy a certain code coverage
-criterion. Common coverage criteria include branch coverage, condition
-coverage and Modified
-Condition/Decision Coverage (MC/DC). Among others, MC/DC is required
-by several avionics software development guidelines to ensure adequate testing
-of safety critical software. Briefly, in order to satisfy MC/DC,
-for every conditional statement containing boolean decisions, each Boolean
-variable should be evaluated one time to "true" and one time to "false",
-in a way that affects the outcome of the decision.
-
-
-
-In the following, we are going to demonstrate how to apply the test suite
-generation functionality in CBMC, by means of a case study. The program
-pid.c is an excerpt from a real-time embedded benchmark PapaBench,
-and implements part of a fly-by-wire autopilot for an Unmanned Aerial Vehicle (UAV).
-It is adjusted mildly for our purposes.
-
-
-
-The aim of function climb_pid_run is to control the vertical climb of the UAV.
-Details on the theory behind this operation are documented in the wiki for the Paparazzi UAV project.
-The behaviour of this simple controller, supposing that the desired speed is 0.5 meters per second,
-is plotted in the Figure below.
-
-In order to test the PID controller, we construct a main control loop,
-which repeatedly invokes the function climb_pid_run (line 69).
-This PID function has two input variables: the desired speed desired_climb
-and the estimated speed estimated_z_dot.
-In the beginning of each loop iteration, values of the inputs are assigned non-deterministically.
-Subsequently, the __CPROVER_assume statement in lines 63 and 64 guarantees that
-both values are bounded within a valid range.
-The __CPROVER_input and __CPROVER_output will help clarify the inputs
-and outputs of interest for generating test suites.
-
-
-
-To demonstrate the automatic test suite generation in CBMC,
-we call the following command and we are going to explain the command line options one by one.
-
-
-
cbmc pid.c --cover mcdc --unwind 6 --xml-ui
-
-
-
-The option --cover mcdc specifies the code coverage criterion.
-There are four conditional statements in the PID function: in line 41, line 44,
-line 48 and line 49.
-To satisfy MC/DC, the test suite has to meet multiple requirements.
-For instance, each conditional statement needs to evaluate to true and false.
-Consider the condition "pprz>=0 && pprz<=MAX_PPRZ" in line 41. CBMC
-instruments three coverage goals to control the respective evaluated results of "pprz>=0" and
-"pprz<=MAX_PPRZ".
-We list them in below and they satisfy the MC/DC rules.
-Note that MAX_PPRZ is defined as 16 * 600 in line 06 of the program.
-
-The "id" of each coverage goal is automatically assigned by CBMC. For every
-coverage goal, a test suite (if there exists) that
-satisfies such a goal is printed out in XML format, as the parameter
---xml-ui is given. Multiple coverage goals can share a
-test suite, when the corresponding execution of the program satisfies all these
-goals at the same time.
-
-
-
-In the end, the following test suites are automatically generated for testing the PID controller.
-A test suite consists of a sequence of input parameters that are
-passed to the PID function climb_pid_run at each loop iteration.
-For example, Test 1 covers the MC/DC goal with id="climb_pid_run.coverage.1".
-The complete output from CBMC is in
-pid_test_suites.xml, where every test suite and the coverage goals it is for
-are clearly described.
-
-
-The option --unwind 6 unwinds the loop inside the main
-function body six times. In order to achieve the complete coverage on all the instrumented goals
-in the PID function climb_pid_run, the loop must be unwound sufficient enough times.
-For example, climb_pid_run needs to be called at least six times for evaluating the
-condition climb_sum_err>MAX_CLIMB_SUM_ERR in line 48 to true.
-This corresponds to the Test 5.
-An introduction to the use of loop unwinding can be found
-in Understanding Loop Unwinding.
-
-
-
-In this small tutorial, we present the automatic test suite generation
-functionality of CBMC, by applying the MC/DC code coverage criterion to a
-PID controller case study. In addition to --cover mcdc, other
-coverage criteria like branch, decision,
-path etc. are also available when calling CBMC.
-
-
-
Coverage Criteria
-
-
-The table below summarizes the coverage criteria that CBMC supports.
-
-
-
-
-
Criterion
Definition
-
-
assertion
-
For every assertion, generate a test that reaches it
-
-
location
-
For every location, generate a test that reaches it
-
-
branch
-
Generate a test for every branch outcome
-
-
decision
-
Generate a test for both outcomes of every Boolean expression
-that is not an operand of a propositional connective
-
-
condition
-
Generate a test for both outcomes of every Boolean expression
-
-
mcdc
-
Modified Condition/Decision Coverage (MC/DC)
-
-
path
-
Bounded path coverage
-
-
cover
-
Generate a test for every __CPROVER_cover statement
-
-The following sections provide an introduction for anybody who
-wishes to modify CBMC or build new tools on top of the APIs used
-by CBMC. They summarize key components, data structures and APIs
-that are used to build the CPROVER tools.
-
-Detailed instructions on how to build CBMC from source are given
-in the file COMPILING.
-
-
-
Components
-
-
-
From C source code file to CPROVER's IR
-
-
-The sources of the C frontend are located in the "src/ansi-c" directory. It
-uses a standard Flex/Bison setup for scanning and parsing the files. The
-Flex scanner produces a token sequence, which is turned into a tree
-representation of the input program using the Bison grammar. The
-typechecker subsequently annotates this parse tree with types and generates
-a symbol table. The symbol table is a map from identifiers (functions,
-variables and types) to their definitions.
-
-
-
-The following example illustrates how to use the frontend for parsing files and
-for translating them into a symbol table. A call to parse generates
-the parse tree of the program. The conversion into the symbol table is
-performed during type checking, which is done by a call to the
-typecheck method. The symbol table is a map from identifiers to the
-symbolt data structure.
-
-The parse trees are implemented using a class called irept. Its
-declaration and definiton can be found in the files "src/util/irep.h" and
-"src/util/irep.cpp", respectively.
-
-
-
-The excerpt below gives some details of the class irept:
-
-Every node of any tree is an object of class irept. Each node has a
-pointer to an object of class dt. The dt objects are used
-for storing the actual content of nodes. Objects of class dt are
-dynamically allocated and can be shared between nodes. A reference-counter
-mechanism is implemented to automatically free unreachable dt
-objects. A shallow copy of a tree is an O(1) operation.
-
-
-
-The field data of class dt is a (hashed) string
-representing the label of the nodes. The fields named_sub,
-comments and sub are links to childs. Edges are either
-labeled with a string or ordered. The string-labeled edges are stored in the
-map comments if their first character is '#'. Otherwise, they are
-stored in the map named_sub. The labels of edges are unique for a
-given node; however, their ordering is not preserved. The field sub
-is a vector of nodes that is used for storing the ordered children. The order
-of edges of this kind is preserved during copy.
-
-The first method returns true if the label of the node is equal to "nil".
-The second method returns false if the label of the node is equal to "nil".
-
The first method looks for an edge with label name
-and returns the corresponding child. If no edge with label name
-is found, then nil_rep is returned.
-
-
The second method does the same as the first except that if
-no edge with label name if found, then a new child is created
-and returned.
-
-
-
The third method does the same as the first except that the label
-of the child is returned (instead of a reference).
-If no edge with label name is found, then an empty
-string is returned.
-
-The first method creates a new ordered edge with a child equal to
-irep. Then it sets irep to nil. The index of the
-edge is equal to the size of vector sub before the call.
-
-
-
-The second method does the same but for labeled edges.
-
-
-
swap
-
-
void swap(irept &irep);
-
-
-
-Exchange the content of the invoked node with the one of irep.
-
-
-
make_nil
-
-
void make_nil();
-
-
-
-Set the label of the node to "nil" and remove all outgoing edges.
-
-Return a constant reference to
-sub, named_sub, and comments, respectively.
-
-
-
Types
-
-
-The class typet inherits from irept. Types may have
-subtypes. This is modeled with two edges named "subtype" and "subtypes". The
-class typet only add specialized methods for accessing the subtype
-information to the interface of irept.
-
-The first method returns true if the a subtype node exists. is not
-nil. The second method returns true is a subtypes node exists.
-
-
-
subtype and subtypes
-
-
typet &subtype();
-typest &subtypes();
-
-
-
-The first method returns a reference to the 'subtype' node.
-The second method returns a reference to the vector of subtypes.
-
-
-
Subtypes of typet
-
-
-A number of subtypes of typet exist which allow convenient
-creation and manipulation of typet objects for special types.
-
-
-
-
Class
Description
-
-
bool_typet
-
Boolean type
-
-
symbol_typet
-
Symbol type. Has edge "identifier" to a string value, which can be accessed with get_identifier and set_identifier.
-
-
struct_typet, union_typet
-
Represent a struct, resp. union types. Convenience functions to access components components().
-
-
code_typet
-
The type of a function/procedure. Convenience functions to access arguments() and return_type().
-
-
array_typet
-
Convenience function size() to access size of the array.
-
-
pointer_typet
-
Pointer type, subtype stores the type of the object pointed to.
-
-
reference_typet
-
Represents a reference type, subtype stores the type of the object referenced to.
-
-
bv_typet
-
Represents a bit vector type with variable width.
-
-
fixed_bv_typet
-
Represents a bit vector that encodes a fixed-point number.
-
-
floatbv_typet
-
Represents a bit vector that encodes a floating-point number.
-
-
string_typet
-
Represents a string type.
-
-
-
-
Source Locations
-
-
-The class source_locationt inherits from the class irept. It
-is used to store locations in text files. It adds specialized methods to
-manipulate the edges named "file", "line", "column", "function".
-
-
-
Expressions
-
-
-The class exprt inherits from class irept. Expressions
-have operands and a type. This is modeled with ordered edges for the
-operands and an edge labeled"type", respectively. The class exprt
-only adds specialized methods for accessing operands and type information
-to the interface of irept.
-
-
-
-
Representation of a binary expression
-
-
Interface of class exprt
-
-
constructors
-
-
explicit exprt(const irep_idt &id);
-
-
-
-Creates an exprt object with a given label and no type.
-
-
-
exprt(const irep_idt &id, const typet &type);
-
-
-
-Creates an exprt object with a given label and type.
-
-
-
type
-
-
const typet &type() const;
-typet &type();
-
-
-
-Return a reference to the 'type' node
-
-
-
has_operands
-
-
bool has_operands() const;
-
-
-
-Return true if the expression has at least one operand.
-
-Turn the current exprt instance into a expression of type "bool"
-with label "constant" and a single edge labeled "value", which points to
-a new node with label either "true" or "false".
-
-
-
void make_typecast(const typet &_type);
-
-
-
-Turns the current exprt instance into a typecast. The old value of
-the instance is appended as the single operand of the typecast, i.e., the
-result is a typecast-expression of the old expression to the indicated type.
-
-
-
void make_not();
-
-
-
-Turns the current exprt instance into an expression with label
-"not" of the same type as the original expression. The old value of the
-instance is appended as the operand of the "not"-node. If the original
-expression is of type "bool", the result represents the negation of the
-original expression with the following simplifications possibly applied:
-
-
-
-
¬ ¬ f = f
-
¬ true = false
-
¬ false = true
-
-
-
-void negate();
-
-
-
-Turns the current exprt instance into a negation of itself, depending
-on its type:
-
-
-
-
-
For boolean expressions, make_not is called.
-
-
For integers, the current instance is turned into a numeric negation
-expression "unary-" of its old value. Chains of "unary-" nodes and
-negations of integer constants are simplified.
-Expect the "this" object and the function argument to be constants of the
-same numeric type. Turn the current exprt instance into a
-constant expression of the same type, whose "value" edge points to the
-result of the sum, product, or difference of the two expressions. If the
-operation fails for some reason (e.g., the types are different),
-true is returned.
-
-
-
Testing common expressions
-
-
bool is_constant() const;
-
-
-
-Returns true if the expression label is "constant".
-
-
-
bool is_boolean() const;
-
-
-
-Returns true if the label of the type is "bool".
-
-
bool is_false() const;
-bool is_true() const;
-
-
-
-The first function returns true if the expression is a boolean constant with
-value "false". The second function returns true for any boolean constant
-that is not of value "false".
-
-
-
bool is_zero() const;
-bool is_one() const;
-
-
-
-The first function returns true if the expression represents a zero numeric
-constant, or if the expression represents a null pointer. The second
-function returns true if the expression represents a numeric constant with
-value "1".
-
-
-
Subtypes of exprt
-
-
-A number of subtypes of exprt provide further convenience functions
-for edge access or other specialized behaviour:
-
-
-
-
-
Class
Description
-
-
transt
-
Represents an SMV-style transition system with invariants
-invar(), initial state init() and transition
-function trans().
-
-
-
true_exprt
-
Boolean constant true expression.
-
-
-
false_exprt
-
Boolean constant false expression.
-
-
-
symbol_exprt
-
Represents a symbol (e.g., a variable occurrence), convenience function for manipulating "identifier"-edge set_identifier and get_identifier
-
-
-
predicate_exprt
-
Convenience constructors to create expressions of type "bool".
-
-
-
binary_relation_exprt : predicate_exprt
-
Convenience functions to create and manipulate binary expressions of type "bool".
-
-
-
equality_exprt : binary_relation_exprt
-
Convenience functions to create and manipulate equality expressions such as "a == b".
-
-
-
ieee_float_equal_exprt : binary_relation_exprt
-
Convenience functions to create and manipulate equality expressions between floating-point numbers.
-
-
-
-
index_exprt
-
Represents an array access expression such as "a[i]". Convenience functions array() and index() for accessing the array expressions and indexing expression.
-
-
-
typecast_exprt
-
Represents a cast to the type of the expression.
-
-
-
-and_exprt,
-implies_exprt,
-or_exprt,
-not_exprt
-
Representations of logical operators with convenience constructors.
-
-
-
address_of_exprt
-
Representation of a C-style &a address-of operation. Convenience function object() for accessing operand.
-
-
-
dereference_exprt
-
Representation of a C-style *a pointer-dereference operation. Convenience function object() for accessing operand.
-
-
-
if_exprt
-
Representation of a conditional expresion, with convenience functions cond(), true_case() and false_case() for accessing operands.
-
-
-
member_exprt
-
Represents a some_struct.some_field member access.
-
-
-
codet
-
Represents a segment of code.
-
-
-
-
-
Symbols and the Symbol Table
-
-
Symbol
-
-
-A symbol is an object of class symbolt. This class
-is declared in "util/symbol.h". The code below shows a partial
-declaration of the interface:
-
-Symbol names are unique. Scopes are handled by adding prefixes
-to symbols:
-
-
-
int main(int argc, char* argv[]) {
-
- // Symbol name: c::main::0::alice
- char alice = 0; // Symbol base: alice
-
- {
- // Symbol name: c::main::1::alice
- int alice = 0; // Symbol base: alice
- }
-}
-
-
Symbol Table
-
-
-A symbol table is an object of class contextt. This class
-is declared in "util/context.h". The code below shows a partial
-declaration of the interface:
-
-
-
class symbol_tablett
-{
-public:
- // Insert the symbol
- bool add(const symbolt &symb);
- // Insert symb into the
- // table and erase it.
- // New_symbol points to the
- // newly inserted element.
- bool move(symbolt &symbol, symbolt *&new_symbol);
-
- // Insert symb into the
- // table. Then symb is erased.
- bool move(symbolt &symb);
-
- // Return the entry of the
- // symbol with given name.
- const irept &value(const std::string &name) const;
-};
-
-
-
Goto Programs
-
-
-Goto programs are a representation of the control flow graph of a program
-that uses only guarded goto and assume statements to model non-sequential
-flow. The main definition can be found in
-"src/goto-programs/goto_program_template.h", which is a template class. The
-concrete instantiation of the template that is used in the framework can be
-found in "src/goto-programs/goto_program.h". A single instruction in a goto
-program is represented by the class goto_programt::instructiont
-whose definition can be found again in
-"goto-programs/goto_program_template.h".
-
-
-
-In the class goto_programt, the control flow graph is represented
-as a mixture of sequential transitions between nodes, and non-sequential
-transitions at goto-nodes. The sequential flow of the program is captured
-by the list instructions that is a field of the class
-goto_programt. Transitions via goto statements are represented in
-the list targets, which is a field of the class
-goto_programt::instructiont, i.e., each goto-instruction carries a
-list of possible jump destinations. The latter list targets is a
-list of iterators which point to elements of the list instructions.
-An illustration is given in the figure below.
-
-
-
-
Representation of program flow in goto_programt
-
-
-Instructions can have a number of different types as represented by
-enum goto_program_instruction_typet and can be accessed via the
-field type in instructiont. These include:
-
-
-
-
GOTO
-
Represents a non-deterministic branch to the instructions given in the
-list targets. Goto statements are guarded, i.e., the
-non-deterministic branch is only taken if the expression in
-guard evaluates to true, otherwise the program continues
-sequentially. Guarded gotos can be used, for example, to model if
-statements. The guard is then set to the negated condition of the
-statement, and goto target is set to bypass the conditionally executed code
-if this guard evaluates to true.
-
-
-
-
ASSUME
-
An assumption statement that restricts viable paths reaching the
-instruction location to the ones that make the expression guard
-evaluate to true.
-
-
-
ASSERT
-
An assertion whose guard is checked for validity when the instruction is
-reached.
-
-
-
RETURN
-
A return statement in a function.
-
-
-
END_FUNCTION
-
Denotes the end of a function.
-
-
-
ASSIGN
-
A variable assignment.
-
-
-
SKIP
-
No operation.
-
-
-
OTHER
-
Any operation not covered by enum
-goto_program_instruction_typet.
-
-
-
-
-
-A number of convenience functions in instructiont, such as
-is_goto(), is_assume(), etc., simplify type queries.
-The following code segment gives a partial interface declaration of
-goto_program_template and instructiont.
-
-
-
template <class codeT, class guardT>
-class goto_program_templatet
-{
-public:
- //list of instruction type
- typedef std::list<class instructiont> instructionst;
-
- //a reference to an instruction in the list
- typedef typename
- std::list::iterator targett;
-
- //Sequential list of instructions,
- //representing sequential program flow
- instructionst instructions;
-
- typedef typename
- std::map target_numberst;
-
- //A map containing the unique number of each target
- target_numberst target_numbers;
-
- //Get the successors of a given instruction
- void get_successors(targett target, targetst &successors);
-
- ...
-
-
- class instructiont
- {
- public:
- codeT code;
-
- //identifier of enclosing function
- irep_idt function;
-
- //location in the source file
- locationt location;
-
- //type of instruction?
- goto_program_instruction_typet type;
-
- //Guard statement for gotos, assume, assert
- guardT guard;
-
- //targets for gotos
- targetst targets;
-
- //set of all predecessors (sequential, and gotos)
- std::set<targett> incoming_edges;
-
- // a globally unique number to identify a
- // program location. It is guaranteed to be
- // ordered in program order within one
- // goto_program
- unsigned location_number;
-
- // a globally unique number to identify loops
- unsigned loop_number;
-
- // true if this is a goto jumping back to an
- // earlier instruction in the sequential program
- // flow
- bool is_backwards_goto() const;
- };
-
-}
-