Skip to content
Merged
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ bitcoin = { version = "0.29.2", optional = true }
bitcoin_hashes = "0.11"
byteorder = "1.3"
elements = { version = "0.21.1", optional = true }
elements-miniscript = { git = "https://github.com/ElementsProject/elements-miniscript", rev = "955f380" }
elements-miniscript = { git = "https://github.com/apoelstra/elements-miniscript", tag = "2023-07--rust-simplicity-patch" }
simplicity-sys = { version = "0.1.0", path = "./simplicity-sys" }
actual-serde = { package = "serde", version = "1.0.103", features = ["derive"], optional = true }

Expand Down
6 changes: 3 additions & 3 deletions jets-bench/benches/elements/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ fn bench(c: &mut Criterion) {
let (src_ty, tgt_ty) = jet_arrow(jet);
let env = env_sampler.env();

let mut group = c.benchmark_group(&format!("{}", jet.to_string()));
let mut group = c.benchmark_group(&jet.to_string());
for i in 0..NUM_RANDOM_SAMPLES {
let params = JetParams::with_rand_aligns(InputSampling::Random);
let name = format!("{}", i);
Expand Down Expand Up @@ -531,7 +531,7 @@ fn bench(c: &mut Criterion) {
let (src_ty, tgt_ty) = jet_arrow(jet);
let env = EnvSampling::Null.env();

let mut group = c.benchmark_group(&format!("{}", jet.to_string()));
let mut group = c.benchmark_group(&jet.to_string());
for i in 0..NUM_RANDOM_SAMPLES {
let params = JetParams::with_rand_aligns(InputSampling::Custom(inp_fn.clone()));
let name = format!("{}", i);
Expand Down Expand Up @@ -612,7 +612,7 @@ fn bench(c: &mut Criterion) {
for (jet, index, env_type) in arr {
let (src_ty, tgt_ty) = jet_arrow(jet);
let env = env_type.env();
let mut group = c.benchmark_group(&format!("{}", jet.to_string()));
let mut group = c.benchmark_group(&jet.to_string());

for i in 0..NUM_RANDOM_SAMPLES {
// We always select the current input because this is where we
Expand Down
5 changes: 4 additions & 1 deletion src/analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@

use crate::jet::Jet;
use crate::Value;
use std::{cmp, fmt, io};
use std::{cmp, fmt};

#[cfg(feature = "elements")]
use std::io;

#[cfg(feature = "elements")]
use elements::encode::Encodable;
Expand Down
2 changes: 1 addition & 1 deletion src/bit_encoding/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ mod tests {
#[test]
fn root_unit_to_unit() {
// main = jet_eq_32 :: 2^64 -> 2 # 7387d279
let justjet = vec![0x6d, 0xb8, 0x80];
let justjet = [0x6d, 0xb8, 0x80];
// Should be able to decode this as an expression...
let mut iter = BitIter::from(&justjet[..]);
decode_expression::<_, Core>(&mut iter).unwrap();
Expand Down
6 changes: 3 additions & 3 deletions src/bit_machine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,23 +528,23 @@ mod tests {
prog.cmr().to_string(),
cmr_str,
"CMR mismatch (got {} expected {}) for program {}",
prog.cmr().to_string(),
prog.cmr(),
cmr_str,
prog_hex,
);
assert_eq!(
prog.imr().to_string(),
imr_str,
"IMR mismatch (got {} expected {}) for program {}",
prog.imr().to_string(),
prog.imr(),
imr_str,
prog_hex,
);
assert_eq!(
prog.amr().to_string(),
amr_str,
"AMR mismatch (got {} expected {}) for program {}",
prog.amr().to_string(),
prog.amr(),
amr_str,
prog_hex,
);
Expand Down
14 changes: 11 additions & 3 deletions src/human_encoding/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ and EXPRESSION is
* `unit`, `iden`, or `witness`;
* `injl`, `injr`, `take`, or `drop` followed by another EXPRESSION;
* `case`, `comp`, or `pair` followed by two EXPRESSIONs;
* `assertl` followed by an EXPRESSION, a literal `#`, and another EXPRESSION;
* `assertr` followed by a literal `#` and two EXPRESSIONs;
* `assertl` followed by an EXPRESSION and a CMR (defined below);
* `assertr` followed by CMR and an EXPRESSION;
* a jet, which begins with `jet_` and must belong to the list of jets (FIXME define this list);
* `const` followed by a VALUE (defined below);
* `fail` followed by an ENTROPY (defined below); or
Expand All @@ -63,6 +63,14 @@ Note that while we allow parenthesis to help group parts of expressions for huma
understanding, they are never needed for disambiguation and are essentially
ignored by the parser.

A CMR is

* `#{` followed by an expression followed by `}`; or
* `#` followed by 64 hex bytes.

The first case indicates that an expression should be replaced by its commitment
Merkle root; the second case just directly encodes the Merkle root.

A HOLE is the literal `?` followed by a NAME. It indicates an expression that has
yet to be defined. Holes have a different namespace than other names.

Expand Down Expand Up @@ -151,7 +159,7 @@ Expressions may be
* one of the core combinators `unit`, `iden`, `comp`, `injl`, `injr`, `case`, `take`, `drop`, `pair`, followed by subexpression(s) as needed;
* the `disconnect` combinator followed by an expression and a hole;
* the `witness` combinator which currently allows no subexpressions;
* the assertions, `assertl` or `assertr`, which take two subexpressions, one of which will be hidden in the decoded program. The hidden subexpression should be prefixed by `#` which indicates to the parser to take the CMR of that expression, not the expression itself.
* the assertions, `assertl` or `assertr`, which take a subexpressions and a CMR. The CMR is encoded as a full expression prefixed by `#{` and suffixed by `}`; but in the bit-encoding the expression does not appear, only its CMR;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

64c6ea4: Typo, and the definition of CMR was extended. I would leave out the latter part because it is already explained in an earlier section.

Suggested change
* the assertions, `assertl` or `assertr`, which take a subexpressions and a CMR. The CMR is encoded as a full expression prefixed by `#{` and suffixed by `}`; but in the bit-encoding the expression does not appear, only its CMR;
the assertions, `assertl` or `assertr`, which take a subexpression and a CMR;

* `fail` followed by a 128-to-512-bit entropy value, which should occur only in the pruned branch of an assertion, though this is not enforced;
* `const` followed by a value, which is a "constant-word jet" and is equivalent to constructing the given value by a tree of `pair`s whose leaves are `injl unit` (0) or `injr unit` (1);

Expand Down
217 changes: 217 additions & 0 deletions src/human_encoding/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
// Simplicity "Human-Readable" Language
//
// To the extent possible under law, the author(s) have dedicated all
// copyright and related and neighboring rights to this software to
// the public domain worldwide. This software is distributed without
// any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication
// along with this software.
// If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
//

//! Parsing Errors

use std::collections::BTreeMap;
use std::sync::{Arc, Mutex};
use std::{error, fmt, iter};

use crate::types;

use super::Position;

/// A set of errors found in a human-readable encoding of a Simplicity program.
#[derive(Clone, Debug, Default)]
pub struct ErrorSet {
context: Option<Arc<str>>,
line_map: Arc<Mutex<Vec<usize>>>,
errors: BTreeMap<Option<Position>, Vec<Error>>,
}

impl ErrorSet {
/// Constructs a new empty error set.
pub fn new() -> Self {
ErrorSet::default()
}

/// Returns the first (and presumably most important) error in the set, if it
/// is non-empty, along with its position.
pub fn first_error(&self) -> Option<(Option<Position>, &Error)> {
self.errors.iter().next().map(|(a, b)| (*a, &b[0]))
}

/// Constructs a new error set with a single error in it.
pub fn single<P: Into<Position>, E: Into<Error>>(position: P, err: E) -> Self {
let mut errors = BTreeMap::default();
errors.insert(Some(position.into()), vec![err.into()]);
ErrorSet {
context: None,
line_map: Arc::new(Mutex::new(vec![])),
errors,
}
}

/// Constructs a new error set with a single error in it.
pub fn single_no_position<E: Into<Error>>(err: E) -> Self {
let mut errors = BTreeMap::default();
errors.insert(None, vec![err.into()]);
ErrorSet {
context: None,
line_map: Arc::new(Mutex::new(vec![])),
errors,
}
}

/// Adds an error to the error set.
pub fn add<P: Into<Position>, E: Into<Error>>(&mut self, position: P, err: E) {
self.errors
.entry(Some(position.into()))
.or_insert(vec![])
.push(err.into());
}

/// Merges another set of errors into the current set.
///
/// # Panics
///
/// Panics if the two sets have different contexts attached.
pub fn merge(&mut self, other: &Self) {
match (self.context.as_ref(), other.context.as_ref()) {
(None, None) => {}
(Some(_), None) => {}
(None, Some(b)) => self.context = Some(Arc::clone(b)),
(Some(a), Some(b)) => {
assert_eq!(a, b, "cannot merge error sets for different source input");
}
};

for (pos, errs) in &other.errors {
self.errors
.entry(*pos)
.or_insert(vec![])
.extend(errs.iter().cloned());
}
}

/// Attaches the input code to the error set, so that error messages can include
/// line numbers etc.
///
/// # Panics
///
/// Panics if it is called twice on the same error set. You should call this once
/// with the complete input code.
pub fn add_context(&mut self, s: Arc<str>) {
if self.context.is_some() {
panic!("tried to add context to the same error context twice");
}
self.context = Some(s);
}

/// Returns a boolean indicating whether the set is empty.
pub fn is_empty(&self) -> bool {
self.errors.is_empty()
}

/// Returns the number of errors currently in the set.
pub fn len(&self) -> usize {
self.errors.len()
}

/// Converts the error set into a result.
///
/// If the set is empty, returns Ok with the given value. Otherwise
/// returns Err with itself.
pub fn into_result<T>(self, ok: T) -> Result<T, Self> {
if self.is_empty() {
Ok(ok)
} else {
Err(self)
}
}

/// Converts the error set into a result.
///
/// If the set is empty, returns Ok with the result of calling the given closure.
/// Otherwise returns Err with itself.
pub fn into_result_with<T, F: FnOnce() -> T>(self, okfn: F) -> Result<T, Self> {
if self.is_empty() {
Ok(okfn())
} else {
Err(self)
}
}
}

impl error::Error for ErrorSet {
fn cause(&self) -> Option<&(dyn error::Error + 'static)> {
match self.first_error()?.1 {
Error::TypeCheck(ref e) => Some(e),
}
}
}

impl fmt::Display for ErrorSet {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut line_map = self.line_map.lock().unwrap();
if line_map.is_empty() {
if let Some(ref s) = self.context {
*line_map = iter::repeat(0)
.take(2)
.chain(
s.char_indices()
.filter_map(|(n, ch)| if ch == '\n' { Some(n) } else { None }),
)
.collect();
}
}

for (pos, errs) in &self.errors {
if let Some(pos) = pos {
for err in errs {
if let Some(ref s) = self.context {
let end = line_map.get(pos.line + 1).copied().unwrap_or(s.len());
let line = &s[line_map[pos.line] + 1..end];
writeln!(f, "{:5} | {}", pos.line, line)?;
writeln!(f, " | {:>width$}", "^", width = pos.column)?;
writeln!(f, " \\-- {}", err)?;
writeln!(f)?;
} else {
writeln!(f, "{:4}:{:2}: {}", pos.line, pos.column, err,)?;
writeln!(f)?;
}
}
} else {
for err in errs {
writeln!(f, "Error: {}", err)?;
}
}
}
Ok(())
}
}

/// An individual error.
///
/// Generally this structure should not be used on its own, but only wrapped in an
/// [`ErrorSet`]. This is because in the human-readable encoding errors it is usually
/// possible to continue past individual errors, and the user would prefer to see as
/// many as possible at once.
#[derive(Clone, Debug)]
pub enum Error {
/// Simplicity type-checking error
TypeCheck(types::Error),
}

impl From<types::Error> for Error {
fn from(e: types::Error) -> Self {
Error::TypeCheck(e)
}
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::TypeCheck(ref e) => fmt::Display::fmt(e, f),
}
}
}
17 changes: 17 additions & 0 deletions src/human_encoding/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
//! in a human-readable format.
//!

mod error;
mod named_node;
mod serialize;

Expand All @@ -31,8 +32,20 @@ use std::collections::HashMap;
use std::str;
use std::sync::Arc;

pub use self::error::{Error, ErrorSet};
pub use self::named_node::NamedCommitNode;

/// Line/column pair
///
/// There is a similar type provided by the `santiago` library but it does not implement
/// `Copy`, among many other traits, which makes it unergonomic to use. Santiago positions
/// can be converted using `.into()`.
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Default, Hash)]
pub struct Position {
line: usize,
column: usize,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Forest<J: Jet> {
roots: HashMap<Arc<str>, Arc<NamedCommitNode<J>>>,
Expand Down Expand Up @@ -80,6 +93,7 @@ impl<J: Jet> Forest<J> {
let node = data.node;
let name = node.name();
let mut expr_str = match node.inner() {
node::Inner::AssertR(cmr, _) => format!("{} := assertr #{}", name, cmr),
node::Inner::Fail(entropy) => format!("{} := fail {}", name, entropy),
node::Inner::Jet(ref j) => format!("{} := jet_{}", name, j),
node::Inner::Word(ref v) => {
Expand All @@ -94,6 +108,9 @@ impl<J: Jet> Forest<J> {
if let Some(child) = node.right_child() {
expr_str.push(' ');
expr_str.push_str(child.name());
} else if let node::Inner::AssertL(_, cmr) = node.inner() {
expr_str.push_str(" #");
expr_str.push_str(&cmr.to_string());
}

let arrow = node.arrow();
Expand Down
Loading