From 2cdd8105744be6f734ca7b8b3ff793d1fa6d520a Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 28 Apr 2018 12:21:14 -0400 Subject: [PATCH 01/10] doc: bump minimum Rust version to 1.20.0 This also clarifies our policy on increasing the minimum Rust version required. In particular, we reserve the right to increase the minimum Rust version in minor version releases of regexes, but never in patch releases. We will default to a reasonably conservative interpretation of this policy, and not bump the minimum required Rust version lightly. If this policy turns out to be too aggressive, then we may alter it in the future to state that the minimum Rust version is fixed for all of regex 1.y.z, and can only be bumped on major regex version releases. See #457 --- .travis.yml | 2 +- README.md | 18 +++++++++++++++++- ci/script.sh | 9 --------- src/vector/avx2.rs | 18 +++++------------- src/vector/ssse3.rs | 36 ++++++++++++++---------------------- 5 files changed, 37 insertions(+), 46 deletions(-) diff --git a/.travis.yml b/.travis.yml index d63fe13dfa..f6f43acb66 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ dist: trusty sudo: false language: rust rust: -- 1.12.0 +- 1.20.0 - stable - beta - nightly diff --git a/README.md b/README.md index c61ba4a644..8828183014 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ by [RE2](https://github.com/google/re2). [![Build status](https://ci.appveyor.com/api/projects/status/github/rust-lang/regex?svg=true)](https://ci.appveyor.com/project/rust-lang-libs/regex) [![Coverage Status](https://coveralls.io/repos/github/rust-lang/regex/badge.svg?branch=master)](https://coveralls.io/github/rust-lang/regex?branch=master) [![](http://meritbadge.herokuapp.com/regex)](https://crates.io/crates/regex) +[![Rust](https://img.shields.io/badge/rust-1.20%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex) ### Documentation @@ -210,7 +211,22 @@ recommended for general use. [Documentation `regex-syntax`.](https://docs.rs/regex-syntax) -# License + +### Minimum Rust version policy + +This crate's minimum supported `rustc` version is `1.20.0`. + +The current **tentative** policy is that the minimum Rust version required to +use this crate can be increased in minor version updates. For example, if +regex 1.0.0 requires Rust 1.20.0, then regex 1.0.z for all values of `z` will +also require Rust 1.20.0 or newer. However, regex 1.y for `y > 0` may require +a newer minimum version of Rust. + +In general, this crate will be conservative with respect to the minimum +supported version of Rust. + + +### License This project is licensed under either of diff --git a/ci/script.sh b/ci/script.sh index baaca969e6..64fafc0d37 100755 --- a/ci/script.sh +++ b/ci/script.sh @@ -8,15 +8,6 @@ set -ex cargo build --verbose cargo doc --verbose -# If we're testing on an older version of Rust, then only check that we -# can build the crate. This is because the dev dependencies might be updated -# more frequently, and therefore might require a newer version of Rust. -# -# This isn't ideal. It's a compromise. -if [ "$TRAVIS_RUST_VERSION" = "1.12.0" ]; then - exit -fi - # Run tests. If we have nightly, then enable our nightly features. if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then cargo test --verbose --features unstable diff --git a/src/vector/avx2.rs b/src/vector/avx2.rs index 28f89d6a76..db0532c3fe 100644 --- a/src/vector/avx2.rs +++ b/src/vector/avx2.rs @@ -54,21 +54,13 @@ impl AVX2VectorBuilder { } } -// We define our union with a macro so that our code continues to compile on -// Rust 1.12. -macro_rules! defunion { - () => { - #[derive(Clone, Copy)] - #[allow(non_camel_case_types)] - pub union u8x32 { - vector: __m256i, - bytes: [u8; 32], - } - } +#[derive(Clone, Copy)] +#[allow(non_camel_case_types)] +pub union u8x32 { + vector: __m256i, + bytes: [u8; 32], } -defunion!(); - impl u8x32 { #[inline] unsafe fn splat(n: u8) -> u8x32 { diff --git a/src/vector/ssse3.rs b/src/vector/ssse3.rs index 4107381d15..0485657f01 100644 --- a/src/vector/ssse3.rs +++ b/src/vector/ssse3.rs @@ -66,30 +66,22 @@ impl SSSE3VectorBuilder { } } -// We define our union with a macro so that our code continues to compile on -// Rust 1.12. -macro_rules! defunion { - () => { - /// A u8x16 is a 128-bit vector with 16 single-byte lanes. - /// - /// It provides a safe API that uses only SSE2 or SSSE3 instructions. - /// The only way for callers to construct a value of this type is - /// through the SSSE3VectorBuilder type, and the only way to get a - /// SSSE3VectorBuilder is if the `ssse3` target feature is enabled. - /// - /// Note that generally speaking, all uses of this type should get - /// inlined, otherwise you probably have a performance bug. - #[derive(Clone, Copy)] - #[allow(non_camel_case_types)] - pub union u8x16 { - vector: __m128i, - bytes: [u8; 16], - } - } +/// A u8x16 is a 128-bit vector with 16 single-byte lanes. +/// +/// It provides a safe API that uses only SSE2 or SSSE3 instructions. +/// The only way for callers to construct a value of this type is +/// through the SSSE3VectorBuilder type, and the only way to get a +/// SSSE3VectorBuilder is if the `ssse3` target feature is enabled. +/// +/// Note that generally speaking, all uses of this type should get +/// inlined, otherwise you probably have a performance bug. +#[derive(Clone, Copy)] +#[allow(non_camel_case_types)] +pub union u8x16 { + vector: __m128i, + bytes: [u8; 16], } -defunion!(); - impl u8x16 { #[inline] unsafe fn splat(n: u8) -> u8x16 { From 0c2208773fa5de25557e98895662724affe93167 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 28 Apr 2018 12:35:02 -0400 Subject: [PATCH 02/10] *: switch from try! to ? --- regex-debug/src/main.rs | 20 ++++----- regex-syntax/src/ast/parse.rs | 74 +++++++++++++++---------------- regex-syntax/src/ast/print.rs | 36 +++++++-------- regex-syntax/src/ast/visitor.rs | 28 ++++++------ regex-syntax/src/error.rs | 18 ++++---- regex-syntax/src/hir/print.rs | 68 ++++++++++++++-------------- regex-syntax/src/hir/translate.rs | 32 ++++++------- regex-syntax/src/hir/visitor.rs | 8 ++-- regex-syntax/src/parser.rs | 4 +- regex-syntax/src/unicode.rs | 8 ++-- src/compile.rs | 56 +++++++++++------------ src/error.rs | 10 ++--- src/exec.rs | 16 +++---- src/prog.rs | 25 +++++------ 14 files changed, 200 insertions(+), 203 deletions(-) diff --git a/regex-debug/src/main.rs b/regex-debug/src/main.rs index 5d4c49e1f6..4a3287ee58 100644 --- a/regex-debug/src/main.rs +++ b/regex-debug/src/main.rs @@ -123,7 +123,7 @@ fn cmd_ast(args: &Args) -> Result<()> { use syntax::ast::parse::Parser; let mut parser = Parser::new(); - let ast = try!(parser.parse(&args.arg_pattern)); + let ast = parser.parse(&args.arg_pattern)?; println!("{:#?}", ast); Ok(()) } @@ -134,13 +134,13 @@ fn cmd_hir(args: &Args) -> Result<()> { let mut parser = ParserBuilder::new() .allow_invalid_utf8(false) .build(); - let hir = try!(parser.parse(&args.arg_pattern)); + let hir = parser.parse(&args.arg_pattern)?; println!("{:#?}", hir); Ok(()) } fn cmd_literals(args: &Args) -> Result<()> { - let exprs = try!(args.parse_many()); + let exprs = args.parse_many()?; let mut lits = if args.cmd_prefixes { args.literals(&exprs, |lits, e| lits.union_prefixes(e)) @@ -173,7 +173,7 @@ fn cmd_literals(args: &Args) -> Result<()> { } fn cmd_anchors(args: &Args) -> Result<()> { - let expr = try!(args.parse_one()); + let expr = args.parse_one()?; if expr.is_anchored_start() { println!("start"); } @@ -184,8 +184,8 @@ fn cmd_anchors(args: &Args) -> Result<()> { } fn cmd_captures(args: &Args) -> Result<()> { - let expr = try!(args.parse_one()); - let prog = try!(args.compiler().only_utf8(false).compile(&[expr])); + let expr = args.parse_one()?; + let prog = args.compiler().only_utf8(false).compile(&[expr])?; for (i, name) in prog.captures.iter().enumerate() { match *name { None => println!("{}", i), @@ -196,14 +196,14 @@ fn cmd_captures(args: &Args) -> Result<()> { } fn cmd_compile(args: &Args) -> Result<()> { - let exprs = try!(args.parse_many()); + let exprs = args.parse_many()?; let compiler = args.compiler() .bytes(args.flag_bytes) .only_utf8(!args.flag_bytes) .dfa(args.flag_dfa) .reverse(args.flag_dfa_reverse); - let prog = try!(compiler.compile(&exprs)); + let prog = compiler.compile(&exprs)?; print!("{:?}", prog); Ok(()) } @@ -213,9 +213,9 @@ fn cmd_utf8_ranges(args: &Args) -> Result<()> { use syntax::hir::{self, HirKind}; use utf8_ranges::Utf8Sequences; - let hir = try!(ParserBuilder::new() + let hir = ParserBuilder::new() .build() - .parse(&format!("[{}]", args.arg_class))); + .parse(&format!("[{}]", args.arg_class))?; let cls = match hir.into_kind() { HirKind::Class(hir::Class::Unicode(cls)) => cls, _ => return Err( diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs index 171dd660ae..b73baee735 100644 --- a/regex-syntax/src/ast/parse.rs +++ b/regex-syntax/src/ast/parse.rs @@ -426,9 +426,9 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// If the capture limit is exceeded, then an error is returned. fn next_capture_index(&self, span: Span) -> Result { let current = self.parser().capture_index.get(); - let i = try!(current.checked_add(1).ok_or_else(|| { + let i = current.checked_add(1).ok_or_else(|| { self.error(span, ast::ErrorKind::CaptureLimitExceeded) - })); + })?; self.parser().capture_index.set(i); Ok(i) } @@ -695,7 +695,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// is returned. fn push_group(&self, mut concat: ast::Concat) -> Result { assert_eq!(self.char(), '('); - match try!(self.parse_group()) { + match self.parse_group()? { Either::Left(set) => { let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace); if let Some(v) = ignore { @@ -837,7 +837,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { ) -> Result { assert_eq!(self.char(), '['); - let (nested_set, nested_union) = try!(self.parse_set_class_open()); + let (nested_set, nested_union) = self.parse_set_class_open()?; self.parser().stack_class.borrow_mut().push(ClassState::Open { union: parent_union, set: nested_set, @@ -987,33 +987,33 @@ impl<'s, P: Borrow> ParserI<'s, P> { break; } match self.char() { - '(' => concat = try!(self.push_group(concat)), - ')' => concat = try!(self.pop_group(concat)), - '|' => concat = try!(self.push_alternate(concat)), + '(' => concat = self.push_group(concat)?, + ')' => concat = self.pop_group(concat)?, + '|' => concat = self.push_alternate(concat)?, '[' => { - let class = try!(self.parse_set_class()); + let class = self.parse_set_class()?; concat.asts.push(Ast::Class(class)); } '?' => { - concat = try!(self.parse_uncounted_repetition( - concat, ast::RepetitionKind::ZeroOrOne)); + concat = self.parse_uncounted_repetition( + concat, ast::RepetitionKind::ZeroOrOne)?; } '*' => { - concat = try!(self.parse_uncounted_repetition( - concat, ast::RepetitionKind::ZeroOrMore)); + concat = self.parse_uncounted_repetition( + concat, ast::RepetitionKind::ZeroOrMore)?; } '+' => { - concat = try!(self.parse_uncounted_repetition( - concat, ast::RepetitionKind::OneOrMore)); + concat = self.parse_uncounted_repetition( + concat, ast::RepetitionKind::OneOrMore)?; } '{' => { - concat = try!(self.parse_counted_repetition(concat)); + concat = self.parse_counted_repetition(concat)?; } - _ => concat.asts.push(try!(self.parse_primitive()).into_ast()), + _ => concat.asts.push(self.parse_primitive()?.into_ast()), } } - let ast = try!(self.pop_group_end(concat)); - try!(NestLimiter::new(self).check(&ast)); + let ast = self.pop_group_end(concat)?; + NestLimiter::new(self).check(&ast)?; Ok(ast::WithComments { ast: ast, comments: mem::replace( @@ -1106,7 +1106,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { ast::ErrorKind::RepetitionCountUnclosed, )); } - let count_start = try!(self.parse_decimal()); + let count_start = self.parse_decimal()?; let mut range = ast::RepetitionRange::Exactly(count_start); if self.is_eof() { return Err(self.error( @@ -1122,7 +1122,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { )); } if self.char() != '}' { - let count_end = try!(self.parse_decimal()); + let count_end = self.parse_decimal()?; range = ast::RepetitionRange::Bounded(count_start, count_end); } else { range = ast::RepetitionRange::AtLeast(count_start); @@ -1191,8 +1191,8 @@ impl<'s, P: Borrow> ParserI<'s, P> { } let inner_span = self.span(); if self.bump_if("?P<") { - let capture_index = try!(self.next_capture_index(open_span)); - let cap = try!(self.parse_capture_name(capture_index)); + let capture_index = self.next_capture_index(open_span)?; + let cap = self.parse_capture_name(capture_index)?; Ok(Either::Right(ast::Group { span: open_span, kind: ast::GroupKind::CaptureName(cap), @@ -1205,7 +1205,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { ast::ErrorKind::GroupUnclosed, )); } - let flags = try!(self.parse_flags()); + let flags = self.parse_flags()?; let char_end = self.char(); self.bump(); if char_end == ')' { @@ -1230,7 +1230,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { })) } } else { - let capture_index = try!(self.next_capture_index(open_span)); + let capture_index = self.next_capture_index(open_span)?; Ok(Either::Right(ast::Group { span: open_span, kind: ast::GroupKind::CaptureIndex(capture_index), @@ -1291,7 +1291,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { name: name.to_string(), index: capture_index, }; - try!(self.add_capture_name(&capname)); + self.add_capture_name(&capname)?; Ok(capname) } @@ -1334,7 +1334,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { last_was_negation = None; let item = ast::FlagsItem { span: self.span_char(), - kind: ast::FlagsItemKind::Flag(try!(self.parse_flag())), + kind: ast::FlagsItemKind::Flag(self.parse_flag()?), }; if let Some(i) = flags.add_item(item) { return Err(self.error( @@ -1460,12 +1460,12 @@ impl<'s, P: Borrow> ParserI<'s, P> { )); } 'x' | 'u' | 'U' => { - let mut lit = try!(self.parse_hex()); + let mut lit = self.parse_hex()?; lit.span.start = start; return Ok(Primitive::Literal(lit)); } 'p' | 'P' => { - let mut cls = try!(self.parse_unicode_class()); + let mut cls = self.parse_unicode_class()?; cls.span.start = start; return Ok(Primitive::Unicode(cls)); } @@ -1756,10 +1756,10 @@ impl<'s, P: Borrow> ParserI<'s, P> { continue; } } - union = try!(self.push_class_open(union)); + union = self.push_class_open(union)?; } ']' => { - match try!(self.pop_class(union)) { + match self.pop_class(union)? { Either::Left(nested_union) => { union = nested_union; } Either::Right(class) => return Ok(class), } @@ -1780,7 +1780,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { ast::ClassSetBinaryOpKind::SymmetricDifference, union); } _ => { - union.push(try!(self.parse_set_class_range())); + union.push(self.parse_set_class_range()?); } } } @@ -1795,7 +1795,7 @@ impl<'s, P: Borrow> ParserI<'s, P> { /// a simple literal is expected (e.g., in a range), then an error is /// returned. fn parse_set_class_range(&self) -> Result { - let prim1 = try!(self.parse_set_class_item()); + let prim1 = self.parse_set_class_item()?; self.bump_space(); if self.is_eof() { return Err(self.unclosed_class_error()); @@ -1816,11 +1816,11 @@ impl<'s, P: Borrow> ParserI<'s, P> { if !self.bump_and_bump_space() { return Err(self.unclosed_class_error()); } - let prim2 = try!(self.parse_set_class_item()); + let prim2 = self.parse_set_class_item()?; let range = ast::ClassSetRange { span: Span::new(prim1.span().start, prim2.span().end), - start: try!(prim1.into_class_literal(self)), - end: try!(prim2.into_class_literal(self)), + start: prim1.into_class_literal(self)?, + end: prim2.into_class_literal(self)?, }; if !range.is_valid() { return Err(self.error( @@ -2121,10 +2121,10 @@ impl<'p, 's, P: Borrow> NestLimiter<'p, 's, P> { } fn increment_depth(&mut self, span: &Span) -> Result<()> { - let new = try!(self.depth.checked_add(1).ok_or_else(|| self.p.error( + let new = self.depth.checked_add(1).ok_or_else(|| self.p.error( span.clone(), ast::ErrorKind::NestLimitExceeded(::std::u32::MAX), - ))); + ))?; let limit = self.p.parser().nest_limit; if new > limit { return Err(self.p.error( diff --git a/regex-syntax/src/ast/print.rs b/regex-syntax/src/ast/print.rs index ddec734cef..4441b4d580 100644 --- a/regex-syntax/src/ast/print.rs +++ b/regex-syntax/src/ast/print.rs @@ -146,9 +146,9 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { Empty(_) => Ok(()), Literal(ref x) => self.fmt_literal(x), Range(ref x) => { - try!(self.fmt_literal(&x.start)); - try!(self.wtr.write_str("-")); - try!(self.fmt_literal(&x.end)); + self.fmt_literal(&x.start)?; + self.wtr.write_str("-")?; + self.fmt_literal(&x.end)?; Ok(()) } Ascii(ref x) => self.fmt_class_ascii(x), @@ -173,15 +173,15 @@ impl<'p, W: fmt::Write> Writer<'p, W> { match ast.kind { CaptureIndex(_) => self.wtr.write_str("("), CaptureName(ref x) => { - try!(self.wtr.write_str("(?P<")); - try!(self.wtr.write_str(&x.name)); - try!(self.wtr.write_str(">")); + self.wtr.write_str("(?P<")?; + self.wtr.write_str(&x.name)?; + self.wtr.write_str(">")?; Ok(()) } NonCapturing(ref flags) => { - try!(self.wtr.write_str("(?")); - try!(self.fmt_flags(flags)); - try!(self.wtr.write_str(":")); + self.wtr.write_str("(?")?; + self.fmt_flags(flags)?; + self.wtr.write_str(":")?; Ok(()) } } @@ -201,9 +201,9 @@ impl<'p, W: fmt::Write> Writer<'p, W> { OneOrMore if ast.greedy => self.wtr.write_str("+"), OneOrMore => self.wtr.write_str("+?"), Range(ref x) => { - try!(self.fmt_repetition_range(x)); + self.fmt_repetition_range(x)?; if !ast.greedy { - try!(self.wtr.write_str("?")); + self.wtr.write_str("?")?; } Ok(()) } @@ -284,9 +284,9 @@ impl<'p, W: fmt::Write> Writer<'p, W> { } fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result { - try!(self.wtr.write_str("(?")); - try!(self.fmt_flags(&ast.flags)); - try!(self.wtr.write_str(")")); + self.wtr.write_str("(?")?; + self.fmt_flags(&ast.flags)?; + self.wtr.write_str(")")?; Ok(()) } @@ -294,7 +294,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { use ast::{Flag, FlagsItemKind}; for item in &ast.items { - try!(match item.kind { + match item.kind { FlagsItemKind::Negation => self.wtr.write_str("-"), FlagsItemKind::Flag(ref flag) => { match *flag { @@ -306,7 +306,7 @@ impl<'p, W: fmt::Write> Writer<'p, W> { Flag::IgnoreWhitespace => self.wtr.write_str("x"), } } - }); + }?; } Ok(()) } @@ -392,9 +392,9 @@ impl<'p, W: fmt::Write> Writer<'p, W> { use ast::ClassUnicodeOpKind::*; if ast.negated { - try!(self.wtr.write_str(r"\P")); + self.wtr.write_str(r"\P")?; } else { - try!(self.wtr.write_str(r"\p")); + self.wtr.write_str(r"\p")?; } match ast.kind { OneLetter(c) => self.wtr.write_char(c), diff --git a/regex-syntax/src/ast/visitor.rs b/regex-syntax/src/ast/visitor.rs index 268ac45f1b..9b93a1ae15 100644 --- a/regex-syntax/src/ast/visitor.rs +++ b/regex-syntax/src/ast/visitor.rs @@ -228,8 +228,8 @@ impl<'a> HeapVisitor<'a> { visitor.start(); loop { - try!(visitor.visit_pre(ast)); - if let Some(x) = try!(self.induct(ast, &mut visitor)) { + visitor.visit_pre(ast)?; + if let Some(x) = self.induct(ast, &mut visitor)? { let child = x.child(); self.stack.push((ast, x)); ast = child; @@ -237,7 +237,7 @@ impl<'a> HeapVisitor<'a> { } // No induction means we have a base case, so we can post visit // it now. - try!(visitor.visit_post(ast)); + visitor.visit_post(ast)?; // At this point, we now try to pop our call stack until it is // either empty or we hit another inductive case. @@ -250,7 +250,7 @@ impl<'a> HeapVisitor<'a> { // inductive steps to process. if let Some(x) = self.pop(frame) { if let Frame::Alternation {..} = x { - try!(visitor.visit_alternation_in()); + visitor.visit_alternation_in()?; } ast = x.child(); self.stack.push((post_ast, x)); @@ -258,7 +258,7 @@ impl<'a> HeapVisitor<'a> { } // Otherwise, we've finished visiting all the child nodes for // this AST, so we can post visit it now. - try!(visitor.visit_post(post_ast)); + visitor.visit_post(post_ast)?; } } } @@ -275,7 +275,7 @@ impl<'a> HeapVisitor<'a> { ) -> Result>, V::Err> { Ok(match *ast { Ast::Class(ast::Class::Bracketed(ref x)) => { - try!(self.visit_class(x, visitor)); + self.visit_class(x, visitor)?; None } Ast::Repetition(ref x) => Some(Frame::Repetition(x)), @@ -334,14 +334,14 @@ impl<'a> HeapVisitor<'a> { ) -> Result<(), V::Err> { let mut ast = ClassInduct::from_bracketed(ast); loop { - try!(self.visit_class_pre(&ast, visitor)); + self.visit_class_pre(&ast, visitor)?; if let Some(x) = self.induct_class(&ast) { let child = x.child(); self.stack_class.push((ast, x)); ast = child; continue; } - try!(self.visit_class_post(&ast, visitor)); + self.visit_class_post(&ast, visitor)?; // At this point, we now try to pop our call stack until it is // either empty or we hit another inductive case. @@ -354,7 +354,7 @@ impl<'a> HeapVisitor<'a> { // additional inductive steps to process. if let Some(x) = self.pop_class(frame) { if let ClassFrame::BinaryRHS { ref op, .. } = x { - try!(visitor.visit_class_set_binary_op_in(op)); + visitor.visit_class_set_binary_op_in(op)?; } ast = x.child(); self.stack_class.push((post_ast, x)); @@ -362,7 +362,7 @@ impl<'a> HeapVisitor<'a> { } // Otherwise, we've finished visiting all the child nodes for // this class node, so we can post visit it now. - try!(self.visit_class_post(&post_ast, visitor)); + self.visit_class_post(&post_ast, visitor)?; } } } @@ -375,10 +375,10 @@ impl<'a> HeapVisitor<'a> { ) -> Result<(), V::Err> { match *ast { ClassInduct::Item(item) => { - try!(visitor.visit_class_set_item_pre(item)); + visitor.visit_class_set_item_pre(item)?; } ClassInduct::BinaryOp(op) => { - try!(visitor.visit_class_set_binary_op_pre(op)); + visitor.visit_class_set_binary_op_pre(op)?; } } Ok(()) @@ -392,10 +392,10 @@ impl<'a> HeapVisitor<'a> { ) -> Result<(), V::Err> { match *ast { ClassInduct::Item(item) => { - try!(visitor.visit_class_set_item_post(item)); + visitor.visit_class_set_item_post(item)?; } ClassInduct::BinaryOp(op) => { - try!(visitor.visit_class_set_binary_op_post(op)); + visitor.visit_class_set_binary_op_post(op)?; } } Ok(()) diff --git a/regex-syntax/src/error.rs b/regex-syntax/src/error.rs index 3f2fade228..1f5b8f817d 100644 --- a/regex-syntax/src/error.rs +++ b/regex-syntax/src/error.rs @@ -105,11 +105,11 @@ impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> { if self.pattern.contains('\n') { let divider = repeat_char('~', 79); - try!(writeln!(f, "regex parse error:")); - try!(writeln!(f, "{}", divider)); + writeln!(f, "regex parse error:")?; + writeln!(f, "{}", divider)?; let notated = spans.notate(); - try!(write!(f, "{}", notated)); - try!(writeln!(f, "{}", divider)); + write!(f, "{}", notated)?; + writeln!(f, "{}", divider)?; // If we have error spans that cover multiple lines, then we just // note the line numbers. if !spans.multi_line.is_empty() { @@ -120,14 +120,14 @@ impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> { span.start.line, span.start.column, span.end.line, span.end.column - 1)); } - try!(writeln!(f, "{}", notes.join("\n"))); + writeln!(f, "{}", notes.join("\n"))?; } - try!(write!(f, "error: {}", self.err)); + write!(f, "error: {}", self.err)?; } else { - try!(writeln!(f, "regex parse error:")); + writeln!(f, "regex parse error:")?; let notated = Spans::from_formatter(self).notate(); - try!(write!(f, "{}", notated)); - try!(write!(f, "error: {}", self.err)); + write!(f, "{}", notated)?; + write!(f, "error: {}", self.err)?; } Ok(()) } diff --git a/regex-syntax/src/hir/print.rs b/regex-syntax/src/hir/print.rs index f31468504a..a40a670bf7 100644 --- a/regex-syntax/src/hir/print.rs +++ b/regex-syntax/src/hir/print.rs @@ -94,71 +94,71 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { | HirKind::Concat(_) | HirKind::Alternation(_) => {} HirKind::Literal(hir::Literal::Unicode(c)) => { - try!(self.write_literal_char(c)); + self.write_literal_char(c)?; } HirKind::Literal(hir::Literal::Byte(b)) => { - try!(self.write_literal_byte(b)); + self.write_literal_byte(b)?; } HirKind::Class(hir::Class::Unicode(ref cls)) => { - try!(self.wtr.write_str("[")); + self.wtr.write_str("[")?; for range in cls.iter() { if range.start() == range.end() { - try!(self.write_literal_char(range.start())); + self.write_literal_char(range.start())?; } else { - try!(self.write_literal_char(range.start())); - try!(self.wtr.write_str("-")); - try!(self.write_literal_char(range.end())); + self.write_literal_char(range.start())?; + self.wtr.write_str("-")?; + self.write_literal_char(range.end())?; } } - try!(self.wtr.write_str("]")); + self.wtr.write_str("]")?; } HirKind::Class(hir::Class::Bytes(ref cls)) => { - try!(self.wtr.write_str("(?-u:[")); + self.wtr.write_str("(?-u:[")?; for range in cls.iter() { if range.start() == range.end() { - try!(self.write_literal_class_byte(range.start())); + self.write_literal_class_byte(range.start())?; } else { - try!(self.write_literal_class_byte(range.start())); - try!(self.wtr.write_str("-")); - try!(self.write_literal_class_byte(range.end())); + self.write_literal_class_byte(range.start())?; + self.wtr.write_str("-")?; + self.write_literal_class_byte(range.end())?; } } - try!(self.wtr.write_str("])")); + self.wtr.write_str("])")?; } HirKind::Anchor(hir::Anchor::StartLine) => { - try!(self.wtr.write_str("(?m:^)")); + self.wtr.write_str("(?m:^)")?; } HirKind::Anchor(hir::Anchor::EndLine) => { - try!(self.wtr.write_str("(?m:$)")); + self.wtr.write_str("(?m:$)")?; } HirKind::Anchor(hir::Anchor::StartText) => { - try!(self.wtr.write_str(r"\A")); + self.wtr.write_str(r"\A")?; } HirKind::Anchor(hir::Anchor::EndText) => { - try!(self.wtr.write_str(r"\z")); + self.wtr.write_str(r"\z")?; } HirKind::WordBoundary(hir::WordBoundary::Unicode) => { - try!(self.wtr.write_str(r"\b")); + self.wtr.write_str(r"\b")?; } HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => { - try!(self.wtr.write_str(r"\B")); + self.wtr.write_str(r"\B")?; } HirKind::WordBoundary(hir::WordBoundary::Ascii) => { - try!(self.wtr.write_str(r"(?-u:\b)")); + self.wtr.write_str(r"(?-u:\b)")?; } HirKind::WordBoundary(hir::WordBoundary::AsciiNegate) => { - try!(self.wtr.write_str(r"(?-u:\B)")); + self.wtr.write_str(r"(?-u:\B)")?; } HirKind::Group(ref x) => { match x.kind { hir::GroupKind::CaptureIndex(_) => { - try!(self.wtr.write_str("(")); + self.wtr.write_str("(")?; } hir::GroupKind::CaptureName { ref name, .. } => { - try!(write!(self.wtr, "(?P<{}>", name)); + write!(self.wtr, "(?P<{}>", name)?; } hir::GroupKind::NonCapturing => { - try!(self.wtr.write_str("(?:")); + self.wtr.write_str("(?:")?; } } } @@ -179,34 +179,34 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { HirKind::Repetition(ref x) => { match x.kind { hir::RepetitionKind::ZeroOrOne => { - try!(self.wtr.write_str("?")); + self.wtr.write_str("?")?; } hir::RepetitionKind::ZeroOrMore => { - try!(self.wtr.write_str("*")); + self.wtr.write_str("*")?; } hir::RepetitionKind::OneOrMore => { - try!(self.wtr.write_str("+")); + self.wtr.write_str("+")?; } hir::RepetitionKind::Range(ref x) => { match *x { hir::RepetitionRange::Exactly(m) => { - try!(write!(self.wtr, "{{{}}}", m)); + write!(self.wtr, "{{{}}}", m)?; } hir::RepetitionRange::AtLeast(m) => { - try!(write!(self.wtr, "{{{},}}", m)); + write!(self.wtr, "{{{},}}", m)?; } hir::RepetitionRange::Bounded(m, n) => { - try!(write!(self.wtr, "{{{},{}}}", m, n)); + write!(self.wtr, "{{{},{}}}", m, n)?; } } } } if !x.greedy { - try!(self.wtr.write_str("?")); + self.wtr.write_str("?")?; } } HirKind::Group(_) => { - try!(self.wtr.write_str(")")); + self.wtr.write_str(")")?; } } Ok(()) @@ -220,7 +220,7 @@ impl<'p, W: fmt::Write> Visitor for Writer<'p, W> { impl<'p, W: fmt::Write> Writer<'p, W> { fn write_literal_char(&mut self, c: char) -> fmt::Result { if is_meta_character(c) { - try!(self.wtr.write_str("\\")); + self.wtr.write_str("\\")?; } self.wtr.write_char(c) } diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index dac85c082e..0cb60acfd8 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -289,13 +289,13 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { self.set_flags(&x.flags); } Ast::Literal(ref x) => { - self.push(HirFrame::Expr(try!(self.hir_literal(x)))); + self.push(HirFrame::Expr(self.hir_literal(x)?)); } Ast::Dot(span) => { - self.push(HirFrame::Expr(try!(self.hir_dot(span)))); + self.push(HirFrame::Expr(self.hir_dot(span)?)); } Ast::Assertion(ref x) => { - self.push(HirFrame::Expr(try!(self.hir_assertion(x)))); + self.push(HirFrame::Expr(self.hir_assertion(x)?)); } Ast::Class(ast::Class::Perl(ref x)) => { if self.flags().unicode() { @@ -309,7 +309,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { } } Ast::Class(ast::Class::Unicode(ref x)) => { - let cls = hir::Class::Unicode(try!(self.hir_unicode_class(x))); + let cls = hir::Class::Unicode(self.hir_unicode_class(x)?); self.push(HirFrame::Expr(Hir::class(cls))); } Ast::Class(ast::Class::Bracketed(ref ast)) => { @@ -324,8 +324,8 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { self.push(HirFrame::Expr(expr)); } else { let mut cls = self.pop().unwrap().unwrap_class_bytes(); - try!(self.bytes_fold_and_negate( - &ast.span, ast.negated, &mut cls)); + self.bytes_fold_and_negate( + &ast.span, ast.negated, &mut cls)?; if cls.iter().next().is_none() { return Err(self.error( ast.span, ErrorKind::EmptyClassNotAllowed)); @@ -402,7 +402,7 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { self.push(HirFrame::ClassUnicode(cls)); } else { let mut cls = self.pop().unwrap().unwrap_class_bytes(); - let byte = try!(self.class_literal_byte(x)); + let byte = self.class_literal_byte(x)?; cls.push(hir::ClassBytesRange::new(byte, byte)); self.push(HirFrame::ClassBytes(cls)); } @@ -414,8 +414,8 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { self.push(HirFrame::ClassUnicode(cls)); } else { let mut cls = self.pop().unwrap().unwrap_class_bytes(); - let start = try!(self.class_literal_byte(&x.start)); - let end = try!(self.class_literal_byte(&x.end)); + let start = self.class_literal_byte(&x.start)?; + let end = self.class_literal_byte(&x.end)?; cls.push(hir::ClassBytesRange::new(start, end)); self.push(HirFrame::ClassBytes(cls)); } @@ -433,13 +433,13 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { for &(s, e) in ascii_class(&x.kind) { cls.push(hir::ClassBytesRange::new(s as u8, e as u8)); } - try!(self.bytes_fold_and_negate( - &x.span, x.negated, &mut cls)); + self.bytes_fold_and_negate( + &x.span, x.negated, &mut cls)?; self.push(HirFrame::ClassBytes(cls)); } } ast::ClassSetItem::Unicode(ref x) => { - let xcls = try!(self.hir_unicode_class(x)); + let xcls = self.hir_unicode_class(x)?; let mut cls = self.pop().unwrap().unwrap_class_unicode(); cls.union(&xcls); self.push(HirFrame::ClassUnicode(cls)); @@ -467,8 +467,8 @@ impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { self.push(HirFrame::ClassUnicode(cls2)); } else { let mut cls1 = self.pop().unwrap().unwrap_class_bytes(); - try!(self.bytes_fold_and_negate( - &ast.span, ast.negated, &mut cls1)); + self.bytes_fold_and_negate( + &ast.span, ast.negated, &mut cls1)?; let mut cls2 = self.pop().unwrap().unwrap_class_bytes(); cls2.union(&cls1); @@ -604,7 +604,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { } fn hir_literal(&self, lit: &ast::Literal) -> Result { - let ch = match try!(self.literal_to_char(lit)) { + let ch = match self.literal_to_char(lit)? { byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)), hir::Literal::Unicode(ch) => ch, }; @@ -915,7 +915,7 @@ impl<'t, 'p> TranslatorI<'t, 'p> { /// Return a scalar byte value suitable for use as a literal in a byte /// character class. fn class_literal_byte(&self, ast: &ast::Literal) -> Result { - match try!(self.literal_to_char(ast)) { + match self.literal_to_char(ast)? { hir::Literal::Byte(byte) => Ok(byte), hir::Literal::Unicode(ch) => { if ch <= 0x7F as char { diff --git a/regex-syntax/src/hir/visitor.rs b/regex-syntax/src/hir/visitor.rs index 716a96d9b4..58be7ad078 100644 --- a/regex-syntax/src/hir/visitor.rs +++ b/regex-syntax/src/hir/visitor.rs @@ -119,7 +119,7 @@ impl<'a> HeapVisitor<'a> { visitor.start(); loop { - try!(visitor.visit_pre(hir)); + visitor.visit_pre(hir)?; if let Some(x) = self.induct(hir) { let child = x.child(); self.stack.push((hir, x)); @@ -128,7 +128,7 @@ impl<'a> HeapVisitor<'a> { } // No induction means we have a base case, so we can post visit // it now. - try!(visitor.visit_post(hir)); + visitor.visit_post(hir)?; // At this point, we now try to pop our call stack until it is // either empty or we hit another inductive case. @@ -141,7 +141,7 @@ impl<'a> HeapVisitor<'a> { // inductive steps to process. if let Some(x) = self.pop(frame) { if let Frame::Alternation {..} = x { - try!(visitor.visit_alternation_in()); + visitor.visit_alternation_in()?; } hir = x.child(); self.stack.push((post_hir, x)); @@ -149,7 +149,7 @@ impl<'a> HeapVisitor<'a> { } // Otherwise, we've finished visiting all the child nodes for // this HIR, so we can post visit it now. - try!(visitor.visit_post(post_hir)); + visitor.visit_post(post_hir)?; } } } diff --git a/regex-syntax/src/parser.rs b/regex-syntax/src/parser.rs index e28d7f3263..5b82e3049f 100644 --- a/regex-syntax/src/parser.rs +++ b/regex-syntax/src/parser.rs @@ -199,8 +199,8 @@ impl Parser { /// Parse the regular expression into a high level intermediate /// representation. pub fn parse(&mut self, pattern: &str) -> Result { - let ast = try!(self.ast.parse(pattern)); - let hir = try!(self.hir.translate(pattern, &ast)); + let ast = self.ast.parse(pattern)?; + let hir = self.hir.translate(pattern, &ast)?; Ok(hir) } } diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs index 153c8a50ee..af932f80bd 100644 --- a/regex-syntax/src/unicode.rs +++ b/regex-syntax/src/unicode.rs @@ -244,7 +244,7 @@ enum CanonicalClassQuery { pub fn class<'a>(query: ClassQuery<'a>) -> Result { use self::CanonicalClassQuery::*; - match try!(query.canonicalize()) { + match query.canonicalize()? { Binary(name) => { property_set(property_bool::BY_NAME, name) .map(hir_class) @@ -255,9 +255,9 @@ pub fn class<'a>(query: ClassQuery<'a>) -> Result { } GeneralCategory("Assigned") => { let mut cls = - try!(property_set(general_category::BY_NAME, "Unassigned") + property_set(general_category::BY_NAME, "Unassigned") .map(hir_class) - .ok_or(Error::PropertyNotFound)); + .ok_or(Error::PropertyNotFound)?; cls.negate(); Ok(cls) } @@ -276,7 +276,7 @@ pub fn class<'a>(query: ClassQuery<'a>) -> Result { } ByValue { property_name: "Age", property_value } => { let mut class = hir::ClassUnicode::empty(); - for set in try!(ages(property_value)) { + for set in ages(property_value)? { class.union(&hir_class(set)); } Ok(class) diff --git a/src/compile.rs b/src/compile.rs index c765f01416..6f2321ec05 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -141,11 +141,11 @@ impl Compiler { self.compiled.is_anchored_start = expr.is_anchored_start(); self.compiled.is_anchored_end = expr.is_anchored_end(); if self.compiled.needs_dotstar() { - dotstar_patch = try!(self.c_dotstar()); + dotstar_patch = self.c_dotstar()?; self.compiled.start = dotstar_patch.entry; } self.compiled.captures = vec![None]; - let patch = try!(self.c_capture(0, expr)); + let patch = self.c_capture(0, expr)?; if self.compiled.needs_dotstar() { self.fill(dotstar_patch.hole, patch.entry); } else { @@ -169,7 +169,7 @@ impl Compiler { exprs.iter().all(|e| e.is_anchored_end()); let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 }; if self.compiled.needs_dotstar() { - dotstar_patch = try!(self.c_dotstar()); + dotstar_patch = self.c_dotstar()?; self.compiled.start = dotstar_patch.entry; } else { self.compiled.start = 0; // first instruction is always split @@ -180,14 +180,14 @@ impl Compiler { for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() { self.fill_to_next(prev_hole); let split = self.push_split_hole(); - let Patch { hole, entry } = try!(self.c_capture(0, expr)); + let Patch { hole, entry } = self.c_capture(0, expr)?; self.fill_to_next(hole); self.compiled.matches.push(self.insts.len()); self.push_compiled(Inst::Match(i)); prev_hole = self.fill_split(split, Some(entry), None); } let i = exprs.len() - 1; - let Patch { hole, entry } = try!(self.c_capture(0, &exprs[i])); + let Patch { hole, entry } = self.c_capture(0, &exprs[i])?; self.fill(prev_hole, entry); self.fill_to_next(hole); self.compiled.matches.push(self.insts.len()); @@ -259,7 +259,7 @@ impl Compiler { use prog; use syntax::hir::HirKind::*; - try!(self.check_size()); + self.check_size()?; match *expr.kind() { Empty => Ok(Patch { hole: Hole::None, entry: self.insts.len() }), Literal(hir::Literal::Unicode(c)) => { @@ -371,7 +371,7 @@ impl Compiler { } else { let entry = self.insts.len(); let hole = self.push_hole(InstHole::Save { slot: first_slot }); - let patch = try!(self.c(expr)); + let patch = self.c(expr)?; self.fill(hole, patch.entry); self.fill_to_next(patch.hole); let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 }); @@ -381,17 +381,17 @@ impl Compiler { fn c_dotstar(&mut self) -> Result { Ok(if !self.compiled.only_utf8() { - try!(self.c(&Hir::repetition(hir::Repetition { + self.c(&Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::ZeroOrMore, greedy: false, hir: Box::new(Hir::any(true)), - }))) + }))? } else { - try!(self.c(&Hir::repetition(hir::Repetition { + self.c(&Hir::repetition(hir::Repetition { kind: hir::RepetitionKind::ZeroOrMore, greedy: false, hir: Box::new(Hir::any(false)), - }))) + }))? }) } @@ -404,9 +404,9 @@ impl Compiler { Box::new(chars.iter()) }; let first = *chars.next().expect("non-empty literal"); - let Patch { mut hole, entry } = try!(self.c_char(first)); + let Patch { mut hole, entry } = self.c_char(first)?; for &c in chars { - let p = try!(self.c_char(c)); + let p = self.c_char(c)?; self.fill(hole, p.entry); hole = p.hole; } @@ -445,9 +445,9 @@ impl Compiler { Box::new(bytes.iter()) }; let first = *bytes.next().expect("non-empty literal"); - let Patch { mut hole, entry } = try!(self.c_byte(first)); + let Patch { mut hole, entry } = self.c_byte(first)?; for &b in bytes { - let p = try!(self.c_byte(b)); + let p = self.c_byte(b)?; self.fill(hole, p.entry); hole = p.hole; } @@ -498,9 +498,9 @@ impl Compiler { return Ok(Patch { hole: Hole::None, entry: self.insts.len() }) } }; - let Patch { mut hole, entry } = try!(self.c(first)); + let Patch { mut hole, entry } = self.c(first)?; for e in exprs { - let p = try!(self.c(e)); + let p = self.c(e)?; self.fill(hole, p.entry); hole = p.hole; } @@ -523,7 +523,7 @@ impl Compiler { self.fill_to_next(prev_hole); let split = self.push_split_hole(); let prev_entry = self.insts.len(); - let Patch { hole, entry } = try!(self.c(e)); + let Patch { hole, entry } = self.c(e)?; if prev_entry == self.insts.len() { // TODO(burntsushi): It is kind of silly that we don't support // empty-subexpressions in alternates, but it is supremely @@ -538,7 +538,7 @@ impl Compiler { prev_hole = self.fill_split(split, Some(entry), None); } let prev_entry = self.insts.len(); - let Patch { hole, entry } = try!(self.c(&exprs[exprs.len() - 1])); + let Patch { hole, entry } = self.c(&exprs[exprs.len() - 1])?; if prev_entry == self.insts.len() { // TODO(burntsushi): See TODO above. return Err(Error::Syntax( @@ -571,7 +571,7 @@ impl Compiler { fn c_repeat_zero_or_one(&mut self, expr: &Hir, greedy: bool) -> Result { let split_entry = self.insts.len(); let split = self.push_split_hole(); - let Patch { hole: hole_rep, entry: entry_rep } = try!(self.c(expr)); + let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?; let split_hole = if greedy { self.fill_split(split, Some(entry_rep), None) @@ -585,7 +585,7 @@ impl Compiler { fn c_repeat_zero_or_more(&mut self, expr: &Hir, greedy: bool) -> Result { let split_entry = self.insts.len(); let split = self.push_split_hole(); - let Patch { hole: hole_rep, entry: entry_rep } = try!(self.c(expr)); + let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?; self.fill(hole_rep, split_entry); let split_hole = if greedy { @@ -597,7 +597,7 @@ impl Compiler { } fn c_repeat_one_or_more(&mut self, expr: &Hir, greedy: bool) -> Result { - let Patch { hole: hole_rep, entry: entry_rep } = try!(self.c(expr)); + let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?; self.fill_to_next(hole_rep); let split = self.push_split_hole(); @@ -616,8 +616,8 @@ impl Compiler { min: u32, ) -> Result { let min = u32_to_usize(min); - let patch_concat = try!(self.c_concat(iter::repeat(expr).take(min))); - let patch_rep = try!(self.c_repeat_zero_or_more(expr, greedy)); + let patch_concat = self.c_concat(iter::repeat(expr).take(min))?; + let patch_rep = self.c_repeat_zero_or_more(expr, greedy)?; self.fill(patch_concat.hole, patch_rep.entry); Ok(Patch { hole: patch_rep.hole, entry: patch_concat.entry }) } @@ -630,7 +630,7 @@ impl Compiler { max: u32, ) -> Result { let (min, max) = (u32_to_usize(min), u32_to_usize(max)); - let patch_concat = try!(self.c_concat(iter::repeat(expr).take(min))); + let patch_concat = self.c_concat(iter::repeat(expr).take(min))?; let initial_entry = patch_concat.entry; if min == max { return Ok(patch_concat); @@ -659,7 +659,7 @@ impl Compiler { for _ in min..max { self.fill_to_next(prev_hole); let split = self.push_split_hole(); - let Patch { hole, entry } = try!(self.c(expr)); + let Patch { hole, entry } = self.c(expr)?; prev_hole = hole; if greedy { holes.push(self.fill_split(split, Some(entry), None)); @@ -889,7 +889,7 @@ impl<'a, 'b> CompileClass<'a, 'b> { Some(utf8_seq) => utf8_seq, }; if is_last_range && it.peek().is_none() { - let Patch { hole, entry } = try!(self.c_utf8_seq(&utf8_seq)); + let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?; holes.push(hole); self.c.fill(last_split, entry); last_split = Hole::None; @@ -902,7 +902,7 @@ impl<'a, 'b> CompileClass<'a, 'b> { } self.c.fill_to_next(last_split); last_split = self.c.push_split_hole(); - let Patch { hole, entry } = try!(self.c_utf8_seq(&utf8_seq)); + let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?; holes.push(hole); last_split = self.c.fill_split(last_split, Some(entry), None); } diff --git a/src/error.rs b/src/error.rs index b02aa3e40d..b068201f6d 100644 --- a/src/error.rs +++ b/src/error.rs @@ -66,11 +66,11 @@ impl fmt::Debug for Error { match *self { Error::Syntax(ref err) => { let hr: String = repeat('~').take(79).collect(); - try!(writeln!(f, "Syntax(")); - try!(writeln!(f, "{}", hr)); - try!(writeln!(f, "{}", err)); - try!(writeln!(f, "{}", hr)); - try!(write!(f, ")")); + writeln!(f, "Syntax(")?; + writeln!(f, "{}", hr)?; + writeln!(f, "{}", err)?; + writeln!(f, "{}", hr)?; + write!(f, ")")?; Ok(()) } Error::CompiledTooBig(limit) => { diff --git a/src/exec.rs b/src/exec.rs index 1358fd99bb..73f9550970 100644 --- a/src/exec.rs +++ b/src/exec.rs @@ -232,7 +232,7 @@ impl ExecBuilder { .allow_invalid_utf8(!self.only_utf8) .nest_limit(self.options.nest_limit) .build(); - let expr = try!(parser.parse(pat)); + let expr = parser.parse(pat)?; bytes = bytes || !expr.is_always_utf8(); if !expr.is_anchored_start() && expr.is_any_anchored_start() { @@ -293,26 +293,26 @@ impl ExecBuilder { }); return Ok(Exec { ro: ro, cache: CachedThreadLocal::new() }); } - let parsed = try!(self.parse()); - let mut nfa = try!( + let parsed = self.parse()?; + let mut nfa = Compiler::new() .size_limit(self.options.size_limit) .bytes(self.bytes || parsed.bytes) .only_utf8(self.only_utf8) - .compile(&parsed.exprs)); - let mut dfa = try!( + .compile(&parsed.exprs)?; + let mut dfa = Compiler::new() .size_limit(self.options.size_limit) .dfa(true) .only_utf8(self.only_utf8) - .compile(&parsed.exprs)); - let mut dfa_reverse = try!( + .compile(&parsed.exprs)?; + let mut dfa_reverse = Compiler::new() .size_limit(self.options.size_limit) .dfa(true) .only_utf8(self.only_utf8) .reverse(true) - .compile(&parsed.exprs)); + .compile(&parsed.exprs)?; let prefixes = parsed.prefixes.unambiguous_prefixes(); let suffixes = parsed.suffixes.unambiguous_suffixes(); diff --git a/src/prog.rs b/src/prog.rs index 4262aa96e6..198e1f45c4 100644 --- a/src/prog.rs +++ b/src/prog.rs @@ -187,25 +187,23 @@ impl fmt::Debug for Program { for (pc, inst) in self.iter().enumerate() { match *inst { Match(slot) => { - try!(write!(f, "{:04} Match({:?})", pc, slot)) + write!(f, "{:04} Match({:?})", pc, slot)? } Save(ref inst) => { let s = format!("{:04} Save({})", pc, inst.slot); - try!(write!(f, "{}", with_goto(pc, inst.goto, s))); + write!(f, "{}", with_goto(pc, inst.goto, s))?; } Split(ref inst) => { - try!(write!(f, "{:04} Split({}, {})", - pc, inst.goto1, inst.goto2)); + write!( + f, "{:04} Split({}, {})", pc, inst.goto1, inst.goto2)?; } EmptyLook(ref inst) => { let s = format!("{:?}", inst.look); - try!(write!(f, "{:04} {}", - pc, with_goto(pc, inst.goto, s))); + write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; } Char(ref inst) => { let s = format!("{:?}", inst.c); - try!(write!(f, "{:04} {}", - pc, with_goto(pc, inst.goto, s))); + write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; } Ranges(ref inst) => { let ranges = inst.ranges @@ -213,22 +211,21 @@ impl fmt::Debug for Program { .map(|r| format!("{:?}-{:?}", r.0, r.1)) .collect::>() .join(", "); - try!(write!(f, "{:04} {}", - pc, with_goto(pc, inst.goto, ranges))); + write!( + f, "{:04} {}", pc, with_goto(pc, inst.goto, ranges))?; } Bytes(ref inst) => { let s = format!( "Bytes({}, {})", visible_byte(inst.start), visible_byte(inst.end)); - try!(write!(f, "{:04} {}", - pc, with_goto(pc, inst.goto, s))); + write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?; } } if pc == self.start { - try!(write!(f, " (start)")); + write!(f, " (start)")?; } - try!(write!(f, "\n")); + write!(f, "\n")?; } Ok(()) } From 9510fe1a621753a05f781892c26cf522efbf2e0a Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 28 Apr 2018 12:59:47 -0400 Subject: [PATCH 03/10] regex: disable octal syntax by default This commit disables octal syntax by default, which will permit us to produce useful error messages if a user tried to invoke a backreference. This commit adds a new `octal` method to RegexBuilder and RegexSetBuilder which permits callers to re-enable octal syntax. See #457 --- src/exec.rs | 6 +----- src/lib.rs | 5 +++-- src/re_builder.rs | 42 ++++++++++++++++++++++++++++++++++++++++++ tests/test_default.rs | 10 ++++++++++ 4 files changed, 56 insertions(+), 7 deletions(-) diff --git a/src/exec.rs b/src/exec.rs index 73f9550970..9133a0ddd4 100644 --- a/src/exec.rs +++ b/src/exec.rs @@ -218,11 +218,7 @@ impl ExecBuilder { for pat in &self.options.pats { let mut parser = ParserBuilder::new() - // TODO(burntsushi): Disable octal in regex 1.0. Nobody - // uses it, and we'll get better error messages when - // someone tries to use a backreference. Provide a new - // opt-in toggle for it though. - .octal(true) + .octal(self.options.octal) .case_insensitive(self.options.case_insensitive) .multi_line(self.options.multi_line) .dot_matches_new_line(self.options.dot_matches_new_line) diff --git a/src/lib.rs b/src/lib.rs index f82eb9c1ce..7d768eea76 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -445,7 +445,7 @@ assert_eq!(&cap[0], "abc"); \n new line \r carriage return \v vertical tab (\x0B) -\123 octal character code (up to three digits) +\123 octal character code (up to three digits) (when enabled) \x7F hex character code (exactly two digits) \x{10FFFF} any hex character code corresponding to a Unicode code point \u007F hex character code (exactly four digits) @@ -619,7 +619,8 @@ determine whether a byte is a word byte or not. 5. Hexadecimal notation can be used to specify arbitrary bytes instead of Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the literal byte `\xFF`, while in Unicode mode, `\xFF` is a Unicode codepoint that -matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation. +matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation when +enabled. 6. `.` matches any *byte* except for `\n` instead of any Unicode scalar value. When the `s` flag is enabled, `.` matches any byte. diff --git a/src/re_builder.rs b/src/re_builder.rs index ae60ecffa4..ca3d62536a 100644 --- a/src/re_builder.rs +++ b/src/re_builder.rs @@ -22,6 +22,7 @@ pub struct RegexOptions { pub swap_greed: bool, pub ignore_whitespace: bool, pub unicode: bool, + pub octal: bool, } impl Default for RegexOptions { @@ -37,6 +38,7 @@ impl Default for RegexOptions { swap_greed: false, ignore_whitespace: false, unicode: true, + octal: false, } } } @@ -142,6 +144,26 @@ impl RegexBuilder { self } + /// Whether to support octal syntax or not. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints in + /// a regular expression. For example, `a`, `\x61`, `\u0061` and + /// `\141` are all equivalent regular expressions, where the last example + /// shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, it does + /// make good error messages harder. That is, in PCRE based regex engines, + /// syntax like `\0` invokes a backreference, which is explicitly + /// unsupported in Rust's regex engine. However, many users expect it to + /// be supported. Therefore, when octal support is disabled, the error + /// message will explicitly mention that backreferences aren't supported. + /// + /// Octal syntax is disabled by default. + pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder { + self.0.octal = yes; + self + } + /// Set the approximate size limit of the compiled regular expression. /// /// This roughly corresponds to the number of bytes occupied by a single @@ -283,6 +305,26 @@ impl RegexSetBuilder { self } + /// Whether to support octal syntax or not. + /// + /// Octal syntax is a little-known way of uttering Unicode codepoints in + /// a regular expression. For example, `a`, `\x61`, `\u0061` and + /// `\141` are all equivalent regular expressions, where the last example + /// shows octal syntax. + /// + /// While supporting octal syntax isn't in and of itself a problem, it does + /// make good error messages harder. That is, in PCRE based regex engines, + /// syntax like `\0` invokes a backreference, which is explicitly + /// unsupported in Rust's regex engine. However, many users expect it to + /// be supported. Therefore, when octal support is disabled, the error + /// message will explicitly mention that backreferences aren't supported. + /// + /// Octal syntax is disabled by default. + pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder { + self.0.octal = yes; + self + } + /// Set the approximate size limit of the compiled regular expression. /// /// This roughly corresponds to the number of bytes occupied by a single diff --git a/tests/test_default.rs b/tests/test_default.rs index e6cf92fa2e..0f8a0c2dba 100644 --- a/tests/test_default.rs +++ b/tests/test_default.rs @@ -75,3 +75,13 @@ fn disallow_non_utf8() { assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err()); assert!(regex::Regex::new(r"(?-u)☃").is_err()); } + +#[test] +fn disallow_octal() { + assert!(regex::Regex::new(r"\0").is_err()); +} + +#[test] +fn allow_octal() { + assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok()); +} From 8ad256b672113e24e98afa73650dc045dc407aef Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 28 Apr 2018 13:11:29 -0400 Subject: [PATCH 04/10] unicode: remove implementations of encode_utf8 This commit removes our explicit implementations of encode_utf8 and replaces them with uses of `char::encode_utf8`, which was added to the standard library in Rust 1.15. --- regex-syntax/src/hir/literal/mod.rs | 8 ++---- regex-syntax/src/unicode.rs | 36 ------------------------ src/utf8.rs | 43 ++++------------------------- 3 files changed, 9 insertions(+), 78 deletions(-) diff --git a/regex-syntax/src/hir/literal/mod.rs b/regex-syntax/src/hir/literal/mod.rs index 3113ec970f..b7d9c1db27 100644 --- a/regex-syntax/src/hir/literal/mod.rs +++ b/regex-syntax/src/hir/literal/mod.rs @@ -19,7 +19,6 @@ use std::mem; use std::ops; use hir::{self, Hir, HirKind}; -use unicode; /// A set of literal byte strings extracted from a regular expression. /// @@ -603,9 +602,8 @@ impl Literals { fn prefixes(expr: &Hir, lits: &mut Literals) { match *expr.kind() { HirKind::Literal(hir::Literal::Unicode(c)) => { - let mut buf = [0u8; 4]; - let i = unicode::encode_utf8(c, &mut buf).unwrap(); - lits.cross_add(&buf[..i]); + let mut buf = [0; 4]; + lits.cross_add(c.encode_utf8(&mut buf).as_bytes()); } HirKind::Literal(hir::Literal::Byte(b)) => { lits.cross_add(&[b]); @@ -685,7 +683,7 @@ fn suffixes(expr: &Hir, lits: &mut Literals) { match *expr.kind() { HirKind::Literal(hir::Literal::Unicode(c)) => { let mut buf = [0u8; 4]; - let i = unicode::encode_utf8(c, &mut buf).unwrap(); + let i = c.encode_utf8(&mut buf).len(); let mut buf = &mut buf[..i]; buf.reverse(); lits.cross_add(buf); diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs index af932f80bd..ed84d98b07 100644 --- a/regex-syntax/src/unicode.rs +++ b/regex-syntax/src/unicode.rs @@ -25,42 +25,6 @@ pub enum Error { PropertyValueNotFound, } -/// Encode the given Unicode character to `dst` as a single UTF-8 sequence. -/// -/// If `dst` is not long enough, then `None` is returned. Otherwise, the number -/// of bytes written is returned. -pub fn encode_utf8(character: char, dst: &mut [u8]) -> Option { - // TODO: Remove this function once we move to at least Rust 1.15, which - // provides char::encode_utf8 for us. - const TAG_CONT: u8 = 0b1000_0000; - const TAG_TWO: u8 = 0b1100_0000; - const TAG_THREE: u8 = 0b1110_0000; - const TAG_FOUR: u8 = 0b1111_0000; - - let code = character as u32; - if code <= 0x7F && !dst.is_empty() { - dst[0] = code as u8; - Some(1) - } else if code <= 0x7FF && dst.len() >= 2 { - dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO; - dst[1] = (code & 0x3F) as u8 | TAG_CONT; - Some(2) - } else if code <= 0xFFFF && dst.len() >= 3 { - dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE; - dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT; - dst[2] = (code & 0x3F) as u8 | TAG_CONT; - Some(3) - } else if dst.len() >= 4 { - dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR; - dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; - dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; - dst[3] = (code & 0x3F) as u8 | TAG_CONT; - Some(4) - } else { - None - } -} - /// An iterator over a codepoint's simple case equivalence class. #[derive(Debug)] pub struct SimpleFoldIter(::std::slice::Iter<'static, char>); diff --git a/src/utf8.rs b/src/utf8.rs index 75616f5fbd..ac38e63502 100644 --- a/src/utf8.rs +++ b/src/utf8.rs @@ -38,37 +38,6 @@ pub fn next_utf8(text: &[u8], i: usize) -> usize { i + inc } -/// Encode the given Unicode character to `dst` as a single UTF-8 sequence. -/// -/// If `dst` is not long enough, then `None` is returned. Otherwise, the number -/// of bytes written is returned. -#[allow(dead_code)] -#[inline] -pub fn encode_utf8(character: char, dst: &mut [u8]) -> Option { - let code = character as u32; - if code <= 0x7F && !dst.is_empty() { - dst[0] = code as u8; - Some(1) - } else if code <= 0x7FF && dst.len() >= 2 { - dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO; - dst[1] = (code & 0x3F) as u8 | TAG_CONT; - Some(2) - } else if code <= 0xFFFF && dst.len() >= 3 { - dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE; - dst[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT; - dst[2] = (code & 0x3F) as u8 | TAG_CONT; - Some(3) - } else if dst.len() >= 4 { - dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR; - dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; - dst[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; - dst[3] = (code & 0x3F) as u8 | TAG_CONT; - Some(4) - } else { - None - } -} - /// Decode a single UTF-8 sequence into a single Unicode codepoint from `src`. /// /// If no valid UTF-8 sequence could be found, then `None` is returned. @@ -184,14 +153,14 @@ mod tests { use super::{ TAG_CONT, TAG_TWO, TAG_THREE, TAG_FOUR, - decode_utf8, decode_last_utf8, encode_utf8, + decode_utf8, decode_last_utf8, }; #[test] fn prop_roundtrip() { fn p(given_cp: char) -> bool { let mut tmp = [0; 4]; - let encoded_len = encode_utf8(given_cp, &mut tmp).unwrap(); + let encoded_len = given_cp.encode_utf8(&mut tmp).len(); let (got_cp, got_len) = decode_utf8(&tmp[..encoded_len]).unwrap(); encoded_len == got_len && given_cp == got_cp } @@ -202,7 +171,7 @@ mod tests { fn prop_roundtrip_last() { fn p(given_cp: char) -> bool { let mut tmp = [0; 4]; - let encoded_len = encode_utf8(given_cp, &mut tmp).unwrap(); + let encoded_len = given_cp.encode_utf8(&mut tmp).len(); let (got_cp, got_len) = decode_last_utf8(&tmp[..encoded_len]).unwrap(); encoded_len == got_len && given_cp == got_cp @@ -214,7 +183,7 @@ mod tests { fn prop_encode_matches_std() { fn p(cp: char) -> bool { let mut got = [0; 4]; - let n = encode_utf8(cp, &mut got).unwrap(); + let n = cp.encode_utf8(&mut got).len(); let expected = cp.to_string(); &got[..n] == expected.as_bytes() } @@ -225,7 +194,7 @@ mod tests { fn prop_decode_matches_std() { fn p(given_cp: char) -> bool { let mut tmp = [0; 4]; - let n = encode_utf8(given_cp, &mut tmp).unwrap(); + let n = given_cp.encode_utf8(&mut tmp).len(); let (got_cp, _) = decode_utf8(&tmp[..n]).unwrap(); let expected_cp = str::from_utf8(&tmp[..n]).unwrap().chars().next().unwrap(); @@ -238,7 +207,7 @@ mod tests { fn prop_decode_last_matches_std() { fn p(given_cp: char) -> bool { let mut tmp = [0; 4]; - let n = encode_utf8(given_cp, &mut tmp).unwrap(); + let n = given_cp.encode_utf8(&mut tmp).len(); let (got_cp, _) = decode_last_utf8(&tmp[..n]).unwrap(); let expected_cp = str::from_utf8(&tmp[..n]).unwrap() From 7a52acb6031cc532a2d97e5fc3ddf460dff8e7d8 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 28 Apr 2018 13:20:09 -0400 Subject: [PATCH 05/10] regex: ban (?-u:\B) for Unicode regexes The issue with the ASCII version of \B is that it can match between code units of UTF-8, which means it can cause match indices reported to be on invalid UTF-8 boundaries. Therefore, similar to things like `(?-u:\xFF)`, we ban negated ASCII word boundaries from Unicode regular expressions. Normal ASCII word boundaries remain accessible from Unicode regular expressions. See #457 --- regex-syntax/src/hir/translate.rs | 20 ++++++++------------ tests/bytes.rs | 10 ++++++++++ tests/regression.rs | 5 ----- tests/word_boundary_unicode.rs | 2 -- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index 0cb60acfd8..8427fb2408 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -724,13 +724,10 @@ impl<'t, 'p> TranslatorI<'t, 'p> { // It is possible for negated ASCII word boundaries to // match at invalid UTF-8 boundaries, even when searching // valid UTF-8. - // - // TODO(ag): Enable this error when regex goes to 1.0. - // Otherwise, it is too steep of a breaking change. - // if !self.trans().allow_invalid_utf8 { - // return Err(self.error( - // asst.span, ErrorKind::InvalidUtf8)); - // } + if !self.trans().allow_invalid_utf8 { + return Err(self.error( + asst.span, ErrorKind::InvalidUtf8)); + } hir::WordBoundary::AsciiNegate }) } @@ -1511,11 +1508,10 @@ mod tests { t_bytes(r"(?-u)\B"), hir_word(hir::WordBoundary::AsciiNegate)); - // TODO(ag): Enable this tests when regex goes to 1.0. - // assert_eq!(t_err(r"(?-u)\B"), TestError { - // kind: hir::ErrorKind::InvalidUtf8, - // span: Span::new(Position::new(5, 1, 6), Position::new(7, 1, 8)), - // }); + assert_eq!(t_err(r"(?-u)\B"), TestError { + kind: hir::ErrorKind::InvalidUtf8, + span: Span::new(Position::new(5, 1, 6), Position::new(7, 1, 8)), + }); } #[test] diff --git a/tests/bytes.rs b/tests/bytes.rs index 0285950a1a..0b0f008d6e 100644 --- a/tests/bytes.rs +++ b/tests/bytes.rs @@ -60,3 +60,13 @@ matiter!(invalidutf8_anchor3, fn negated_full_byte_range() { assert!(::regex::bytes::Regex::new(r#"(?-u)[^\x00-\xff]"#).is_err()); } + +matiter!(word_boundary_ascii1, r"(?-u:\B)x(?-u:\B)", "áxβ"); +matiter!(word_boundary_ascii2, r"(?-u:\B)", "0\u{7EF5E}", (2, 2), (3, 3), (4, 4), (5, 5)); + +// See: https://github.com/rust-lang/regex/issues/264 +mat!(ascii_boundary_no_capture, r"(?-u)\B", "\u{28f3e}", Some((0, 0))); +mat!(ascii_boundary_capture, r"(?-u)(\B)", "\u{28f3e}", Some((0, 0))); + +// See: https://github.com/rust-lang/regex/issues/271 +mat!(end_not_wb, r"$(?-u:\B)", "\u{5c124}\u{b576c}", Some((8, 8))); diff --git a/tests/regression.rs b/tests/regression.rs index 1bc79ac7e7..a09333e2c3 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -61,10 +61,6 @@ matiter!(word_boundary_dfa, r"\b", "a b c", // See: https://github.com/rust-lang/regex/issues/268 matiter!(partial_anchor, r"^a|b", "ba", (0, 1)); -// See: https://github.com/rust-lang/regex/issues/264 -mat!(ascii_boundary_no_capture, r"(?-u)\B", "\u{28f3e}", Some((0, 0))); -mat!(ascii_boundary_capture, r"(?-u)(\B)", "\u{28f3e}", Some((0, 0))); - // See: https://github.com/rust-lang/regex/issues/280 ismatch!(partial_anchor_alternate_begin, r"^a|z", "yyyyya", false); ismatch!(partial_anchor_alternate_end, r"a$|z", "ayyyyy", false); @@ -77,7 +73,6 @@ mat!(lits_unambiguous2, r"((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P[0-9]+)$", "CIMG2341", Some((0, 8)), Some((0, 4)), None, Some((0, 4)), Some((4, 8))); // See: https://github.com/rust-lang/regex/issues/271 -mat!(end_not_wb, r"$(?-u:\B)", "\u{5c124}\u{b576c}", Some((8, 8))); mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4))); mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4))); mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4))); diff --git a/tests/word_boundary_unicode.rs b/tests/word_boundary_unicode.rs index 43612a91ac..c41355ffc4 100644 --- a/tests/word_boundary_unicode.rs +++ b/tests/word_boundary_unicode.rs @@ -4,5 +4,3 @@ matiter!(unicode1, r"\bx\b", "áxβ"); matiter!(unicode2, r"\Bx\B", "áxβ", (2, 3)); matiter!(ascii1, r"(?-u:\b)x(?-u:\b)", "áxβ", (2, 3)); -matiter!(ascii2, r"(?-u:\B)x(?-u:\B)", "áxβ"); -matiter!(ascii3, r"(?-u:\B)", "0\u{7EF5E}", (5, 5)); From d1c536aa77d50f5a2edf8d9de6e1a4910ebd1060 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 28 Apr 2018 13:24:52 -0400 Subject: [PATCH 06/10] syntax: fixes for Rust 1.20.0 Make sure we can run tests for regex-syntax on Rust 1.20.0. --- regex-syntax/src/hir/translate.rs | 7 +++---- regex-syntax/src/lib.rs | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index 8427fb2408..7939ab59ad 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -1023,8 +1023,8 @@ fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes { fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] { use ast::ClassAsciiKind::*; - // TODO: Get rid of these consts, which appear necessary for older - // versions of Rust. + // The contortions below with `const` appear necessary for older versions + // of Rust. type T = &'static [(char, char)]; match *kind { Alnum => { @@ -1599,8 +1599,7 @@ mod tests { fn escape() { assert_eq!( t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), - hir_lit(r"\.+*?()|[]{}^$#"), - ); + hir_lit(r"\.+*?()|[]{}^$#")); } #[test] diff --git a/regex-syntax/src/lib.rs b/regex-syntax/src/lib.rs index 27a1d45fdb..2c785bfc5f 100644 --- a/regex-syntax/src/lib.rs +++ b/regex-syntax/src/lib.rs @@ -205,8 +205,7 @@ mod tests { fn escape_meta() { assert_eq!( escape(r"\.+*?()|[]{}^$#&-~"), - r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~".to_string(), - ); + r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~".to_string()); } #[test] From 0d93dc8505f1919b89cd48e1d0acf47fa2334210 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 28 Apr 2018 13:31:30 -0400 Subject: [PATCH 07/10] regex: remove From impl This removes a public `From` impl that automatically converts errors from the regex-syntax crate to a regex::Error. This actually causes regex-syntax to be a public dependency of regex, which was an oversight. We now remove it, which completely breaks any source code coupling between regex and regex-syntax. See #457 --- src/error.rs | 8 -------- src/exec.rs | 4 +++- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/error.rs b/src/error.rs index b068201f6d..fff1969b47 100644 --- a/src/error.rs +++ b/src/error.rs @@ -11,8 +11,6 @@ use std::fmt; use std::iter::repeat; -use syntax; - /// An error that occurred during parsing or compiling a regular expression. #[derive(Clone, PartialEq)] pub enum Error { @@ -84,9 +82,3 @@ impl fmt::Debug for Error { } } } - -impl From for Error { - fn from(err: syntax::Error) -> Error { - Error::Syntax(err.to_string()) - } -} diff --git a/src/exec.rs b/src/exec.rs index 9133a0ddd4..e6079e6c7a 100644 --- a/src/exec.rs +++ b/src/exec.rs @@ -228,7 +228,9 @@ impl ExecBuilder { .allow_invalid_utf8(!self.only_utf8) .nest_limit(self.options.nest_limit) .build(); - let expr = parser.parse(pat)?; + let expr = parser + .parse(pat) + .map_err(|e| Error::Syntax(e.to_string()))?; bytes = bytes || !expr.is_always_utf8(); if !expr.is_anchored_start() && expr.is_any_anchored_start() { From a307c8bdb75bbdb9644bbbce34739715f5d3fcf4 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 28 Apr 2018 13:50:24 -0400 Subject: [PATCH 08/10] build: remove simd-accel feature This feature is no longer used, instead we rely on runtime CPU feature detection. We kept the feature around as a no-op for backwards compatibility, but no longer need to support it for regex 1.0. --- Cargo.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a899854e55..e3aa7579ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,9 +54,6 @@ unstable = ["pattern"] # Enable to use the unstable pattern traits defined in std. This is enabled # by default if the unstable feature is enabled. pattern = [] -# Enable to use simd acceleration. -# Note that this is deprecated and is a no-op. -simd-accel = [] [lib] # There are no benchmarks in the library code itself From f704c79fd621ed117d175c5d6b32311e10ac3644 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 28 Apr 2018 13:51:55 -0400 Subject: [PATCH 09/10] build: gate regex API on 'use_std' feature This commit adds a new 'use_std' feature and enables it by default. This permits us to one day add support for building regex without 'use_std' (but with 'alloc', probably) by avoiding the introduction of incompatibilities. Namely, this setup ensures that all of today's uses of '--no-default-features' won't compile without also adding the 'use_std' feature. Closes #457 --- Cargo.toml | 11 +++++++---- build.rs | 2 +- src/lib.rs | 8 ++++++++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e3aa7579ff..bff622759a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,10 +43,13 @@ quickcheck = { version = "0.6", default-features = false } rand = "0.4" [features] -# We don't enable any features by default currently, but if the compiler -# supports a specific type of feature, then regex's build.rs might enable -# some default features. -default = [] +default = ["use_std"] +# The 'use_std' feature permits the regex crate to use the standard library. +# This is intended to support future use cases where the regex crate may be +# able to compile without std, and instead just rely on 'core' and 'alloc' +# (for example). Currently, this isn't supported, and removing the 'use_std' +# feature will prevent regex from compiling. +use_std = [] # A blanket feature that governs whether unstable features are enabled or not. # Unstable features are disabled by default, and typically rely on unstable # features in rustc itself. diff --git a/build.rs b/build.rs index ad27991206..a5d12a108b 100644 --- a/build.rs +++ b/build.rs @@ -12,7 +12,7 @@ fn main() { let version = String::from_utf8(output).unwrap(); // If we're using nightly Rust, then we can enable vector optimizations. - // Note that these aren't actually activated unless the `nightly` feature + // Note that these aren't actually activated unless the `unstable` feature // is enabled. // // We also don't activate these if we've explicitly disabled auto diff --git a/src/lib.rs b/src/lib.rs index 7d768eea76..8123eba6a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -530,11 +530,17 @@ extern crate quickcheck; extern crate regex_syntax as syntax; extern crate utf8_ranges; +#[cfg(feature = "use_std")] pub use error::Error; +#[cfg(feature = "use_std")] pub use re_builder::unicode::*; +#[cfg(feature = "use_std")] pub use re_builder::set_unicode::*; +#[cfg(feature = "use_std")] pub use re_set::unicode::*; +#[cfg(feature = "use_std")] pub use re_trait::Locations; +#[cfg(feature = "use_std")] pub use re_unicode::{ Regex, Match, Captures, CaptureNames, Matches, CaptureMatches, SubCaptureMatches, @@ -629,6 +635,7 @@ When the `s` flag is enabled, `.` matches any byte. In general, one should expect performance on `&[u8]` to be roughly similar to performance on `&str`. */ +#[cfg(feature = "use_std")] pub mod bytes { pub use re_builder::bytes::*; pub use re_builder::set_bytes::*; @@ -664,6 +671,7 @@ mod vector; /// testing different matching engines and supporting the `regex-debug` CLI /// utility. #[doc(hidden)] +#[cfg(feature = "use_std")] pub mod internal { pub use compile::Compiler; pub use exec::{Exec, ExecBuilder}; From 613a5f3c3ad5d4c01618c5e2f5dc3d1997f3fd04 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 1 May 2018 13:41:28 -0400 Subject: [PATCH 10/10] changelog 1.0.0 --- CHANGELOG.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b63625fff9..9432c314a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,35 @@ +1.0.0 (2018-05-01) +================== +This release marks the 1.0 release of regex. + +While this release includes some breaking changes, most users of older versions +of the regex library should be able to migrate to 1.0 by simply bumping the +version number. The important changes are as follows: + +* We adopt Rust 1.20 as the new minimum supported version of Rust for regex. + We also tentativley adopt a policy that permits bumping the minimum supported + version of Rust in minor version releases of regex, but no patch releases. + That is, with respect to semver, we do not strictly consider bumping the + minimum version of Rust to be a breaking change, but adopt a conservative + stance as a compromise. +* Octal syntax in regular expressions has been disabled by default. This + permits better error messages that inform users that backreferences aren't + available. Octal syntax can be re-enabled via the corresponding option on + `RegexBuilder`. +* `(?-u:\B)` is no longer allowed in Unicode regexes since it can match at + invalid UTF-8 code unit boundaries. `(?-u:\b)` is still allowed in Unicode + regexes. +* The `From` impl has been removed. This formally removes + the public dependency on `regex-syntax`. +* A new feature, `use_std`, has been added and enabled by default. Disabling + the feature will result in a compilation error. In the future, this may + permit us to support `no_std` environments (w/ `alloc`) in a backwards + compatible way. + +For more information and discussion, please see +[1.0 release tracking issue](https://github.com/rust-lang/regex/issues/457). + + 0.2.11 (2018-05-01) =================== This release primarily contains bug fixes. Some of them resolve bugs where