Skip to content

Commit 9967e07

Browse files
committed
Add ExprBuilder, which can set the default values of flags when parsing.
Closes #172.
1 parent 7a5224f commit 9967e07

File tree

4 files changed

+152
-49
lines changed

4 files changed

+152
-49
lines changed

regex-syntax/src/lib.rs

Lines changed: 88 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,7 @@ use unicode::case_folding;
8282
use self::Expr::*;
8383
use self::Repeater::*;
8484

85-
pub use parser::is_punct;
86-
87-
/// The maximum number of nested expressions allowed.
88-
const NEST_LIMIT: usize = 200;
85+
use parser::{Flags, Parser};
8986

9087
/// A regular expression abstract syntax tree.
9188
///
@@ -236,10 +233,86 @@ pub struct ClassRange {
236233
pub end: char,
237234
}
238235

236+
/// A builder for configuring regular expression parsing.
237+
///
238+
/// This allows setting the default values of flags and other options, such
239+
/// as the maximum nesting depth.
240+
#[derive(Clone, Debug)]
241+
pub struct ExprBuilder {
242+
flags: Flags,
243+
nest_limit: usize,
244+
}
245+
246+
impl ExprBuilder {
247+
/// Create a new builder for configuring expression parsing.
248+
///
249+
/// Note that all flags are disabled by default.
250+
pub fn new() -> ExprBuilder {
251+
ExprBuilder {
252+
flags: Flags {
253+
casei: false,
254+
multi: false,
255+
dotnl: false,
256+
swap_greed: false,
257+
ignore_space: false,
258+
},
259+
nest_limit: 200,
260+
}
261+
}
262+
263+
/// Set the default value for the case insensitive (`i`) flag.
264+
pub fn case_insensitive(mut self, yes: bool) -> ExprBuilder {
265+
self.flags.casei = yes;
266+
self
267+
}
268+
269+
/// Set the default value for the multi-line matching (`m`) flag.
270+
pub fn multi_line(mut self, yes: bool) -> ExprBuilder {
271+
self.flags.multi = yes;
272+
self
273+
}
274+
275+
/// Set the default value for the any character (`s`) flag.
276+
pub fn dot_matches_new_line(mut self, yes: bool) -> ExprBuilder {
277+
self.flags.dotnl = yes;
278+
self
279+
}
280+
281+
/// Set the default value for the greedy swap (`U`) flag.
282+
pub fn swap_greed(mut self, yes: bool) -> ExprBuilder {
283+
self.flags.swap_greed = yes;
284+
self
285+
}
286+
287+
/// Set the default value for the ignore whitespace (`x`) flag.
288+
pub fn ignore_whitespace(mut self, yes: bool) -> ExprBuilder {
289+
self.flags.ignore_space = yes;
290+
self
291+
}
292+
293+
/// Set the nesting limit for regular expression parsing.
294+
///
295+
/// Regular expressions that nest more than this limit will result in a
296+
/// `StackExhausted` error.
297+
pub fn nest_limit(mut self, limit: usize) -> ExprBuilder {
298+
self.nest_limit = limit;
299+
self
300+
}
301+
302+
/// Parse a string as a regular expression using the current configuraiton.
303+
pub fn parse(self, s: &str) -> Result<Expr> {
304+
Parser::parse(s, self.flags).and_then(|e| e.simplify(self.nest_limit))
305+
}
306+
}
307+
239308
impl Expr {
240309
/// Parses a string in a regular expression syntax tree.
310+
///
311+
/// This is a convenience method for parsing an expression using the
312+
/// default configuration. To tweak parsing options (such as which flags
313+
/// are enabled by default), use the `ExprBuilder` type.
241314
pub fn parse(s: &str) -> Result<Expr> {
242-
parser::Parser::parse(s).and_then(|e| e.simplify())
315+
ExprBuilder::new().parse(s)
243316
}
244317

245318
/// Returns true iff the expression can be repeated by a quantifier.
@@ -257,7 +330,7 @@ impl Expr {
257330
}
258331
}
259332

260-
fn simplify(self) -> Result<Expr> {
333+
fn simplify(self, nest_limit: usize) -> Result<Expr> {
261334
fn combine_literals(es: &mut Vec<Expr>, e: Expr) {
262335
match (es.pop(), e) {
263336
(None, e) => es.push(e),
@@ -277,15 +350,15 @@ impl Expr {
277350
}
278351
}
279352
}
280-
fn simp(expr: Expr, recurse: usize) -> Result<Expr> {
281-
if recurse > NEST_LIMIT {
353+
fn simp(expr: Expr, recurse: usize, limit: usize) -> Result<Expr> {
354+
if recurse > limit {
282355
return Err(Error {
283356
pos: 0,
284357
surround: "".to_owned(),
285358
kind: ErrorKind::StackExhausted,
286359
});
287360
}
288-
let simplify = |e| simp(e, recurse + 1);
361+
let simplify = |e| simp(e, recurse + 1, limit);
289362
Ok(match expr {
290363
Repeat { e, r, greedy } => Repeat {
291364
e: Box::new(try!(simplify(*e))),
@@ -321,7 +394,7 @@ impl Expr {
321394
e => e,
322395
})
323396
}
324-
simp(self, 0)
397+
simp(self, 0, nest_limit)
325398
}
326399

327400
/// Returns true if and only if the expression is required to match from
@@ -1044,7 +1117,7 @@ mod properties;
10441117

10451118
#[cfg(test)]
10461119
mod tests {
1047-
use {NEST_LIMIT, CharClass, ClassRange, Expr};
1120+
use {CharClass, ClassRange, Expr};
10481121

10491122
fn class(ranges: &[(char, char)]) -> CharClass {
10501123
let ranges = ranges.iter().cloned()
@@ -1060,12 +1133,12 @@ mod tests {
10601133
fn stack_exhaustion() {
10611134
use std::iter::repeat;
10621135

1063-
let open: String = repeat('(').take(NEST_LIMIT).collect();
1064-
let close: String = repeat(')').take(NEST_LIMIT).collect();
1136+
let open: String = repeat('(').take(200).collect();
1137+
let close: String = repeat(')').take(200).collect();
10651138
assert!(Expr::parse(&format!("{}a{}", open, close)).is_ok());
10661139

1067-
let open: String = repeat('(').take(NEST_LIMIT + 1).collect();
1068-
let close: String = repeat(')').take(NEST_LIMIT + 1).collect();
1140+
let open: String = repeat('(').take(200 + 1).collect();
1141+
let close: String = repeat(')').take(200 + 1).collect();
10691142
assert!(Expr::parse(&format!("{}a{}", open, close)).is_err());
10701143
}
10711144

regex-syntax/src/parser.rs

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,22 @@ pub struct Parser {
3333
flags: Flags,
3434
}
3535

36-
/// An empheral type for representing the expression stack.
36+
/// Flag state used in the parser.
37+
#[derive(Clone, Copy, Debug, Default)]
38+
pub struct Flags {
39+
/// i
40+
pub casei: bool,
41+
/// m
42+
pub multi: bool,
43+
/// s
44+
pub dotnl: bool,
45+
/// U
46+
pub swap_greed: bool,
47+
/// x
48+
pub ignore_space: bool,
49+
}
50+
51+
/// An ephemeral type for representing the expression stack.
3752
///
3853
/// Everything on the stack is either a regular expression or a marker
3954
/// indicating the opening of a group (possibly non-capturing). The opening
@@ -50,32 +65,16 @@ enum Build {
5065
},
5166
}
5267

53-
/// Flag state.
54-
#[derive(Clone, Copy, Debug)]
55-
struct Flags {
56-
casei: bool,
57-
multi: bool,
58-
dotnl: bool,
59-
swap_greed: bool,
60-
ignore_space: bool,
61-
}
62-
6368
// Primary expression parsing routines.
6469
impl Parser {
65-
pub fn parse(s: &str) -> Result<Expr> {
70+
pub fn parse(s: &str, flags: Flags) -> Result<Expr> {
6671
Parser {
6772
chars: s.chars().collect(),
6873
chari: 0,
6974
stack: vec![],
7075
caps: 0,
7176
names: vec![],
72-
flags: Flags {
73-
casei: false,
74-
multi: false,
75-
dotnl: false,
76-
swap_greed: false,
77-
ignore_space: false,
78-
},
77+
flags: flags,
7978
}.parse_expr()
8079
}
8180

@@ -1048,7 +1047,6 @@ fn is_valid_capture_char(c: char) -> bool {
10481047
}
10491048

10501049
/// Returns true if the give character has significance in a regex.
1051-
#[doc(hidden)]
10521050
pub fn is_punct(c: char) -> bool {
10531051
match c {
10541052
'\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
@@ -1134,14 +1132,14 @@ const XDIGIT: Class = &[('0', '9'), ('A', 'F'), ('a', 'f')];
11341132
mod tests {
11351133
use { CharClass, ClassRange, Expr, Repeater, ErrorKind };
11361134
use unicode::regex::{PERLD, PERLS, PERLW};
1137-
use super::Parser;
1138-
use super::{LOWER, UPPER};
1135+
use super::{LOWER, UPPER, Flags, Parser};
11391136

11401137
static YI: &'static [(char, char)] = &[
11411138
('\u{a000}', '\u{a48c}'), ('\u{a490}', '\u{a4c6}'),
11421139
];
11431140

1144-
fn p(s: &str) -> Expr { Parser::parse(s).unwrap() }
1141+
fn p(s: &str) -> Expr { Parser::parse(s, Flags::default()).unwrap() }
1142+
fn pf(s: &str, flags: Flags) -> Expr { Parser::parse(s, flags).unwrap() }
11451143
fn lit(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: false } }
11461144
fn liti(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: true } }
11471145
fn b<T>(v: T) -> Box<T> { Box::new(v) }
@@ -1539,6 +1537,40 @@ mod tests {
15391537
]));
15401538
}
15411539

1540+
#[test]
1541+
fn flags_default_casei() {
1542+
let flags = Flags { casei: true, .. Flags::default() };
1543+
assert_eq!(pf("a", flags), liti('a'));
1544+
}
1545+
1546+
#[test]
1547+
fn flags_default_multi() {
1548+
let flags = Flags { multi: true, .. Flags::default() };
1549+
assert_eq!(pf("^", flags), Expr::StartLine);
1550+
}
1551+
1552+
#[test]
1553+
fn flags_default_dotnl() {
1554+
let flags = Flags { dotnl: true, .. Flags::default() };
1555+
assert_eq!(pf(".", flags), Expr::AnyChar);
1556+
}
1557+
1558+
#[test]
1559+
fn flags_default_swap_greed() {
1560+
let flags = Flags { swap_greed: true, .. Flags::default() };
1561+
assert_eq!(pf("a+", flags), Expr::Repeat {
1562+
e: b(lit('a')),
1563+
r: Repeater::OneOrMore,
1564+
greedy: false,
1565+
});
1566+
}
1567+
1568+
#[test]
1569+
fn flags_default_ignore_space() {
1570+
let flags = Flags { ignore_space: true, .. Flags::default() };
1571+
assert_eq!(pf(" a ", flags), lit('a'));
1572+
}
1573+
15421574
#[test]
15431575
fn escape_simple() {
15441576
assert_eq!(p(r"\a\f\t\n\r\v"), c(&[
@@ -1907,6 +1939,11 @@ mod tests {
19071939
Expr::Class(class(&[('x', 'x')]).case_fold().negate()));
19081940
}
19091941

1942+
#[test]
1943+
fn ignore_space_empty() {
1944+
assert_eq!(p("(?x) "), Expr::Empty);
1945+
}
1946+
19101947
#[test]
19111948
fn ignore_space_literal() {
19121949
assert_eq!(p("(?x) a b c"), Expr::Concat(vec![
@@ -1992,7 +2029,7 @@ mod tests {
19922029

19932030
macro_rules! test_err {
19942031
($re:expr, $pos:expr, $kind:expr) => {{
1995-
let err = Parser::parse($re).unwrap_err();
2032+
let err = Parser::parse($re, Flags::default()).unwrap_err();
19962033
assert_eq!($pos, err.pos);
19972034
assert_eq!($kind, err.kind);
19982035
assert!($re.contains(&err.surround));

regex-syntax/src/properties.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ fn parser_never_panics() {
129129
impl Arbitrary for Expr {
130130
fn arbitrary<G: Gen>(g: &mut G) -> Expr {
131131
let e = fix_capture_indices(gen_expr(g, 0, ExprType::Anything));
132-
e.simplify().unwrap()
132+
e.simplify(200).unwrap()
133133
}
134134

135135
fn shrink(&self) -> Box<Iterator<Item=Expr>> {
@@ -193,7 +193,7 @@ impl Arbitrary for Expr {
193193
}))
194194
}
195195
};
196-
Box::new(es.map(|e| fix_capture_indices(e).simplify().unwrap()))
196+
Box::new(es.map(|e| fix_capture_indices(e).simplify(200).unwrap()))
197197
}
198198
}
199199

src/re.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,7 @@ const REPLACE_EXPAND: &'static str = r"(?x)
3535
/// The string returned may be safely used as a literal in a regular
3636
/// expression.
3737
pub fn quote(text: &str) -> String {
38-
let mut quoted = String::with_capacity(text.len());
39-
for c in text.chars() {
40-
if syntax::is_punct(c) {
41-
quoted.push('\\')
42-
}
43-
quoted.push(c);
44-
}
45-
quoted
38+
syntax::quote(text)
4639
}
4740

4841
/// Tests if the given regular expression matches somewhere in the text given.

0 commit comments

Comments
 (0)