Skip to content

Commit b2ff8bb

Browse files
committed
rollup merge of rust-lang#19818: emk/regex_at_name_opt
Hello! This is my first Rust patch, and I fear that I've probably skipped at least 7 critical steps. I'd appreciate your feedback and advice about how to contribute to Rust. This patch is based on a discussion with @BurntSushi in rust-lang#14602 a while back. I'm happy to revise it as needed to fit into the modern world. :-) As discussed in that issue, the existing `at` and `name` functions represent two different results with the empty string: 1. Matched the empty string. 2. Did not match anything. Consider the following example. This regex has two named matched groups, `key` and `value`. `value` is optional: ```rust // Matches "foo", "foo;v=bar" and "foo;v=". regex!(r"(?P<key>[a-z]+)(;v=(?P<value>[a-z]*))?"); ``` We can access `value` using `caps.name("value")`, but there's no way for us to distinguish between the `"foo"` and `"foo;v="` cases. Early this year, @BurntSushi recommended modifying the existing `at` and `name` functions to return `Option`, instead of adding new functions to the API. This is a [breaking-change], but the fix is easy: - `refs.at(1)` becomes `refs.at(1).unwrap_or("")`. - `refs.name(name)` becomes `refs.name(name).unwrap_or("")`.
2 parents 3bc3ae3 + c2b0d7d commit b2ff8bb

File tree

5 files changed

+38
-37
lines changed

5 files changed

+38
-37
lines changed

src/compiletest/compiletest.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ fn extract_gdb_version(full_version_line: Option<String>) -> Option<String> {
394394

395395
match re.captures(full_version_line) {
396396
Some(captures) => {
397-
Some(captures.at(2).to_string())
397+
Some(captures.at(2).unwrap_or("").to_string())
398398
}
399399
None => {
400400
println!("Could not extract GDB version from line '{}'",
@@ -428,7 +428,7 @@ fn extract_lldb_version(full_version_line: Option<String>) -> Option<String> {
428428

429429
match re.captures(full_version_line) {
430430
Some(captures) => {
431-
Some(captures.at(1).to_string())
431+
Some(captures.at(1).unwrap_or("").to_string())
432432
}
433433
None => {
434434
println!("Could not extract LLDB version from line '{}'",

src/compiletest/errors.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@ fn parse_expected(last_nonfollow_error: Option<uint>,
6666
line: &str,
6767
re: &Regex) -> Option<(WhichLine, ExpectedError)> {
6868
re.captures(line).and_then(|caps| {
69-
let adjusts = caps.name("adjusts").len();
70-
let kind = caps.name("kind").to_ascii_lower();
71-
let msg = caps.name("msg").trim().to_string();
72-
let follow = caps.name("follow").len() > 0;
69+
let adjusts = caps.name("adjusts").unwrap_or("").len();
70+
let kind = caps.name("kind").unwrap_or("").to_ascii_lower();
71+
let msg = caps.name("msg").unwrap_or("").trim().to_string();
72+
let follow = caps.name("follow").unwrap_or("").len() > 0;
7373

7474
let (which, line) = if follow {
7575
assert!(adjusts == 0, "use either //~| or //~^, not both.");

src/grammar/verify.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,10 @@ fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>) -> TokenAn
173173
);
174174

175175
let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
176-
let start = m.name("start");
177-
let end = m.name("end");
178-
let toknum = m.name("toknum");
179-
let content = m.name("content");
176+
let start = m.name("start").unwrap_or("");
177+
let end = m.name("end").unwrap_or("");
178+
let toknum = m.name("toknum").unwrap_or("");
179+
let content = m.name("content").unwrap_or("");
180180

181181
let proto_tok = tokens.get(toknum).expect(format!("didn't find token {} in the map",
182182
toknum).as_slice());

src/libregex/lib.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,9 @@
103103
//! let re = regex!(r"(\d{4})-(\d{2})-(\d{2})");
104104
//! let text = "2012-03-14, 2013-01-01 and 2014-07-05";
105105
//! for cap in re.captures_iter(text) {
106-
//! println!("Month: {} Day: {} Year: {}", cap.at(2), cap.at(3), cap.at(1));
106+
//! println!("Month: {} Day: {} Year: {}",
107+
//! cap.at(2).unwrap_or(""), cap.at(3).unwrap_or(""),
108+
//! cap.at(1).unwrap_or(""));
107109
//! }
108110
//! // Output:
109111
//! // Month: 03 Day: 14 Year: 2012
@@ -285,7 +287,7 @@
285287
//! # fn main() {
286288
//! let re = regex!(r"(?i)a+(?-i)b+");
287289
//! let cap = re.captures("AaAaAbbBBBb").unwrap();
288-
//! assert_eq!(cap.at(0), "AaAaAbb");
290+
//! assert_eq!(cap.at(0), Some("AaAaAbb"));
289291
//! # }
290292
//! ```
291293
//!

src/libregex/re.rs

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -273,9 +273,9 @@ impl Regex {
273273
/// let re = regex!(r"'([^']+)'\s+\((\d{4})\)");
274274
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
275275
/// let caps = re.captures(text).unwrap();
276-
/// assert_eq!(caps.at(1), "Citizen Kane");
277-
/// assert_eq!(caps.at(2), "1941");
278-
/// assert_eq!(caps.at(0), "'Citizen Kane' (1941)");
276+
/// assert_eq!(caps.at(1), Some("Citizen Kane"));
277+
/// assert_eq!(caps.at(2), Some("1941"));
278+
/// assert_eq!(caps.at(0), Some("'Citizen Kane' (1941)"));
279279
/// # }
280280
/// ```
281281
///
@@ -291,9 +291,9 @@ impl Regex {
291291
/// let re = regex!(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
292292
/// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
293293
/// let caps = re.captures(text).unwrap();
294-
/// assert_eq!(caps.name("title"), "Citizen Kane");
295-
/// assert_eq!(caps.name("year"), "1941");
296-
/// assert_eq!(caps.at(0), "'Citizen Kane' (1941)");
294+
/// assert_eq!(caps.name("title"), Some("Citizen Kane"));
295+
/// assert_eq!(caps.name("year"), Some("1941"));
296+
/// assert_eq!(caps.at(0), Some("'Citizen Kane' (1941)"));
297297
/// # }
298298
/// ```
299299
///
@@ -434,7 +434,7 @@ impl Regex {
434434
/// # use regex::Captures; fn main() {
435435
/// let re = regex!(r"([^,\s]+),\s+(\S+)");
436436
/// let result = re.replace("Springsteen, Bruce", |&: caps: &Captures| {
437-
/// format!("{} {}", caps.at(2), caps.at(1))
437+
/// format!("{} {}", caps.at(2).unwrap_or(""), caps.at(1).unwrap_or(""))
438438
/// });
439439
/// assert_eq!(result.as_slice(), "Bruce Springsteen");
440440
/// # }
@@ -712,27 +712,25 @@ impl<'t> Captures<'t> {
712712
Some((self.locs[s].unwrap(), self.locs[e].unwrap()))
713713
}
714714

715-
/// Returns the matched string for the capture group `i`.
716-
/// If `i` isn't a valid capture group or didn't match anything, then the
717-
/// empty string is returned.
718-
pub fn at(&self, i: uint) -> &'t str {
715+
/// Returns the matched string for the capture group `i`. If `i` isn't
716+
/// a valid capture group or didn't match anything, then `None` is
717+
/// returned.
718+
pub fn at(&self, i: uint) -> Option<&'t str> {
719719
match self.pos(i) {
720-
None => "",
721-
Some((s, e)) => {
722-
self.text.slice(s, e)
723-
}
720+
None => None,
721+
Some((s, e)) => Some(self.text.slice(s, e))
724722
}
725723
}
726724

727-
/// Returns the matched string for the capture group named `name`.
728-
/// If `name` isn't a valid capture group or didn't match anything, then
729-
/// the empty string is returned.
730-
pub fn name(&self, name: &str) -> &'t str {
725+
/// Returns the matched string for the capture group named `name`. If
726+
/// `name` isn't a valid capture group or didn't match anything, then
727+
/// `None` is returned.
728+
pub fn name(&self, name: &str) -> Option<&'t str> {
731729
match self.named {
732-
None => "",
730+
None => None,
733731
Some(ref h) => {
734732
match h.get(name) {
735-
None => "",
733+
None => None,
736734
Some(i) => self.at(*i),
737735
}
738736
}
@@ -769,11 +767,12 @@ impl<'t> Captures<'t> {
769767
// FIXME: Don't use regexes for this. It's completely unnecessary.
770768
let re = Regex::new(r"(^|[^$]|\b)\$(\w+)").unwrap();
771769
let text = re.replace_all(text, |&mut: refs: &Captures| -> String {
772-
let (pre, name) = (refs.at(1), refs.at(2));
770+
let pre = refs.at(1).unwrap_or("");
771+
let name = refs.at(2).unwrap_or("");
773772
format!("{}{}", pre,
774773
match from_str::<uint>(name.as_slice()) {
775-
None => self.name(name).to_string(),
776-
Some(i) => self.at(i).to_string(),
774+
None => self.name(name).unwrap_or("").to_string(),
775+
Some(i) => self.at(i).unwrap_or("").to_string(),
777776
})
778777
});
779778
let re = Regex::new(r"\$\$").unwrap();
@@ -802,7 +801,7 @@ impl<'t> Iterator<&'t str> for SubCaptures<'t> {
802801
fn next(&mut self) -> Option<&'t str> {
803802
if self.idx < self.caps.len() {
804803
self.idx += 1;
805-
Some(self.caps.at(self.idx - 1))
804+
Some(self.caps.at(self.idx - 1).unwrap_or(""))
806805
} else {
807806
None
808807
}

0 commit comments

Comments
 (0)