Skip to content

Commit 1378899

Browse files
author
Ethan Pailes
committed
Address @BurntSushi's comments.
1 parent 7eec64e commit 1378899

File tree

6 files changed

+122
-110
lines changed

6 files changed

+122
-110
lines changed

Cargo.toml

-3
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,6 @@ pattern = []
5757
# Enable to use simd acceleration.
5858
# Note that this is deprecated and is a no-op.
5959
simd-accel = []
60-
# When testing, run the expensive randomized testing suites in addition
61-
# to the fast unit tests.
62-
random-test = []
6360

6461
[lib]
6562
# There are no benchmarks in the library code itself

HACKING.md

+9
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,11 @@ matching engine we want to test. The entry points are:
249249
backtracking on every regex and use *arbitrary* byte based programs.
250250
* `tests/test_backtrack_utf8bytes.rs` - tests `Regex::new`, forced to use
251251
backtracking on every regex and use *UTF-8* byte based programs.
252+
* `tests/test_crates_regex.rs` - tests to make sure that all of the
253+
backends behave in the same way against a number of quickcheck
254+
generated random inputs. These tests need to be enabled through
255+
the `RUST_REGEX_RUN_RANDOM_TESTING` environment variable (see
256+
below).
252257

253258
The lazy DFA and pure literal engines are absent from this list because
254259
they cannot be used on every regular expression. Instead, we rely on
@@ -259,6 +264,10 @@ entry points, it can take a while to compile everything. To reduce compile
259264
times slightly, try using `cargo test --test default`, which will only use the
260265
`tests/test_default.rs` entry point.
261266

267+
The random testing takes quite a while, so it is not enabled by default.
268+
In order to run the random testing you can set the
269+
`RUST_REGEX_RUN_RANDOM_TESTING` environment variable to anything before
270+
invoking `cargo test`.
262271

263272
## Benchmarking
264273

ci/script.sh

+6-9
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,16 @@ if [ "$TRAVIS_RUST_VERSION" = "1.12.0" ]; then
1717
exit
1818
fi
1919

20-
# Run tests.
21-
#
22-
# If we have nightly, then enable our nightly features.
23-
# If we are part of a nightly build, then run expensive random tests.
24-
CARGO_TEST_ARGS=""
2520
if [ "$TRAVIS_EVENT_TYPE" = "cron" ]; then
26-
CARGO_TEST_ARGS="${CARGO_TEST_ARGS} --features random-test"
21+
export RUST_REGEX_RUN_RANDOM_TESTING=1
2722
fi
23+
24+
# Run tests. If we have nightly, then enable our nightly features.
2825
if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
29-
CARGO_TEST_ARGS="${CARGO_TEST_ARGS} --features unstable"
26+
cargo test --verbose --features unstable
27+
else
28+
cargo test --verbose
3029
fi
31-
cargo test --verbose ${CARGO_TEST_ARGS}
3230

3331
# Run a test that confirms the shootout benchmarks are correct.
3432
ci/run-shootout-test
@@ -50,4 +48,3 @@ if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
5048
(cd bench && ./run $x --no-run --verbose)
5149
done
5250
fi
53-

scripts/scrape_crates_io.py

+20-14
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
#!/usr/bin/env python3
22

3-
import urllib3
3+
from subprocess import call
4+
import argparse
5+
import datetime
46
import glob
7+
import json
58
import os
69
import re
7-
import pdb
810
import shutil
9-
import json
1011
import tempfile
11-
import argparse
12-
import datetime
1312
import time
14-
from subprocess import call
13+
import urllib3
1514

1615
CRATES_IO_INDEX_GIT_LOC = "https://github.com/rust-lang/crates.io-index.git"
1716
RE_REGEX = re.compile(r"Regex::new\((r?\".*?\")\)")
@@ -21,17 +20,19 @@
2120
urllib3.disable_warnings()
2221
http = urllib3.PoolManager()
2322

23+
2424
def argparser():
2525
p = argparse.ArgumentParser("A script to scrape crates.io for regex.")
2626
p.add_argument("-c", "--crates-index", metavar="CRATES_INDEX_DIR",
27-
help=("A directory where we can find crates.io-index "
28-
+ "(if this isn't set it will be automatically "
29-
+ "downloaded)."))
27+
help=("A directory where we can find crates.io-index "
28+
+ "(if this isn't set it will be automatically "
29+
+ "downloaded)."))
3030
p.add_argument("-o", "--output-file", metavar="OUTPUT",
31-
default="crates_regex.rs",
32-
help="The name of the output file to create.")
31+
default="crates_regex.rs",
32+
help="The name of the output file to create.")
3333
return p
3434

35+
3536
PRELUDE = """// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
3637
// file at the top-level directory of this distribution and at
3738
// http://rust-lang.org/COPYRIGHT.
@@ -49,6 +50,7 @@ def argparser():
4950
5051
"""
5152

53+
5254
def main():
5355
args = argparser().parse_args()
5456
out = open(os.path.abspath(args.output_file), "w")
@@ -86,12 +88,14 @@ def main():
8688
shutil.rmtree(work_dir)
8789
out.close()
8890

91+
8992
def download_crates_index():
9093
if call(["git", "clone", CRATES_IO_INDEX_GIT_LOC]) != 0:
9194
print("Error cloning the crates.io index")
9295
exit(1)
9396
return "crates.io-index"
9497

98+
9599
def iter_crates(crates_index):
96100
exclude = set(["config.json", ".git"])
97101
for crate_index_file in iter_files(crates_index, exclude=exclude):
@@ -107,6 +111,7 @@ def iter_crates(crates_index):
107111
continue
108112
yield (crate_info["name"], crate_info["vers"])
109113

114+
110115
def iter_files(d, exclude=set()):
111116
for x in os.listdir(d):
112117
if x in exclude:
@@ -124,8 +129,8 @@ class Crate(object):
124129
def __init__(self, work_dir, name, version):
125130
self.name = name
126131
self.version = version
127-
self.url = "https://crates.io/api/v1/crates/{name}/{version}/download".format(
128-
name=self.name, version=self.version)
132+
self.url = ("https://crates.io/api/v1/crates/{name}/{version}/download"
133+
.format(name=self.name, version=self.version))
129134
self.filename = "{}/{}-{}.tar.gz".format(
130135
work_dir, self.name, self.version)
131136

@@ -164,7 +169,7 @@ def __exit__(self, ty, value, tb):
164169
try:
165170
shutil.rmtree(self.filename[:-len(".tar.gz")])
166171
os.remove(self.filename)
167-
except:
172+
except _:
168173
pass
169174

170175
def iter_srcs(self):
@@ -178,5 +183,6 @@ def iter_lines(self):
178183
for line in f:
179184
yield line
180185

186+
181187
if __name__ == "__main__":
182188
main()

tests/consistent.rs

+79-79
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
extern crate regex;
2-
extern crate quickcheck;
3-
4-
use self::regex::internal::ExecBuilder;
1+
use regex::internal::ExecBuilder;
52

63
/// Given a regex, check if all of the backends produce the same
74
/// results on a number of different inputs.
@@ -104,102 +101,105 @@ pub fn backends_are_consistent(re: &str) -> Result<u64, String> {
104101

105102
macro_rules! checker {
106103
($module_name:ident, $regex_type:path, $mk_input:expr) => {
107-
mod $module_name {
108-
// use $regex_type;
109-
use super::quickcheck;
110-
use super::quickcheck::{TestResult, Arbitrary};
111-
112-
pub fn check_backends(backends: &[(&str, $regex_type)]) -> Result<u64, String> {
113-
let mut total_passed = 0;
114-
for regex in backends[1..].iter() {
115-
total_passed += try!(quickcheck_regex_eq(&backends[0], regex));
116-
}
117104

118-
Ok(total_passed)
119-
}
120-
121-
fn quickcheck_regex_eq(
122-
&(name1, ref re1): &(&str, $regex_type),
123-
&(name2, ref re2): &(&str, $regex_type),
124-
) -> Result<u64, String> {
125-
quickcheck::QuickCheck::new()
126-
.quicktest(RegexEqualityTest::new(re1.clone(), re2.clone()))
127-
.map_err(|err|
128-
format!("{}(/{}/) and {}(/{}/) are inconsistent.\nQuickCheck Err: {:?}",
129-
name1, re1, name2, re2, err))
130-
}
105+
mod $module_name {
106+
use quickcheck;
107+
use quickcheck::{TestResult, Arbitrary};
131108

132-
struct RegexEqualityTest {
133-
re1: $regex_type,
134-
re2: $regex_type,
135-
}
136-
impl RegexEqualityTest {
137-
fn new(re1: $regex_type, re2: $regex_type) -> Self {
138-
RegexEqualityTest {
139-
re1: re1,
140-
re2: re2,
109+
pub fn check_backends(
110+
backends: &[(&str, $regex_type)]
111+
) -> Result<u64, String> {
112+
let mut total_passed = 0;
113+
for regex in backends[1..].iter() {
114+
total_passed += try!(quickcheck_regex_eq(&backends[0], regex));
141115
}
116+
117+
Ok(total_passed)
142118
}
143-
}
144119

145-
impl quickcheck::Testable for RegexEqualityTest {
146-
fn result<G: quickcheck::Gen>(&self, gen: &mut G) -> TestResult {
147-
let input = $mk_input(gen);
148-
let input = &input;
120+
fn quickcheck_regex_eq(
121+
&(name1, ref re1): &(&str, $regex_type),
122+
&(name2, ref re2): &(&str, $regex_type),
123+
) -> Result<u64, String> {
124+
quickcheck::QuickCheck::new()
125+
.quicktest(RegexEqualityTest::new(re1.clone(), re2.clone()))
126+
.map_err(|err|
127+
format!("{}(/{}/) and {}(/{}/) are inconsistent.\
128+
QuickCheck Err: {:?}",
129+
name1, re1, name2, re2, err))
130+
}
149131

150-
if self.re1.find(&input) != self.re2.find(input) {
151-
return TestResult::error(
152-
format!("find mismatch input={:?}", input));
132+
struct RegexEqualityTest {
133+
re1: $regex_type,
134+
re2: $regex_type,
135+
}
136+
impl RegexEqualityTest {
137+
fn new(re1: $regex_type, re2: $regex_type) -> Self {
138+
RegexEqualityTest {
139+
re1: re1,
140+
re2: re2,
141+
}
153142
}
143+
}
144+
145+
impl quickcheck::Testable for RegexEqualityTest {
146+
fn result<G: quickcheck::Gen>(&self, gen: &mut G) -> TestResult {
147+
let input = $mk_input(gen);
148+
let input = &input;
149+
150+
if self.re1.find(&input) != self.re2.find(input) {
151+
return TestResult::error(
152+
format!("find mismatch input={:?}", input));
153+
}
154+
155+
let cap1 = self.re1.captures(input);
156+
let cap2 = self.re2.captures(input);
157+
match (cap1, cap2) {
158+
(None, None) => {}
159+
(Some(cap1), Some(cap2)) => {
160+
for (c1, c2) in cap1.iter().zip(cap2.iter()) {
161+
if c1 != c2 {
162+
return TestResult::error(
163+
format!("captures mismatch input={:?}", input));
164+
}
165+
}
166+
}
167+
_ => return TestResult::error(
168+
format!("captures mismatch input={:?}", input)),
169+
}
154170

155-
let cap1 = self.re1.captures(input);
156-
let cap2 = self.re2.captures(input);
157-
match (cap1, cap2) {
158-
(None, None) => {}
159-
(Some(cap1), Some(cap2)) => {
171+
let fi1 = self.re1.find_iter(input);
172+
let fi2 = self.re2.find_iter(input);
173+
for (m1, m2) in fi1.zip(fi2) {
174+
if m1 != m2 {
175+
return TestResult::error(
176+
format!("find_iter mismatch input={:?}", input));
177+
}
178+
}
179+
180+
let ci1 = self.re1.captures_iter(input);
181+
let ci2 = self.re2.captures_iter(input);
182+
for (cap1, cap2) in ci1.zip(ci2) {
160183
for (c1, c2) in cap1.iter().zip(cap2.iter()) {
161184
if c1 != c2 {
162185
return TestResult::error(
163-
format!("captures mismatch input={:?}", input));
186+
format!("captures_iter mismatch input={:?}", input));
164187
}
165188
}
166189
}
167-
_ => return TestResult::error(
168-
format!("captures mismatch input={:?}", input)),
169-
}
170190

171-
let fi1 = self.re1.find_iter(input);
172-
let fi2 = self.re2.find_iter(input);
173-
for (m1, m2) in fi1.zip(fi2) {
174-
if m1 != m2 {
175-
return TestResult::error(
176-
format!("find_iter mismatch input={:?}", input));
177-
}
178-
}
179-
180-
let ci1 = self.re1.captures_iter(input);
181-
let ci2 = self.re2.captures_iter(input);
182-
for (cap1, cap2) in ci1.zip(ci2) {
183-
for (c1, c2) in cap1.iter().zip(cap2.iter()) {
184-
if c1 != c2 {
191+
let s1 = self.re1.split(input);
192+
let s2 = self.re2.split(input);
193+
for (chunk1, chunk2) in s1.zip(s2) {
194+
if chunk1 != chunk2 {
185195
return TestResult::error(
186-
format!("captures_iter mismatch input={:?}", input));
196+
format!("split mismatch input={:?}", input));
187197
}
188198
}
189-
}
190199

191-
let s1 = self.re1.split(input);
192-
let s2 = self.re2.split(input);
193-
for (chunk1, chunk2) in s1.zip(s2) {
194-
if chunk1 != chunk2 {
195-
return TestResult::error(
196-
format!("split mismatch input={:?}", input));
197-
}
200+
TestResult::from_bool(true)
198201
}
199-
200-
TestResult::from_bool(true)
201202
}
202-
}
203203

204204
} // mod
205205
} // rule case

tests/test_crates_regex.rs

+8-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
extern crate regex;
2+
extern crate quickcheck;
23

34
///////////////////////////////////////////////////////////////////////
45
// //
@@ -38,20 +39,22 @@ fn word_boundary_backtracking_default_mismatch() {
3839
// //
3940
///////////////////////////////////////////////////////////////////////
4041

41-
#[cfg(feature="random-test")]
4242
mod consistent;
4343

44-
#[cfg(feature="random-test")]
4544
mod crates_regex {
4645

4746
macro_rules! consistent {
4847
($test_name:ident, $regex_src:expr) => {
4948
#[test]
5049
fn $test_name() {
5150
use super::consistent::backends_are_consistent;
52-
match backends_are_consistent($regex_src) {
53-
Ok(_) => {},
54-
Err(err) => panic!("{}", err),
51+
52+
let run_random = option_env!("RUST_REGEX_RUN_RANDOM_TESTING");
53+
if run_random.is_some() {
54+
match backends_are_consistent($regex_src) {
55+
Ok(_) => {},
56+
Err(err) => panic!("{}", err),
57+
}
5558
}
5659
}
5760
}

0 commit comments

Comments
 (0)