Skip to content

Commit df8919e

Browse files
committed
Merge branch 'just-one-more-benchmark-suite' into temp
2 parents 3a2b324 + fdf2c23 commit df8919e

16 files changed

+18906
-34
lines changed

Sources/RegexBenchmark/Benchmark.swift

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,12 @@ struct CrossBenchmark {
6969
/// TODO: Probably better ot have a whole-line vs search anywhere, maybe
7070
/// accomodate multi-line matching, etc.
7171
var isWhole: Bool = false
72+
73+
/// Whether or not to do firstMatch as well or just allMatches
74+
var includeFirst: Bool = false
7275

7376
func register(_ runner: inout BenchmarkRunner) {
7477
let swiftRegex = try! Regex(regex)
75-
76-
let nsPattern = isWhole ? "^" + regex + "$" : regex
7778
let nsRegex: NSRegularExpression
7879
if isWhole {
7980
nsRegex = try! NSRegularExpression(pattern: "^" + regex + "$")
@@ -95,30 +96,32 @@ struct CrossBenchmark {
9596
type: .first,
9697
target: input))
9798
} else {
98-
runner.register(
99-
Benchmark(
100-
name: baseName + "First",
101-
regex: swiftRegex,
102-
type: .first,
103-
target: input))
10499
runner.register(
105100
Benchmark(
106101
name: baseName + "All",
107102
regex: swiftRegex,
108103
type: .allMatches,
109104
target: input))
110-
runner.register(
111-
NSBenchmark(
112-
name: baseName + "First_NS",
113-
regex: nsRegex,
114-
type: .first,
115-
target: input))
116105
runner.register(
117106
NSBenchmark(
118107
name: baseName + "All_NS",
119108
regex: nsRegex,
120109
type: .allMatches,
121110
target: input))
111+
if includeFirst {
112+
runner.register(
113+
Benchmark(
114+
name: baseName + "First",
115+
regex: swiftRegex,
116+
type: .first,
117+
target: input))
118+
runner.register(
119+
NSBenchmark(
120+
name: baseName + "First_NS",
121+
regex: nsRegex,
122+
type: .first,
123+
target: input))
124+
}
122125
}
123126
}
124127
}

Sources/RegexBenchmark/BenchmarkRegistration.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ extension BenchmarkRunner {
1616
benchmark.addHTML()
1717
benchmark.addEmail()
1818
benchmark.addCustomCharacterClasses()
19+
benchmark.addDna()
20+
benchmark.addUnicode()
1921
// -- end of registrations --
2022
return benchmark
2123
}

Sources/RegexBenchmark/CLI.swift

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import ArgumentParser
22

33
@main
44
struct Runner: ParsableCommand {
5-
@Argument(help: "Names of benchmarks to run")
5+
@Argument(help: "Patterns for benchmarks to run")
66
var specificBenchmarks: [String] = []
77

88
@Flag(help: "Run only once for profiling purposes")
@@ -20,19 +20,30 @@ struct Runner: ParsableCommand {
2020
@Flag(help: "Should the results be saved")
2121
var save = false
2222

23-
@Flag(help: "Compare this result with the latest saved result")
23+
@Flag(help: "Compare this result with a saved result")
2424
var compare = false
2525

2626
@Option(help: "The result file to compare against, if this flag is not set it will compare against the most recent result file")
2727
var compareFile: String?
2828

29+
@Flag(help: "Exclude the comparisons to NSRegex")
30+
var excludeNs = false
31+
2932
mutating func run() throws {
3033
var runner = BenchmarkRunner.makeRunner(samples, outputPath)
31-
32-
// todo: regex based filter
34+
3335
if !self.specificBenchmarks.isEmpty {
34-
runner.suite = runner.suite.filter { b in specificBenchmarks.contains(b.name) }
36+
runner.suite = runner.suite.filter { b in
37+
specificBenchmarks.contains { pattern in
38+
try! Regex(pattern).wholeMatch(in: b.name) != nil
39+
}
40+
}
3541
}
42+
43+
if excludeNs {
44+
runner.suite = runner.suite.filter { b in !b.name.contains("NS") }
45+
}
46+
3647
switch (profile, debug) {
3748
case (true, true): print("Cannot run both profile and debug")
3849
case (true, false): runner.profile()

Sources/RegexBenchmark/Inputs/DnaFASTA.swift

Lines changed: 16676 additions & 0 deletions
Large diffs are not rendered by default.

Sources/RegexBenchmark/Inputs/TaggedUnicode.swift

Lines changed: 2008 additions & 0 deletions
Large diffs are not rendered by default.

Sources/RegexBenchmark/Suite/CssRegex.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ extension BenchmarkRunner {
66
let r = #"--([a-zA-Z0-9_-]+)\s*:\s*(.*?);"#
77

88
let css = CrossBenchmark(
9-
baseName: "css", regex: r, input: Inputs.swiftOrgCSS)
9+
baseName: "Css", regex: r, input: Inputs.swiftOrgCSS)
1010
css.register(&self)
1111
}
1212
}

Sources/RegexBenchmark/Suite/CustomCharacterClasses.swift

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,37 +13,37 @@ extension BenchmarkRunner {
1313
let input = Inputs.graphemeBreakData
1414

1515
register(Benchmark(
16-
name: "basicCCC",
16+
name: "BasicCCC",
1717
regex: try! Regex(basic),
1818
type: .allMatches,
1919
target: input))
2020

2121
register(Benchmark(
22-
name: "basicRangeCCC",
22+
name: "BasicRangeCCC",
2323
regex: try! Regex(basicRange),
2424
type: .allMatches,
2525
target: input))
2626

2727
register(Benchmark(
28-
name: "caseInsensitiveCCC",
28+
name: "CaseInsensitiveCCC",
2929
regex: try! Regex(caseInsensitive),
3030
type: .allMatches,
3131
target: input))
3232

3333
register(Benchmark(
34-
name: "invertedCCC",
34+
name: "InvertedCCC",
3535
regex: try! Regex(inverted),
3636
type: .allMatches,
3737
target: input))
3838

3939
register(Benchmark(
40-
name: "subtractionCCC",
40+
name: "SubtractionCCC",
4141
regex: try! Regex(subtraction),
4242
type: .allMatches,
4343
target: input))
4444

4545
register(Benchmark(
46-
name: "intersectionCCC",
46+
name: "IntersectionCCC",
4747
regex: try! Regex(intersection),
4848
type: .allMatches,
4949
target: input))
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import _StringProcessing
2+
3+
extension BenchmarkRunner {
4+
mutating func addDna() {
5+
// regex-redux from the benchmarks game
6+
// https://benchmarksgame-team.pages.debian.net/benchmarksgame/description/regexredux.html#regexredux
7+
let dna = "agg[act]taaa|ttta[agt]cct"
8+
let ends = "aND|caN|Ha[DS]|WaS"
9+
10+
let dnaMatching = CrossBenchmark(
11+
baseName: "DnaMatch",
12+
regex: dna,
13+
input: Inputs.dnaFASTA,
14+
includeFirst: true)
15+
16+
let sequenceEnds = CrossBenchmark(
17+
baseName: "DnaEndsMatch",
18+
regex: ends,
19+
input: Inputs.dnaFASTA,
20+
includeFirst: true)
21+
22+
dnaMatching.register(&self)
23+
sequenceEnds.register(&self)
24+
}
25+
}

Sources/RegexBenchmark/Suite/EmailRegex.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,22 @@ extension BenchmarkRunner {
1313
let emailWithLookaheads = #"(?=[A-z0-9][A-z0-9@._%+-]{5,253})[A-z0-9._%+-]{1,64}@(?:(?=[A-z0-9-]{1,63}\.)[A-z0-9]+(?:-[A-z0-9]+)*\.){1,8}[A-z]{2,63}"#
1414

1515
let emailRFCValid = CrossBenchmark(
16-
baseName: "emailRFC", regex: emailRFC, input: Inputs.validEmails)
16+
baseName: "EmailRFC", regex: emailRFC, input: Inputs.validEmails)
1717

1818
let emailRFCInvalid = CrossBenchmark(
19-
baseName: "emailRFCNoMatches",
19+
baseName: "EmailRFCNoMatches",
2020
regex: emailRFC,
2121
input: Inputs.graphemeBreakData
2222
)
2323

2424
let emailValid = CrossBenchmark(
25-
baseName: "emailLookahead",
25+
baseName: "EmailLookahead",
2626
regex: emailWithLookaheads,
2727
input: Inputs.validEmails
2828
)
2929

3030
let emailInvalid = CrossBenchmark(
31-
baseName: "emailLookaheadNoMatches",
31+
baseName: "EmailLookaheadNoMatches",
3232
regex: emailWithLookaheads,
3333
input: Inputs.graphemeBreakData
3434
)

Sources/RegexBenchmark/Suite/GraphemeBreak.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ extension BenchmarkRunner {
1818
let regex = #"HANGUL SYLLABLE [A-Z]+(?:\.\.HANGUL SYLLABLE [A-Z]+)?"#
1919

2020
let benchmark = CrossBenchmark(
21-
baseName: "HangulSyllable", regex: regex, input: input)
21+
baseName: "HangulSyllable", regex: regex, input: input, includeFirst: true)
2222
benchmark.register(&self)
2323
}
2424
}

0 commit comments

Comments
 (0)