Skip to content

Commit e9017c2

Browse files
rsccagedmantis
authored andcommitted
[release-branch.go1.18] regexp: limit size of parsed regexps
Set a 128 MB limit on the amount of space used by []syntax.Inst in the compiled form corresponding to a given regexp. Also set a 128 MB limit on the rune storage in the *syntax.Regexp tree itself. Thanks to Adam Korczynski (ADA Logics) and OSS-Fuzz for reporting this issue. Fixes CVE-2022-41715. Updates #55949. Fixes #55950. Change-Id: Ia656baed81564436368cf950e1c5409752f28e1b Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1592136 TryBot-Result: Security TryBots <[email protected]> Reviewed-by: Damien Neil <[email protected]> Run-TryBot: Roland Shoemaker <[email protected]> Reviewed-by: Julie Qiu <[email protected]> Reviewed-on: https://go-review.googlesource.com/c/go/+/438501 Run-TryBot: Carlos Amedee <[email protected]> Reviewed-by: Carlos Amedee <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Dmitri Shuralyov <[email protected]>
1 parent 0a72381 commit e9017c2

File tree

2 files changed

+148
-10
lines changed

2 files changed

+148
-10
lines changed

src/regexp/syntax/parse.go

Lines changed: 140 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,49 @@ const (
9090
// until we've allocated at least maxHeight Regexp structures.
9191
const maxHeight = 1000
9292

93+
// maxSize is the maximum size of a compiled regexp in Insts.
94+
// It too is somewhat arbitrarily chosen, but the idea is to be large enough
95+
// to allow significant regexps while at the same time small enough that
96+
// the compiled form will not take up too much memory.
97+
// 128 MB is enough for a 3.3 million Inst structures, which roughly
98+
// corresponds to a 3.3 MB regexp.
99+
const (
100+
maxSize = 128 << 20 / instSize
101+
instSize = 5 * 8 // byte, 2 uint32, slice is 5 64-bit words
102+
)
103+
104+
// maxRunes is the maximum number of runes allowed in a regexp tree
105+
// counting the runes in all the nodes.
106+
// Ignoring character classes p.numRunes is always less than the length of the regexp.
107+
// Character classes can make it much larger: each \pL adds 1292 runes.
108+
// 128 MB is enough for 32M runes, which is over 26k \pL instances.
109+
// Note that repetitions do not make copies of the rune slices,
110+
// so \pL{1000} is only one rune slice, not 1000.
111+
// We could keep a cache of character classes we've seen,
112+
// so that all the \pL we see use the same rune list,
113+
// but that doesn't remove the problem entirely:
114+
// consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()].
115+
// And because the Rune slice is exposed directly in the Regexp,
116+
// there is not an opportunity to change the representation to allow
117+
// partial sharing between different character classes.
118+
// So the limit is the best we can do.
119+
const (
120+
maxRunes = 128 << 20 / runeSize
121+
runeSize = 4 // rune is int32
122+
)
123+
93124
type parser struct {
94125
flags Flags // parse mode flags
95126
stack []*Regexp // stack of parsed expressions
96127
free *Regexp
97128
numCap int // number of capturing groups seen
98129
wholeRegexp string
99-
tmpClass []rune // temporary char class work space
100-
numRegexp int // number of regexps allocated
101-
height map[*Regexp]int // regexp height for height limit check
130+
tmpClass []rune // temporary char class work space
131+
numRegexp int // number of regexps allocated
132+
numRunes int // number of runes in char classes
133+
repeats int64 // product of all repetitions seen
134+
height map[*Regexp]int // regexp height, for height limit check
135+
size map[*Regexp]int64 // regexp compiled size, for size limit check
102136
}
103137

104138
func (p *parser) newRegexp(op Op) *Regexp {
@@ -122,6 +156,104 @@ func (p *parser) reuse(re *Regexp) {
122156
p.free = re
123157
}
124158

159+
func (p *parser) checkLimits(re *Regexp) {
160+
if p.numRunes > maxRunes {
161+
panic(ErrInternalError)
162+
}
163+
p.checkSize(re)
164+
p.checkHeight(re)
165+
}
166+
167+
func (p *parser) checkSize(re *Regexp) {
168+
if p.size == nil {
169+
// We haven't started tracking size yet.
170+
// Do a relatively cheap check to see if we need to start.
171+
// Maintain the product of all the repeats we've seen
172+
// and don't track if the total number of regexp nodes
173+
// we've seen times the repeat product is in budget.
174+
if p.repeats == 0 {
175+
p.repeats = 1
176+
}
177+
if re.Op == OpRepeat {
178+
n := re.Max
179+
if n == -1 {
180+
n = re.Min
181+
}
182+
if n <= 0 {
183+
n = 1
184+
}
185+
if int64(n) > maxSize/p.repeats {
186+
p.repeats = maxSize
187+
} else {
188+
p.repeats *= int64(n)
189+
}
190+
}
191+
if int64(p.numRegexp) < maxSize/p.repeats {
192+
return
193+
}
194+
195+
// We need to start tracking size.
196+
// Make the map and belatedly populate it
197+
// with info about everything we've constructed so far.
198+
p.size = make(map[*Regexp]int64)
199+
for _, re := range p.stack {
200+
p.checkSize(re)
201+
}
202+
}
203+
204+
if p.calcSize(re, true) > maxSize {
205+
panic(ErrInternalError)
206+
}
207+
}
208+
209+
func (p *parser) calcSize(re *Regexp, force bool) int64 {
210+
if !force {
211+
if size, ok := p.size[re]; ok {
212+
return size
213+
}
214+
}
215+
216+
var size int64
217+
switch re.Op {
218+
case OpLiteral:
219+
size = int64(len(re.Rune))
220+
case OpCapture, OpStar:
221+
// star can be 1+ or 2+; assume 2 pessimistically
222+
size = 2 + p.calcSize(re.Sub[0], false)
223+
case OpPlus, OpQuest:
224+
size = 1 + p.calcSize(re.Sub[0], false)
225+
case OpConcat:
226+
for _, sub := range re.Sub {
227+
size += p.calcSize(sub, false)
228+
}
229+
case OpAlternate:
230+
for _, sub := range re.Sub {
231+
size += p.calcSize(sub, false)
232+
}
233+
if len(re.Sub) > 1 {
234+
size += int64(len(re.Sub)) - 1
235+
}
236+
case OpRepeat:
237+
sub := p.calcSize(re.Sub[0], false)
238+
if re.Max == -1 {
239+
if re.Min == 0 {
240+
size = 2 + sub // x*
241+
} else {
242+
size = 1 + int64(re.Min)*sub // xxx+
243+
}
244+
break
245+
}
246+
// x{2,5} = xx(x(x(x)?)?)?
247+
size = int64(re.Max)*sub + int64(re.Max-re.Min)
248+
}
249+
250+
if size < 1 {
251+
size = 1
252+
}
253+
p.size[re] = size
254+
return size
255+
}
256+
125257
func (p *parser) checkHeight(re *Regexp) {
126258
if p.numRegexp < maxHeight {
127259
return
@@ -158,6 +290,7 @@ func (p *parser) calcHeight(re *Regexp, force bool) int {
158290

159291
// push pushes the regexp re onto the parse stack and returns the regexp.
160292
func (p *parser) push(re *Regexp) *Regexp {
293+
p.numRunes += len(re.Rune)
161294
if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
162295
// Single rune.
163296
if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
@@ -189,7 +322,7 @@ func (p *parser) push(re *Regexp) *Regexp {
189322
}
190323

191324
p.stack = append(p.stack, re)
192-
p.checkHeight(re)
325+
p.checkLimits(re)
193326
return re
194327
}
195328

@@ -299,7 +432,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
299432
re.Sub = re.Sub0[:1]
300433
re.Sub[0] = sub
301434
p.stack[n-1] = re
302-
p.checkHeight(re)
435+
p.checkLimits(re)
303436

304437
if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
305438
return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
@@ -503,6 +636,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp {
503636

504637
for j := start; j < i; j++ {
505638
sub[j] = p.removeLeadingString(sub[j], len(str))
639+
p.checkLimits(sub[j])
506640
}
507641
suffix := p.collapse(sub[start:i], OpAlternate) // recurse
508642

@@ -560,6 +694,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp {
560694
for j := start; j < i; j++ {
561695
reuse := j != start // prefix came from sub[start]
562696
sub[j] = p.removeLeadingRegexp(sub[j], reuse)
697+
p.checkLimits(sub[j])
563698
}
564699
suffix := p.collapse(sub[start:i], OpAlternate) // recurse
565700

src/regexp/syntax/parse_test.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -484,12 +484,15 @@ var invalidRegexps = []string{
484484
`(?P<>a)`,
485485
`[a-Z]`,
486486
`(?i)[a-Z]`,
487-
`a{100000}`,
488-
`a{100000,}`,
489-
"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
490-
strings.Repeat("(", 1000) + strings.Repeat(")", 1000),
491-
strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000),
492487
`\Q\E*`,
488+
`a{100000}`, // too much repetition
489+
`a{100000,}`, // too much repetition
490+
"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})", // too much repetition
491+
strings.Repeat("(", 1000) + strings.Repeat(")", 1000), // too deep
492+
strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000), // too deep
493+
"(" + strings.Repeat("(xx?)", 1000) + "){1000}", // too long
494+
strings.Repeat("(xx?){1000}", 1000), // too long
495+
strings.Repeat(`\pL`, 27000), // too many runes
493496
}
494497

495498
var onlyPerl = []string{

0 commit comments

Comments
 (0)