Skip to content

Commit af6be75

Browse files
Valid email address should only start with alphanumeric (#28174)
This fixes issue #27847 where regular expression allowed email address to start with special symbols. Valid email addresses should start with alphanumeric character, and as such will be rendered as email. Added test cases from the bug report to validate, such input will not be rendered anymore as email address. --------- Co-authored-by: wxiaoguang <[email protected]>
1 parent 6d3c674 commit af6be75

File tree

3 files changed

+43
-10
lines changed

3 files changed

+43
-10
lines changed

modules/markup/html.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ var globalVars = sync.OnceValue(func() *globalVarsType {
7171
// it is still accepted by the CommonMark specification, as well as the HTML5 spec:
7272
// http://spec.commonmark.org/0.28/#email-address
7373
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
74-
v.emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")
74+
// At the moment, we use stricter rule for rendering purpose: only allow the "name" part starting after the word boundary
75+
v.emailRegex = regexp.MustCompile(`\b([-\w.!#$%&'*+/=?^{|}~]*@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)\b`)
7576

7677
// emojiShortCodeRegex find emoji by alias like :smile:
7778
v.emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)

modules/markup/html_email.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33

44
package markup
55

6-
import "golang.org/x/net/html"
6+
import (
7+
"strings"
8+
9+
"golang.org/x/net/html"
10+
)
711

812
// emailAddressProcessor replaces raw email addresses with a mailto: link.
913
func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
@@ -14,6 +18,14 @@ func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
1418
return
1519
}
1620

21+
var nextByte byte
22+
if len(node.Data) > m[3] {
23+
nextByte = node.Data[m[3]]
24+
}
25+
if strings.IndexByte(":/", nextByte) != -1 {
26+
// for cases: "[email protected]:owner/repo.git", "https://[email protected]/owner/repo.git"
27+
return
28+
}
1729
mail := node.Data[m[2]:m[3]]
1830
replaceContent(node, m[2], m[3], createLink(ctx, "mailto:"+mail, mail, "" /*mailto*/))
1931
node = node.NextSibling.NextSibling

modules/markup/html_test.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -225,10 +225,10 @@ func TestRender_email(t *testing.T) {
225225
test := func(input, expected string) {
226226
res, err := markup.RenderString(markup.NewTestRenderContext().WithRelativePath("a.md"), input)
227227
assert.NoError(t, err)
228-
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(res))
228+
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(res), "input: %s", input)
229229
}
230-
// Text that should be turned into email link
231230

231+
// Text that should be turned into email link
232232
test(
233233
234234
`<p><a href="mailto:[email protected]" rel="nofollow">[email protected]</a></p>`)
@@ -260,28 +260,48 @@ func TestRender_email(t *testing.T) {
260260
<a href="mailto:[email protected]" rel="nofollow">[email protected]</a>?
261261
<a href="mailto:[email protected]" rel="nofollow">[email protected]</a>!</p>`)
262262

263+
// match GitHub behavior
264+
test("email@[email protected]", `<p>email@<a href="mailto:[email protected]" rel="nofollow">[email protected]</a></p>`)
265+
266+
// match GitHub behavior
267+
test(`"[email protected]"`, `<p>&#34;<a href="mailto:[email protected]" rel="nofollow">[email protected]</a>&#34;</p>`)
268+
263269
// Test that should *not* be turned into email links
264-
test(
265-
266-
`<p>&#34;[email protected]&#34;</p>`)
267270
test(
268271
"/home/gitea/mailstore/info@gitea/com",
269272
`<p>/home/gitea/mailstore/info@gitea/com</p>`)
270273
test(
271274
"[email protected]:go-gitea/gitea.git",
272275
`<p>[email protected]:go-gitea/gitea.git</p>`)
276+
test(
277+
"https://foo:[email protected]",
278+
`<p><a href="https://foo:[email protected]" rel="nofollow">https://foo:[email protected]</a></p>`)
273279
test(
274280
"gitea@3",
275281
`<p>gitea@3</p>`)
276282
test(
277283
278284
279-
test(
280-
281-
`<p>email@[email protected]</p>`)
282285
test(
283286
284287
288+
289+
cases := []struct {
290+
input, expected string
291+
}{
292+
// match GitHub behavior
293+
{"[email protected]", `<p>?<a href="mailto:[email protected]" rel="nofollow">[email protected]</a></p>`},
294+
{"*[email protected]", `<p>*<a href="mailto:[email protected]" rel="nofollow">[email protected]</a></p>`},
295+
{"[email protected]", `<p>~<a href="mailto:[email protected]" rel="nofollow">[email protected]</a></p>`},
296+
297+
// the following cases don't match GitHub behavior, but they are valid email addresses ...
298+
// maybe we should reduce the candidate characters for the "name" part in the future
299+
{"a*[email protected]", `<p><a href="mailto:a*[email protected]" rel="nofollow">a*[email protected]</a></p>`},
300+
{"[email protected]", `<p><a href="mailto:[email protected]" rel="nofollow">[email protected]</a></p>`},
301+
}
302+
for _, c := range cases {
303+
test(c.input, c.expected)
304+
}
285305
}
286306

287307
func TestRender_emoji(t *testing.T) {

0 commit comments

Comments
 (0)