Skip to content

Commit 6293736

Browse files
mrsdizziezeripath
authored andcommitted
Use stricter boundaries for auto-link detection (#6522)
* Use stricter boundaries for auto-link detection Currently autolinks use \W for boundary detection which creates many situations of inserting links into places they don't belong (paths, URLs, UUIDs, etc...) This fixes that by replacing \W and only allowing these matches to touch an open paren or bracket (matching what seems to be Github behavior) in addition to whitespace and start of line. Similar for ending boundary as well. Fixes #6149 (and probably others) * Update test Replace incorrect test with a value that is a valid username, based on: "Username should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters." * Also allow for period at the end Matching Github behavior * Fix email regex to work properly with specificed boundaries Create a specific capture group for email address and then use FindStringSubmatchIndex to allow for non-matching patterns as boundaries. * Add Tests Add tests for new behavior -- including tests for email addresses which were absent before.
1 parent 5422f23 commit 6293736

File tree

3 files changed

+70
-11
lines changed

3 files changed

+70
-11
lines changed

modules/markup/html.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,20 @@ var (
3535
// TODO: fix invalid linking issue
3636

3737
// mentionPattern matches all mentions in the form of "@user"
38-
mentionPattern = regexp.MustCompile(`(?:\s|^|\W)(@[0-9a-zA-Z-_\.]+)`)
38+
mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_\.]+)(?:\s|$|\)|\])`)
3939

4040
// issueNumericPattern matches string that references to a numeric issue, e.g. #1287
41-
issueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)(#[0-9]+)\b`)
41+
issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`)
4242
// issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
43-
issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\W)([A-Z]{1,10}-[1-9][0-9]*)\b`)
43+
issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|\.(\s|$))`)
4444
// crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
4545
// e.g. gogits/gogs#12345
46-
crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)\b`)
46+
crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`)
4747

4848
// sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
4949
// Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
5050
// so that abbreviated hash links can be used as well. This matches git and github useability.
51-
sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-f]{7,40})\b`)
51+
sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(?:\s|$|\)|\]|\.(\s|$))`)
5252

5353
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
5454
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
@@ -63,7 +63,7 @@ var (
6363
// well as the HTML5 spec:
6464
// http://spec.commonmark.org/0.28/#email-address
6565
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
66-
emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*")
66+
emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)(?:\\s|$|\\)|\\]|\\.(\\s|$))")
6767

6868
linkRegex, _ = xurls.StrictMatchingScheme("https?://")
6969
)
@@ -656,12 +656,12 @@ func sha1CurrentPatternProcessor(ctx *postProcessCtx, node *html.Node) {
656656

657657
// emailAddressProcessor replaces raw email addresses with a mailto: link.
658658
func emailAddressProcessor(ctx *postProcessCtx, node *html.Node) {
659-
m := emailRegex.FindStringIndex(node.Data)
659+
m := emailRegex.FindStringSubmatchIndex(node.Data)
660660
if m == nil {
661661
return
662662
}
663-
mail := node.Data[m[0]:m[1]]
664-
replaceContent(node, m[0], m[1], createLink("mailto:"+mail, mail))
663+
mail := node.Data[m[2]:m[3]]
664+
replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail))
665665
}
666666

667667
// linkProcessor creates links for any HTTP or HTTPS URL not captured by

modules/markup/html_internal_test.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ func TestRender_IssueIndexPattern(t *testing.T) {
7171
test("test#1234")
7272
test("#1234test")
7373
test(" test #1234test")
74+
test("/home/gitea/#1234")
7475

7576
// should not render issue mention without leading space
7677
test("test#54321 issue")
@@ -103,9 +104,11 @@ func TestRender_IssueIndexPattern2(t *testing.T) {
103104
test("#1234 test", "%s test", 1234)
104105
test("test #8 issue", "test %s issue", 8)
105106
test("test issue #1234", "test issue %s", 1234)
107+
test("fixes issue #1234.", "fixes issue %s.", 1234)
106108

107-
// should render mentions in parentheses
109+
// should render mentions in parentheses / brackets
108110
test("(#54321 issue)", "(%s issue)", 54321)
111+
test("[#54321 issue]", "[%s issue]", 54321)
109112
test("test (#9801 extra) issue", "test (%s extra) issue", 9801)
110113
test("test (#1)", "test (%s)", 1)
111114

@@ -253,10 +256,14 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
253256
trueTestCases := []string{
254257
"d8a994ef243349f321568f9e36d5c3f444b99cae",
255258
"abcdefabcdefabcdefabcdefabcdefabcdefabcd",
259+
"(abcdefabcdefabcdefabcdefabcdefabcdefabcd)",
260+
"[abcdefabcdefabcdefabcdefabcdefabcdefabcd]",
261+
"abcdefabcdefabcdefabcdefabcdefabcdefabcd.",
256262
}
257263
falseTestCases := []string{
258264
"test",
259265
"abcdefg",
266+
"e59ff077-2d03-4e6b-964d-63fbaea81f",
260267
"abcdefghijklmnopqrstuvwxyzabcdefghijklmn",
261268
"abcdefghijklmnopqrstuvwxyzabcdefghijklmO",
262269
}
@@ -309,14 +316,18 @@ func TestRegExp_mentionPattern(t *testing.T) {
309316
"@ANT_123",
310317
"@xxx-DiN0-z-A..uru..s-xxx",
311318
" @lol ",
312-
" @Te/st",
319+
" @Te-st",
320+
"(@gitea)",
321+
"[@gitea]",
313322
}
314323
falseTestCases := []string{
315324
"@ 0",
316325
"@ ",
317326
"@",
318327
"",
319328
"ABC",
329+
"/home/gitea/@gitea",
330+
"\"@gitea\"",
320331
}
321332

322333
for _, testCase := range trueTestCases {
@@ -335,6 +346,9 @@ func TestRegExp_issueAlphanumericPattern(t *testing.T) {
335346
"A-1",
336347
"RC-80",
337348
"ABCDEFGHIJ-1234567890987654321234567890",
349+
"ABC-123.",
350+
"(ABC-123)",
351+
"[ABC-123]",
338352
}
339353
falseTestCases := []string{
340354
"RC-08",
@@ -347,6 +361,8 @@ func TestRegExp_issueAlphanumericPattern(t *testing.T) {
347361
"ABC",
348362
"GG-",
349363
"rm-1",
364+
"/home/gitea/ABC-1234",
365+
"MY-STRING-ABC-123",
350366
}
351367

352368
for _, testCase := range trueTestCases {

modules/markup/html_test.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ func TestRender_Commits(t *testing.T) {
3636
test(commit, `<p><a href="`+commit+`" rel="nofollow">b6dd6210ea</a></p>`)
3737
test(tree, `<p><a href="`+tree+`" rel="nofollow">b6dd6210ea/src</a></p>`)
3838
test("commit "+sha, `<p>commit <a href="`+commit+`" rel="nofollow">b6dd6210ea</a></p>`)
39+
test("/home/gitea/"+sha, "<p>/home/gitea/"+sha+"</p>")
40+
3941
}
4042

4143
func TestRender_CrossReferences(t *testing.T) {
@@ -53,6 +55,9 @@ func TestRender_CrossReferences(t *testing.T) {
5355
test(
5456
"go-gitea/gitea#12345",
5557
`<p><a href="`+util.URLJoin(AppURL, "go-gitea", "gitea", "issues", "12345")+`" rel="nofollow">go-gitea/gitea#12345</a></p>`)
58+
test(
59+
"/home/gitea/go-gitea/gitea#12345",
60+
`<p>/home/gitea/go-gitea/gitea#12345</p>`)
5661
}
5762

5863
func TestMisc_IsSameDomain(t *testing.T) {
@@ -144,6 +149,44 @@ func TestRender_links(t *testing.T) {
144149
`<p>www</p>`)
145150
}
146151

152+
func TestRender_email(t *testing.T) {
153+
setting.AppURL = AppURL
154+
setting.AppSubURL = AppSubURL
155+
156+
test := func(input, expected string) {
157+
buffer := RenderString("a.md", input, setting.AppSubURL, nil)
158+
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer)))
159+
}
160+
// Text that should be turned into email link
161+
162+
test(
163+
164+
`<p><a href="mailto:[email protected]" rel="nofollow">[email protected]</a></p>`)
165+
test(
166+
167+
`<p>(<a href="mailto:[email protected]" rel="nofollow">[email protected]</a>)</p>`)
168+
test(
169+
170+
`<p>[<a href="mailto:[email protected]" rel="nofollow">[email protected]</a>]</p>`)
171+
test(
172+
173+
`<p><a href="mailto:[email protected]" rel="nofollow">[email protected]</a>.</p>`)
174+
test(
175+
"send email to [email protected].",
176+
`<p>send email to <a href="mailto:[email protected]" rel="nofollow">[email protected]</a>.</p>`)
177+
178+
// Test that should *not* be turned into email links
179+
test(
180+
181+
`<p>“[email protected]”</p>`)
182+
test(
183+
"/home/gitea/mailstore/info@gitea/com",
184+
`<p>/home/gitea/mailstore/info@gitea/com</p>`)
185+
test(
186+
"[email protected]:go-gitea/gitea.git",
187+
`<p>[email protected]:go-gitea/gitea.git</p>`)
188+
}
189+
147190
func TestRender_ShortLinks(t *testing.T) {
148191
setting.AppURL = AppURL
149192
setting.AppSubURL = AppSubURL

0 commit comments

Comments
 (0)