Skip to content

Use stricter boundaries for auto-link detection #6522

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions modules/markup/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,20 @@ var (
// TODO: fix invalid linking issue

// mentionPattern matches all mentions in the form of "@user"
mentionPattern = regexp.MustCompile(`(?:\s|^|\W)(@[0-9a-zA-Z-_\.]+)`)
mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_\.]+)(?:\s|$|\)|\])`)

// issueNumericPattern matches string that references to a numeric issue, e.g. #1287
issueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)(#[0-9]+)\b`)
issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`)
// issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\W)([A-Z]{1,10}-[1-9][0-9]*)\b`)
issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|\.(\s|$))`)
// crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
// e.g. gogits/gogs#12345
crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)\b`)
crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`)

// sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
// Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
// so that abbreviated hash links can be used as well. This matches git and github useability.
sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-f]{7,40})\b`)
sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(?:\s|$|\)|\]|\.(\s|$))`)

// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
Expand All @@ -63,7 +63,7 @@ var (
// well as the HTML5 spec:
// http://spec.commonmark.org/0.28/#email-address
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*")
emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)(?:\\s|$|\\)|\\]|\\.(\\s|$))")

linkRegex, _ = xurls.StrictMatchingScheme("https?://")
)
Expand Down Expand Up @@ -656,12 +656,12 @@ func sha1CurrentPatternProcessor(ctx *postProcessCtx, node *html.Node) {

// emailAddressProcessor replaces raw email addresses with a mailto: link.
func emailAddressProcessor(ctx *postProcessCtx, node *html.Node) {
m := emailRegex.FindStringIndex(node.Data)
m := emailRegex.FindStringSubmatchIndex(node.Data)
if m == nil {
return
}
mail := node.Data[m[0]:m[1]]
replaceContent(node, m[0], m[1], createLink("mailto:"+mail, mail))
mail := node.Data[m[2]:m[3]]
replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail))
}

// linkProcessor creates links for any HTTP or HTTPS URL not captured by
Expand Down
20 changes: 18 additions & 2 deletions modules/markup/html_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ func TestRender_IssueIndexPattern(t *testing.T) {
test("test#1234")
test("#1234test")
test(" test #1234test")
test("/home/gitea/#1234")

// should not render issue mention without leading space
test("test#54321 issue")
Expand Down Expand Up @@ -103,9 +104,11 @@ func TestRender_IssueIndexPattern2(t *testing.T) {
test("#1234 test", "%s test", 1234)
test("test #8 issue", "test %s issue", 8)
test("test issue #1234", "test issue %s", 1234)
test("fixes issue #1234.", "fixes issue %s.", 1234)

// should render mentions in parentheses
// should render mentions in parentheses / brackets
test("(#54321 issue)", "(%s issue)", 54321)
test("[#54321 issue]", "[%s issue]", 54321)
test("test (#9801 extra) issue", "test (%s extra) issue", 9801)
test("test (#1)", "test (%s)", 1)

Expand Down Expand Up @@ -253,10 +256,14 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
trueTestCases := []string{
"d8a994ef243349f321568f9e36d5c3f444b99cae",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd",
"(abcdefabcdefabcdefabcdefabcdefabcdefabcd)",
"[abcdefabcdefabcdefabcdefabcdefabcdefabcd]",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd.",
}
falseTestCases := []string{
"test",
"abcdefg",
"e59ff077-2d03-4e6b-964d-63fbaea81f",
"abcdefghijklmnopqrstuvwxyzabcdefghijklmn",
"abcdefghijklmnopqrstuvwxyzabcdefghijklmO",
}
Expand Down Expand Up @@ -309,14 +316,18 @@ func TestRegExp_mentionPattern(t *testing.T) {
"@ANT_123",
"@xxx-DiN0-z-A..uru..s-xxx",
" @lol ",
" @Te/st",
" @Te-st",
"(@gitea)",
"[@gitea]",
}
falseTestCases := []string{
"@ 0",
"@ ",
"@",
"",
"ABC",
"/home/gitea/@gitea",
"\"@gitea\"",
}

for _, testCase := range trueTestCases {
Expand All @@ -335,6 +346,9 @@ func TestRegExp_issueAlphanumericPattern(t *testing.T) {
"A-1",
"RC-80",
"ABCDEFGHIJ-1234567890987654321234567890",
"ABC-123.",
"(ABC-123)",
"[ABC-123]",
}
falseTestCases := []string{
"RC-08",
Expand All @@ -347,6 +361,8 @@ func TestRegExp_issueAlphanumericPattern(t *testing.T) {
"ABC",
"GG-",
"rm-1",
"/home/gitea/ABC-1234",
"MY-STRING-ABC-123",
}

for _, testCase := range trueTestCases {
Expand Down
43 changes: 43 additions & 0 deletions modules/markup/html_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ func TestRender_Commits(t *testing.T) {
test(commit, `<p><a href="`+commit+`" rel="nofollow">b6dd6210ea</a></p>`)
test(tree, `<p><a href="`+tree+`" rel="nofollow">b6dd6210ea/src</a></p>`)
test("commit "+sha, `<p>commit <a href="`+commit+`" rel="nofollow">b6dd6210ea</a></p>`)
test("/home/gitea/"+sha, "<p>/home/gitea/"+sha+"</p>")

}

func TestRender_CrossReferences(t *testing.T) {
Expand All @@ -53,6 +55,9 @@ func TestRender_CrossReferences(t *testing.T) {
test(
"go-gitea/gitea#12345",
`<p><a href="`+util.URLJoin(AppURL, "go-gitea", "gitea", "issues", "12345")+`" rel="nofollow">go-gitea/gitea#12345</a></p>`)
test(
"/home/gitea/go-gitea/gitea#12345",
`<p>/home/gitea/go-gitea/gitea#12345</p>`)
}

func TestMisc_IsSameDomain(t *testing.T) {
Expand Down Expand Up @@ -144,6 +149,44 @@ func TestRender_links(t *testing.T) {
`<p>www</p>`)
}

func TestRender_email(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL

test := func(input, expected string) {
buffer := RenderString("a.md", input, setting.AppSubURL, nil)
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer)))
}
// Text that should be turned into email link

test(
"[email protected]",
`<p><a href="mailto:[email protected]" rel="nofollow">[email protected]</a></p>`)
test(
"([email protected])",
`<p>(<a href="mailto:[email protected]" rel="nofollow">[email protected]</a>)</p>`)
test(
"[[email protected]]",
`<p>[<a href="mailto:[email protected]" rel="nofollow">[email protected]</a>]</p>`)
test(
"[email protected].",
`<p><a href="mailto:[email protected]" rel="nofollow">[email protected]</a>.</p>`)
test(
"send email to [email protected].",
`<p>send email to <a href="mailto:[email protected]" rel="nofollow">[email protected]</a>.</p>`)

// Test that should *not* be turned into email links
test(
"\"[email protected]\"",
`<p>“[email protected]”</p>`)
test(
"/home/gitea/mailstore/info@gitea/com",
`<p>/home/gitea/mailstore/info@gitea/com</p>`)
test(
"[email protected]:go-gitea/gitea.git",
`<p>[email protected]:go-gitea/gitea.git</p>`)
}

func TestRender_ShortLinks(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL
Expand Down