Skip to content

Fix markdown URL parsing for commit ID (#30812) #30855

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 65 additions & 55 deletions modules/markup/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"path"
"path/filepath"
"regexp"
"slices"
"strings"
"sync"

Expand Down Expand Up @@ -54,7 +55,7 @@ var (
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)

// anyHashPattern splits url containing SHA into parts
anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~%./\w]+)?(\?[-+~%.\w&=]+)?(#[-+~%.\w]+)?`)

// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
Expand Down Expand Up @@ -591,7 +592,8 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {

func mentionProcessor(ctx *RenderContext, node *html.Node) {
start := 0
for node != nil {
nodeStop := node.NextSibling
for node != nodeStop {
found, loc := references.FindFirstMentionBytes(util.UnsafeStringToBytes(node.Data[start:]))
if !found {
node = node.NextSibling
Expand Down Expand Up @@ -962,57 +964,68 @@ func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) {
}
}

// fullHashPatternProcessor renders SHA containing URLs
func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
if ctx.Metas == nil {
return
type anyHashPatternResult struct {
PosStart int
PosEnd int
FullURL string
CommitID string
SubPath string
QueryHash string
}

func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) {
m := anyHashPattern.FindStringSubmatchIndex(s)
if m == nil {
return ret, false
}

next := node.NextSibling
for node != nil && node != next {
m := anyHashPattern.FindStringSubmatchIndex(node.Data)
if m == nil {
return
ret.PosStart, ret.PosEnd = m[0], m[1]
ret.FullURL = s[ret.PosStart:ret.PosEnd]
if strings.HasSuffix(ret.FullURL, ".") {
// if url ends in '.', it's very likely that it is not part of the actual url but used to finish a sentence.
ret.PosEnd--
ret.FullURL = ret.FullURL[:len(ret.FullURL)-1]
for i := 0; i < len(m); i++ {
m[i] = min(m[i], ret.PosEnd)
}
}

urlFull := node.Data[m[0]:m[1]]
text := base.ShortSha(node.Data[m[2]:m[3]])
ret.CommitID = s[m[2]:m[3]]
if m[5] > 0 {
ret.SubPath = s[m[4]:m[5]]
}

// 3rd capture group matches a optional path
subpath := ""
if m[5] > 0 {
subpath = node.Data[m[4]:m[5]]
}
lastStart, lastEnd := m[len(m)-2], m[len(m)-1]
if lastEnd > 0 {
ret.QueryHash = s[lastStart:lastEnd][1:]
}
return ret, true
}

// 4th capture group matches a optional url hash
hash := ""
if m[7] > 0 {
hash = node.Data[m[6]:m[7]][1:]
// fullHashPatternProcessor renders SHA containing URLs
func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
if ctx.Metas == nil {
return
}
nodeStop := node.NextSibling
for node != nodeStop {
if node.Type != html.TextNode {
node = node.NextSibling
continue
}

start := m[0]
end := m[1]

// If url ends in '.', it's very likely that it is not part of the
// actual url but used to finish a sentence.
if strings.HasSuffix(urlFull, ".") {
end--
urlFull = urlFull[:len(urlFull)-1]
if hash != "" {
hash = hash[:len(hash)-1]
} else if subpath != "" {
subpath = subpath[:len(subpath)-1]
}
ret, ok := anyHashPatternExtract(node.Data)
if !ok {
node = node.NextSibling
continue
}

if subpath != "" {
text += subpath
text := base.ShortSha(ret.CommitID)
if ret.SubPath != "" {
text += ret.SubPath
}

if hash != "" {
text += " (" + hash + ")"
if ret.QueryHash != "" {
text += " (" + ret.QueryHash + ")"
}
replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "commit"))
node = node.NextSibling.NextSibling
}
}
Expand All @@ -1021,19 +1034,16 @@ func comparePatternProcessor(ctx *RenderContext, node *html.Node) {
if ctx.Metas == nil {
return
}

next := node.NextSibling
for node != nil && node != next {
m := comparePattern.FindStringSubmatchIndex(node.Data)
if m == nil {
return
nodeStop := node.NextSibling
for node != nodeStop {
if node.Type != html.TextNode {
node = node.NextSibling
continue
}

// Ensure that every group (m[0]...m[7]) has a match
for i := 0; i < 8; i++ {
if m[i] == -1 {
return
}
m := comparePattern.FindStringSubmatchIndex(node.Data)
if m == nil || slices.Contains(m[:8], -1) { // ensure that every group (m[0]...m[7]) has a match
node = node.NextSibling
continue
}

urlFull := node.Data[m[0]:m[1]]
Expand Down
3 changes: 2 additions & 1 deletion modules/markup/html_codepreview.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ func renderCodeBlock(ctx *RenderContext, node *html.Node) (urlPosStart, urlPosSt
}

func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) {
for node != nil {
nodeStop := node.NextSibling
for node != nodeStop {
if node.Type != html.TextNode {
node = node.NextSibling
continue
Expand Down
59 changes: 42 additions & 17 deletions modules/markup/html_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,36 +399,61 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
}

func TestRegExp_anySHA1Pattern(t *testing.T) {
testCases := map[string][]string{
testCases := map[string]anyHashPatternResult{
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
"/test/unit/event.js",
"#L2703",
CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
SubPath: "/test/unit/event.js",
QueryHash: "L2703",
},
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
"/test/unit/event.js",
"",
CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
SubPath: "/test/unit/event.js",
},
"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
"0705be475092aede1eddae01319ec931fb9c65fc",
"",
"",
CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
},
"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
"0705be475092aede1eddae01319ec931fb9c65fc",
"/src",
"",
CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
SubPath: "/src",
},
"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
"d8a994ef243349f321568f9e36d5c3f444b99cae",
"",
"#diff-2",
CommitID: "d8a994ef243349f321568f9e36d5c3f444b99cae",
QueryHash: "diff-2",
},
"non-url": {},
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b#L1-L2": {
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
QueryHash: "L1-L2",
},
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678.": {
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
},
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678/sub.": {
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
SubPath: "/sub",
},
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b.": {
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
},
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b&c=d": {
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
},
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678#hash.": {
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
QueryHash: "hash",
},
}

for k, v := range testCases {
assert.Equal(t, anyHashPattern.FindStringSubmatch(k)[1:], v)
ret, ok := anyHashPatternExtract(k)
if v.CommitID == "" {
assert.False(t, ok)
} else {
assert.EqualValues(t, strings.TrimSuffix(k, "."), ret.FullURL)
assert.EqualValues(t, v.CommitID, ret.CommitID)
assert.EqualValues(t, v.SubPath, ret.SubPath)
assert.EqualValues(t, v.QueryHash, ret.QueryHash)
}
}
}

Expand Down
7 changes: 6 additions & 1 deletion modules/markup/html_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ func TestRender_CrossReferences(t *testing.T) {
test(
util.URLJoin(markup.TestAppURL, "gogitea", "some-repo-name", "issues", "12345"),
`<p><a href="`+util.URLJoin(markup.TestAppURL, "gogitea", "some-repo-name", "issues", "12345")+`" class="ref-issue" rel="nofollow">gogitea/some-repo-name#12345</a></p>`)

inputURL := "https://host/a/b/commit/0123456789012345678901234567890123456789/foo.txt?a=b#L2-L3"
test(
inputURL,
`<p><a href="`+inputURL+`" rel="nofollow"><code>0123456789/foo.txt (L2-L3)</code></a></p>`)
}

func TestMisc_IsSameDomain(t *testing.T) {
Expand Down Expand Up @@ -695,7 +700,7 @@ func TestIssue18471(t *testing.T) {
}, strings.NewReader(data), &res)

assert.NoError(t, err)
assert.Equal(t, "<a href=\"http://domain/org/repo/compare/783b039...da951ce\" class=\"compare\"><code class=\"nohighlight\">783b039...da951ce</code></a>", res.String())
assert.Equal(t, `<a href="http://domain/org/repo/compare/783b039...da951ce" class="compare"><code class="nohighlight">783b039...da951ce</code></a>`, res.String())
}

func TestIsFullURL(t *testing.T) {
Expand Down