Skip to content

Commit 471b411

Browse files
GiteaBotwxiaoguang
andauthored
Fix markdown URL parsing for commit ID (#30812) (#30855)
Backport #30812 by wxiaoguang Co-authored-by: wxiaoguang <[email protected]>
1 parent a82e630 commit 471b411

File tree

4 files changed

+115
-74
lines changed

4 files changed

+115
-74
lines changed

modules/markup/html.go

Lines changed: 65 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"path"
1111
"path/filepath"
1212
"regexp"
13+
"slices"
1314
"strings"
1415
"sync"
1516

@@ -54,7 +55,7 @@ var (
5455
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
5556

5657
// anyHashPattern splits url containing SHA into parts
57-
anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
58+
anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~%./\w]+)?(\?[-+~%.\w&=]+)?(#[-+~%.\w]+)?`)
5859

5960
// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
6061
comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
@@ -591,7 +592,8 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
591592

592593
func mentionProcessor(ctx *RenderContext, node *html.Node) {
593594
start := 0
594-
for node != nil {
595+
nodeStop := node.NextSibling
596+
for node != nodeStop {
595597
found, loc := references.FindFirstMentionBytes(util.UnsafeStringToBytes(node.Data[start:]))
596598
if !found {
597599
node = node.NextSibling
@@ -962,57 +964,68 @@ func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) {
962964
}
963965
}
964966

965-
// fullHashPatternProcessor renders SHA containing URLs
966-
func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
967-
if ctx.Metas == nil {
968-
return
967+
type anyHashPatternResult struct {
968+
PosStart int
969+
PosEnd int
970+
FullURL string
971+
CommitID string
972+
SubPath string
973+
QueryHash string
974+
}
975+
976+
func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) {
977+
m := anyHashPattern.FindStringSubmatchIndex(s)
978+
if m == nil {
979+
return ret, false
969980
}
970981

971-
next := node.NextSibling
972-
for node != nil && node != next {
973-
m := anyHashPattern.FindStringSubmatchIndex(node.Data)
974-
if m == nil {
975-
return
982+
ret.PosStart, ret.PosEnd = m[0], m[1]
983+
ret.FullURL = s[ret.PosStart:ret.PosEnd]
984+
if strings.HasSuffix(ret.FullURL, ".") {
985+
// if url ends in '.', it's very likely that it is not part of the actual url but used to finish a sentence.
986+
ret.PosEnd--
987+
ret.FullURL = ret.FullURL[:len(ret.FullURL)-1]
988+
for i := 0; i < len(m); i++ {
989+
m[i] = min(m[i], ret.PosEnd)
976990
}
991+
}
977992

978-
urlFull := node.Data[m[0]:m[1]]
979-
text := base.ShortSha(node.Data[m[2]:m[3]])
993+
ret.CommitID = s[m[2]:m[3]]
994+
if m[5] > 0 {
995+
ret.SubPath = s[m[4]:m[5]]
996+
}
980997

981-
// 3rd capture group matches a optional path
982-
subpath := ""
983-
if m[5] > 0 {
984-
subpath = node.Data[m[4]:m[5]]
985-
}
998+
lastStart, lastEnd := m[len(m)-2], m[len(m)-1]
999+
if lastEnd > 0 {
1000+
ret.QueryHash = s[lastStart:lastEnd][1:]
1001+
}
1002+
return ret, true
1003+
}
9861004

987-
// 4th capture group matches a optional url hash
988-
hash := ""
989-
if m[7] > 0 {
990-
hash = node.Data[m[6]:m[7]][1:]
1005+
// fullHashPatternProcessor renders SHA containing URLs
1006+
func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
1007+
if ctx.Metas == nil {
1008+
return
1009+
}
1010+
nodeStop := node.NextSibling
1011+
for node != nodeStop {
1012+
if node.Type != html.TextNode {
1013+
node = node.NextSibling
1014+
continue
9911015
}
992-
993-
start := m[0]
994-
end := m[1]
995-
996-
// If url ends in '.', it's very likely that it is not part of the
997-
// actual url but used to finish a sentence.
998-
if strings.HasSuffix(urlFull, ".") {
999-
end--
1000-
urlFull = urlFull[:len(urlFull)-1]
1001-
if hash != "" {
1002-
hash = hash[:len(hash)-1]
1003-
} else if subpath != "" {
1004-
subpath = subpath[:len(subpath)-1]
1005-
}
1016+
ret, ok := anyHashPatternExtract(node.Data)
1017+
if !ok {
1018+
node = node.NextSibling
1019+
continue
10061020
}
1007-
1008-
if subpath != "" {
1009-
text += subpath
1021+
text := base.ShortSha(ret.CommitID)
1022+
if ret.SubPath != "" {
1023+
text += ret.SubPath
10101024
}
1011-
1012-
if hash != "" {
1013-
text += " (" + hash + ")"
1025+
if ret.QueryHash != "" {
1026+
text += " (" + ret.QueryHash + ")"
10141027
}
1015-
replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
1028+
replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "commit"))
10161029
node = node.NextSibling.NextSibling
10171030
}
10181031
}
@@ -1021,19 +1034,16 @@ func comparePatternProcessor(ctx *RenderContext, node *html.Node) {
10211034
if ctx.Metas == nil {
10221035
return
10231036
}
1024-
1025-
next := node.NextSibling
1026-
for node != nil && node != next {
1027-
m := comparePattern.FindStringSubmatchIndex(node.Data)
1028-
if m == nil {
1029-
return
1037+
nodeStop := node.NextSibling
1038+
for node != nodeStop {
1039+
if node.Type != html.TextNode {
1040+
node = node.NextSibling
1041+
continue
10301042
}
1031-
1032-
// Ensure that every group (m[0]...m[7]) has a match
1033-
for i := 0; i < 8; i++ {
1034-
if m[i] == -1 {
1035-
return
1036-
}
1043+
m := comparePattern.FindStringSubmatchIndex(node.Data)
1044+
if m == nil || slices.Contains(m[:8], -1) { // ensure that every group (m[0]...m[7]) has a match
1045+
node = node.NextSibling
1046+
continue
10371047
}
10381048

10391049
urlFull := node.Data[m[0]:m[1]]

modules/markup/html_codepreview.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ func renderCodeBlock(ctx *RenderContext, node *html.Node) (urlPosStart, urlPosSt
6060
}
6161

6262
func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) {
63-
for node != nil {
63+
nodeStop := node.NextSibling
64+
for node != nodeStop {
6465
if node.Type != html.TextNode {
6566
node = node.NextSibling
6667
continue

modules/markup/html_internal_test.go

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -399,36 +399,61 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
399399
}
400400

401401
func TestRegExp_anySHA1Pattern(t *testing.T) {
402-
testCases := map[string][]string{
402+
testCases := map[string]anyHashPatternResult{
403403
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
404-
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
405-
"/test/unit/event.js",
406-
"#L2703",
404+
CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
405+
SubPath: "/test/unit/event.js",
406+
QueryHash: "L2703",
407407
},
408408
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
409-
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
410-
"/test/unit/event.js",
411-
"",
409+
CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
410+
SubPath: "/test/unit/event.js",
412411
},
413412
"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
414-
"0705be475092aede1eddae01319ec931fb9c65fc",
415-
"",
416-
"",
413+
CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
417414
},
418415
"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
419-
"0705be475092aede1eddae01319ec931fb9c65fc",
420-
"/src",
421-
"",
416+
CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
417+
SubPath: "/src",
422418
},
423419
"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
424-
"d8a994ef243349f321568f9e36d5c3f444b99cae",
425-
"",
426-
"#diff-2",
420+
CommitID: "d8a994ef243349f321568f9e36d5c3f444b99cae",
421+
QueryHash: "diff-2",
422+
},
423+
"non-url": {},
424+
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b#L1-L2": {
425+
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
426+
QueryHash: "L1-L2",
427+
},
428+
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678.": {
429+
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
430+
},
431+
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678/sub.": {
432+
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
433+
SubPath: "/sub",
434+
},
435+
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b.": {
436+
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
437+
},
438+
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b&c=d": {
439+
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
440+
},
441+
"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678#hash.": {
442+
CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
443+
QueryHash: "hash",
427444
},
428445
}
429446

430447
for k, v := range testCases {
431-
assert.Equal(t, anyHashPattern.FindStringSubmatch(k)[1:], v)
448+
ret, ok := anyHashPatternExtract(k)
449+
if v.CommitID == "" {
450+
assert.False(t, ok)
451+
} else {
452+
assert.EqualValues(t, strings.TrimSuffix(k, "."), ret.FullURL)
453+
assert.EqualValues(t, v.CommitID, ret.CommitID)
454+
assert.EqualValues(t, v.SubPath, ret.SubPath)
455+
assert.EqualValues(t, v.QueryHash, ret.QueryHash)
456+
}
432457
}
433458
}
434459

modules/markup/html_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,11 @@ func TestRender_CrossReferences(t *testing.T) {
124124
test(
125125
util.URLJoin(markup.TestAppURL, "gogitea", "some-repo-name", "issues", "12345"),
126126
`<p><a href="`+util.URLJoin(markup.TestAppURL, "gogitea", "some-repo-name", "issues", "12345")+`" class="ref-issue" rel="nofollow">gogitea/some-repo-name#12345</a></p>`)
127+
128+
inputURL := "https://host/a/b/commit/0123456789012345678901234567890123456789/foo.txt?a=b#L2-L3"
129+
test(
130+
inputURL,
131+
`<p><a href="`+inputURL+`" rel="nofollow"><code>0123456789/foo.txt (L2-L3)</code></a></p>`)
127132
}
128133

129134
func TestMisc_IsSameDomain(t *testing.T) {
@@ -695,7 +700,7 @@ func TestIssue18471(t *testing.T) {
695700
}, strings.NewReader(data), &res)
696701

697702
assert.NoError(t, err)
698-
assert.Equal(t, "<a href=\"http://domain/org/repo/compare/783b039...da951ce\" class=\"compare\"><code class=\"nohighlight\">783b039...da951ce</code></a>", res.String())
703+
assert.Equal(t, `<a href="http://domain/org/repo/compare/783b039...da951ce" class="compare"><code class="nohighlight">783b039...da951ce</code></a>`, res.String())
699704
}
700705

701706
func TestIsFullURL(t *testing.T) {

0 commit comments

Comments
 (0)