Skip to content

Commit f2de5dc

Browse files
mrsdizzietechknowlogick
mrsdizzie
authored andcommitted
Replace linkRegex with xurls library (#6261)
* Replace linkRegex with xurls library Rather than maintaining a complicated regex to match URLs for autolinking, gitea can use this existing go library that takes care of the matching with very little code change to gitea itself. After spending a while trying to find the perfect regex for all cases this library still works better as it is more flexible than a single regex ever will be. This will also fix the following issues: #5844 #3095 #3381 This passes all our current tests and I've added new ones mentioned in those issues as well. * Use xurls.StrictMatchingScheme instead of xurls.Strict This is much faster and we only care about https? links to preserve existing behavior.
1 parent 01bd1fc commit f2de5dc

File tree

9 files changed

+2038
-3
lines changed

9 files changed

+2038
-3
lines changed

Gopkg.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Gopkg.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,7 @@ ignored = ["google.golang.org/appengine*"]
113113
[[constraint]]
114114
name = "github.com/prometheus/client_golang"
115115
version = "0.9.0"
116+
117+
[[constraint]]
118+
name = "github.com/mvdan/xurls"
119+
version = "2.0.0"

modules/markup/html.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"code.gitea.io/gitea/modules/util"
1818

1919
"github.com/Unknwon/com"
20+
"github.com/mvdan/xurls"
2021
"golang.org/x/net/html"
2122
"golang.org/x/net/html/atom"
2223
)
@@ -64,9 +65,7 @@ var (
6465
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
6566
emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*")
6667

67-
// matches http/https links. used for autlinking those. partly modified from
68-
// the original present in autolink.js
69-
linkRegex = regexp.MustCompile(`(?:(?:http|https):\/\/(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(?:\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-]*)?\??(?:[\-\+:=&;%@\.\w]*)#?(?:[\.\!\/\\\w]*))?`)
68+
linkRegex, _ = xurls.StrictMatchingScheme("https?://")
7069
)
7170

7271
// regexp for full links to issues/pulls

modules/markup/html_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,15 @@ func TestRender_links(t *testing.T) {
104104
test(
105105
"http://142.42.1.1/",
106106
`<p><a href="http://142.42.1.1/" rel="nofollow">http://142.42.1.1/</a></p>`)
107+
test(
108+
"https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd",
109+
`<p><a href="https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd" rel="nofollow">https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd</a></p>`)
110+
test(
111+
"https://en.wikipedia.org/wiki/URL_(disambiguation)",
112+
`<p><a href="https://en.wikipedia.org/wiki/URL_(disambiguation)" rel="nofollow">https://en.wikipedia.org/wiki/URL_(disambiguation)</a></p>`)
113+
test(
114+
"https://foo_bar.example.com/",
115+
`<p><a href="https://foo_bar.example.com/" rel="nofollow">https://foo_bar.example.com/</a></p>`)
107116

108117
// Test that should *not* be turned into URL
109118
test(

vendor/github.com/mvdan/xurls/LICENSE

Lines changed: 27 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/mvdan/xurls/schemes.go

Lines changed: 299 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)