From bd25bd68c4b0d4c63dd18f44cb96a01073b8f3dc Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 9 Nov 2020 22:06:17 +0000 Subject: [PATCH 1/4] Fix panic bug in handling multiple references in commit The issue lay in determining the position of matches on a second run round a commit message in FindAllIssueReferences. Fix #13483 Signed-off-by: Andrew Thornton --- modules/references/references.go | 39 ++++++++++++++++++++++++++- modules/references/references_test.go | 7 +++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/modules/references/references.go b/modules/references/references.go index 070c6e566a8e7..9f8a5a952ffb6 100644 --- a/modules/references/references.go +++ b/modules/references/references.go @@ -240,30 +240,67 @@ func FindAllIssueReferences(content string) []IssueReference { // Need to convert fully qualified html references to local system to #/! short codes contentBytes := []byte(content) if re := getGiteaIssuePullPattern(); re != nil { + // We will iterate through the content, rewrite and simplify full references. + // + // We want to transform something like: + // + // 0 1 2 3 4 5 6 + // 012345678901234567890123456789012345678901234567890123456789012345789 + // this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo + // https://ourgitea.com/git/owner/repo/pulls/123456789 + // + // Into something like: + // + // this is a #123456789, foo + // !123456789 + pos := 0 for { + // re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$) match := re.FindSubmatchIndex(contentBytes[pos:]) if match == nil { break } + // match is a bunch of indices into the content from pos onwards so if our content is: + + // To simplify things let's just add pos to all of the indices in match + for i := range match { + match[i] += pos + } + // match[0]-match[1] is whole string // match[2]-match[3] is preamble - pos += match[3] + + // move the position to the end of the preamble + pos = match[3] + // match[4]-match[5] is owner/repo + // now copy the owner/repo to end of the preamble endPos := pos + match[5] - match[4] copy(contentBytes[pos:endPos], contentBytes[match[4]:match[5]]) + + // move the current position to the end of the newly copied owner/repo pos = endPos + + // Now set the issue/pull marker: + // // match[6]-match[7] == 'issues' contentBytes[pos] = '#' if string(contentBytes[match[6]:match[7]]) == "pulls" { contentBytes[pos] = '!' } pos++ + + // Then add the issue/pull number + // // match[8]-match[9] is the number endPos = pos + match[9] - match[8] copy(contentBytes[pos:endPos], contentBytes[match[8]:match[9]]) + + // Now copy what's left at the end of the string to the new end position copy(contentBytes[endPos:], contentBytes[match[9]:]) // now we reset the length + // our new section has length endPos - match[3] // our old section has length match[9] - match[3] contentBytes = contentBytes[:len(contentBytes)-match[9]+endPos] diff --git a/modules/references/references_test.go b/modules/references/references_test.go index 0c4037f1204af..75b1d0eb0b26d 100644 --- a/modules/references/references_test.go +++ b/modules/references/references_test.go @@ -106,6 +106,13 @@ func TestFindAllIssueReferences(t *testing.T) { {202, "user4", "repo5", "202", true, XRefActionNone, nil, nil, ""}, }, }, + { + "This http://gitea.com:3000/user4/repo5/pulls/202 yes. http://gitea.com:3000/user4/repo5/pulls/203 no", + []testResult{ + {202, "user4", "repo5", "202", true, XRefActionNone, nil, nil, ""}, + {203, "user4", "repo5", "203", true, XRefActionNone, nil, nil, ""}, + }, + }, { "This http://GiTeA.COM:3000/user4/repo6/pulls/205 yes.", []testResult{ From dfd51744fc89f13d473ac1ebfd8d5a51fa5c8ecb Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 9 Nov 2020 22:17:06 +0000 Subject: [PATCH 2/4] Extract function and make testable Signed-off-by: Andrew Thornton --- modules/references/references.go | 132 +++++++++++++------------- modules/references/references_test.go | 21 ++++ 2 files changed, 89 insertions(+), 64 deletions(-) diff --git a/modules/references/references.go b/modules/references/references.go index 9f8a5a952ffb6..a98bbcb723fc7 100644 --- a/modules/references/references.go +++ b/modules/references/references.go @@ -235,77 +235,81 @@ func findAllIssueReferencesMarkdown(content string) []*rawReference { return findAllIssueReferencesBytes(bcontent, links) } -// FindAllIssueReferences returns a list of unvalidated references found in a string. -func FindAllIssueReferences(content string) []IssueReference { - // Need to convert fully qualified html references to local system to #/! short codes - contentBytes := []byte(content) - if re := getGiteaIssuePullPattern(); re != nil { - // We will iterate through the content, rewrite and simplify full references. - // - // We want to transform something like: - // - // 0 1 2 3 4 5 6 - // 012345678901234567890123456789012345678901234567890123456789012345789 - // this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo - // https://ourgitea.com/git/owner/repo/pulls/123456789 - // - // Into something like: - // - // this is a #123456789, foo - // !123456789 - - pos := 0 - for { - // re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$) - match := re.FindSubmatchIndex(contentBytes[pos:]) - if match == nil { - break - } - // match is a bunch of indices into the content from pos onwards so if our content is: +func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) { + // We will iterate through the content, rewrite and simplify full references. + // + // We want to transform something like: + // + // 0 1 2 3 4 5 6 + // 012345678901234567890123456789012345678901234567890123456789012345789 + // this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo + // https://ourgitea.com/git/owner/repo/pulls/123456789 + // + // Into something like: + // + // this is a #123456789, foo + // !123456789 - // To simplify things let's just add pos to all of the indices in match - for i := range match { - match[i] += pos - } + pos := 0 + for { + // re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$) + match := re.FindSubmatchIndex((*contentBytes)[pos:]) + if match == nil { + break + } + // match is a bunch of indices into the content from pos onwards so if our content is: + + // To simplify things let's just add pos to all of the indices in match + for i := range match { + match[i] += pos + } - // match[0]-match[1] is whole string - // match[2]-match[3] is preamble + // match[0]-match[1] is whole string + // match[2]-match[3] is preamble - // move the position to the end of the preamble - pos = match[3] + // move the position to the end of the preamble + pos = match[3] - // match[4]-match[5] is owner/repo - // now copy the owner/repo to end of the preamble - endPos := pos + match[5] - match[4] - copy(contentBytes[pos:endPos], contentBytes[match[4]:match[5]]) + // match[4]-match[5] is owner/repo + // now copy the owner/repo to end of the preamble + endPos := pos + match[5] - match[4] + copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]]) - // move the current position to the end of the newly copied owner/repo - pos = endPos + // move the current position to the end of the newly copied owner/repo + pos = endPos - // Now set the issue/pull marker: - // - // match[6]-match[7] == 'issues' - contentBytes[pos] = '#' - if string(contentBytes[match[6]:match[7]]) == "pulls" { - contentBytes[pos] = '!' - } - pos++ - - // Then add the issue/pull number - // - // match[8]-match[9] is the number - endPos = pos + match[9] - match[8] - copy(contentBytes[pos:endPos], contentBytes[match[8]:match[9]]) - - // Now copy what's left at the end of the string to the new end position - copy(contentBytes[endPos:], contentBytes[match[9]:]) - // now we reset the length - - // our new section has length endPos - match[3] - // our old section has length match[9] - match[3] - contentBytes = contentBytes[:len(contentBytes)-match[9]+endPos] - pos = endPos + // Now set the issue/pull marker: + // + // match[6]-match[7] == 'issues' + (*contentBytes)[pos] = '#' + if string((*contentBytes)[match[6]:match[7]]) == "pulls" { + (*contentBytes)[pos] = '!' } + pos++ + + // Then add the issue/pull number + // + // match[8]-match[9] is the number + endPos = pos + match[9] - match[8] + copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]]) + + // Now copy what's left at the end of the string to the new end position + copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:]) + // now we reset the length + + // our new section has length endPos - match[3] + // our old section has length match[9] - match[3] + (*contentBytes) = (*contentBytes)[:len((*contentBytes))-match[9]+endPos] + pos = endPos + } +} + +// FindAllIssueReferences returns a list of unvalidated references found in a string. +func FindAllIssueReferences(content string) []IssueReference { + // Need to convert fully qualified html references to local system to #/! short codes + contentBytes := []byte(content) + if re := getGiteaIssuePullPattern(); re != nil { + convertFullHTMLReferencesToShortRefs(re, &contentBytes) } else { log.Debug("No GiteaIssuePullPattern pattern") } diff --git a/modules/references/references_test.go b/modules/references/references_test.go index 75b1d0eb0b26d..f51379e4c332c 100644 --- a/modules/references/references_test.go +++ b/modules/references/references_test.go @@ -5,6 +5,7 @@ package references import ( + "regexp" "testing" "code.gitea.io/gitea/modules/setting" @@ -29,6 +30,26 @@ type testResult struct { TimeLog string } +func TestConvertFullHTMLReferencesToShortRefs(t *testing.T) { + re := regexp.MustCompile(`(\s|^|\(|\[)` + + regexp.QuoteMeta("https://ourgitea.com/git/") + + `([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` + + `((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`) + test := `this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo +https://ourgitea.com/git/owner/repo/pulls/123456789 + And https://ourgitea.com/git/owner/repo/pulls/123 +` + expect := `this is a owner/repo#123456789, foo +owner/repo!123456789 + And owner/repo!123 +` + + contentBytes := []byte(test) + convertFullHTMLReferencesToShortRefs(re, &contentBytes) + result := string(contentBytes) + assert.EqualValues(t, expect, result) +} + func TestFindAllIssueReferences(t *testing.T) { fixtures := []testFixture{ From 9cf3daef472f03a3f45910c45b98f22d0b68fd50 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 9 Nov 2020 22:19:19 +0000 Subject: [PATCH 3/4] Fix the comment Signed-off-by: Andrew Thornton --- modules/references/references.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/references/references.go b/modules/references/references.go index a98bbcb723fc7..ee93e5a688812 100644 --- a/modules/references/references.go +++ b/modules/references/references.go @@ -257,9 +257,8 @@ func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byt if match == nil { break } - // match is a bunch of indices into the content from pos onwards so if our content is: - - // To simplify things let's just add pos to all of the indices in match + // match is a bunch of indices into the content from pos onwards so + // to simplify things let's just add pos to all of the indices in match for i := range match { match[i] += pos } From e36643fd92d7bd760322ca9501404d38a18055da Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 9 Nov 2020 22:21:02 +0000 Subject: [PATCH 4/4] cleaning up the comments a bit more Signed-off-by: Andrew Thornton --- modules/references/references.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/references/references.go b/modules/references/references.go index ee93e5a688812..6e0baefc6e1b8 100644 --- a/modules/references/references.go +++ b/modules/references/references.go @@ -240,8 +240,6 @@ func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byt // // We want to transform something like: // - // 0 1 2 3 4 5 6 - // 012345678901234567890123456789012345678901234567890123456789012345789 // this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo // https://ourgitea.com/git/owner/repo/pulls/123456789 //