Skip to content

Alternative fix for HTML diff entity split (#13425) #13427

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 30 additions & 84 deletions services/gitdiff/gitdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ var (
codeTagSuffix = []byte(`</span>`)
)
var trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`)
var entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`)

// shouldWriteInline represents combinations where we manually write inline changes
func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool {
Expand All @@ -205,14 +206,40 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT
match = ""
}
// Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency.
// Since inline changes might split in the middle of a chroma span tag, make we manually put it back together
// before writing so we don't try insert added/removed code spans in the middle of an existing chroma span
// and create broken HTML.
// Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together
// before writing so we don't try insert added/removed code spans in the middle of one of those
// and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of
// a line ending with an incomplete HTML entity or partial/opening <span>.

// EX:
// diffs[{Type: dmp.DiffDelete, Text: "language</span><span "},
// {Type: dmp.DiffEqual, Text: "c"},
// {Type: dmp.DiffDelete, Text: "lass="p">}]

// After first iteration
// diffs[{Type: dmp.DiffDelete, Text: "language</span>"}, //write out
// {Type: dmp.DiffEqual, Text: "<span c"},
// {Type: dmp.DiffDelete, Text: "lass="p">,</span>}]

// After second iteration
// {Type: dmp.DiffEqual, Text: ""}, // write out
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]

// Final
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
// end up writing <span class="removed-code"><span class="p">,</span></span>
// Instead of <span class="removed-code">lass="p",</span></span>

m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text)
if m != nil {
match = diff.Text[m[0]:m[1]]
diff.Text = strings.TrimSuffix(diff.Text, match)
}
m = entityRegex.FindStringSubmatchIndex(diff.Text)
if m != nil {
match = diff.Text[m[0]:m[1]]
diff.Text = strings.TrimSuffix(diff.Text, match)
}
// Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it
if strings.HasPrefix(diff.Text, "</span>") {
buf.WriteString("</span>")
Expand Down Expand Up @@ -290,9 +317,6 @@ func init() {
diffMatchPatch.DiffEditCost = 100
}

var unterminatedEntityRE = regexp.MustCompile(`&[^ ;]*$`)
var unstartedEntiyRE = regexp.MustCompile(`^[^ ;]*;`)

// GetComputedInlineDiffFor computes inline diff for the given line.
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML {
if setting.Git.DisableDiffHighlight {
Expand Down Expand Up @@ -333,89 +357,11 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem
diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)

// Now we need to clean up the split entities
diffRecord = unsplitEntities(diffRecord)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)

return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
}

// unsplitEntities looks for broken up html entities. It relies on records being presimplified and the data being passed in being valid html
func unsplitEntities(records []diffmatchpatch.Diff) []diffmatchpatch.Diff {
// Unsplitting entities is simple...
//
// Iterate through all be the last records because if we're the last record then there's nothing we can do
for i := 0; i+1 < len(records); i++ {
record := &records[i]

// Look for an unterminated entity at the end of the line
unterminated := unterminatedEntityRE.FindString(record.Text)
if len(unterminated) == 0 {
continue
}

switch record.Type {
case diffmatchpatch.DiffEqual:
// If we're an diff equal we want to give this unterminated entity to our next delete and insert
record.Text = record.Text[0 : len(record.Text)-len(unterminated)]
records[i+1].Text = unterminated + records[i+1].Text

nextType := records[i+1].Type

if nextType == diffmatchpatch.DiffEqual {
continue
}

// if the next in line is a delete then we will want the thing after that to be an insert and so on.
oneAfterType := diffmatchpatch.DiffInsert
if nextType == diffmatchpatch.DiffInsert {
oneAfterType = diffmatchpatch.DiffDelete
}

if i+2 < len(records) && records[i+2].Type == oneAfterType {
records[i+2].Text = unterminated + records[i+2].Text
} else {
records = append(records[:i+2], append([]diffmatchpatch.Diff{
{
Type: oneAfterType,
Text: unterminated,
}}, records[i+2:]...)...)
}
case diffmatchpatch.DiffDelete:
fallthrough
case diffmatchpatch.DiffInsert:
// if we're an insert or delete we want to claim the terminal bit of the entity from the next equal in line
targetType := diffmatchpatch.DiffInsert
if record.Type == diffmatchpatch.DiffInsert {
targetType = diffmatchpatch.DiffDelete
}
next := &records[i+1]
if next.Type == diffmatchpatch.DiffEqual {
// if the next is an equal we need to snaffle the entity end off the start and add an delete/insert
if terminal := unstartedEntiyRE.FindString(next.Text); len(terminal) > 0 {
record.Text += terminal
next.Text = next.Text[len(terminal):]
records = append(records[:i+2], append([]diffmatchpatch.Diff{
{
Type: targetType,
Text: unterminated,
}}, records[i+2:]...)...)
}
} else if next.Type == targetType {
// if the next is an insert we need to snaffle the entity end off the one after that and add it to both.
if i+2 < len(records) && records[i+2].Type == diffmatchpatch.DiffEqual {
if terminal := unstartedEntiyRE.FindString(records[i+2].Text); len(terminal) > 0 {
record.Text += terminal
next.Text += terminal
records[i+2].Text = records[i+2].Text[len(terminal):]
}
}
}
}
}
return records
}

// DiffFile represents a file diff.
type DiffFile struct {
Name string
Expand Down
45 changes: 15 additions & 30 deletions services/gitdiff/gitdiff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"github.com/sergi/go-diff/diffmatchpatch"
dmp "github.com/sergi/go-diff/diffmatchpatch"
"github.com/stretchr/testify/assert"
"gopkg.in/ini.v1"
Expand All @@ -27,35 +26,6 @@ func assertEqual(t *testing.T, s1 string, s2 template.HTML) {
}
}

func TestUnsplitEntities(t *testing.T) {
left := "sh &#34;useradd -u 111 jenkins&#34;"
right := "sh &#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39;"
diffRecord := diffMatchPatch.DiffMain(left, right, true)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)

// Now we need to clean up the split entities
diffRecord = unsplitEntities(diffRecord)
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)

leftRecombined := ""
rightRecombined := ""
for _, record := range diffRecord {
assert.False(t, unterminatedEntityRE.MatchString(record.Text), "")
switch record.Type {
case diffmatchpatch.DiffDelete:
leftRecombined += record.Text
case diffmatchpatch.DiffInsert:
rightRecombined += record.Text
default:
leftRecombined += record.Text
rightRecombined += record.Text
}
}

assert.EqualValues(t, left, leftRecombined)
assert.EqualValues(t, right, rightRecombined)
}

func TestDiffToHTML(t *testing.T) {
setting.Cfg = ini.Empty()
assertEqual(t, "foo <span class=\"added-code\">bar</span> biz", diffToHTML("", []dmp.Diff{
Expand Down Expand Up @@ -113,6 +83,21 @@ func TestDiffToHTML(t *testing.T) {
{Type: dmp.DiffEqual, Text: "<span class=\"sa\"></span><span class=\"s2\">&#34;</span><span class=\"s2\">// </span><span class=\"s2\">&#34;</span><span class=\"p\">,</span> <span class=\"n\">sys</span><span class=\"o\">.</span><span class=\"n\">argv</span>"},
{Type: dmp.DiffInsert, Text: "<span class=\"p\">)</span>"},
}, DiffLineAdd))

assertEqual(t, "sh <span class=\"added-code\">&#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins</span>&#39;", diffToHTML("", []dmp.Diff{
{Type: dmp.DiffEqual, Text: "sh &#3"},
{Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins&#34"},
{Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39"},
{Type: dmp.DiffEqual, Text: ";"},
}, DiffLineAdd))

assertEqual(t, "<span class=\"x\"> &lt;h<span class=\"added-code\">4 class=</span><span class=\"added-code\">&#34;release-list-title df ac&#34;</span>&gt;</span>", diffToHTML("", []dmp.Diff{
{Type: dmp.DiffEqual, Text: "<span class=\"x\"> &lt;h"},
{Type: dmp.DiffInsert, Text: "4 class=&#"},
{Type: dmp.DiffEqual, Text: "3"},
{Type: dmp.DiffInsert, Text: "4;release-list-title df ac&#34;"},
{Type: dmp.DiffEqual, Text: "&gt;</span>"},
}, DiffLineAdd))
}

func TestParsePatch_singlefile(t *testing.T) {
Expand Down