From 52f38a74674a6f55d6d3ed6af2d8b0b43d3ec057 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Thu, 16 Jun 2022 23:32:17 +0800 Subject: [PATCH 1/7] refactor --- modules/highlight/highlight.go | 143 ++++++++++++++---------- modules/highlight/highlight_test.go | 166 +++++++++++++++------------- routers/web/repo/view.go | 26 ++--- 3 files changed, 189 insertions(+), 146 deletions(-) diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index 146264c7244b5..86d1366d3a2a5 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -10,6 +10,7 @@ import ( "bytes" "fmt" gohtml "html" + "io" "path/filepath" "strings" "sync" @@ -26,7 +27,7 @@ import ( ) // don't index files larger than this many bytes for performance purposes -const sizeLimit = 1000000 +const sizeLimit = 1024 * 1024 var ( // For custom user mapping @@ -40,11 +41,12 @@ var ( // NewContext loads custom highlight map from local config func NewContext() { once.Do(func() { - keys := setting.Cfg.Section("highlight.mapping").Keys() - for i := range keys { - highlightMapping[keys[i].Name()] = keys[i].Value() + if setting.Cfg != nil { + keys := setting.Cfg.Section("highlight.mapping").Keys() + for i := range keys { + highlightMapping[keys[i].Name()] = keys[i].Value() + } } - // The size 512 is simply a conservative rule of thumb c, err := lru.New2Q(512) if err != nil { @@ -132,30 +134,26 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string { return strings.TrimSuffix(htmlbuf.String(), "\n") } -// File returns a slice of chroma syntax highlighted lines of code -func File(numLines int, fileName, language string, code []byte) []string { +// File returns a slice of chroma syntax highlighted HTML lines of code +func File(fileName, language string, code []byte) ([]string, error) { NewContext() if len(code) > sizeLimit { - return plainText(string(code), numLines) + return PlainText(code), nil } + formatter := html.New(html.WithClasses(true), html.WithLineNumbers(false), html.PreventSurroundingPre(true), ) - if formatter == nil { - log.Error("Couldn't create chroma formatter") - return plainText(string(code), numLines) - } - - htmlbuf := bytes.Buffer{} - htmlw := bufio.NewWriter(&htmlbuf) + htmlBuf := bytes.Buffer{} + htmlWriter := bufio.NewWriter(&htmlBuf) var lexer chroma.Lexer // provided language overrides everything - if len(language) > 0 { + if language != "" { lexer = lexers.Get(language) } @@ -166,9 +164,9 @@ func File(numLines int, fileName, language string, code []byte) []string { } if lexer == nil { - language := analyze.GetCodeLanguage(fileName, code) + guessLanguage := analyze.GetCodeLanguage(fileName, code) - lexer = lexers.Get(language) + lexer = lexers.Get(guessLanguage) if lexer == nil { lexer = lexers.Match(fileName) if lexer == nil { @@ -179,61 +177,90 @@ func File(numLines int, fileName, language string, code []byte) []string { iterator, err := lexer.Tokenise(nil, string(code)) if err != nil { - log.Error("Can't tokenize code: %v", err) - return plainText(string(code), numLines) + return nil, fmt.Errorf("can't tokenize code: %w", err) } - err = formatter.Format(htmlw, styles.GitHub, iterator) + err = formatter.Format(htmlWriter, styles.GitHub, iterator) if err != nil { - log.Error("Can't format code: %v", err) - return plainText(string(code), numLines) - } - - htmlw.Flush() - finalNewLine := false - if len(code) > 0 { - finalNewLine = code[len(code)-1] == '\n' + return nil, fmt.Errorf("can't format code: %w", err) } - m := make([]string, 0, numLines) - for i, v := range strings.SplitN(htmlbuf.String(), "\n", numLines) { - content := string(v) + _ = htmlWriter.Flush() - // remove useless wrapper nodes that are always present - content = strings.Replace(content, "", "", 1) - content = strings.TrimPrefix(content, ``) + m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1) - // if there's no final newline, closing tags will be on last line - if !finalNewLine && i == numLines-1 { - content = strings.TrimSuffix(content, ``) + htmlStr := htmlBuf.String() + line := strings.Builder{} + insideLine := 0 // every makes it increase one level, every closed makes it decrease one level + tagStack := make([]string, 0, 4) + for len(htmlStr) > 0 { + pos1 := strings.IndexByte(htmlStr, '<') + pos2 := strings.IndexByte(htmlStr, '>') + if pos1 == -1 || pos2 == -1 || pos1 > pos2 { + break } - - // need to keep lines that are only \n so copy/paste works properly in browser - if content == "" { - content = "\n" - } else if content == `` { - content += "\n" + tag := htmlStr[pos1 : pos2+1] + if insideLine > 0 { + line.WriteString(htmlStr[:pos1]) } - - m = append(m, content) + if tag[1] == '/' { + if len(tagStack) == 0 { + return nil, fmt.Errorf("can't find matched tag: %q", tag) + } + popped := tagStack[len(tagStack)-1] + tagStack = tagStack[:len(tagStack)-1] + if popped == `` { + insideLine-- + lineStr := line.String() + if lineStr != "" && lineStr[len(lineStr)-1] == '\n' { + lineStr = lineStr[:len(lineStr)-1] + " " + } + m = append(m, lineStr) + line = strings.Builder{} + } + if insideLine > 0 { + line.WriteString(tag) + } + } else { + tagStack = append(tagStack, tag) + if insideLine > 0 { + line.WriteString(tag) + } + if tag == `` { + insideLine++ + } + } + htmlStr = htmlStr[pos2+1:] } - if finalNewLine { - m = append(m, "\n") + + if len(m) == 0 { + m = append(m, "") // maybe we do not want to return 0 lines } - return m + return m, nil } -// return unhiglighted map -func plainText(code string, numLines int) []string { - m := make([]string, 0, numLines) - for _, v := range strings.SplitN(string(code), "\n", numLines) { - content := string(v) - // need to keep lines that are only \n so copy/paste works properly in browser - if content == "" { - content = "\n" +// PlainText returns non-highlighted HTML for code +func PlainText(code []byte) []string { + r := bufio.NewReader(bytes.NewReader(code)) + m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1) + for { + content, err := r.ReadString('\n') + if err != nil && err != io.EOF { + log.Error("failed to read string from buffer: %v", err) + break + } + if content == "" && err == io.EOF { + break } - m = append(m, gohtml.EscapeString(content)) + s := gohtml.EscapeString(content) + s = strings.ReplaceAll(s, "\n", " ") + m = append(m, s) } + + if len(m) == 0 { + m = append(m, "") // maybe we do not want to return 0 lines + } + return m } diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index bbe1b716b83ab..6875109bdebbe 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -8,100 +8,116 @@ import ( "strings" "testing" - "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/util" - "github.com/stretchr/testify/assert" - "gopkg.in/ini.v1" ) func TestFile(t *testing.T) { - setting.Cfg = ini.Empty() + tests := []struct { - name string - numLines int - fileName string - code string - want string + name string + code string + want []string }{ { - name: ".drone.yml", - numLines: 12, - fileName: ".drone.yml", - code: util.Dedent(` - kind: pipeline - name: default + name: "empty.py", + code: "", + want: []string{""}, + }, + { + name: "eol-no.py", + code: "a=1", + want: []string{`a=1`}, + }, + { + name: "eol-newline1.py", + code: "a=1\n", + want: []string{ + `a=1 `, + }, + }, + { + name: "eol-newline2.py", + code: "a=1\n\n", + want: []string{ + `a=1 `, + ` `, + }, + }, + { + name: "empty-line.py", + code: strings.TrimSpace(` +a=1 - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic +b='' `), - want: util.Dedent(` - kind: pipeline - name: default - + want: []string{ + `a=1 `, + ` `, + `b=''`, + }, + }, + } - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - `), + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + lines, err := File(tt.name, "", []byte(tt.code)) + assert.NoError(t, err) + assert.EqualValues(t, tt.want, lines) + }) + } +} + +func TestPlainText(t *testing.T) { + + tests := []struct { + name string + code string + want []string + }{ + { + name: "empty.py", + code: "", + want: []string{""}, }, { - name: ".drone.yml - trailing space", - numLines: 13, - fileName: ".drone.yml", - code: strings.Replace(util.Dedent(` - kind: pipeline - name: default - - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - `)+"\n", "name: default", "name: default ", 1), - want: util.Dedent(` - kind: pipeline - name: default - + name: "eol-no.py", + code: "a=1", + want: []string{`a=1`}, + }, + { + name: "eol-newline1.py", + code: "a=1\n", + want: []string{ + `a=1 `, + }, + }, + { + name: "eol-newline2.py", + code: "a=1\n\n", + want: []string{ + `a=1 `, + ` `, + }, + }, + { + name: "empty-line.py", + code: strings.TrimSpace(` +a=1 - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - - - +b='' `), + want: []string{ + `a=1 `, + ` `, + `b=''`, + }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := strings.Join(File(tt.numLines, tt.fileName, "", []byte(tt.code)), "\n") - assert.Equal(t, tt.want, got) - assert.Equal(t, strings.Count(got, "")) + lines := PlainText([]byte(tt.code)) + assert.EqualValues(t, tt.want, lines) }) } } diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 01bd2d89234f5..1586dd4bf58db 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -15,7 +15,6 @@ import ( "net/http" "net/url" "path" - "strconv" "strings" "time" @@ -58,15 +57,6 @@ type namedBlob struct { blob *git.Blob } -func linesBytesCount(s []byte) int { - nl := []byte{'\n'} - n := bytes.Count(s, nl) - if len(s) > 0 && !bytes.HasSuffix(s, nl) { - n++ - } - return n -} - // FIXME: There has to be a more efficient way of doing this func getReadmeFileFromPath(commit *git.Commit, treePath string) (*namedBlob, error) { tree, err := commit.SubTree(treePath) @@ -552,8 +542,14 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st ) } else { buf, _ := io.ReadAll(rd) - lineNums := linesBytesCount(buf) - ctx.Data["NumLines"] = strconv.Itoa(lineNums) + + // empty: 0 line; "a": one line; "a\n": two lines; "a\nb": two lines; + // the NumLines is only used for the display on the UI: "xxx lines" + if len(buf) == 0 { + ctx.Data["NumLines"] = 0 + } else { + ctx.Data["NumLines"] = bytes.Count(buf, []byte{'\n'}) + 1 + } ctx.Data["NumLinesSet"] = true language := "" @@ -581,7 +577,11 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st language = "" } } - fileContent := highlight.File(lineNums, blob.Name(), language, buf) + fileContent, err := highlight.File(blob.Name(), language, buf) + if err != nil { + log.Error("highlight.File failed, fallback to plain text: %v", err) + fileContent = highlight.PlainText(buf) + } status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard) ctx.Data["EscapeStatus"] = status statuses := make([]charset.EscapeStatus, len(fileContent)) From 71519137f1e25de924c4e664d7f8d2cf14c50130 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Thu, 16 Jun 2022 23:38:50 +0800 Subject: [PATCH 2/7] remove unused Dedent --- modules/util/util.go | 33 --------------------------------- modules/util/util_test.go | 7 ------- 2 files changed, 40 deletions(-) diff --git a/modules/util/util.go b/modules/util/util.go index be60fe4b4bbdf..97ed56d3cf0ee 100644 --- a/modules/util/util.go +++ b/modules/util/util.go @@ -9,7 +9,6 @@ import ( "crypto/rand" "errors" "math/big" - "regexp" "strconv" "strings" @@ -193,38 +192,6 @@ func ToTitleCase(s string) string { return titleCaser.String(s) } -var ( - whitespaceOnly = regexp.MustCompile("(?m)^[ \t]+$") - leadingWhitespace = regexp.MustCompile("(?m)(^[ \t]*)(?:[^ \t\n])") -) - -// Dedent removes common indentation of a multi-line string along with whitespace around it -// Based on https://github.com/lithammer/dedent -func Dedent(s string) string { - var margin string - - s = whitespaceOnly.ReplaceAllString(s, "") - indents := leadingWhitespace.FindAllStringSubmatch(s, -1) - - for i, indent := range indents { - if i == 0 { - margin = indent[1] - } else if strings.HasPrefix(indent[1], margin) { - continue - } else if strings.HasPrefix(margin, indent[1]) { - margin = indent[1] - } else { - margin = "" - break - } - } - - if margin != "" { - s = regexp.MustCompile("(?m)^"+margin).ReplaceAllString(s, "") - } - return strings.TrimSpace(s) -} - // NumberIntoInt64 transform a given int into int64. func NumberIntoInt64(number interface{}) int64 { var value int64 diff --git a/modules/util/util_test.go b/modules/util/util_test.go index 91b0ef9455e1d..ca5bd87eaebe3 100644 --- a/modules/util/util_test.go +++ b/modules/util/util_test.go @@ -225,10 +225,3 @@ func TestToTitleCase(t *testing.T) { assert.Equal(t, ToTitleCase(`foo bar baz`), `Foo Bar Baz`) assert.Equal(t, ToTitleCase(`FOO BAR BAZ`), `Foo Bar Baz`) } - -func TestDedent(t *testing.T) { - assert.Equal(t, Dedent(` - foo - bar - `), "foo\n\tbar") -} From 413461f1bbb442d429e3aa7ee96d5c3c03e20d9d Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 17 Jun 2022 08:13:40 +0800 Subject: [PATCH 3/7] add tests for special HTML chars --- modules/highlight/highlight_test.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index 6875109bdebbe..5c137a3bd06fe 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -23,6 +23,16 @@ func TestFile(t *testing.T) { code: "", want: []string{""}, }, + { + name: "tags.txt", + code: "<>", + want: []string{"<>"}, + }, + { + name: "tags.py", + code: "<>", + want: []string{`<>`}, + }, { name: "eol-no.py", code: "a=1", @@ -79,6 +89,11 @@ func TestPlainText(t *testing.T) { code: "", want: []string{""}, }, + { + name: "tags.py", + code: "<>", + want: []string{"<>"}, + }, { name: "eol-no.py", code: "a=1", From a1daefa469e8ae61fb9bbba5b70f91bd0ed78f48 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 17 Jun 2022 08:43:48 +0800 Subject: [PATCH 4/7] more tests for space and indent --- modules/highlight/highlight_test.go | 37 ++++++++++++++++------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index 5c137a3bd06fe..7485216355cbe 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -54,16 +54,22 @@ func TestFile(t *testing.T) { }, }, { - name: "empty-line.py", - code: strings.TrimSpace(` -a=1 + name: "empty-line-with-space.py", + code: strings.ReplaceAll(strings.TrimSpace(` +def: + a=1 b='' - `), +{space} +c=2 + `), "{space}", " "), want: []string{ - `a=1 `, + `def: `, + ` a=1 `, ` `, - `b=''`, + `b='' `, + ` `, + `c=2`, }, }, } @@ -72,7 +78,7 @@ b='' t.Run(tt.name, func(t *testing.T) { lines, err := File(tt.name, "", []byte(tt.code)) assert.NoError(t, err) - assert.EqualValues(t, tt.want, lines) + assert.EqualValues(t, strings.Join(tt.want, "\n"), strings.Join(lines, "\n")) }) } } @@ -116,23 +122,22 @@ func TestPlainText(t *testing.T) { }, { name: "empty-line.py", - code: strings.TrimSpace(` -a=1 + code: strings.ReplaceAll(strings.TrimSpace(` +def: + a=1 b='' - `), - want: []string{ - `a=1 `, - ` `, - `b=''`, - }, +{space} +c=2 + `), "{space}", " "), + want: strings.Split("def: \n a=1 \n \nb='' \n \nc=2", "\n"), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { lines := PlainText([]byte(tt.code)) - assert.EqualValues(t, tt.want, lines) + assert.EqualValues(t, strings.Join(tt.want, "\n"), strings.Join(lines, "\n")) }) } } From 4e87838ae52db324913297ea871c568f606bee2b Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 17 Jun 2022 08:52:00 +0800 Subject: [PATCH 5/7] introduce `lines` helper function to support test case in string --- modules/highlight/highlight_test.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index 7485216355cbe..5d371bb26c643 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -11,6 +11,10 @@ import ( "github.com/stretchr/testify/assert" ) +func lines(s string) []string { + return strings.Split(strings.TrimSpace(s), "\n") +} + func TestFile(t *testing.T) { tests := []struct { @@ -63,14 +67,14 @@ b='' {space} c=2 `), "{space}", " "), - want: []string{ - `def: `, - ` a=1 `, - ` `, - `b='' `, - ` `, - `c=2`, - }, + want: lines(` +def: + a=1 + +b='' + +c=2`, + ), }, } @@ -121,7 +125,7 @@ func TestPlainText(t *testing.T) { }, }, { - name: "empty-line.py", + name: "empty-line-with-space.py", code: strings.ReplaceAll(strings.TrimSpace(` def: a=1 From b60c044a09d04ae173e426272a3b7f8694c9d3d3 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Wed, 13 Jul 2022 08:49:29 +0800 Subject: [PATCH 6/7] fix --- modules/highlight/highlight_test.go | 21 +++++++++++++----- modules/util/util.go | 33 +++++++++++++++++++++++++++++ modules/util/util_test.go | 7 ++++++ 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index 5d371bb26c643..bd4aae6ee8288 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -80,9 +80,12 @@ c=2 for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - lines, err := File(tt.name, "", []byte(tt.code)) + out, err := File(tt.name, "", []byte(tt.code)) assert.NoError(t, err) - assert.EqualValues(t, strings.Join(tt.want, "\n"), strings.Join(lines, "\n")) + expected := strings.Join(tt.want, "\n") + actual := strings.Join(out, "\n") + assert.Equal(t, strings.Count(actual, "")) + assert.EqualValues(t, expected, actual) }) } } @@ -134,14 +137,22 @@ b='' {space} c=2 `), "{space}", " "), - want: strings.Split("def: \n a=1 \n \nb='' \n \nc=2", "\n"), + want: lines(` +def: + a=1 + +b='' + +c=2`), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - lines := PlainText([]byte(tt.code)) - assert.EqualValues(t, strings.Join(tt.want, "\n"), strings.Join(lines, "\n")) + out := PlainText([]byte(tt.code)) + expected := strings.Join(tt.want, "\n") + actual := strings.Join(out, "\n") + assert.EqualValues(t, expected, actual) }) } } diff --git a/modules/util/util.go b/modules/util/util.go index 97ed56d3cf0ee..be60fe4b4bbdf 100644 --- a/modules/util/util.go +++ b/modules/util/util.go @@ -9,6 +9,7 @@ import ( "crypto/rand" "errors" "math/big" + "regexp" "strconv" "strings" @@ -192,6 +193,38 @@ func ToTitleCase(s string) string { return titleCaser.String(s) } +var ( + whitespaceOnly = regexp.MustCompile("(?m)^[ \t]+$") + leadingWhitespace = regexp.MustCompile("(?m)(^[ \t]*)(?:[^ \t\n])") +) + +// Dedent removes common indentation of a multi-line string along with whitespace around it +// Based on https://github.com/lithammer/dedent +func Dedent(s string) string { + var margin string + + s = whitespaceOnly.ReplaceAllString(s, "") + indents := leadingWhitespace.FindAllStringSubmatch(s, -1) + + for i, indent := range indents { + if i == 0 { + margin = indent[1] + } else if strings.HasPrefix(indent[1], margin) { + continue + } else if strings.HasPrefix(margin, indent[1]) { + margin = indent[1] + } else { + margin = "" + break + } + } + + if margin != "" { + s = regexp.MustCompile("(?m)^"+margin).ReplaceAllString(s, "") + } + return strings.TrimSpace(s) +} + // NumberIntoInt64 transform a given int into int64. func NumberIntoInt64(number interface{}) int64 { var value int64 diff --git a/modules/util/util_test.go b/modules/util/util_test.go index ca5bd87eaebe3..91b0ef9455e1d 100644 --- a/modules/util/util_test.go +++ b/modules/util/util_test.go @@ -225,3 +225,10 @@ func TestToTitleCase(t *testing.T) { assert.Equal(t, ToTitleCase(`foo bar baz`), `Foo Bar Baz`) assert.Equal(t, ToTitleCase(`FOO BAR BAZ`), `Foo Bar Baz`) } + +func TestDedent(t *testing.T) { + assert.Equal(t, Dedent(` + foo + bar + `), "foo\n\tbar") +} From a98a18599042d345f42fc3a5c0e90593b2dce63f Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Thu, 14 Jul 2022 10:21:30 +0800 Subject: [PATCH 7/7] introduce newLineInHTML: make it easier to switch back and make it clear to do tests --- modules/highlight/highlight.go | 22 +++++++++++++++------- modules/highlight/highlight_test.go | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index 86d1366d3a2a5..8838176eabd32 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -29,6 +29,12 @@ import ( // don't index files larger than this many bytes for performance purposes const sizeLimit = 1024 * 1024 +// newLineInHTML is the HTML entity to be used for newline in HTML content, if it's empty then the original "\n" is kept +// this option is here for 2 purposes: +// (1) make it easier to switch back to the original "\n" if there is any compatibility issue in the future +// (2) make it clear to do tests: " " is the real newline for rendering, '\n' is ignorable/trim-able and could be ignored +var newLineInHTML = " " + var ( // For custom user mapping highlightMapping = map[string]string{} @@ -60,7 +66,7 @@ func NewContext() { func Code(fileName, language, code string) string { NewContext() - // diff view newline will be passed as empty, change to literal \n so it can be copied + // diff view newline will be passed as empty, change to literal '\n' so it can be copied // preserve literal newline in blame view if code == "" || code == "\n" { return "\n" @@ -116,7 +122,7 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string { htmlbuf := bytes.Buffer{} htmlw := bufio.NewWriter(&htmlbuf) - iterator, err := lexer.Tokenise(nil, string(code)) + iterator, err := lexer.Tokenise(nil, code) if err != nil { log.Error("Can't tokenize code: %v", err) return code @@ -128,9 +134,9 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string { return code } - htmlw.Flush() + _ = htmlw.Flush() // Chroma will add newlines for certain lexers in order to highlight them properly - // Once highlighted, strip them here so they don't cause copy/paste trouble in HTML output + // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output return strings.TrimSuffix(htmlbuf.String(), "\n") } @@ -212,8 +218,8 @@ func File(fileName, language string, code []byte) ([]string, error) { if popped == `` { insideLine-- lineStr := line.String() - if lineStr != "" && lineStr[len(lineStr)-1] == '\n' { - lineStr = lineStr[:len(lineStr)-1] + " " + if newLineInHTML != "" && lineStr != "" && lineStr[len(lineStr)-1] == '\n' { + lineStr = lineStr[:len(lineStr)-1] + newLineInHTML } m = append(m, lineStr) line = strings.Builder{} @@ -254,7 +260,9 @@ func PlainText(code []byte) []string { break } s := gohtml.EscapeString(content) - s = strings.ReplaceAll(s, "\n", " ") + if newLineInHTML != "" && s != "" && s[len(s)-1] == '\n' { + s = s[:len(s)-1] + newLineInHTML + } m = append(m, s) } diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index bd4aae6ee8288..d37d81ff6a313 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -16,7 +16,12 @@ func lines(s string) []string { } func TestFile(t *testing.T) { + defaultNewLineInHTML := newLineInHTML + defer func() { + newLineInHTML = defaultNewLineInHTML + }() + newLineInHTML = " " tests := []struct { name string code string @@ -88,10 +93,20 @@ c=2 assert.EqualValues(t, expected, actual) }) } + + newLineInHTML = "" + out, err := File("test-original-newline.py", "", []byte("a=1\n")) + assert.NoError(t, err) + assert.EqualValues(t, `a=1`+"\n", strings.Join(out, "")) } func TestPlainText(t *testing.T) { + defaultNewLineInHTML := newLineInHTML + defer func() { + newLineInHTML = defaultNewLineInHTML + }() + newLineInHTML = " " tests := []struct { name string code string @@ -155,4 +170,8 @@ c=2`), assert.EqualValues(t, expected, actual) }) } + + newLineInHTML = "" + out := PlainText([]byte("a=1\n")) + assert.EqualValues(t, "a=1\n", strings.Join(out, "")) }