Skip to content

Commit b5065b7

Browse files
committed
refactor
1 parent cec21e6 commit b5065b7

File tree

2 files changed

+24
-18
lines changed

2 files changed

+24
-18
lines changed

modules/charset/escape.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ func EscapeControlBytes(text []byte) (EscapeStatus, []byte) {
6363
func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) {
6464
buf := make([]byte, 4096)
6565
readStart := 0
66+
runeCount := 0
6667
var n int
6768
var writePos int
6869

@@ -79,6 +80,8 @@ readingloop:
7980

8081
for i < len(bs) {
8182
r, size := utf8.DecodeRune(bs[i:])
83+
runeCount++
84+
8285
// Now handle the codepoints
8386
switch {
8487
case r == utf8.RuneError:
@@ -113,6 +116,8 @@ readingloop:
113116
lineHasRTLScript = false
114117
lineHasLTRScript = false
115118

119+
case runeCount == 1 && r == 0xFEFF: // UTF BOM
120+
// the first BOM is safe
116121
case r == '\r' || r == '\t' || r == ' ':
117122
// These are acceptable control characters and space characters
118123
case unicode.IsSpace(r):
@@ -144,8 +149,7 @@ readingloop:
144149
return
145150
}
146151
writePos = i + size
147-
// 65279 == BOM rune.
148-
case r != rune(65279) && unicode.Is(unicode.C, r):
152+
case unicode.Is(unicode.C, r):
149153
escaped.Escaped = true
150154
escaped.HasControls = true
151155
if writePos < i {

modules/charset/escape_test.go

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -130,18 +130,12 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
130130
status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true},
131131
},
132132
{
133-
name: "BOM encoding UTF-8",
134-
text: string([]byte{'\xef', '\xbb', '\xbf'}),
135-
result: string([]byte{'\xef', '\xbb', '\xbf'}),
136-
status: EscapeStatus{},
137-
},
138-
{
139-
name: "BOM encoding UTF-16",
140-
text: string([]byte{239, 187, 191, 228, 189, 160, 229, 165, 189, 239, 188, 140, 228, 184, 150, 231, 149, 140, 10, 104, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 10}),
141-
result: string([]byte{239, 187, 191, 228, 189, 160, 229, 165, 189, 239, 188, 140, 228, 184, 150, 231, 149, 140, 10, 104, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 10}),
142-
status: EscapeStatus{
143-
HasLTRScript: true,
144-
},
133+
// UTF-8/16/32 all use the same codepoint for BOM
134+
// Gitea could read UTF-16/32 content and convert into UTF-8 internally then render it, so we only process UTF-8 internally
135+
name: "UTF BOM",
136+
text: "\xef\xbb\xbftest",
137+
result: "\xef\xbb\xbftest",
138+
status: EscapeStatus{HasLTRScript: true},
145139
},
146140
}
147141

@@ -177,19 +171,27 @@ func TestEscapeControlReader(t *testing.T) {
177171
// lets add some control characters to the tests
178172
tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
179173
copy(tests, escapeControlTests)
174+
175+
// if there is a BOM, we should keep the BOM
176+
addPrefix := func(prefix string, s string) string {
177+
if strings.HasPrefix(s, "\xef\xbb\xbf") {
178+
return s[:3] + prefix + s[3:]
179+
}
180+
return prefix + s
181+
}
180182
for _, test := range escapeControlTests {
181183
test.name += " (+Control)"
182-
test.text = "\u001E" + test.text
183-
test.result = `<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">` + "\u001e" + `</span></span>` + test.result
184+
test.text = addPrefix("\u001E", test.text)
185+
test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">`+"\u001e"+`</span></span>`, test.result)
184186
test.status.Escaped = true
185187
test.status.HasControls = true
186188
tests = append(tests, test)
187189
}
188190

189191
for _, test := range escapeControlTests {
190192
test.name += " (+Mark)"
191-
test.text = "\u0300" + test.text
192-
test.result = `<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">` + "\u0300" + `</span></span>` + test.result
193+
test.text = addPrefix("\u0300", test.text)
194+
test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">`+"\u0300"+`</span></span>`, test.result)
193195
test.status.Escaped = true
194196
test.status.HasMarks = true
195197
tests = append(tests, test)

0 commit comments

Comments
 (0)