Skip to content

Commit 304f29a

Browse files
authored
Add basic content sanitizer (#1344)
1 parent dc53810 commit 304f29a

File tree

5 files changed

+284
-3
lines changed

5 files changed

+284
-3
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,6 @@ bin/
1717
.DS_Store
1818

1919
# binary
20-
github-mcp-server
20+
github-mcp-server
21+
22+
.history

pkg/github/issues.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"time"
1111

1212
ghErrors "github.com/github/github-mcp-server/pkg/errors"
13+
"github.com/github/github-mcp-server/pkg/sanitize"
1314
"github.com/github/github-mcp-server/pkg/translations"
1415
"github.com/go-viper/mapstructure/v2"
1516
"github.com/google/go-github/v76/github"
@@ -211,15 +212,15 @@ func fragmentToIssue(fragment IssueFragment) *github.Issue {
211212

212213
return &github.Issue{
213214
Number: github.Ptr(int(fragment.Number)),
214-
Title: github.Ptr(string(fragment.Title)),
215+
Title: github.Ptr(sanitize.FilterInvisibleCharacters(string(fragment.Title))),
215216
CreatedAt: &github.Timestamp{Time: fragment.CreatedAt.Time},
216217
UpdatedAt: &github.Timestamp{Time: fragment.UpdatedAt.Time},
217218
User: &github.User{
218219
Login: github.Ptr(string(fragment.Author.Login)),
219220
},
220221
State: github.Ptr(string(fragment.State)),
221222
ID: github.Ptr(fragment.DatabaseID),
222-
Body: github.Ptr(string(fragment.Body)),
223+
Body: github.Ptr(sanitize.FilterInvisibleCharacters(string(fragment.Body))),
223224
Labels: foundLabels,
224225
Comments: github.Ptr(int(fragment.Comments.TotalCount)),
225226
}
@@ -323,6 +324,16 @@ func GetIssue(ctx context.Context, client *github.Client, owner string, repo str
323324
return mcp.NewToolResultError(fmt.Sprintf("failed to get issue: %s", string(body))), nil
324325
}
325326

327+
// Sanitize title/body on response
328+
if issue != nil {
329+
if issue.Title != nil {
330+
issue.Title = github.Ptr(sanitize.FilterInvisibleCharacters(*issue.Title))
331+
}
332+
if issue.Body != nil {
333+
issue.Body = github.Ptr(sanitize.FilterInvisibleCharacters(*issue.Body))
334+
}
335+
}
336+
326337
r, err := json.Marshal(issue)
327338
if err != nil {
328339
return nil, fmt.Errorf("failed to marshal issue: %w", err)

pkg/github/pullrequests.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/shurcooL/githubv4"
1515

1616
ghErrors "github.com/github/github-mcp-server/pkg/errors"
17+
"github.com/github/github-mcp-server/pkg/sanitize"
1718
"github.com/github/github-mcp-server/pkg/translations"
1819
)
1920

@@ -123,6 +124,16 @@ func GetPullRequest(ctx context.Context, client *github.Client, owner, repo stri
123124
return mcp.NewToolResultError(fmt.Sprintf("failed to get pull request: %s", string(body))), nil
124125
}
125126

127+
// sanitize title/body on response
128+
if pr != nil {
129+
if pr.Title != nil {
130+
pr.Title = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Title))
131+
}
132+
if pr.Body != nil {
133+
pr.Body = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Body))
134+
}
135+
}
136+
126137
r, err := json.Marshal(pr)
127138
if err != nil {
128139
return nil, fmt.Errorf("failed to marshal response: %w", err)
@@ -804,6 +815,19 @@ func ListPullRequests(getClient GetClientFn, t translations.TranslationHelperFun
804815
return mcp.NewToolResultError(fmt.Sprintf("failed to list pull requests: %s", string(body))), nil
805816
}
806817

818+
// sanitize title/body on each PR
819+
for _, pr := range prs {
820+
if pr == nil {
821+
continue
822+
}
823+
if pr.Title != nil {
824+
pr.Title = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Title))
825+
}
826+
if pr.Body != nil {
827+
pr.Body = github.Ptr(sanitize.FilterInvisibleCharacters(*pr.Body))
828+
}
829+
}
830+
807831
r, err := json.Marshal(prs)
808832
if err != nil {
809833
return nil, fmt.Errorf("failed to marshal response: %w", err)

pkg/sanitize/sanitize.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package sanitize
2+
3+
// FilterInvisibleCharacters removes invisible or control characters that should not appear
4+
// in user-facing titles or bodies. This includes:
5+
// - Unicode tag characters: U+E0001, U+E0020–U+E007F
6+
// - BiDi control characters: U+202A–U+202E, U+2066–U+2069
7+
// - Hidden modifier characters: U+200B, U+200C, U+200E, U+200F, U+00AD, U+FEFF, U+180E, U+2060–U+2064
8+
func FilterInvisibleCharacters(input string) string {
9+
if input == "" {
10+
return input
11+
}
12+
13+
// Filter runes
14+
out := make([]rune, 0, len(input))
15+
for _, r := range input {
16+
if !shouldRemoveRune(r) {
17+
out = append(out, r)
18+
}
19+
}
20+
return string(out)
21+
}
22+
23+
func shouldRemoveRune(r rune) bool {
24+
switch r {
25+
case 0x200B, // ZERO WIDTH SPACE
26+
0x200C, // ZERO WIDTH NON-JOINER
27+
0x200E, // LEFT-TO-RIGHT MARK
28+
0x200F, // RIGHT-TO-LEFT MARK
29+
0x00AD, // SOFT HYPHEN
30+
0xFEFF, // ZERO WIDTH NO-BREAK SPACE
31+
0x180E: // MONGOLIAN VOWEL SEPARATOR
32+
return true
33+
case 0xE0001: // TAG
34+
return true
35+
}
36+
37+
// Ranges
38+
// Unicode tags: U+E0020–U+E007F
39+
if r >= 0xE0020 && r <= 0xE007F {
40+
return true
41+
}
42+
// BiDi controls: U+202A–U+202E
43+
if r >= 0x202A && r <= 0x202E {
44+
return true
45+
}
46+
// BiDi isolates: U+2066–U+2069
47+
if r >= 0x2066 && r <= 0x2069 {
48+
return true
49+
}
50+
// Hidden modifiers: U+2060–U+2064
51+
if r >= 0x2060 && r <= 0x2064 {
52+
return true
53+
}
54+
55+
return false
56+
}

pkg/sanitize/sanitize_test.go

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
package sanitize
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func TestFilterInvisibleCharacters(t *testing.T) {
10+
tests := []struct {
11+
name string
12+
input string
13+
expected string
14+
}{
15+
{
16+
name: "empty string",
17+
input: "",
18+
expected: "",
19+
},
20+
{
21+
name: "normal text without invisible characters",
22+
input: "Hello World",
23+
expected: "Hello World",
24+
},
25+
{
26+
name: "text with zero width space",
27+
input: "Hello\u200BWorld",
28+
expected: "HelloWorld",
29+
},
30+
{
31+
name: "text with zero width non-joiner",
32+
input: "Hello\u200CWorld",
33+
expected: "HelloWorld",
34+
},
35+
{
36+
name: "text with left-to-right mark",
37+
input: "Hello\u200EWorld",
38+
expected: "HelloWorld",
39+
},
40+
{
41+
name: "text with right-to-left mark",
42+
input: "Hello\u200FWorld",
43+
expected: "HelloWorld",
44+
},
45+
{
46+
name: "text with soft hyphen",
47+
input: "Hello\u00ADWorld",
48+
expected: "HelloWorld",
49+
},
50+
{
51+
name: "text with zero width no-break space (BOM)",
52+
input: "Hello\uFEFFWorld",
53+
expected: "HelloWorld",
54+
},
55+
{
56+
name: "text with mongolian vowel separator",
57+
input: "Hello\u180EWorld",
58+
expected: "HelloWorld",
59+
},
60+
{
61+
name: "text with unicode tag character",
62+
input: "Hello\U000E0001World",
63+
expected: "HelloWorld",
64+
},
65+
{
66+
name: "text with unicode tag range characters",
67+
input: "Hello\U000E0020World\U000E007FTest",
68+
expected: "HelloWorldTest",
69+
},
70+
{
71+
name: "text with bidi control characters",
72+
input: "Hello\u202AWorld\u202BTest\u202CEnd\u202DMore\u202EFinal",
73+
expected: "HelloWorldTestEndMoreFinal",
74+
},
75+
{
76+
name: "text with bidi isolate characters",
77+
input: "Hello\u2066World\u2067Test\u2068End\u2069Final",
78+
expected: "HelloWorldTestEndFinal",
79+
},
80+
{
81+
name: "text with hidden modifier characters",
82+
input: "Hello\u2060World\u2061Test\u2062End\u2063More\u2064Final",
83+
expected: "HelloWorldTestEndMoreFinal",
84+
},
85+
{
86+
name: "multiple invisible characters mixed",
87+
input: "Hello\u200B\u200C\u200E\u200F\u00AD\uFEFF\u180E\U000E0001World",
88+
expected: "HelloWorld",
89+
},
90+
{
91+
name: "text with normal unicode characters (should be preserved)",
92+
input: "Hello 世界 🌍 αβγ",
93+
expected: "Hello 世界 🌍 αβγ",
94+
},
95+
{
96+
name: "invisible characters at start and end",
97+
input: "\u200BHello World\u200C",
98+
expected: "Hello World",
99+
},
100+
{
101+
name: "only invisible characters",
102+
input: "\u200B\u200C\u200E\u200F",
103+
expected: "",
104+
},
105+
{
106+
name: "real-world example with title",
107+
input: "Fix\u200B bug\u00AD in\u202A authentication\u202C",
108+
expected: "Fix bug in authentication",
109+
},
110+
{
111+
name: "issue body with mixed content",
112+
input: "This is a\u200B bug report.\n\nSteps to reproduce:\u200C\n1. Do this\u200E\n2. Do that\u200F",
113+
expected: "This is a bug report.\n\nSteps to reproduce:\n1. Do this\n2. Do that",
114+
},
115+
}
116+
117+
for _, tt := range tests {
118+
t.Run(tt.name, func(t *testing.T) {
119+
result := FilterInvisibleCharacters(tt.input)
120+
assert.Equal(t, tt.expected, result)
121+
})
122+
}
123+
}
124+
125+
func TestShouldRemoveRune(t *testing.T) {
126+
tests := []struct {
127+
name string
128+
rune rune
129+
expected bool
130+
}{
131+
// Individual characters that should be removed
132+
{name: "zero width space", rune: 0x200B, expected: true},
133+
{name: "zero width non-joiner", rune: 0x200C, expected: true},
134+
{name: "left-to-right mark", rune: 0x200E, expected: true},
135+
{name: "right-to-left mark", rune: 0x200F, expected: true},
136+
{name: "soft hyphen", rune: 0x00AD, expected: true},
137+
{name: "zero width no-break space", rune: 0xFEFF, expected: true},
138+
{name: "mongolian vowel separator", rune: 0x180E, expected: true},
139+
{name: "unicode tag", rune: 0xE0001, expected: true},
140+
141+
// Range tests - Unicode tags: U+E0020–U+E007F
142+
{name: "unicode tag range start", rune: 0xE0020, expected: true},
143+
{name: "unicode tag range middle", rune: 0xE0050, expected: true},
144+
{name: "unicode tag range end", rune: 0xE007F, expected: true},
145+
{name: "before unicode tag range", rune: 0xE001F, expected: false},
146+
{name: "after unicode tag range", rune: 0xE0080, expected: false},
147+
148+
// Range tests - BiDi controls: U+202A–U+202E
149+
{name: "bidi control range start", rune: 0x202A, expected: true},
150+
{name: "bidi control range middle", rune: 0x202C, expected: true},
151+
{name: "bidi control range end", rune: 0x202E, expected: true},
152+
{name: "before bidi control range", rune: 0x2029, expected: false},
153+
{name: "after bidi control range", rune: 0x202F, expected: false},
154+
155+
// Range tests - BiDi isolates: U+2066–U+2069
156+
{name: "bidi isolate range start", rune: 0x2066, expected: true},
157+
{name: "bidi isolate range middle", rune: 0x2067, expected: true},
158+
{name: "bidi isolate range end", rune: 0x2069, expected: true},
159+
{name: "before bidi isolate range", rune: 0x2065, expected: false},
160+
{name: "after bidi isolate range", rune: 0x206A, expected: false},
161+
162+
// Range tests - Hidden modifiers: U+2060–U+2064
163+
{name: "hidden modifier range start", rune: 0x2060, expected: true},
164+
{name: "hidden modifier range middle", rune: 0x2062, expected: true},
165+
{name: "hidden modifier range end", rune: 0x2064, expected: true},
166+
{name: "before hidden modifier range", rune: 0x205F, expected: false},
167+
{name: "after hidden modifier range", rune: 0x2065, expected: false},
168+
169+
// Characters that should NOT be removed
170+
{name: "regular ascii letter", rune: 'A', expected: false},
171+
{name: "regular ascii digit", rune: '1', expected: false},
172+
{name: "regular ascii space", rune: ' ', expected: false},
173+
{name: "newline", rune: '\n', expected: false},
174+
{name: "tab", rune: '\t', expected: false},
175+
{name: "unicode letter", rune: '世', expected: false},
176+
{name: "emoji", rune: '🌍', expected: false},
177+
{name: "greek letter", rune: 'α', expected: false},
178+
{name: "punctuation", rune: '.', expected: false},
179+
{name: "hyphen (normal)", rune: '-', expected: false},
180+
}
181+
182+
for _, tt := range tests {
183+
t.Run(tt.name, func(t *testing.T) {
184+
result := shouldRemoveRune(tt.rune)
185+
assert.Equal(t, tt.expected, result, "rune: U+%04X (%c)", tt.rune, tt.rune)
186+
})
187+
}
188+
}

0 commit comments

Comments
 (0)