Skip to content

Commit 16e51a3

Browse files
csweichelroboquat
authored andcommitted
[agent-smith] Use fewer allocations
1 parent 1b603c1 commit 16e51a3

File tree

2 files changed

+272
-6
lines changed

2 files changed

+272
-6
lines changed

components/ee/agent-smith/pkg/detector/proc.go

Lines changed: 145 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,13 @@
55
package detector
66

77
import (
8+
"bufio"
9+
"bytes"
810
"context"
911
"encoding/binary"
1012
"fmt"
13+
"io"
14+
"os"
1115
"path/filepath"
1216
"sort"
1317
"strconv"
@@ -49,7 +53,7 @@ func (fs realProcfs) Discover() map[int]*process {
4953
log.WithField("pid", p.PID).WithError(err).Debug("cannot get commandline of process")
5054
continue
5155
}
52-
stat, err := p.Stat()
56+
stat, err := statProc(p.PID)
5357
if err != nil {
5458
log.WithField("pid", p.PID).WithError(err).Debug("cannot stat process")
5559
continue
@@ -76,7 +80,7 @@ func (fs realProcfs) Discover() map[int]*process {
7680
proc.Path = path
7781
parent.Children = append(parent.Children, proc)
7882

79-
binary.LittleEndian.PutUint64(digest[0:8], uint64(stat.PID))
83+
binary.LittleEndian.PutUint64(digest[0:8], uint64(p.PID))
8084
binary.LittleEndian.PutUint64(digest[8:16], uint64(stat.PPID))
8185
binary.LittleEndian.PutUint64(digest[16:24], stat.Starttime)
8286
proc.Hash = xxhash.Sum64(digest)
@@ -86,16 +90,151 @@ func (fs realProcfs) Discover() map[int]*process {
8690
return idx
8791
}
8892

89-
func (p realProcfs) Environ(pid int) ([]string, error) {
90-
proc, err := procfs.NewProc(pid)
93+
type stat struct {
94+
PPID int
95+
Starttime uint64
96+
}
97+
98+
// statProc returns a limited set of /proc/<pid>/stat content.
99+
func statProc(pid int) (*stat, error) {
100+
f, err := os.Open(fmt.Sprintf("/proc/%d/stat", pid))
91101
if err != nil {
92102
return nil, err
93103
}
94-
env, err := proc.Environ()
104+
defer f.Close()
105+
106+
return parseStat(f)
107+
}
108+
109+
func parseStat(r io.Reader) (res *stat, err error) {
110+
var (
111+
ppid uint64
112+
starttime uint64
113+
i = -1
114+
)
115+
116+
scan := bufio.NewScanner(r)
117+
// We use a fixed buffer size assuming that none of the env vars we're interested in is any larger.
118+
// This is part of the trick to keep allocs down.
119+
scan.Buffer(make([]byte, 512), 512)
120+
scan.Split(scanFixedSpace(512))
121+
for scan.Scan() {
122+
text := scan.Bytes()
123+
if text[len(text)-1] == ')' {
124+
i = 0
125+
}
126+
127+
if i == 2 {
128+
ppid, err = strconv.ParseUint(string(text), 10, 64)
129+
}
130+
if i == 20 {
131+
starttime, err = strconv.ParseUint(string(text), 10, 64)
132+
}
133+
if err != nil {
134+
return
135+
}
136+
137+
if i >= 0 {
138+
i++
139+
}
140+
}
141+
if err := scan.Err(); err != nil {
142+
return nil, err
143+
}
144+
145+
if ppid == 0 || starttime == 0 {
146+
return nil, fmt.Errorf("cannot parse stat")
147+
}
148+
149+
return &stat{
150+
PPID: int(ppid),
151+
Starttime: starttime,
152+
}, nil
153+
}
154+
155+
func (p realProcfs) Environ(pid int) ([]string, error) {
156+
// Note: procfs.Environ is too expensive becuase it uses io.ReadAll which leaks
157+
// memory over time.
158+
159+
f, err := os.Open(fmt.Sprintf("/proc/%d/environ", pid))
95160
if err != nil {
96161
return nil, err
97162
}
98-
return env, nil
163+
defer f.Close()
164+
165+
return parseGitpodEnviron(f)
166+
}
167+
168+
func parseGitpodEnviron(r io.Reader) ([]string, error) {
169+
// Note: this function is benchmarked in BenchmarkParseGitPodEnviron.
170+
// At the time of this wriging it consumed 3+N allocs where N is the number of
171+
// env vars starting with GITPOD_.
172+
//
173+
// When making changes to this function, ensure you're not causing more allocs
174+
// which could have a too drastic resource usage effect in prod.
175+
176+
scan := bufio.NewScanner(r)
177+
// We use a fixed buffer size assuming that none of the env vars we're interested in is any larger.
178+
// This is part of the trick to keep allocs down.
179+
scan.Buffer(make([]byte, 512), 512)
180+
scan.Split(scanNullTerminatedLines(512))
181+
182+
// we expect at least 10 relevant env vars
183+
res := make([]string, 0, 10)
184+
for scan.Scan() {
185+
// we only keep GITPOD_ variables for optimisation
186+
text := scan.Bytes()
187+
if !bytes.HasPrefix(text, []byte("GITPOD_")) {
188+
continue
189+
}
190+
191+
res = append(res, string(text))
192+
}
193+
return res, nil
194+
}
195+
196+
func scanNullTerminatedLines(fixedBufferSize int) func(data []byte, atEOF bool) (advance int, token []byte, err error) {
197+
return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
198+
if atEOF && len(data) == 0 {
199+
return 0, nil, nil
200+
}
201+
if i := bytes.IndexByte(data, 0); i >= 0 {
202+
// We have a full null-terminated line.
203+
return i + 1, data[:i], nil
204+
}
205+
// If we're at EOF, we have a final, non-terminated line. Return it.
206+
if atEOF {
207+
return len(data), data, nil
208+
}
209+
if len(data) == 512 {
210+
return len(data), data, nil
211+
}
212+
// Request more data.
213+
return 0, nil, nil
214+
}
215+
}
216+
217+
func scanFixedSpace(fixedBufferSize int) func(data []byte, atEOF bool) (advance int, token []byte, err error) {
218+
// The returned function behaves like bufio.ScanLines except that it doesn't try to
219+
// request lines longer than fixedBufferSize.
220+
return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
221+
if atEOF && len(data) == 0 {
222+
return 0, nil, nil
223+
}
224+
if i := bytes.IndexByte(data, ' '); i >= 0 {
225+
// We have a full null-terminated line.
226+
return i + 1, data[:i], nil
227+
}
228+
// If we're at EOF, we have a final, non-terminated line. Return it.
229+
if atEOF {
230+
return len(data), data, nil
231+
}
232+
if len(data) == 512 {
233+
return len(data), data, nil
234+
}
235+
// Request more data.
236+
return 0, nil, nil
237+
}
99238
}
100239

101240
var _ ProcessDetector = &ProcfsDetector{}

components/ee/agent-smith/pkg/detector/proc_test.go

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package detector
66

77
import (
8+
"bytes"
89
"fmt"
910
"sort"
1011
"sync"
@@ -276,3 +277,129 @@ func TestDiscovery(t *testing.T) {
276277
t.Fatal("did not discover any process")
277278
}
278279
}
280+
281+
func TestParseGitpodEnviron(t *testing.T) {
282+
tests := []struct {
283+
Name string
284+
Content string
285+
Expectation []string
286+
}{
287+
{
288+
Name: "empty set",
289+
Expectation: []string{},
290+
},
291+
{
292+
Name: "happy path",
293+
Content: "GITPOD_INSTANCE_ID=foobar\000GITPOD_SOMETHING=blabla\000SOMETHING_ELSE\000",
294+
Expectation: []string{
295+
"GITPOD_INSTANCE_ID=foobar",
296+
"GITPOD_SOMETHING=blabla",
297+
},
298+
},
299+
{
300+
Name: "exceed token size",
301+
Content: func() string {
302+
r := "12345678"
303+
for i := 0; i < 7; i++ {
304+
r += r
305+
}
306+
return "SOME_ENV_VAR=" + r + "\000GITPOD_FOOBAR=bar"
307+
}(),
308+
Expectation: []string{
309+
"GITPOD_FOOBAR=bar",
310+
},
311+
},
312+
}
313+
for _, test := range tests {
314+
t.Run(test.Name, func(t *testing.T) {
315+
act, err := parseGitpodEnviron(bytes.NewReader([]byte(test.Content)))
316+
if err != nil {
317+
t.Fatal(err)
318+
}
319+
320+
if diff := cmp.Diff(test.Expectation, act); diff != "" {
321+
t.Errorf("unexpected parseGitpodEnviron (-want +got):\n%s", diff)
322+
}
323+
})
324+
}
325+
}
326+
327+
func benchmarkParseGitPodEnviron(content string, b *testing.B) {
328+
b.ReportAllocs()
329+
b.ResetTimer()
330+
for n := 0; n < b.N; n++ {
331+
parseGitpodEnviron(bytes.NewReader([]byte(content)))
332+
}
333+
}
334+
335+
func BenchmarkParseGitPodEnvironP0(b *testing.B) { benchmarkParseGitPodEnviron("", b) }
336+
func BenchmarkParseGitPodEnvironP1(b *testing.B) {
337+
benchmarkParseGitPodEnviron("GITPOD_INSTANCE_ID=foobar\000", b)
338+
}
339+
func BenchmarkParseGitPodEnvironP2(b *testing.B) {
340+
benchmarkParseGitPodEnviron("GITPOD_INSTANCE_ID=foobar\000GITPOD_INSTANCE_ID=foobar\000", b)
341+
}
342+
func BenchmarkParseGitPodEnvironP4(b *testing.B) {
343+
benchmarkParseGitPodEnviron("GITPOD_INSTANCE_ID=foobar\000GITPOD_INSTANCE_ID=foobar\000GITPOD_INSTANCE_ID=foobar\000GITPOD_INSTANCE_ID=foobar\000", b)
344+
}
345+
func BenchmarkParseGitPodEnvironP8(b *testing.B) {
346+
benchmarkParseGitPodEnviron("GITPOD_INSTANCE_ID=foobar\000GITPOD_INSTANCE_ID=foobar\000GITPOD_INSTANCE_ID=foobar\000GITPOD_INSTANCE_ID=foobar\000GITPOD_INSTANCE_ID=foobar\000GITPOD_INSTANCE_ID=foobar\000GITPOD_INSTANCE_ID=foobar\000GITPOD_INSTANCE_ID=foobar\000", b)
347+
}
348+
func BenchmarkParseGitPodEnvironN1(b *testing.B) { benchmarkParseGitPodEnviron("NOT_ME\000", b) }
349+
func BenchmarkParseGitPodEnvironN2(b *testing.B) {
350+
benchmarkParseGitPodEnviron("NOT_ME\000NOT_ME\000", b)
351+
}
352+
func BenchmarkParseGitPodEnvironN4(b *testing.B) {
353+
benchmarkParseGitPodEnviron("NOT_ME\000NOT_ME\000NOT_ME\000NOT_ME\000", b)
354+
}
355+
func BenchmarkParseGitPodEnvironN8(b *testing.B) {
356+
benchmarkParseGitPodEnviron("NOT_ME\000NOT_ME\000NOT_ME\000NOT_ME\000NOT_ME\000NOT_ME\000NOT_ME\000NOT_ME\000", b)
357+
}
358+
359+
func TestParseStat(t *testing.T) {
360+
type Expectation struct {
361+
S *stat
362+
Err string
363+
}
364+
tests := []struct {
365+
Name string
366+
Content string
367+
Expectation Expectation
368+
}{
369+
{
370+
Name: "empty set",
371+
Expectation: Expectation{Err: "cannot parse stat"},
372+
},
373+
{
374+
Name: "happy path",
375+
Content: "80275 (cat) R 717 80275 717 34817 80275 4194304 85 0 0 0 0 0 0 0 26 6 1 0 4733826 5771264 135 18446744073709551615 94070799228928 94070799254577 140722983793472 0 0 0 0 0 0 0 0 0 17 14 0 0 0 0 0 94070799272592 94070799274176 94070803738624 140722983801930 140722983801950 140722983801950 140722983821291 0",
376+
Expectation: Expectation{S: &stat{PPID: 717, Starttime: 4733826}},
377+
},
378+
}
379+
for _, test := range tests {
380+
t.Run(test.Name, func(t *testing.T) {
381+
var (
382+
act Expectation
383+
err error
384+
)
385+
act.S, err = parseStat(bytes.NewReader([]byte(test.Content)))
386+
if err != nil {
387+
act.Err = err.Error()
388+
}
389+
390+
if diff := cmp.Diff(test.Expectation, act); diff != "" {
391+
t.Errorf("unexpected parseStat (-want +got):\n%s", diff)
392+
}
393+
})
394+
}
395+
}
396+
397+
func BenchmarkParseStat(b *testing.B) {
398+
r := bytes.NewReader([]byte("80275 (cat) R 717 80275 717 34817 80275 4194304 85 0 0 0 0 0 0 0 26 6 1 0 4733826 5771264 135 18446744073709551615 94070799228928 94070799254577 140722983793472 0 0 0 0 0 0 0 0 0 17 14 0 0 0 0 0 94070799272592 94070799274176 94070803738624 140722983801930 140722983801950 140722983801950 140722983821291 0"))
399+
400+
b.ReportAllocs()
401+
b.ResetTimer()
402+
for n := 0; n < b.N; n++ {
403+
parseStat(r)
404+
}
405+
}

0 commit comments

Comments
 (0)