Skip to content

Commit 2404e92

Browse files
authored
CLOUDP-317911 Add pprof integration in operator (#101)
# Summary pprof can be configured by two new environment variables: - `MDB_OPERATOR_PPROF_ENABLED` - together with `OPERATOR_ENV` controls enabling of pprof server. Basically the rule for enabling pprof is defined in `IsPprofEnabled` function: https://github.com/mongodb/mongodb-kubernetes/blob/94fc9eac3f36cc7df096e86b5eef8ce9fed02f58/pkg/pprof/pprof.go#L66-L70 - `MDB_OPERATOR_PPROF_PORT` - by default it is set to 10081 It's more than `_ "net/http/pprof"` one liner for a couple of reasons: - having the possibility to enable pprof for the production environment is necessary for debugging memory issues and it does not add much overhead either https://stackoverflow.com/a/64057856. Previously it was only enabled for dev and local - exposing by default pprof server in production is a no-go in many organisations due to sensitive information exposed or just exposing some port is enough to alert security staff https://cwe.mitre.org/data/definitions/200.html standard way of starting pprof by is discouraged for lack of configurability and security issues - G114: Use of net/http serve function that has no support for setting timeouts (gosec) - G108: Profiling endpoint is automatically exposed on /debug/pprof (gosec) ```go import _ "net/http/pprof" [...] go func() { log.Println(http.ListenAndServe("localhost:10081", nil)) }() ``` ## Proof of Work pprof debug page is available at default `localhost:10081` port ![Screenshot 2025-05-09 at 16 33 59](https://github.com/user-attachments/assets/44c0c900-2dc5-45ff-b1de-917302a052b7) Added unit tests that verify `IsPprofEnabled` function. Shutdown is also working: ```go 2025-05-09T16:32:56.750+0200 INFO pprof/pprof.go:57 Stopping pprof server 2025-05-09T16:32:56.750+0200 INFO controller/controller.go:235 Shutdown signal received, waiting for all workers to finish {"controller": "mongodbuser-controller"} 2025-05-09T16:32:56.750+0200 INFO controller/controller.go:237 All workers finished {"controller": "mongodbmulticluster-controller"} 2025-05-09T16:32:56.750+0200 INFO controller/controller.go:237 All workers finished {"controller": "mongodbreplicaset-controller"} 2025-05-09T16:32:56.750+0200 INFO controller/controller.go:237 All workers finished {"controller": "mongodbstandalone-controller"} 2025-05-09T16:32:56.750+0200 INFO controller/controller.go:237 All workers finished {"controller": "mongodbuser-controller"} 2025-05-09T16:32:56.750+0200 INFO controller/controller.go:237 All workers finished {"controller": "opsmanager-controller"} 2025-05-09T16:32:56.750+0200 INFO controller/controller.go:237 All workers finished {"controller": "mongodbshardedcluster-controller"} 2025-05-09T16:32:56.750+0200 INFO manager/internal.go:537 Stopping and waiting for caches 2025-05-09T16:32:56.750+0200 INFO pprof/pprof.go:52 pprof server stopped ``` ## Checklist - [x] Have you linked a jira ticket and/or is the ticket in the title? - [ ] Have you checked whether your jira ticket required DOCSP changes? - [ ] Have you checked for release_note changes? ## Reminder (Please remove this when merging) - Please try to Approve or Reject Changes the PR, keep PRs in review as short as possible - Our Short Guide for PRs: [Link](https://docs.google.com/document/d/1T93KUtdvONq43vfTfUt8l92uo4e4SEEvFbIEKOxGr44/edit?tab=t.0) - Remember the following Communication Standards - use comment prefixes for clarity: * **blocking**: Must be addressed before approval. * **follow-up**: Can be addressed in a later PR or ticket. * **q**: Clarifying question. * **nit**: Non-blocking suggestions. * **note**: Side-note, non-actionable. Example: Praise * --> no prefix is considered a question
1 parent 7fa6687 commit 2404e92

File tree

4 files changed

+178
-0
lines changed

4 files changed

+178
-0
lines changed

main.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ import (
4646
"github.com/mongodb/mongodb-kubernetes/mongodb-community-operator/pkg/util/envvar"
4747
"github.com/mongodb/mongodb-kubernetes/pkg/images"
4848
"github.com/mongodb/mongodb-kubernetes/pkg/multicluster"
49+
"github.com/mongodb/mongodb-kubernetes/pkg/pprof"
4950
"github.com/mongodb/mongodb-kubernetes/pkg/telemetry"
5051
"github.com/mongodb/mongodb-kubernetes/pkg/util"
5152
"github.com/mongodb/mongodb-kubernetes/pkg/util/architectures"
@@ -277,6 +278,16 @@ func main() {
277278
log.Info("Not running telemetry component!")
278279
}
279280

281+
pprofEnabledString := env.ReadOrDefault(util.OperatorPprofEnabledEnv, "")
282+
if pprofEnabled, err := pprof.IsPprofEnabled(pprofEnabledString, getOperatorEnv()); err != nil {
283+
log.Errorf("Unable to check if pprof is enabled: %s", err)
284+
} else if pprofEnabled {
285+
port := env.ReadIntOrDefault(util.OperatorPprofPortEnv, util.OperatorPprofDefaultPort)
286+
if err := mgr.Add(pprof.NewRunnable(port, log)); err != nil {
287+
log.Errorf("Unable to start pprof server: %s", err)
288+
}
289+
}
290+
280291
log.Info("Starting the Cmd.")
281292

282293
// Start the Manager

pkg/pprof/pprof.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
package pprof
2+
3+
import (
4+
"context"
5+
"errors"
6+
"fmt"
7+
"net/http"
8+
"net/http/pprof"
9+
"strconv"
10+
"time"
11+
12+
"go.uber.org/zap"
13+
14+
"github.com/mongodb/mongodb-kubernetes/pkg/util"
15+
)
16+
17+
type Runnable struct {
18+
port int
19+
log *zap.SugaredLogger
20+
}
21+
22+
func NewRunnable(port int, log *zap.SugaredLogger) *Runnable {
23+
return &Runnable{
24+
port: port,
25+
log: log,
26+
}
27+
}
28+
29+
func (p *Runnable) Start(ctx context.Context) error {
30+
pprofAddress := fmt.Sprintf("localhost:%d", p.port)
31+
32+
handler := http.NewServeMux()
33+
handler.HandleFunc("GET /debug/pprof/", pprof.Index)
34+
handler.HandleFunc("GET /debug/pprof/cmdline/", pprof.Cmdline)
35+
handler.HandleFunc("GET /debug/pprof/profile/", pprof.Profile)
36+
handler.HandleFunc("GET /debug/pprof/symbol/", pprof.Symbol)
37+
handler.HandleFunc("GET /debug/pprof/trace/", pprof.Trace)
38+
39+
server := &http.Server{
40+
Addr: pprofAddress,
41+
ReadHeaderTimeout: 10 * time.Second,
42+
Handler: handler,
43+
}
44+
45+
go func() {
46+
p.log.Infof("Starting pprof server at %s", pprofAddress)
47+
if err := server.ListenAndServe(); err != nil {
48+
if !errors.Is(err, http.ErrServerClosed) {
49+
p.log.Errorf("unable to start pprof server: %s", err.Error())
50+
}
51+
}
52+
p.log.Info("pprof server stopped")
53+
}()
54+
55+
go func() {
56+
<-ctx.Done()
57+
p.log.Info("Stopping pprof server")
58+
if err := server.Shutdown(context.Background()); err != nil {
59+
p.log.Errorf("unable to shutdown pprof server: %s", err.Error())
60+
}
61+
}()
62+
63+
return nil
64+
}
65+
66+
// IsPprofEnabled checks if pprof is enabled based on the MDB_OPERATOR_PPROF_ENABLED
67+
// and OPERATOR_ENV environment variables. It returns true if:
68+
// - MDB_OPERATOR_PPROF_ENABLED is set to true
69+
// - OPERATOR_ENV is set to dev or local and MDB_OPERATOR_PPROF_ENABLED is not set
70+
// Otherwise, it returns false.
71+
func IsPprofEnabled(pprofEnabledString string, operatorEnv util.OperatorEnvironment) (bool, error) {
72+
if pprofEnabledString != "" {
73+
pprofEnabled, err := strconv.ParseBool(pprofEnabledString)
74+
if err != nil {
75+
return false, fmt.Errorf("unable to parse %s environment variable: %w", util.OperatorPprofEnabledEnv, err)
76+
}
77+
78+
return pprofEnabled, nil
79+
}
80+
81+
return operatorEnv == util.OperatorEnvironmentDev || operatorEnv == util.OperatorEnvironmentLocal, nil
82+
}

pkg/pprof/pprof_test.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
package pprof
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
"github.com/stretchr/testify/require"
8+
9+
"github.com/mongodb/mongodb-kubernetes/pkg/util"
10+
)
11+
12+
func TestIsPprofEnabled(t *testing.T) {
13+
tests := map[string]struct {
14+
pprofEnabledString string
15+
operatorEnv util.OperatorEnvironment
16+
expected bool
17+
expectedErrMsg string
18+
}{
19+
"pprof enabled by default in dev": {
20+
operatorEnv: util.OperatorEnvironmentDev,
21+
expected: true,
22+
},
23+
"pprof enabled by default in local": {
24+
operatorEnv: util.OperatorEnvironmentLocal,
25+
expected: true,
26+
},
27+
"pprof disabled by default in prod": {
28+
operatorEnv: util.OperatorEnvironmentProd,
29+
expected: false,
30+
},
31+
"pprof enabled in prod": {
32+
pprofEnabledString: "true",
33+
operatorEnv: util.OperatorEnvironmentProd,
34+
expected: true,
35+
},
36+
"pprof explicitly enabled in dev": {
37+
pprofEnabledString: "true",
38+
operatorEnv: util.OperatorEnvironmentDev,
39+
expected: true,
40+
},
41+
"pprof explicitly enabled in local": {
42+
pprofEnabledString: "true",
43+
operatorEnv: util.OperatorEnvironmentLocal,
44+
expected: true,
45+
},
46+
"pprof disabled in dev": {
47+
pprofEnabledString: "false",
48+
operatorEnv: util.OperatorEnvironmentDev,
49+
expected: false,
50+
},
51+
"pprof disabled in local": {
52+
pprofEnabledString: "false",
53+
operatorEnv: util.OperatorEnvironmentLocal,
54+
expected: false,
55+
},
56+
"pprof disabled explicitly in prod": {
57+
pprofEnabledString: "false",
58+
operatorEnv: util.OperatorEnvironmentProd,
59+
expected: false,
60+
},
61+
"pprof misconfigured": {
62+
pprofEnabledString: "false11",
63+
operatorEnv: util.OperatorEnvironmentProd,
64+
expected: false,
65+
expectedErrMsg: "unable to parse MDB_OPERATOR_PPROF_ENABLED environment variable: strconv.ParseBool: parsing \"false11\": invalid syntax",
66+
},
67+
}
68+
69+
for name, test := range tests {
70+
t.Run(name, func(t *testing.T) {
71+
result, err := IsPprofEnabled(test.pprofEnabledString, test.operatorEnv)
72+
if test.expectedErrMsg != "" {
73+
require.Error(t, err)
74+
assert.Equal(t, test.expectedErrMsg, err.Error())
75+
}
76+
77+
assert.Equal(t, test.expected, result)
78+
})
79+
}
80+
}

pkg/util/constants.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,11 @@ const (
154154
LDAP = "LDAP"
155155
MinimumScramSha256MdbVersion = "4.0.0"
156156

157+
// pprof variables
158+
OperatorPprofEnabledEnv = "MDB_OPERATOR_PPROF_ENABLED"
159+
OperatorPprofPortEnv = "MDB_OPERATOR_PPROF_PORT"
160+
OperatorPprofDefaultPort = 10081
161+
157162
// these were historically used and constituted a security issue—if set they should be changed
158163
InvalidKeyFileContents = "DUMMYFILE"
159164
InvalidAutomationAgentPassword = "D9XK2SfdR2obIevI9aKsYlVH" //nolint //Part of the algorithm

0 commit comments

Comments
 (0)