Skip to content

Commit 7b02e8d

Browse files
authored
Merge pull request #140 from DataDog/fricounet/upstream/otel-tracing-grpc
Add support for otel tracing of grpc calls
2 parents 0911089 + 47cfaa9 commit 7b02e8d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+20513
-12
lines changed

connection/connection.go

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727

2828
"github.com/kubernetes-csi/csi-lib-utils/metrics"
2929
"github.com/kubernetes-csi/csi-lib-utils/protosanitizer"
30+
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
3031
"google.golang.org/grpc"
3132
"k8s.io/klog/v2"
3233
)
@@ -56,6 +57,8 @@ func SetMaxGRPCLogLength(characterCount int) {
5657
//
5758
// The function tries to connect for 30 seconds, and returns an error if no connection has been established at that point.
5859
// The function automatically disables TLS and adds interceptor for logging of all gRPC messages at level 5.
60+
// If the metricsManager is 'nil', no metrics will be recorded on the gRPC calls.
61+
// The function behaviour can be tweaked with options.
5962
//
6063
// For a connection to a Unix Domain socket, the behavior after
6164
// loosing the connection is configurable. The default is to
@@ -70,12 +73,20 @@ func SetMaxGRPCLogLength(characterCount int) {
7073
// For other connections, the default behavior from gRPC is used and
7174
// loss of connection is not detected reliably.
7275
func Connect(address string, metricsManager metrics.CSIMetricsManager, options ...Option) (*grpc.ClientConn, error) {
73-
return connect(address, metricsManager, []grpc.DialOption{grpc.WithTimeout(time.Second * 30)}, options)
76+
// Prepend default options
77+
options = append([]Option{WithTimeout(time.Second * 30)}, options...)
78+
if metricsManager != nil {
79+
options = append([]Option{WithMetrics(metricsManager)}, options...)
80+
}
81+
return connect(address, options)
7482
}
7583

7684
// ConnectWithoutMetrics behaves exactly like Connect except no metrics are recorded.
85+
// This function is deprecated, prefer using Connect with `nil` as the metricsManager.
7786
func ConnectWithoutMetrics(address string, options ...Option) (*grpc.ClientConn, error) {
78-
return connect(address, nil, []grpc.DialOption{grpc.WithTimeout(time.Second * 30)}, options)
87+
// Prepend default options
88+
options = append([]Option{WithTimeout(time.Second * 30)}, options...)
89+
return connect(address, options)
7990
}
8091

8192
// Option is the type of all optional parameters for Connect.
@@ -105,29 +116,59 @@ func ExitOnConnectionLoss() func() bool {
105116
}
106117
}
107118

119+
// WithTimeout adds a configurable timeout on the gRPC calls.
120+
func WithTimeout(timeout time.Duration) Option {
121+
return func(o *options) {
122+
o.timeout = timeout
123+
}
124+
}
125+
126+
// WithMetrics enables the recording of metrics on the gRPC calls with the provided CSIMetricsManager.
127+
func WithMetrics(metricsManager metrics.CSIMetricsManager) Option {
128+
return func(o *options) {
129+
o.metricsManager = metricsManager
130+
}
131+
}
132+
133+
// WithOtelTracing enables the recording of traces on the gRPC calls with opentelemetry gRPC interceptor.
134+
func WithOtelTracing() Option {
135+
return func(o *options) {
136+
o.enableOtelTracing = true
137+
}
138+
}
139+
108140
type options struct {
109-
reconnect func() bool
141+
reconnect func() bool
142+
timeout time.Duration
143+
metricsManager metrics.CSIMetricsManager
144+
enableOtelTracing bool
110145
}
111146

112147
// connect is the internal implementation of Connect. It has more options to enable testing.
113148
func connect(
114149
address string,
115-
metricsManager metrics.CSIMetricsManager,
116-
dialOptions []grpc.DialOption, connectOptions []Option) (*grpc.ClientConn, error) {
150+
connectOptions []Option) (*grpc.ClientConn, error) {
117151
var o options
118152
for _, option := range connectOptions {
119153
option(&o)
120154
}
121155

122-
dialOptions = append(dialOptions,
156+
dialOptions := []grpc.DialOption{
123157
grpc.WithInsecure(), // Don't use TLS, it's usually local Unix domain socket in a container.
124158
grpc.WithBackoffMaxDelay(time.Second), // Retry every second after failure.
125159
grpc.WithBlock(), // Block until connection succeeds.
126-
)
160+
}
161+
162+
if o.timeout > 0 {
163+
dialOptions = append(dialOptions, grpc.WithTimeout(o.timeout))
164+
}
127165

128166
interceptors := []grpc.UnaryClientInterceptor{LogGRPC}
129-
if metricsManager != nil {
130-
interceptors = append(interceptors, ExtendedCSIMetricsManager{metricsManager}.RecordMetricsClientInterceptor)
167+
if o.metricsManager != nil {
168+
interceptors = append(interceptors, ExtendedCSIMetricsManager{o.metricsManager}.RecordMetricsClientInterceptor)
169+
}
170+
if o.enableOtelTracing {
171+
interceptors = append(interceptors, otelgrpc.UnaryClientInterceptor())
131172
}
132173
dialOptions = append(dialOptions, grpc.WithChainUnaryInterceptor(interceptors...))
133174

connection/connection_test.go

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"testing"
2828
"time"
2929

30+
"go.opentelemetry.io/otel/trace"
3031
"google.golang.org/grpc"
3132
"google.golang.org/grpc/codes"
3233
"google.golang.org/grpc/connectivity"
@@ -138,13 +139,38 @@ func TestConnectWithoutMetrics(t *testing.T) {
138139
addr, stopServer := startServer(t, tmp, nil, nil, nil)
139140
defer stopServer()
140141

141-
conn, err := ConnectWithoutMetrics("unix:///" + addr)
142+
// With Connect
143+
conn, err := Connect("unix:///"+addr, nil)
142144
if assert.NoError(t, err, "connect with unix:/// prefix") &&
143145
assert.NotNil(t, conn, "got a connection") {
144146
assert.Equal(t, connectivity.Ready, conn.GetState(), "connection ready")
145147
err = conn.Close()
146148
assert.NoError(t, err, "closing connection")
147149
}
150+
151+
// With ConnectWithoutMetics
152+
conn, err = ConnectWithoutMetrics("unix:///" + addr)
153+
if assert.NoError(t, err, "connect with unix:/// prefix") &&
154+
assert.NotNil(t, conn, "got a connection") {
155+
assert.Equal(t, connectivity.Ready, conn.GetState(), "connection ready")
156+
err = conn.Close()
157+
assert.NoError(t, err, "closing connection")
158+
}
159+
}
160+
161+
func TestConnectWithOtelTracing(t *testing.T) {
162+
tmp := tmpDir(t)
163+
defer os.RemoveAll(tmp)
164+
addr, stopServer := startServer(t, tmp, nil, nil, nil)
165+
defer stopServer()
166+
167+
conn, err := Connect(addr, metrics.NewCSIMetricsManager("fake.csi.driver.io"), WithOtelTracing())
168+
if assert.NoError(t, err, "connect via absolute path") &&
169+
assert.NotNil(t, conn, "got a connection") {
170+
assert.Equal(t, connectivity.Ready, conn.GetState(), "connection ready")
171+
err = conn.Close()
172+
assert.NoError(t, err, "closing connection")
173+
}
148174
}
149175

150176
func TestWaitForServer(t *testing.T) {
@@ -191,13 +217,13 @@ func TestWaitForServer(t *testing.T) {
191217
}
192218
}
193219

194-
func TestTimout(t *testing.T) {
220+
func TestTimeout(t *testing.T) {
195221
tmp := tmpDir(t)
196222
defer os.RemoveAll(tmp)
197223

198224
startTime := time.Now()
199225
timeout := 5 * time.Second
200-
conn, err := connect(path.Join(tmp, "no-such.sock"), metrics.NewCSIMetricsManager("fake.csi.driver.io"), []grpc.DialOption{grpc.WithTimeout(timeout)}, nil)
226+
conn, err := connect(path.Join(tmp, "no-such.sock"), []Option{WithTimeout(timeout)})
201227
endTime := time.Now()
202228
if assert.Error(t, err, "connection should fail") {
203229
assert.InEpsilon(t, timeout, endTime.Sub(startTime), 1, "connection timeout")
@@ -491,3 +517,32 @@ func verifyMetricsError(t *testing.T, err error, metricToIgnore string) error {
491517

492518
return nil
493519
}
520+
521+
func TestConnectWithOtelGrpcInterceptorTraces(t *testing.T) {
522+
t.Logf("Running regular connection test")
523+
tmp := tmpDir(t)
524+
defer os.RemoveAll(tmp)
525+
// We have to have a real implementation of the gRPC call, otherwise the trace
526+
// interceptor is not called. The CSI identity service is used because it's simple.
527+
addr, stopServer := startServer(t, tmp, &identityServer{}, nil, nil)
528+
defer stopServer()
529+
530+
conn, err := Connect(addr, nil, WithOtelTracing())
531+
532+
if assert.NoError(t, err, "connect via absolute path") &&
533+
assert.NotNil(t, conn, "got a connection") {
534+
defer conn.Close()
535+
assert.Equal(t, connectivity.Ready, conn.GetState(), "connection ready")
536+
537+
identityClient := csi.NewIdentityClient(conn)
538+
ctx := context.Background()
539+
if _, err := identityClient.GetPluginInfo(ctx, &csi.GetPluginInfoRequest{}); assert.Error(t, err) {
540+
errStatus, _ := status.FromError(err)
541+
assert.Equal(t, codes.Unimplemented, errStatus.Code(), "not implemented")
542+
}
543+
544+
// First traceID is 00000000000000000000000000000000
545+
assert.Equal(t, "00000000000000000000000000000000", trace.SpanContextFromContext(ctx).TraceID().String())
546+
}
547+
548+
}

go.mod

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ require (
66
github.com/container-storage-interface/spec v1.8.0
77
github.com/golang/protobuf v1.5.3
88
github.com/stretchr/testify v1.8.2
9+
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.41.0
10+
go.opentelemetry.io/otel/trace v1.15.0
911
golang.org/x/net v0.13.0
1012
google.golang.org/grpc v1.54.0
1113
k8s.io/api v0.28.0
@@ -21,6 +23,7 @@ require (
2123
github.com/davecgh/go-spew v1.1.1 // indirect
2224
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
2325
github.com/go-logr/logr v1.2.4 // indirect
26+
github.com/go-logr/stdr v1.2.2 // indirect
2427
github.com/go-openapi/jsonpointer v0.19.6 // indirect
2528
github.com/go-openapi/jsonreference v0.20.2 // indirect
2629
github.com/go-openapi/swag v0.22.3 // indirect
@@ -43,6 +46,8 @@ require (
4346
github.com/prometheus/common v0.44.0 // indirect
4447
github.com/prometheus/procfs v0.10.1 // indirect
4548
github.com/spf13/pflag v1.0.5 // indirect
49+
go.opentelemetry.io/otel v1.15.0 // indirect
50+
go.opentelemetry.io/otel/metric v0.38.0 // indirect
4651
golang.org/x/oauth2 v0.8.0 // indirect
4752
golang.org/x/sys v0.10.0 // indirect
4853
golang.org/x/term v0.10.0 // indirect

go.sum

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
cloud.google.com/go/compute v1.15.1 h1:7UGq3QknM33pw5xATlpzeoomNxsacIVvTqTTvbfajmE=
2+
cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY=
13
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
24
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
35
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
@@ -14,8 +16,11 @@ github.com/emicklei/go-restful/v3 v3.9.0 h1:XwGDlfxEnQZzuopoqxwSEllNcCOM9DhhFyhF
1416
github.com/emicklei/go-restful/v3 v3.9.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
1517
github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U=
1618
github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
19+
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
1720
github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
1821
github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
22+
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
23+
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
1924
github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE=
2025
github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
2126
github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
@@ -93,6 +98,14 @@ github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ
9398
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
9499
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
95100
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
101+
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.41.0 h1:pWlIooxHVVdetyXFDsuzfqV42lXVIDmVGBCHeaXzDyI=
102+
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.41.0/go.mod h1:YjmsSWM1VTcWXFSgyrmLADPMZZohioz9onjgkikk59w=
103+
go.opentelemetry.io/otel v1.15.0 h1:NIl24d4eiLJPM0vKn4HjLYM+UZf6gSfi9Z+NmCxkWbk=
104+
go.opentelemetry.io/otel v1.15.0/go.mod h1:qfwLEbWhLPk5gyWrne4XnF0lC8wtywbuJbgfAE3zbek=
105+
go.opentelemetry.io/otel/metric v0.38.0 h1:vv/Nv/44S3GzMMmeUhaesBKsAenE6xLkTVWL+zuv30w=
106+
go.opentelemetry.io/otel/metric v0.38.0/go.mod h1:uAtxN5hl8aXh5irD8afBtSwQU5Zjg64WWSz6KheZxBg=
107+
go.opentelemetry.io/otel/trace v1.15.0 h1:5Fwje4O2ooOxkfyqI/kJwxWotggDLix4BSAvpE1wlpo=
108+
go.opentelemetry.io/otel/trace v1.15.0/go.mod h1:CUsmE2Ht1CRkvE8OsMESvraoZrrcgD1J2W8GV1ev0Y4=
96109
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
97110
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
98111
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=

0 commit comments

Comments
 (0)