Skip to content

Commit 1b06877

Browse files
cskiralyfjl
authored andcommitted
p2p: fix dial metrics not picking up the right error (ethereum#31621)
Our metrics related to dial errors were off. The original error was not wrapped, so the caller function had no chance of picking it up. Therefore the most common error, which is "TooManyPeers", was not correctly counted. The metrics were originally introduced in ethereum#27621 I was thinking of various possible solutions. - the one proposed here wraps both the new error and the origial error. It is not a pattern we use in other parts of the code, but works. This is maybe the smallest possible change. - as an alternate, I could write a proper `errProtoHandshakeError` with it's own wrapped error - finally, I'm not even sure we need `errProtoHandshakeError`, maybe we could just pass up the original error. --------- Signed-off-by: Csaba Kiraly <[email protected]> Co-authored-by: Felix Lange <[email protected]>
1 parent 8334b2c commit 1b06877

File tree

3 files changed

+34
-23
lines changed

3 files changed

+34
-23
lines changed

p2p/metrics.go

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ var (
4949
serveSuccessMeter metrics.Meter = metrics.NilMeter{}
5050
dialMeter metrics.Meter = metrics.NilMeter{}
5151
dialSuccessMeter metrics.Meter = metrics.NilMeter{}
52-
dialConnectionError metrics.Meter = metrics.NilMeter{}
52+
dialConnectionError metrics.Meter = metrics.NilMeter{} // dial timeout; no route to host; connection refused; network is unreachable
5353

5454
// count peers that stayed connected for at least 1 min
5555
serve1MinSuccessMeter metrics.Meter = metrics.NilMeter{}
@@ -61,8 +61,11 @@ var (
6161
dialSelf = metrics.NewRegisteredMeter("p2p/dials/error/self", nil)
6262
dialUselessPeer = metrics.NewRegisteredMeter("p2p/dials/error/useless", nil)
6363
dialUnexpectedIdentity = metrics.NewRegisteredMeter("p2p/dials/error/id/unexpected", nil)
64-
dialEncHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/enc", nil)
65-
dialProtoHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/proto", nil)
64+
dialEncHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/enc", nil) // EOF; connection reset during handshake; message too big; i/o timeout
65+
dialProtoHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/proto", nil) // EOF
66+
67+
// capture the rest of errors that are not handled by the above meters
68+
dialOtherError = metrics.NewRegisteredMeter("p2p/dials/error/other", nil)
6669
)
6770

6871
func init() {
@@ -84,30 +87,34 @@ func init() {
8487
dial1MinSuccessMeter = metrics.NewRegisteredMeter("p2p/dials/success/1min", nil)
8588
}
8689

87-
// markDialError matches errors that occur while setting up a dial connection
88-
// to the corresponding meter.
90+
// markDialError matches errors that occur while setting up a dial connection to the
91+
// corresponding meter. We don't maintain meters for evert possible error, just for
92+
// the most interesting ones.
8993
func markDialError(err error) {
9094
if !metrics.Enabled {
9195
return
9296
}
93-
if err2 := errors.Unwrap(err); err2 != nil {
94-
err = err2
95-
}
96-
switch err {
97-
case DiscTooManyPeers:
97+
98+
var reason DiscReason
99+
var handshakeErr *protoHandshakeError
100+
d := errors.As(err, &reason)
101+
switch {
102+
case d && reason == DiscTooManyPeers:
98103
dialTooManyPeers.Mark(1)
99-
case DiscAlreadyConnected:
104+
case d && reason == DiscAlreadyConnected:
100105
dialAlreadyConnected.Mark(1)
101-
case DiscSelf:
106+
case d && reason == DiscSelf:
102107
dialSelf.Mark(1)
103-
case DiscUselessPeer:
108+
case d && reason == DiscUselessPeer:
104109
dialUselessPeer.Mark(1)
105-
case DiscUnexpectedIdentity:
110+
case d && reason == DiscUnexpectedIdentity:
106111
dialUnexpectedIdentity.Mark(1)
107-
case errEncHandshakeError:
108-
dialEncHandshakeError.Mark(1)
109-
case errProtoHandshakeError:
112+
case errors.As(err, &handshakeErr):
110113
dialProtoHandshakeError.Mark(1)
114+
case errors.Is(err, errEncHandshakeError):
115+
dialEncHandshakeError.Mark(1)
116+
default:
117+
dialOtherError.Mark(1)
111118
}
112119
}
113120

p2p/server.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,15 @@ const (
6666
)
6767

6868
var (
69-
errServerStopped = errors.New("server stopped")
70-
errEncHandshakeError = errors.New("rlpx enc error")
71-
errProtoHandshakeError = errors.New("rlpx proto error")
69+
errServerStopped = errors.New("server stopped")
70+
errEncHandshakeError = errors.New("rlpx enc error")
7271
)
7372

73+
type protoHandshakeError struct{ err error }
74+
75+
func (e *protoHandshakeError) Error() string { return fmt.Sprintf("rlpx proto error: %v", e.err) }
76+
func (e *protoHandshakeError) Unwrap() error { return e.err }
77+
7478
// Server manages all peer connections.
7579
type Server struct {
7680
// Config fields may not be modified while the server is running.
@@ -907,7 +911,7 @@ func (srv *Server) setupConn(c *conn, dialDest *enode.Node) error {
907911
phs, err := c.doProtoHandshake(srv.ourHandshake)
908912
if err != nil {
909913
clog.Trace("Failed p2p handshake", "err", err)
910-
return fmt.Errorf("%w: %v", errProtoHandshakeError, err)
914+
return &protoHandshakeError{err: err}
911915
}
912916
if id := c.node.ID(); !bytes.Equal(crypto.Keccak256(phs.ID), id[:]) {
913917
clog.Trace("Wrong devp2p handshake identity", "phsid", hex.EncodeToString(phs.ID))

p2p/server_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,11 +410,11 @@ func TestServerSetupConn(t *testing.T) {
410410
wantCloseErr: DiscUnexpectedIdentity,
411411
},
412412
{
413-
tt: &setupTransport{pubkey: clientpub, protoHandshakeErr: errProtoHandshakeError},
413+
tt: &setupTransport{pubkey: clientpub, protoHandshakeErr: DiscTooManyPeers},
414414
dialDest: enode.NewV4(clientpub, nil, 0, 0),
415415
flags: dynDialedConn,
416416
wantCalls: "doEncHandshake,doProtoHandshake,close,",
417-
wantCloseErr: errProtoHandshakeError,
417+
wantCloseErr: DiscTooManyPeers,
418418
},
419419
{
420420
tt: &setupTransport{pubkey: srvpub, phs: protoHandshake{ID: crypto.FromECDSAPub(srvpub)[1:]}},

0 commit comments

Comments
 (0)