Skip to content

Commit 3529769

Browse files
committed
otel: update the retry stats as per gRFC A96
1 parent 1fa0bbd commit 3529769

File tree

12 files changed

+116
-47
lines changed

12 files changed

+116
-47
lines changed

api/src/main/java/io/grpc/ClientStreamTracer.java

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,15 @@ public static final class StreamInfo {
132132
private final CallOptions callOptions;
133133
private final int previousAttempts;
134134
private final boolean isTransparentRetry;
135+
private final boolean isHedging;
135136

136137
StreamInfo(
137-
CallOptions callOptions, int previousAttempts, boolean isTransparentRetry) {
138+
CallOptions callOptions, int previousAttempts, boolean isTransparentRetry,
139+
boolean isHedging) {
138140
this.callOptions = checkNotNull(callOptions, "callOptions");
139141
this.previousAttempts = previousAttempts;
140142
this.isTransparentRetry = isTransparentRetry;
143+
this.isHedging = isHedging;
141144
}
142145

143146
/**
@@ -165,6 +168,15 @@ public boolean isTransparentRetry() {
165168
return isTransparentRetry;
166169
}
167170

171+
/**
172+
* Whether the stream is hedging.
173+
*
174+
* @since 1.74.0
175+
*/
176+
public boolean isHedging() {
177+
return isHedging;
178+
}
179+
168180
/**
169181
* Converts this StreamInfo into a new Builder.
170182
*
@@ -174,7 +186,9 @@ public Builder toBuilder() {
174186
return new Builder()
175187
.setCallOptions(callOptions)
176188
.setPreviousAttempts(previousAttempts)
177-
.setIsTransparentRetry(isTransparentRetry);
189+
.setIsTransparentRetry(isTransparentRetry)
190+
.setIsHedging(isHedging);
191+
178192
}
179193

180194
/**
@@ -192,6 +206,7 @@ public String toString() {
192206
.add("callOptions", callOptions)
193207
.add("previousAttempts", previousAttempts)
194208
.add("isTransparentRetry", isTransparentRetry)
209+
.add("isHedging", isHedging)
195210
.toString();
196211
}
197212

@@ -204,6 +219,7 @@ public static final class Builder {
204219
private CallOptions callOptions = CallOptions.DEFAULT;
205220
private int previousAttempts;
206221
private boolean isTransparentRetry;
222+
private boolean isHedging;
207223

208224
Builder() {
209225
}
@@ -236,11 +252,21 @@ public Builder setIsTransparentRetry(boolean isTransparentRetry) {
236252
return this;
237253
}
238254

255+
/**
256+
* Sets whether the stream is hedging.
257+
*
258+
* @since 1.74.0
259+
*/
260+
public Builder setIsHedging(boolean isHedging) {
261+
this.isHedging = isHedging;
262+
return this;
263+
}
264+
239265
/**
240266
* Builds a new StreamInfo.
241267
*/
242268
public StreamInfo build() {
243-
return new StreamInfo(callOptions, previousAttempts, isTransparentRetry);
269+
return new StreamInfo(callOptions, previousAttempts, isTransparentRetry, isHedging);
244270
}
245271
}
246272
}

core/src/main/java/io/grpc/internal/ClientCallImpl.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,8 @@ public void runInContext() {
250250
stream = clientStreamProvider.newStream(method, callOptions, headers, context);
251251
} else {
252252
ClientStreamTracer[] tracers =
253-
GrpcUtil.getClientStreamTracers(callOptions, headers, 0, false);
253+
GrpcUtil.getClientStreamTracers(callOptions, headers, 0,
254+
false, false);
254255
String deadlineName = contextIsDeadlineSource ? "Context" : "CallOptions";
255256
Long nameResolutionDelay = callOptions.getOption(NAME_RESOLUTION_DELAYED);
256257
String description = String.format(

core/src/main/java/io/grpc/internal/GrpcUtil.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -757,13 +757,15 @@ public ListenableFuture<SocketStats> getStats() {
757757

758758
/** Gets stream tracers based on CallOptions. */
759759
public static ClientStreamTracer[] getClientStreamTracers(
760-
CallOptions callOptions, Metadata headers, int previousAttempts, boolean isTransparentRetry) {
760+
CallOptions callOptions, Metadata headers, int previousAttempts, boolean isTransparentRetry,
761+
boolean isHedging) {
761762
List<ClientStreamTracer.Factory> factories = callOptions.getStreamTracerFactories();
762763
ClientStreamTracer[] tracers = new ClientStreamTracer[factories.size() + 1];
763764
StreamInfo streamInfo = StreamInfo.newBuilder()
764765
.setCallOptions(callOptions)
765766
.setPreviousAttempts(previousAttempts)
766767
.setIsTransparentRetry(isTransparentRetry)
768+
.setIsHedging(isHedging)
767769
.build();
768770
for (int i = 0; i < factories.size(); i++) {
769771
tracers[i] = factories.get(i).newClientStreamTracer(streamInfo, headers);

core/src/main/java/io/grpc/internal/ManagedChannelImpl.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,8 @@ public ClientStream newStream(
480480
// the delayed transport or a real transport will go in-use and cancel the idle timer.
481481
if (!retryEnabled) {
482482
ClientStreamTracer[] tracers = GrpcUtil.getClientStreamTracers(
483-
callOptions, headers, 0, /* isTransparentRetry= */ false);
483+
callOptions, headers, 0, /* isTransparentRetry= */ false,
484+
/* isHedging= */false);
484485
Context origContext = context.attach();
485486
try {
486487
return delayedTransport.newStream(method, headers, callOptions, tracers);
@@ -520,10 +521,10 @@ void postCommit() {
520521
@Override
521522
ClientStream newSubstream(
522523
Metadata newHeaders, ClientStreamTracer.Factory factory, int previousAttempts,
523-
boolean isTransparentRetry) {
524+
boolean isTransparentRetry, boolean isHedging) {
524525
CallOptions newOptions = callOptions.withStreamTracerFactory(factory);
525526
ClientStreamTracer[] tracers = GrpcUtil.getClientStreamTracers(
526-
newOptions, newHeaders, previousAttempts, isTransparentRetry);
527+
newOptions, newHeaders, previousAttempts, isTransparentRetry, isHedging);
527528
Context origContext = context.attach();
528529
try {
529530
return delayedTransport.newStream(method, newHeaders, newOptions, tracers);

core/src/main/java/io/grpc/internal/OobChannel.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ final class OobChannel extends ManagedChannel implements InternalInstrumented<Ch
8888
public ClientStream newStream(MethodDescriptor<?, ?> method,
8989
CallOptions callOptions, Metadata headers, Context context) {
9090
ClientStreamTracer[] tracers = GrpcUtil.getClientStreamTracers(
91-
callOptions, headers, 0, /* isTransparentRetry= */ false);
91+
callOptions, headers, 0, /* isTransparentRetry= */ false,
92+
/* isHedging= */ false);
9293
Context origContext = context.attach();
9394
// delayed transport's newStream() always acquires a lock, but concurrent performance doesn't
9495
// matter here because OOB communication should be sparse, and it's not on application RPC's

core/src/main/java/io/grpc/internal/RetriableStream.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,8 @@ public ClientStreamTracer newClientStreamTracer(
266266

267267
Metadata newHeaders = updateHeaders(headers, previousAttemptCount);
268268
// NOTICE: This set _must_ be done before stream.start() and it actually is.
269-
sub.stream = newSubstream(newHeaders, tracerFactory, previousAttemptCount, isTransparentRetry);
269+
sub.stream = newSubstream(newHeaders, tracerFactory, previousAttemptCount, isTransparentRetry,
270+
isHedging);
270271
return sub;
271272
}
272273

@@ -276,7 +277,7 @@ public ClientStreamTracer newClientStreamTracer(
276277
*/
277278
abstract ClientStream newSubstream(
278279
Metadata headers, ClientStreamTracer.Factory tracerFactory, int previousAttempts,
279-
boolean isTransparentRetry);
280+
boolean isTransparentRetry, boolean isHedging);
280281

281282
/** Adds grpc-previous-rpc-attempts in the headers of a retry/hedging RPC. */
282283
@VisibleForTesting

core/src/main/java/io/grpc/internal/SubchannelChannel.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ public ClientStream newStream(MethodDescriptor<?, ?> method,
5959
transport = notReadyTransport;
6060
}
6161
ClientStreamTracer[] tracers = GrpcUtil.getClientStreamTracers(
62-
callOptions, headers, 0, /* isTransparentRetry= */ false);
62+
callOptions, headers, 0, /* isTransparentRetry= */ false,
63+
/* isHedging= */ false);
6364
Context origContext = context.attach();
6465
try {
6566
return transport.newStream(method, headers, callOptions, tracers);

core/src/test/java/io/grpc/internal/RetriableStreamTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,8 @@ ClientStream newSubstream(
186186
Metadata metadata,
187187
ClientStreamTracer.Factory tracerFactory,
188188
int previousAttempts,
189-
boolean isTransparentRetry) {
189+
boolean isTransparentRetry,
190+
boolean isHedging) {
190191
bufferSizeTracer =
191192
tracerFactory.newClientStreamTracer(STREAM_INFO, metadata);
192193
int actualPreviousRpcAttemptsInHeader = metadata.get(GRPC_PREVIOUS_RPC_ATTEMPTS) == null

opentelemetry/src/main/java/io/grpc/opentelemetry/GrpcOpenTelemetry.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818

1919
import static com.google.common.base.Preconditions.checkNotNull;
2020
import static io.grpc.internal.GrpcUtil.IMPLEMENTATION_VERSION;
21+
import static io.grpc.opentelemetry.internal.OpenTelemetryConstants.HEDGE_BUCKETS;
2122
import static io.grpc.opentelemetry.internal.OpenTelemetryConstants.LATENCY_BUCKETS;
2223
import static io.grpc.opentelemetry.internal.OpenTelemetryConstants.RETRY_BUCKETS;
2324
import static io.grpc.opentelemetry.internal.OpenTelemetryConstants.SIZE_BUCKETS;
25+
import static io.grpc.opentelemetry.internal.OpenTelemetryConstants.TRANSPARENT_RETRY_BUCKETS;
2426

2527
import com.google.common.annotations.VisibleForTesting;
2628
import com.google.common.base.Stopwatch;
@@ -247,12 +249,37 @@ static OpenTelemetryMetricsResource createMetricInstruments(Meter meter,
247249
meter.histogramBuilder(
248250
"grpc.client.call.retries")
249251
.setUnit("{retry}")
250-
.setDescription("Number of retry attempts made during the client call")
252+
.setDescription("Number of retries during the client call. "
253+
+ "If there were no retries, 0 is not reported.")
251254
.ofLongs()
252255
.setExplicitBucketBoundariesAdvice(RETRY_BUCKETS)
253256
.build());
254257
}
255258

259+
if (isMetricEnabled("grpc.client.call.transparent_retries", enableMetrics, disableDefault)) {
260+
builder.clientCallRetriesCounter(
261+
meter.histogramBuilder(
262+
"grpc.client.call.transparent_retries")
263+
.setUnit("{transparent_retry}")
264+
.setDescription("Number of transparent retries during the client call. "
265+
+ "If there were no transparent retries, 0 is not reported.")
266+
.ofLongs()
267+
.setExplicitBucketBoundariesAdvice(TRANSPARENT_RETRY_BUCKETS)
268+
.build());
269+
}
270+
271+
if (isMetricEnabled("grpc.client.call.hedges", enableMetrics, disableDefault)) {
272+
builder.clientCallRetriesCounter(
273+
meter.histogramBuilder(
274+
"grpc.client.call.hedges")
275+
.setUnit("{hedge}")
276+
.setDescription("Number of hedges during the client call. "
277+
+ "If there were no hedges, 0 is not reported.")
278+
.ofLongs()
279+
.setExplicitBucketBoundariesAdvice(HEDGE_BUCKETS)
280+
.build());
281+
}
282+
256283
if (isMetricEnabled("grpc.client.call.retry_delay", enableMetrics, disableDefault)) {
257284
builder.clientCallRetryDelayCounter(
258285
meter.histogramBuilder(

opentelemetry/src/main/java/io/grpc/opentelemetry/OpenTelemetryMetricsModule.java

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import static io.grpc.opentelemetry.internal.OpenTelemetryConstants.BACKEND_SERVICE_KEY;
2121
import static io.grpc.opentelemetry.internal.OpenTelemetryConstants.LOCALITY_KEY;
2222
import static io.grpc.opentelemetry.internal.OpenTelemetryConstants.METHOD_KEY;
23-
import static io.grpc.opentelemetry.internal.OpenTelemetryConstants.RETRY_TYPE_KEY;
2423
import static io.grpc.opentelemetry.internal.OpenTelemetryConstants.STATUS_KEY;
2524
import static io.grpc.opentelemetry.internal.OpenTelemetryConstants.TARGET_KEY;
2625

@@ -45,7 +44,6 @@
4544
import io.grpc.Status;
4645
import io.grpc.Status.Code;
4746
import io.grpc.StreamTracer;
48-
import io.grpc.opentelemetry.internal.OpenTelemetryConstants;
4947
import io.opentelemetry.api.common.AttributesBuilder;
5048
import java.util.ArrayList;
5149
import java.util.Collection;
@@ -299,6 +297,7 @@ static final class CallAttemptsTracerFactory extends ClientStreamTracer.Factory
299297
private long callLatencyNanos;
300298
private final Object lock = new Object();
301299
private final AtomicLong attemptsPerCall = new AtomicLong();
300+
private final AtomicLong hedgedAttemptsPerCall = new AtomicLong();
302301
private final AtomicLong transparentRetriesPerCall = new AtomicLong();
303302
@GuardedBy("lock")
304303
private int activeStreams;
@@ -352,7 +351,10 @@ public ClientStreamTracer newClientStreamTracer(StreamInfo info, Metadata metada
352351
}
353352
if (info.isTransparentRetry()) {
354353
transparentRetriesPerCall.incrementAndGet();
355-
} else {
354+
} else if (info.isHedging()) {
355+
hedgedAttemptsPerCall.incrementAndGet();
356+
}
357+
else {
356358
attemptsPerCall.incrementAndGet();
357359
}
358360
return newClientTracer(info);
@@ -442,19 +444,30 @@ void recordFinishedCall() {
442444
retriesPerCall = attempts - 1;
443445
}
444446

445-
module.resource.clientCallRetriesCounter().record(
446-
retriesPerCall,
447-
baseAttributes.toBuilder()
448-
.put(RETRY_TYPE_KEY, OpenTelemetryConstants.RetryType.RETRY.getValue())
449-
.build()
450-
);
447+
if (retriesPerCall > 0) {
448+
module.resource.clientCallRetriesCounter().record(retriesPerCall, baseAttributes);
449+
}
450+
}
451+
452+
// Hedge counts
453+
if (module.resource.clientCallHedgesCounter() != null) {
451454

455+
long hedgesPerCall = 0;
456+
long attempts = hedgedAttemptsPerCall.get();
457+
if (attempts > 0) {
458+
hedgesPerCall = attempts - 1;
459+
}
460+
461+
if (hedgesPerCall > 0) {
462+
module.resource.clientCallHedgesCounter().record(hedgesPerCall, baseAttributes);
463+
}
464+
}
465+
466+
// Transparent Retry counts
467+
if (module.resource.clientCallTransparentRetriesCounter() != null
468+
&& transparentRetriesPerCall.get() > 0) {
452469
module.resource.clientCallRetriesCounter().record(
453-
transparentRetriesPerCall.get(),
454-
baseAttributes.toBuilder()
455-
.put(RETRY_TYPE_KEY, OpenTelemetryConstants.RetryType.TRANSPARENT.getValue())
456-
.build()
457-
);
470+
transparentRetriesPerCall.get(), baseAttributes);
458471
}
459472

460473
// Retry delay

opentelemetry/src/main/java/io/grpc/opentelemetry/OpenTelemetryMetricsResource.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ abstract class OpenTelemetryMetricsResource {
4444
@Nullable
4545
abstract LongHistogram clientCallRetriesCounter();
4646

47+
@Nullable
48+
abstract LongHistogram clientCallTransparentRetriesCounter();
49+
50+
@Nullable
51+
abstract LongHistogram clientCallHedgesCounter();
52+
4753
@Nullable
4854
abstract DoubleHistogram clientCallRetryDelayCounter();
4955

@@ -80,6 +86,10 @@ abstract Builder clientTotalReceivedCompressedMessageSizeCounter(
8086

8187
abstract Builder clientCallRetriesCounter(LongHistogram counter);
8288

89+
abstract Builder clientCallTransparentRetriesCounter(LongHistogram counter);
90+
91+
abstract Builder clientCallHedgesCounter(LongHistogram counter);
92+
8393
abstract Builder clientCallRetryDelayCounter(DoubleHistogram counter);
8494

8595
abstract Builder serverCallCountCounter(LongCounter counter);

opentelemetry/src/main/java/io/grpc/opentelemetry/internal/OpenTelemetryConstants.java

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,6 @@ public final class OpenTelemetryConstants {
3636
public static final AttributeKey<String> BACKEND_SERVICE_KEY =
3737
AttributeKey.stringKey("grpc.lb.backend_service");
3838

39-
public static final AttributeKey<String> RETRY_TYPE_KEY =
40-
AttributeKey.stringKey("grpc.retry_type");
41-
4239
public static final List<Double> LATENCY_BUCKETS =
4340
ImmutableList.of(
4441
0d, 0.00001d, 0.00005d, 0.0001d, 0.0003d, 0.0006d, 0.0008d, 0.001d, 0.002d,
@@ -52,24 +49,12 @@ public final class OpenTelemetryConstants {
5249
0L, 1024L, 2048L, 4096L, 16384L, 65536L, 262144L, 1048576L, 4194304L, 16777216L,
5350
67108864L, 268435456L, 1073741824L, 4294967296L);
5451

55-
public static final List<Long> RETRY_BUCKETS =
56-
ImmutableList.of(0L, 1L, 2L, 3L, 4L, 5L, 10L, 100L, 1000L);
57-
58-
public enum RetryType {
59-
RETRY("retry"),
60-
HEDGE("hedge"),
61-
TRANSPARENT("transparent");
52+
public static final List<Long> RETRY_BUCKETS = ImmutableList.of(0L, 1L, 2L, 3L, 4L, 5L);
6253

63-
private final String value;
54+
public static final List<Long> TRANSPARENT_RETRY_BUCKETS =
55+
ImmutableList.of(0L, 1L, 2L, 3L, 4L, 5L, 10L);
6456

65-
RetryType(String value) {
66-
this.value = value;
67-
}
68-
69-
public String getValue() {
70-
return value;
71-
}
72-
}
57+
public static final List<Long> HEDGE_BUCKETS = ImmutableList.of(0L, 1L, 2L, 3L, 4L, 5L);
7358

7459
private OpenTelemetryConstants() {
7560
}

0 commit comments

Comments
 (0)