Skip to content

Commit d17e31e

Browse files
committed
HDDS-1214. Enable tracing for the datanode read/write path. Contributed by Elek, Marton.
Closes #550.
1 parent c730786 commit d17e31e

File tree

6 files changed

+88
-54
lines changed

6 files changed

+88
-54
lines changed

hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/tracing/TracingUtil.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import io.jaegertracing.Configuration;
2323
import io.jaegertracing.internal.JaegerTracer;
2424
import io.opentracing.Scope;
25+
import io.opentracing.Span;
2526
import io.opentracing.SpanContext;
2627
import io.opentracing.Tracer;
2728
import io.opentracing.util.GlobalTracer;
@@ -64,6 +65,19 @@ public static String exportCurrentSpan() {
6465
return builder.toString();
6566
}
6667

68+
/**
69+
* Export the specific span as a string.
70+
*
71+
* @return encoded tracing context.
72+
*/
73+
public static String exportSpan(Span span) {
74+
StringBuilder builder = new StringBuilder();
75+
if (span != null) {
76+
GlobalTracer.get().inject(span.context(), StringCodec.FORMAT, builder);
77+
}
78+
return builder.toString();
79+
}
80+
6781
/**
6882
* Create a new scope and use the imported span as the parent.
6983
*

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
.InvalidContainerStateException;
3434
import org.apache.hadoop.hdds.scm.container.common.helpers
3535
.StorageContainerException;
36+
import org.apache.hadoop.hdds.tracing.TracingUtil;
3637
import org.apache.hadoop.ozone.audit.AuditAction;
3738
import org.apache.hadoop.ozone.audit.AuditEventStatus;
3839
import org.apache.hadoop.ozone.audit.AuditLogger;
@@ -61,6 +62,8 @@
6162
ContainerDataProto.State;
6263
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result;
6364
import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
65+
66+
import io.opentracing.Scope;
6467
import org.slf4j.Logger;
6568
import org.slf4j.LoggerFactory;
6669

@@ -137,10 +140,19 @@ public void buildMissingContainerSet(Set<Long> createdContainerSet) {
137140
containerSet.buildMissingContainerSet(createdContainerSet);
138141
}
139142

140-
@SuppressWarnings("methodlength")
141143
@Override
142144
public ContainerCommandResponseProto dispatch(
143145
ContainerCommandRequestProto msg, DispatcherContext dispatcherContext) {
146+
String spanName = "HddsDispatcher." + msg.getCmdType().name();
147+
try (Scope scope = TracingUtil
148+
.importAndCreateScope(spanName, msg.getTraceID())) {
149+
return dispatchRequest(msg, dispatcherContext);
150+
}
151+
}
152+
153+
@SuppressWarnings("methodlength")
154+
private ContainerCommandResponseProto dispatchRequest(
155+
ContainerCommandRequestProto msg, DispatcherContext dispatcherContext) {
144156
Preconditions.checkNotNull(msg);
145157
LOG.trace("Command {}, trace ID: {} ", msg.getCmdType().toString(),
146158
msg.getTraceID());

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CloseContainerCommandHandler.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
.StorageContainerDatanodeProtocolProtos.SCMCommandProto;
2525
import org.apache.hadoop.hdds.protocol.proto
2626
.StorageContainerDatanodeProtocolProtos.CloseContainerCommandProto;
27+
import org.apache.hadoop.hdds.tracing.TracingUtil;
2728
import org.apache.hadoop.ozone.container.common.interfaces.Container;
2829
import org.apache.hadoop.ozone.container.common.statemachine
2930
.SCMConnectionManager;
@@ -133,6 +134,7 @@ private ContainerCommandRequestProto getContainerCommandRequestProto(
133134
final ContainerCommandRequestProto.Builder command =
134135
ContainerCommandRequestProto.newBuilder();
135136
command.setCmdType(ContainerProtos.Type.CloseContainer);
137+
command.setTraceID(TracingUtil.exportCurrentSpan());
136138
command.setContainerID(containerId);
137139
command.setCloseContainer(
138140
ContainerProtos.CloseContainerRequestProto.getDefaultInstance());

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@
3131
import org.apache.hadoop.hdds.scm.container.common.helpers.
3232
StorageContainerException;
3333
import org.apache.hadoop.hdds.tracing.GrpcServerInterceptor;
34+
import org.apache.hadoop.hdds.tracing.TracingUtil;
3435
import org.apache.hadoop.ozone.OzoneConfigKeys;
3536
import org.apache.hadoop.ozone.OzoneConsts;
3637
import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
3738

39+
import io.opentracing.Scope;
3840
import org.apache.ratis.thirdparty.io.grpc.BindableService;
3941
import org.apache.ratis.thirdparty.io.grpc.Server;
4042
import org.apache.ratis.thirdparty.io.grpc.ServerBuilder;
@@ -168,12 +170,18 @@ public void stop() {
168170
@Override
169171
public void submitRequest(ContainerCommandRequestProto request,
170172
HddsProtos.PipelineID pipelineID) throws IOException {
171-
super.submitRequest(request, pipelineID);
172-
ContainerProtos.ContainerCommandResponseProto response =
173-
storageContainer.dispatch(request, null);
174-
if (response.getResult() != ContainerProtos.Result.SUCCESS) {
175-
throw new StorageContainerException(response.getMessage(),
176-
response.getResult());
173+
try (Scope scope = TracingUtil
174+
.importAndCreateScope(
175+
"XceiverServerGrpc." + request.getCmdType().name(),
176+
request.getTraceID())) {
177+
178+
super.submitRequest(request, pipelineID);
179+
ContainerProtos.ContainerCommandResponseProto response =
180+
storageContainer.dispatch(request, null);
181+
if (response.getResult() != ContainerProtos.Result.SUCCESS) {
182+
throw new StorageContainerException(response.getMessage(),
183+
response.getResult());
184+
}
177185
}
178186
}
179187

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java

Lines changed: 43 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import org.apache.hadoop.hdds.HddsUtils;
2626
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
2727

28-
import io.opentracing.Scope;
2928
import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
3029
import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
3130
import org.apache.ratis.proto.RaftProtos.RaftPeerRole;
@@ -51,7 +50,6 @@
5150
.ReadChunkRequestProto;
5251
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
5352
.ReadChunkResponseProto;
54-
import org.apache.hadoop.hdds.tracing.TracingUtil;
5553
import org.apache.hadoop.ozone.container.common.interfaces.ContainerDispatcher;
5654
import org.apache.hadoop.hdds.security.token.TokenVerifier;
5755
import org.apache.hadoop.security.UserGroupInformation;
@@ -271,51 +269,50 @@ public TransactionContext startTransaction(RaftClientRequest request)
271269
final ContainerCommandRequestProto proto =
272270
getContainerCommandRequestProto(request.getMessage().getContent());
273271
Preconditions.checkArgument(request.getRaftGroupId().equals(gid));
274-
try (Scope scope = TracingUtil
275-
.importAndCreateScope(proto.getCmdType().name(), proto.getTraceID())) {
276-
try {
277-
dispatcher.validateContainerCommand(proto);
278-
} catch (IOException ioe) {
279-
TransactionContext ctxt = TransactionContext.newBuilder()
280-
.setClientRequest(request)
281-
.setStateMachine(this)
282-
.setServerRole(RaftPeerRole.LEADER)
283-
.build();
284-
ctxt.setException(ioe);
285-
return ctxt;
286-
}
287-
if (proto.getCmdType() == Type.WriteChunk) {
288-
final WriteChunkRequestProto write = proto.getWriteChunk();
289-
// create the log entry proto
290-
final WriteChunkRequestProto commitWriteChunkProto =
291-
WriteChunkRequestProto.newBuilder()
292-
.setBlockID(write.getBlockID())
293-
.setChunkData(write.getChunkData())
294-
// skipping the data field as it is
295-
// already set in statemachine data proto
296-
.build();
297-
ContainerCommandRequestProto commitContainerCommandProto =
298-
ContainerCommandRequestProto
299-
.newBuilder(proto)
300-
.setWriteChunk(commitWriteChunkProto)
301-
.build();
302-
303-
return TransactionContext.newBuilder()
304-
.setClientRequest(request)
305-
.setStateMachine(this)
306-
.setServerRole(RaftPeerRole.LEADER)
307-
.setStateMachineData(write.getData())
308-
.setLogData(commitContainerCommandProto.toByteString())
309-
.build();
310-
} else {
311-
return TransactionContext.newBuilder()
312-
.setClientRequest(request)
313-
.setStateMachine(this)
314-
.setServerRole(RaftPeerRole.LEADER)
315-
.setLogData(request.getMessage().getContent())
316-
.build();
317-
}
272+
try {
273+
dispatcher.validateContainerCommand(proto);
274+
} catch (IOException ioe) {
275+
TransactionContext ctxt = TransactionContext.newBuilder()
276+
.setClientRequest(request)
277+
.setStateMachine(this)
278+
.setServerRole(RaftPeerRole.LEADER)
279+
.build();
280+
ctxt.setException(ioe);
281+
return ctxt;
318282
}
283+
if (proto.getCmdType() == Type.WriteChunk) {
284+
final WriteChunkRequestProto write = proto.getWriteChunk();
285+
// create the log entry proto
286+
final WriteChunkRequestProto commitWriteChunkProto =
287+
WriteChunkRequestProto.newBuilder()
288+
.setBlockID(write.getBlockID())
289+
.setChunkData(write.getChunkData())
290+
// skipping the data field as it is
291+
// already set in statemachine data proto
292+
.build();
293+
ContainerCommandRequestProto commitContainerCommandProto =
294+
ContainerCommandRequestProto
295+
.newBuilder(proto)
296+
.setWriteChunk(commitWriteChunkProto)
297+
.setTraceID(proto.getTraceID())
298+
.build();
299+
300+
return TransactionContext.newBuilder()
301+
.setClientRequest(request)
302+
.setStateMachine(this)
303+
.setServerRole(RaftPeerRole.LEADER)
304+
.setStateMachineData(write.getData())
305+
.setLogData(commitContainerCommandProto.toByteString())
306+
.build();
307+
} else {
308+
return TransactionContext.newBuilder()
309+
.setClientRequest(request)
310+
.setStateMachine(this)
311+
.setServerRole(RaftPeerRole.LEADER)
312+
.setLogData(request.getMessage().getContent())
313+
.build();
314+
}
315+
319316
}
320317

321318
private ByteString getStateMachineData(StateMachineLogEntryProto entryProto) {

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,8 @@ public void submitRequest(ContainerCommandRequestProto request,
486486
super.submitRequest(request, pipelineID);
487487
RaftClientReply reply;
488488
try (Scope scope = TracingUtil
489-
.importAndCreateScope(request.getCmdType().name(),
489+
.importAndCreateScope(
490+
"XceiverServerRatis." + request.getCmdType().name(),
490491
request.getTraceID())) {
491492

492493
RaftClientRequest raftClientRequest =

0 commit comments

Comments
 (0)