Skip to content

Commit f8f85fc

Browse files
committed
HDDS-1318. Fix MalformedTracerStateStringException on DN logs. Contributed by Xiaoyu Yao.
This closes #641 (cherry picked from commit ca5e4ce)
1 parent 5f8ded5 commit f8f85fc

File tree

12 files changed

+114
-58
lines changed

12 files changed

+114
-58
lines changed

hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
3434
import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
3535
import org.apache.hadoop.hdds.security.x509.SecurityConfig;
36+
import org.apache.hadoop.hdds.tracing.GrpcClientInterceptor;
37+
import org.apache.hadoop.hdds.tracing.TracingUtil;
3638
import org.apache.hadoop.ozone.OzoneConfigKeys;
3739
import org.apache.hadoop.ozone.OzoneConsts;
3840
import org.apache.hadoop.security.UserGroupInformation;
@@ -136,7 +138,8 @@ private void connectToDatanode(DatanodeDetails dn, String encodedToken)
136138
NettyChannelBuilder channelBuilder = NettyChannelBuilder.forAddress(dn
137139
.getIpAddress(), port).usePlaintext()
138140
.maxInboundMessageSize(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE)
139-
.intercept(new ClientCredentialInterceptor(userName, encodedToken));
141+
.intercept(new ClientCredentialInterceptor(userName, encodedToken),
142+
new GrpcClientInterceptor());
140143
if (secConfig.isGrpcTlsEnabled()) {
141144
File trustCertCollectionFile = secConfig.getTrustStoreFile();
142145
File privateKeyFile = secConfig.getClientPrivateKeyFile();
@@ -204,7 +207,7 @@ public ContainerCommandResponseProto sendCommand(
204207
ContainerCommandRequestProto request) throws IOException {
205208
try {
206209
XceiverClientReply reply;
207-
reply = sendCommandWithRetry(request, null);
210+
reply = sendCommandWithTraceIDAndRetry(request, null);
208211
ContainerCommandResponseProto responseProto = reply.getResponse().get();
209212
return responseProto;
210213
} catch (ExecutionException | InterruptedException e) {
@@ -217,7 +220,21 @@ public XceiverClientReply sendCommand(
217220
ContainerCommandRequestProto request, List<DatanodeDetails> excludeDns)
218221
throws IOException {
219222
Preconditions.checkState(HddsUtils.isReadOnly(request));
220-
return sendCommandWithRetry(request, excludeDns);
223+
return sendCommandWithTraceIDAndRetry(request, excludeDns);
224+
}
225+
226+
private XceiverClientReply sendCommandWithTraceIDAndRetry(
227+
ContainerCommandRequestProto request, List<DatanodeDetails> excludeDns)
228+
throws IOException {
229+
try (Scope scope = GlobalTracer.get()
230+
.buildSpan("XceiverClientGrpc." + request.getCmdType().name())
231+
.startActive(true)) {
232+
ContainerCommandRequestProto finalPayload =
233+
ContainerCommandRequestProto.newBuilder(request)
234+
.setTraceID(TracingUtil.exportCurrentSpan())
235+
.build();
236+
return sendCommandWithRetry(finalPayload, excludeDns);
237+
}
221238
}
222239

223240
private XceiverClientReply sendCommandWithRetry(

hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/tracing/StringCodec.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,27 @@
2525
import io.jaegertracing.internal.exceptions.TraceIdOutOfBoundException;
2626
import io.jaegertracing.spi.Codec;
2727
import io.opentracing.propagation.Format;
28+
import org.slf4j.Logger;
29+
import org.slf4j.LoggerFactory;
2830

2931
/**
30-
* A jaeger codec to save the current tracing context t a string.
32+
* A jaeger codec to save the current tracing context as a string.
3133
*/
3234
public class StringCodec implements Codec<StringBuilder> {
3335

36+
public static final Logger LOG = LoggerFactory.getLogger(StringCodec.class);
3437
public static final StringFormat FORMAT = new StringFormat();
3538

3639
@Override
3740
public JaegerSpanContext extract(StringBuilder s) {
41+
if (s == null) {
42+
throw new EmptyTracerStateStringException();
43+
}
3844
String value = s.toString();
3945
if (value != null && !value.equals("")) {
4046
String[] parts = value.split(":");
4147
if (parts.length != 4) {
48+
LOG.trace("MalformedTracerStateString: {}", value);
4249
throw new MalformedTracerStateStringException(value);
4350
} else {
4451
String traceId = parts[0];
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hdds.tracing;
19+
20+
import io.jaegertracing.internal.JaegerSpanContext;
21+
import io.jaegertracing.internal.exceptions.EmptyTracerStateStringException;
22+
import io.jaegertracing.internal.exceptions.MalformedTracerStateStringException;
23+
import org.apache.hadoop.test.LambdaTestUtils;
24+
import org.junit.jupiter.api.Test;
25+
26+
import static org.junit.jupiter.api.Assertions.assertTrue;
27+
28+
class TestStringCodec {
29+
30+
@Test
31+
void testExtract() throws Exception {
32+
StringCodec codec = new StringCodec();
33+
34+
LambdaTestUtils.intercept(EmptyTracerStateStringException.class,
35+
() -> codec.extract(null));
36+
37+
StringBuilder sb = new StringBuilder().append("123");
38+
LambdaTestUtils.intercept(MalformedTracerStateStringException.class,
39+
"String does not match tracer state format",
40+
() -> codec.extract(sb));
41+
42+
sb.append(":456:789");
43+
LambdaTestUtils.intercept(MalformedTracerStateStringException.class,
44+
"String does not match tracer state format",
45+
() -> codec.extract(sb));
46+
sb.append(":66");
47+
JaegerSpanContext context = codec.extract(sb);
48+
String expectedContextString = new String("123:456:789:66");
49+
assertTrue(context.getTraceId().equals("123"));
50+
assertTrue(context.toString().equals(expectedContextString));
51+
}
52+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hdds.tracing;
19+
/**
20+
Test cases for ozone tracing.
21+
*/

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,6 @@ public static ContainerCommandRequestProto getBlockRequest(
545545
*/
546546
public static void verifyGetBlock(ContainerCommandRequestProto request,
547547
ContainerCommandResponseProto response, int expectedChunksCount) {
548-
Assert.assertEquals(request.getTraceID(), response.getTraceID());
549548
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
550549
Assert.assertEquals(expectedChunksCount,
551550
response.getGetBlock().getBlockData().getChunksCount());

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReplication.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,6 @@ public void testContainerReplication() throws Exception {
112112

113113
Assert.assertNotNull(response);
114114
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
115-
Assert.assertTrue(
116-
putBlockRequest.getTraceID().equals(response.getTraceID()));
117115

118116
HddsDatanodeService destinationDatanode =
119117
chooseDatanodeWithoutContainer(sourcePipelines,

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/metrics/TestContainerMetrics.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ public void testContainerMetrics() throws Exception {
123123
ContainerCommandRequestProto request = ContainerTestHelper
124124
.getCreateContainerRequest(containerID, pipeline);
125125
ContainerCommandResponseProto response = client.sendCommand(request);
126-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
127126
Assert.assertEquals(ContainerProtos.Result.SUCCESS,
128127
response.getResult());
129128

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainer.java

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,6 @@ static void runTestOzoneContainerViaDataNode(
158158
response = client.sendCommand(request);
159159
Assert.assertNotNull(response);
160160
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
161-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
162161

163162
// Put Block
164163
putBlockRequest = ContainerTestHelper.getPutBlockRequest(
@@ -168,8 +167,6 @@ static void runTestOzoneContainerViaDataNode(
168167
response = client.sendCommand(putBlockRequest);
169168
Assert.assertNotNull(response);
170169
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
171-
Assert.assertTrue(putBlockRequest.getTraceID()
172-
.equals(response.getTraceID()));
173170

174171
// Get Block
175172
request = ContainerTestHelper.
@@ -187,7 +184,6 @@ static void runTestOzoneContainerViaDataNode(
187184
response = client.sendCommand(request);
188185
Assert.assertNotNull(response);
189186
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
190-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
191187

192188
//Delete Chunk
193189
request = ContainerTestHelper.getDeleteChunkRequest(
@@ -196,7 +192,6 @@ static void runTestOzoneContainerViaDataNode(
196192
response = client.sendCommand(request);
197193
Assert.assertNotNull(response);
198194
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
199-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
200195

201196
//Update an existing container
202197
Map<String, String> containerUpdate = new HashMap<String, String>();
@@ -259,8 +254,6 @@ static void runTestBothGetandPutSmallFile(
259254
ContainerProtos.ContainerCommandResponseProto response
260255
= client.sendCommand(smallFileRequest);
261256
Assert.assertNotNull(response);
262-
Assert.assertTrue(smallFileRequest.getTraceID()
263-
.equals(response.getTraceID()));
264257

265258
final ContainerProtos.ContainerCommandRequestProto getSmallFileRequest
266259
= ContainerTestHelper.getReadSmallFileRequest(client.getPipeline(),
@@ -310,16 +303,13 @@ public void testCloseContainer() throws Exception {
310303
Assert.assertNotNull(response);
311304
Assert.assertEquals(ContainerProtos.Result.SUCCESS,
312305
response.getResult());
313-
Assert.assertTrue(
314-
putBlockRequest.getTraceID().equals(response.getTraceID()));
315306

316307
// Close the contianer.
317308
request = ContainerTestHelper.getCloseContainer(
318309
client.getPipeline(), containerID);
319310
response = client.sendCommand(request);
320311
Assert.assertNotNull(response);
321312
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
322-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
323313

324314

325315
// Assert that none of the write operations are working after close.
@@ -330,25 +320,19 @@ public void testCloseContainer() throws Exception {
330320
Assert.assertNotNull(response);
331321
Assert.assertEquals(ContainerProtos.Result.CLOSED_CONTAINER_IO,
332322
response.getResult());
333-
Assert.assertTrue(
334-
writeChunkRequest.getTraceID().equals(response.getTraceID()));
335323

336324
// Read chunk must work on a closed container.
337325
request = ContainerTestHelper.getReadChunkRequest(client.getPipeline(),
338326
writeChunkRequest.getWriteChunk());
339327
response = client.sendCommand(request);
340328
Assert.assertNotNull(response);
341329
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
342-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
343-
344330

345331
// Put block will fail on a closed container.
346332
response = client.sendCommand(putBlockRequest);
347333
Assert.assertNotNull(response);
348334
Assert.assertEquals(ContainerProtos.Result.CLOSED_CONTAINER_IO,
349335
response.getResult());
350-
Assert.assertTrue(putBlockRequest.getTraceID()
351-
.equals(response.getTraceID()));
352336

353337
// Get block must work on the closed container.
354338
request = ContainerTestHelper.getBlockRequest(client.getPipeline(),
@@ -366,7 +350,6 @@ public void testCloseContainer() throws Exception {
366350
Assert.assertNotNull(response);
367351
Assert.assertEquals(ContainerProtos.Result.CLOSED_CONTAINER_IO,
368352
response.getResult());
369-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
370353
} finally {
371354
if (client != null) {
372355
client.close();
@@ -407,8 +390,6 @@ public void testDeleteContainer() throws Exception {
407390
Assert.assertNotNull(response);
408391
Assert.assertEquals(ContainerProtos.Result.SUCCESS,
409392
response.getResult());
410-
Assert.assertTrue(
411-
putBlockRequest.getTraceID().equals(response.getTraceID()));
412393

413394
// Container cannot be deleted because force flag is set to false and
414395
// the container is still open
@@ -419,7 +400,6 @@ public void testDeleteContainer() throws Exception {
419400
Assert.assertNotNull(response);
420401
Assert.assertEquals(ContainerProtos.Result.DELETE_ON_OPEN_CONTAINER,
421402
response.getResult());
422-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
423403

424404
// Container can be deleted, by setting force flag, even with out closing
425405
request = ContainerTestHelper.getDeleteContainer(
@@ -429,7 +409,6 @@ public void testDeleteContainer() throws Exception {
429409
Assert.assertNotNull(response);
430410
Assert.assertEquals(ContainerProtos.Result.SUCCESS,
431411
response.getResult());
432-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
433412

434413
} finally {
435414
if (client != null) {
@@ -524,7 +503,6 @@ public static void createContainerForTesting(XceiverClientSpi client,
524503
ContainerProtos.ContainerCommandResponseProto response =
525504
client.sendCommand(request);
526505
Assert.assertNotNull(response);
527-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
528506
}
529507

530508
public static ContainerProtos.ContainerCommandRequestProto
@@ -539,30 +517,6 @@ public static void createContainerForTesting(XceiverClientSpi client,
539517
client.sendCommand(writeChunkRequest);
540518
Assert.assertNotNull(response);
541519
Assert.assertEquals(ContainerProtos.Result.SUCCESS, response.getResult());
542-
Assert.assertTrue(response.getTraceID().equals(response.getTraceID()));
543520
return writeChunkRequest;
544521
}
545-
546-
static void runRequestWithoutTraceId(
547-
long containerID, XceiverClientSpi client) throws Exception {
548-
try {
549-
client.connect();
550-
createContainerForTesting(client, containerID);
551-
BlockID blockID = ContainerTestHelper.getTestBlockID(containerID);
552-
final ContainerProtos.ContainerCommandRequestProto smallFileRequest
553-
= ContainerTestHelper.getWriteSmallFileRequest(
554-
client.getPipeline(), blockID, 1024);
555-
556-
ContainerProtos.ContainerCommandResponseProto response
557-
= client.sendCommand(smallFileRequest);
558-
Assert.assertNotNull(response);
559-
Assert.assertTrue(smallFileRequest.getTraceID()
560-
.equals(response.getTraceID()));
561-
} finally {
562-
if (client != null) {
563-
client.close();
564-
}
565-
}
566-
}
567-
568522
}

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOzoneContainerWithTLS.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@ public static void createContainerForTesting(XceiverClientSpi client,
178178
ContainerProtos.ContainerCommandResponseProto response =
179179
client.sendCommand(request);
180180
Assert.assertNotNull(response);
181-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
182181
}
183182

184183
private StateContext getContext(DatanodeDetails datanodeDetails) {

hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestSecureOzoneContainer.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,6 @@ public static void createContainerForTesting(XceiverClientSpi client,
212212
ContainerProtos.ContainerCommandResponseProto response =
213213
client.sendCommand(request);
214214
Assert.assertNotNull(response);
215-
Assert.assertTrue(request.getTraceID().equals(response.getTraceID()));
216215
}
217216

218217
private StateContext getContext(DatanodeDetails datanodeDetails) {

0 commit comments

Comments
 (0)