Skip to content

Commit 4025000

Browse files
authored
Change hash computation for protobuf to better represent impacting changes + save proto number in schema (#8201)
* change hash computation for protobuf to be closer to impacting changes * add protobuf number as extension to the saved schema * update test
1 parent 82d8045 commit 4025000

File tree

7 files changed

+75
-33
lines changed

7 files changed

+75
-33
lines changed

dd-java-agent/instrumentation/avro/src/main/java/datadog/trace/instrumentation/avro/SchemaExtractor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ public static boolean extractProperty(
104104
}
105105

106106
return builder.addProperty(
107-
schemaName, fieldName, array, type, description, ref, format, enumValues);
107+
schemaName, fieldName, array, type, description, ref, format, enumValues, null);
108108
}
109109

110110
public static boolean extractSchema(

dd-java-agent/instrumentation/protobuf/src/main/java/datadog/trace/instrumentation/protobuf_java/SchemaExtractor.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import datadog.trace.bootstrap.instrumentation.api.Schema;
1111
import datadog.trace.bootstrap.instrumentation.api.SchemaBuilder;
1212
import datadog.trace.bootstrap.instrumentation.api.SchemaIterator;
13+
import java.util.Comparator;
14+
import java.util.HashMap;
1315
import java.util.List;
1416
import java.util.stream.Collectors;
1517

@@ -57,7 +59,8 @@ public static boolean extractProperty(
5759
if (field.isRepeated()) {
5860
array = true;
5961
}
60-
switch (field.getType().toProto().getNumber()) {
62+
int typeCode = field.getType().toProto().getNumber();
63+
switch (typeCode) {
6164
case TYPE_DOUBLE:
6265
type = "number";
6366
format = "double";
@@ -107,6 +110,7 @@ public static boolean extractProperty(
107110
if (!extractSchema(field.getMessageType(), builder, depth)) {
108111
return false;
109112
}
113+
builder.addToHash(field.getMessageType().getFullName());
110114
break;
111115
case TYPE_BYTES:
112116
type = "string";
@@ -123,6 +127,7 @@ public static boolean extractProperty(
123127
enumValues =
124128
field.getEnumType().getValues().stream()
125129
.map(Descriptors.EnumValueDescriptor::getName)
130+
.peek(builder::addToHash)
126131
.collect(Collectors.toList());
127132
break;
128133
case TYPE_SFIXED32:
@@ -140,8 +145,13 @@ public static boolean extractProperty(
140145
description = "Unknown type";
141146
break;
142147
}
148+
builder.addToHash(field.getNumber());
149+
builder.addToHash(typeCode);
150+
builder.addToHash(depth);
151+
HashMap<String, String> extensions = new HashMap<String, String>(1);
152+
extensions.put("x-protobuf-number", Integer.toString(field.getNumber()));
143153
return builder.addProperty(
144-
schemaName, fieldName, array, type, description, ref, format, enumValues);
154+
schemaName, fieldName, array, type, description, ref, format, enumValues, extensions);
145155
}
146156

147157
public static boolean extractSchema(Descriptor descriptor, SchemaBuilder builder, int depth) {
@@ -150,7 +160,11 @@ public static boolean extractSchema(Descriptor descriptor, SchemaBuilder builder
150160
if (!builder.shouldExtractSchema(schemaName, depth)) {
151161
return false;
152162
}
153-
for (FieldDescriptor field : descriptor.getFields()) {
163+
// iterate fields in number order to ensure hash stability
164+
for (FieldDescriptor field :
165+
descriptor.getFields().stream()
166+
.sorted(Comparator.comparingInt(FieldDescriptor::getNumber))
167+
.collect(Collectors.toList())) {
154168
if (!extractProperty(field, schemaName, field.getName(), builder, depth)) {
155169
return false;
156170
}

dd-java-agent/instrumentation/protobuf/src/test/groovy/com/datadog/instrumentation/protobuf/AbstractMessageInstrumentationTest.groovy

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
package com.datadog.instrumentation.protobuf
22

3+
import com.datadog.instrumentation.protobuf.generated.Message.MyMessage
4+
import com.datadog.instrumentation.protobuf.generated.Message.OtherMessage
35
import com.google.protobuf.InvalidProtocolBufferException
46
import datadog.trace.agent.test.AgentTestRunner
57
import datadog.trace.api.DDTags
68
import datadog.trace.bootstrap.instrumentation.api.AgentSpan
79

810
import static datadog.trace.agent.test.utils.TraceUtils.runUnderTrace
911
import static datadog.trace.bootstrap.instrumentation.api.AgentTracer.activeSpan
10-
import com.datadog.instrumentation.protobuf.generated.Message.MyMessage
11-
import com.datadog.instrumentation.protobuf.generated.Message.OtherMessage
1212

1313
class AbstractMessageInstrumentationTest extends AgentTestRunner {
1414
@Override
1515
protected boolean isDataStreamsEnabled() {
1616
return true
1717
}
1818

19-
String schema = "{\"components\":{\"schemas\":{\"com.datadog.instrumentation.protobuf.generated.MyMessage\":{\"properties\":{\"id\":{\"type\":\"string\"},\"value\":{\"type\":\"string\"},\"other_message\":{\"items\":{\"\$ref\":\"#/components/schemas/com.datadog.instrumentation.protobuf.generated.OtherMessage\"},\"type\":\"array\"}},\"type\":\"object\"},\"com.datadog.instrumentation.protobuf.generated.OtherMessage\":{\"properties\":{\"name\":{\"type\":\"string\"},\"age\":{\"format\":\"int32\",\"type\":\"integer\"}},\"type\":\"object\"}}},\"openapi\":\"3.0.0\"}"
20-
String schemaID = "9054678588020233022"
19+
String expectedSchema = "{\"components\":{\"schemas\":{\"com.datadog.instrumentation.protobuf.generated.MyMessage\":{\"properties\":{\"id\":{\"extensions\":{\"x-protobuf-number\":\"1\"},\"type\":\"string\"},\"value\":{\"extensions\":{\"x-protobuf-number\":\"2\"},\"type\":\"string\"},\"other_message\":{\"extensions\":{\"x-protobuf-number\":\"3\"},\"items\":{\"\$ref\":\"#/components/schemas/com.datadog.instrumentation.protobuf.generated.OtherMessage\"},\"type\":\"array\"}},\"type\":\"object\"},\"com.datadog.instrumentation.protobuf.generated.OtherMessage\":{\"properties\":{\"name\":{\"extensions\":{\"x-protobuf-number\":\"1\"},\"type\":\"string\"},\"age\":{\"extensions\":{\"x-protobuf-number\":\"2\"},\"format\":\"int32\",\"type\":\"integer\"}},\"type\":\"object\"}}},\"openapi\":\"3.0.0\"}"
20+
String expectedSchemaID = "4690647329509494987"
2121

2222

2323
void 'test extract protobuf schema on serialize & deserialize'() {
@@ -50,12 +50,12 @@ class AbstractMessageInstrumentationTest extends AgentTestRunner {
5050
errored false
5151
measured false
5252
tags {
53-
"$DDTags.SCHEMA_DEFINITION" schema
53+
"$DDTags.SCHEMA_DEFINITION" expectedSchema
5454
"$DDTags.SCHEMA_WEIGHT" 1
5555
"$DDTags.SCHEMA_TYPE" "protobuf"
5656
"$DDTags.SCHEMA_NAME" "com.datadog.instrumentation.protobuf.generated.MyMessage"
5757
"$DDTags.SCHEMA_OPERATION" "serialization"
58-
"$DDTags.SCHEMA_ID" schemaID
58+
"$DDTags.SCHEMA_ID" expectedSchemaID
5959
defaultTags(false)
6060
}
6161
}
@@ -68,12 +68,12 @@ class AbstractMessageInstrumentationTest extends AgentTestRunner {
6868
errored false
6969
measured false
7070
tags {
71-
"$DDTags.SCHEMA_DEFINITION" schema
71+
"$DDTags.SCHEMA_DEFINITION" expectedSchema
7272
"$DDTags.SCHEMA_WEIGHT" 1
7373
"$DDTags.SCHEMA_TYPE" "protobuf"
7474
"$DDTags.SCHEMA_NAME" "com.datadog.instrumentation.protobuf.generated.MyMessage"
7575
"$DDTags.SCHEMA_OPERATION" "deserialization"
76-
"$DDTags.SCHEMA_ID" schemaID
76+
"$DDTags.SCHEMA_ID" expectedSchemaID
7777
defaultTags(false)
7878
}
7979
}

dd-java-agent/instrumentation/protobuf/src/test/groovy/com/datadog/instrumentation/protobuf/DynamicMessageInstrumentationTest.groovy

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
package com.datadog.instrumentation.protobuf
22

3+
import com.datadog.instrumentation.protobuf.generated.Message.MyMessage
34
import com.google.protobuf.DynamicMessage
45
import datadog.trace.agent.test.AgentTestRunner
56
import datadog.trace.api.DDTags
67
import datadog.trace.bootstrap.instrumentation.api.AgentSpan
7-
import com.datadog.instrumentation.protobuf.generated.Message.MyMessage
88

99
import static datadog.trace.agent.test.utils.TraceUtils.runUnderTrace
1010
import static datadog.trace.bootstrap.instrumentation.api.AgentTracer.activeSpan
@@ -22,8 +22,8 @@ class DynamicMessageInstrumentationTest extends AgentTestRunner {
2222
.setValue("Hello from Protobuf!")
2323
.build()
2424
when:
25-
String schema = "{\"components\":{\"schemas\":{\"com.datadog.instrumentation.protobuf.generated.MyMessage\":{\"properties\":{\"id\":{\"type\":\"string\"},\"value\":{\"type\":\"string\"},\"other_message\":{\"items\":{\"\$ref\":\"#/components/schemas/com.datadog.instrumentation.protobuf.generated.OtherMessage\"},\"type\":\"array\"}},\"type\":\"object\"},\"com.datadog.instrumentation.protobuf.generated.OtherMessage\":{\"properties\":{\"name\":{\"type\":\"string\"},\"age\":{\"format\":\"int32\",\"type\":\"integer\"}},\"type\":\"object\"}}},\"openapi\":\"3.0.0\"}"
26-
String schemaID = "9054678588020233022"
25+
String expectedSchema = "{\"components\":{\"schemas\":{\"com.datadog.instrumentation.protobuf.generated.MyMessage\":{\"properties\":{\"id\":{\"extensions\":{\"x-protobuf-number\":\"1\"},\"type\":\"string\"},\"value\":{\"extensions\":{\"x-protobuf-number\":\"2\"},\"type\":\"string\"},\"other_message\":{\"extensions\":{\"x-protobuf-number\":\"3\"},\"items\":{\"\$ref\":\"#/components/schemas/com.datadog.instrumentation.protobuf.generated.OtherMessage\"},\"type\":\"array\"}},\"type\":\"object\"},\"com.datadog.instrumentation.protobuf.generated.OtherMessage\":{\"properties\":{\"name\":{\"extensions\":{\"x-protobuf-number\":\"1\"},\"type\":\"string\"},\"age\":{\"extensions\":{\"x-protobuf-number\":\"2\"},\"format\":\"int32\",\"type\":\"integer\"}},\"type\":\"object\"}}},\"openapi\":\"3.0.0\"}"
26+
String expectedSchemaID = "4690647329509494987"
2727
var bytes
2828
runUnderTrace("parent_serialize") {
2929
AgentSpan span = activeSpan()
@@ -46,12 +46,12 @@ class DynamicMessageInstrumentationTest extends AgentTestRunner {
4646
errored false
4747
measured false
4848
tags {
49-
"$DDTags.SCHEMA_DEFINITION" schema
49+
"$DDTags.SCHEMA_DEFINITION" expectedSchema
5050
"$DDTags.SCHEMA_WEIGHT" 1
5151
"$DDTags.SCHEMA_TYPE" "protobuf"
5252
"$DDTags.SCHEMA_NAME" "com.datadog.instrumentation.protobuf.generated.MyMessage"
5353
"$DDTags.SCHEMA_OPERATION" "serialization"
54-
"$DDTags.SCHEMA_ID" schemaID
54+
"$DDTags.SCHEMA_ID" expectedSchemaID
5555
defaultTags(false)
5656
}
5757
}
@@ -64,12 +64,12 @@ class DynamicMessageInstrumentationTest extends AgentTestRunner {
6464
errored false
6565
measured false
6666
tags {
67-
"$DDTags.SCHEMA_DEFINITION" schema
67+
"$DDTags.SCHEMA_DEFINITION" expectedSchema
6868
"$DDTags.SCHEMA_WEIGHT" 1
6969
"$DDTags.SCHEMA_TYPE" "protobuf"
7070
"$DDTags.SCHEMA_NAME" "com.datadog.instrumentation.protobuf.generated.MyMessage"
7171
"$DDTags.SCHEMA_OPERATION" "deserialization"
72-
"$DDTags.SCHEMA_ID" schemaID
72+
"$DDTags.SCHEMA_ID" expectedSchemaID
7373
defaultTags(false)
7474
}
7575
}

dd-trace-core/src/main/java/datadog/trace/core/datastreams/SchemaBuilder.java

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ public class SchemaBuilder implements datadog.trace.bootstrap.instrumentation.ap
1717
private static final DDCache<String, Schema> CACHE = DDCaches.newFixedSizeCache(32);
1818
private static final int maxDepth = 10;
1919
private static final int maxProperties = 1000;
20+
private static final long HASH_INIT = FNV64Hash.generateHash(new byte[0], FNV64Hash.Version.v1A);
21+
private long currentHash = HASH_INIT;
2022
private int properties;
2123
private final SchemaIterator iterator;
2224

@@ -33,27 +35,41 @@ public boolean addProperty(
3335
String description,
3436
String ref,
3537
String format,
36-
List<String> enumValues) {
38+
List<String> enumValues,
39+
Map<String, String> extensions) {
3740
if (properties >= maxProperties) {
3841
return false;
3942
}
4043
properties++;
4144
OpenApiSchema.Property property =
42-
new OpenApiSchema.Property(type, description, ref, format, enumValues, null);
45+
new OpenApiSchema.Property(
46+
type, description, ref, format, enumValues, isArray ? null : extensions, null);
4347
if (isArray) {
44-
property = new OpenApiSchema.Property("array", null, null, null, null, property);
48+
property = new OpenApiSchema.Property("array", null, null, null, null, extensions, property);
4549
}
4650
schema.components.schemas.get(schemaName).properties.put(fieldName, property);
4751
return true;
4852
}
4953

54+
public void addToHash(int value) {
55+
addToHash(Integer.toString(value));
56+
}
57+
58+
public void addToHash(String value) {
59+
currentHash = FNV64Hash.continueHash(currentHash, value, FNV64Hash.Version.v1A);
60+
}
61+
5062
public Schema build() {
5163
this.iterator.iterateOverSchema(this);
5264
Moshi moshi = new Moshi.Builder().build();
5365
JsonAdapter<OpenApiSchema> jsonAdapter = moshi.adapter(OpenApiSchema.class);
5466
String definition = jsonAdapter.toJson(this.schema);
55-
String id = Long.toUnsignedString(FNV64Hash.generateHash(definition, FNV64Hash.Version.v1A));
56-
return new Schema(definition, id);
67+
if (currentHash == HASH_INIT) {
68+
// if hash was not computed along the way,
69+
// we fall back to computing it from the json representation of the schema
70+
currentHash = FNV64Hash.generateHash(definition, FNV64Hash.Version.v1A);
71+
}
72+
return new Schema(definition, Long.toUnsignedString(currentHash));
5773
}
5874

5975
@Override
@@ -93,6 +109,7 @@ public static class Property {
93109
@Json(name = "enum")
94110
public List<String> enumValues;
95111

112+
public final Map<String, String> extensions;
96113
public Property items;
97114

98115
public Property(
@@ -101,12 +118,14 @@ public Property(
101118
String ref,
102119
String format,
103120
List<String> enumValues,
121+
Map<String, String> extensions,
104122
Property items) {
105123
this.type = type;
106124
this.description = description;
107125
this.ref = ref;
108126
this.format = format;
109127
this.enumValues = enumValues;
128+
this.extensions = extensions;
110129
this.items = items;
111130
}
112131
}

dd-trace-core/src/test/groovy/datadog/trace/core/datastreams/SchemaBuilderTest.groovy

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,15 @@ class SchemaBuilderTest extends DDCoreSpecification {
1010

1111
@Override
1212
void iterateOverSchema(datadog.trace.bootstrap.instrumentation.api.SchemaBuilder builder) {
13-
builder.addProperty("person", "name", false, "string", "name of the person", null, null, null)
14-
builder.addProperty("person", "phone_numbers", true, "string", null, null, null, null)
15-
builder.addProperty("person", "person_name", false, "string", null, null, null, null)
16-
builder.addProperty("person", "address", false, "object", null, "#/components/schemas/address", null, null)
17-
builder.addProperty("address", "zip", false, "number", null, null, "int", null)
18-
builder.addProperty("address", "street", false, "string", null, null, null, null)
13+
HashMap<String, String> extension = new HashMap<String, String>(1)
14+
extension.put("x-test-extension-1", "hello")
15+
extension.put("x-test-extension-2", "world")
16+
builder.addProperty("person", "name", false, "string", "name of the person", null, null, null, null)
17+
builder.addProperty("person", "phone_numbers", true, "string", null, null, null, null, null)
18+
builder.addProperty("person", "person_name", false, "string", null, null, null, null, null)
19+
builder.addProperty("person", "address", false, "object", null, "#/components/schemas/address", null, null, null)
20+
builder.addProperty("address", "zip", false, "number", null, null, "int", null, null)
21+
builder.addProperty("address", "street", false, "string", null, null, null, null, extension)
1922
}
2023
}
2124

@@ -31,8 +34,8 @@ class SchemaBuilderTest extends DDCoreSpecification {
3134
Schema schema = builder.build()
3235

3336
then:
34-
"{\"components\":{\"schemas\":{\"person\":{\"properties\":{\"name\":{\"description\":\"name of the person\",\"type\":\"string\"},\"phone_numbers\":{\"items\":{\"type\":\"string\"},\"type\":\"array\"},\"person_name\":{\"type\":\"string\"},\"address\":{\"\$ref\":\"#/components/schemas/address\",\"type\":\"object\"}},\"type\":\"object\"},\"address\":{\"properties\":{\"zip\":{\"format\":\"int\",\"type\":\"number\"},\"street\":{\"type\":\"string\"}},\"type\":\"object\"}}},\"openapi\":\"3.0.0\"}" == schema.definition
35-
"14950130709604290100" == schema.id
37+
"{\"components\":{\"schemas\":{\"person\":{\"properties\":{\"name\":{\"description\":\"name of the person\",\"type\":\"string\"},\"phone_numbers\":{\"items\":{\"type\":\"string\"},\"type\":\"array\"},\"person_name\":{\"type\":\"string\"},\"address\":{\"\$ref\":\"#/components/schemas/address\",\"type\":\"object\"}},\"type\":\"object\"},\"address\":{\"properties\":{\"zip\":{\"format\":\"int\",\"type\":\"number\"},\"street\":{\"extensions\":{\"x-test-extension-1\":\"hello\",\"x-test-extension-2\":\"world\"},\"type\":\"string\"}},\"type\":\"object\"}}},\"openapi\":\"3.0.0\"}" == schema.definition
38+
"16548065305426330543" == schema.id
3639
shouldExtractPerson
3740
shouldExtractAddress
3841
!shouldExtractPerson2

internal-api/src/main/java/datadog/trace/bootstrap/instrumentation/api/SchemaBuilder.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package datadog.trace.bootstrap.instrumentation.api;
22

33
import java.util.List;
4+
import java.util.Map;
45

56
public interface SchemaBuilder {
67
boolean addProperty(
@@ -11,7 +12,12 @@ boolean addProperty(
1112
String description,
1213
String ref,
1314
String format,
14-
List<String> enumValues);
15+
List<String> enumValue,
16+
Map<String, String> extensions);
17+
18+
void addToHash(int value);
19+
20+
void addToHash(String value);
1521

1622
boolean shouldExtractSchema(String schemaName, int depth);
1723
}

0 commit comments

Comments
 (0)