36
36
import java .util .Optional ;
37
37
import java .util .Properties ;
38
38
import java .util .UUID ;
39
+ import java .util .function .Consumer ;
39
40
import org .apache .spark .ExceptionFailure ;
40
41
import org .apache .spark .SparkConf ;
41
42
import org .apache .spark .TaskFailedReason ;
51
52
import org .apache .spark .sql .streaming .StateOperatorProgress ;
52
53
import org .apache .spark .sql .streaming .StreamingQueryListener ;
53
54
import org .apache .spark .sql .streaming .StreamingQueryProgress ;
55
+ import org .apache .spark .util .Utils ;
54
56
import org .slf4j .Logger ;
55
57
import org .slf4j .LoggerFactory ;
56
58
import scala .Tuple2 ;
@@ -68,12 +70,16 @@ public abstract class AbstractDatadogSparkListener extends SparkListener {
68
70
private static final Logger log = LoggerFactory .getLogger (AbstractDatadogSparkListener .class );
69
71
private static final ObjectMapper objectMapper = new ObjectMapper ();
70
72
public static volatile AbstractDatadogSparkListener listener = null ;
73
+ public static volatile SparkListenerInterface openLineageSparkListener = null ;
74
+ public static volatile SparkConf openLineageSparkConf = null ;
75
+
71
76
public static volatile boolean finishTraceOnApplicationEnd = true ;
72
77
public static volatile boolean isPysparkShell = false ;
73
78
74
79
private final int MAX_COLLECTION_SIZE = 5000 ;
75
80
private final int MAX_ACCUMULATOR_SIZE = 50000 ;
76
81
private final String RUNTIME_TAGS_PREFIX = "spark.datadog.tags." ;
82
+ private static final String AGENT_OL_ENDPOINT = "openlineage/api/v1/lineage" ;
77
83
78
84
private final SparkConf sparkConf ;
79
85
private final String sparkVersion ;
@@ -109,6 +115,7 @@ public abstract class AbstractDatadogSparkListener extends SparkListener {
109
115
private final Map <Long , SparkSQLUtils .AccumulatorWithStage > accumulators =
110
116
new RemoveEldestHashMap <>(MAX_ACCUMULATOR_SIZE );
111
117
118
+ private volatile boolean isStreamingJob = false ;
112
119
private final boolean isRunningOnDatabricks ;
113
120
private final String databricksClusterName ;
114
121
private final String databricksServiceName ;
@@ -151,8 +158,62 @@ public AbstractDatadogSparkListener(SparkConf sparkConf, String appId, String sp
151
158
finishApplication (System .currentTimeMillis (), null , 0 , null );
152
159
}
153
160
}));
161
+ }
162
+
163
+ static void setupSparkConf (SparkConf sparkConf ) {
164
+ sparkConf .set ("spark.openlineage.transport.type" , "composite" );
165
+ sparkConf .set ("spark.openlineage.transport.continueOnFailure" , "true" );
166
+ sparkConf .set ("spark.openlineage.transport.transports.agent.type" , "http" );
167
+ sparkConf .set ("spark.openlineage.transport.transports.agent.url" , getAgentHttpUrl ());
168
+ sparkConf .set ("spark.openlineage.transport.transports.agent.endpoint" , AGENT_OL_ENDPOINT );
169
+ sparkConf .set ("spark.openlineage.transport.transports.agent.compression" , "gzip" );
170
+ sparkConf .set (
171
+ "spark.openlineage.run.tags" ,
172
+ "_dd.trace_id:"
173
+ + listener .applicationSpan .context ().getTraceId ().toString ()
174
+ + ";_dd.ol_intake.emit_spans:false" );
175
+ for (Tuple2 <String , String > tuple : sparkConf .getAll ()) {
176
+ log .error ("Set Spark conf: Key: " + tuple ._1 + ", Value: " + tuple ._2 );
177
+ }
178
+ }
179
+
180
+ public void setupOpenLineage () {
181
+ log .error ("Setting up OpenLineage-Datadog integration" );
182
+ if (openLineageSparkListener != null ) {
183
+ log .error ("No init needed" );
184
+ setupSparkConf (openLineageSparkConf );
185
+ return ;
186
+ }
187
+
188
+ String className = "io.openlineage.spark.agent.OpenLineageSparkListener" ;
189
+ Class clazz ;
190
+ try {
191
+ try {
192
+ clazz = Class .forName (className , true , Thread .currentThread ().getContextClassLoader ());
193
+ } catch (ClassNotFoundException e ) {
194
+ clazz = Class .forName (className , true , Utils .class .getClassLoader ());
195
+ }
196
+ } catch (ClassNotFoundException e ) {
197
+ log .info ("OpenLineage integration is not present on the classpath" );
198
+ return ;
199
+ }
200
+
201
+ openLineageSparkConf = sparkConf ;
202
+ if (clazz == null ) {
203
+ log .info ("OpenLineage integration is not present on the classpath: class is null" );
204
+ return ;
205
+ }
206
+ try {
207
+ setupSparkConf (openLineageSparkConf );
208
+ openLineageSparkListener =
209
+ (SparkListenerInterface )
210
+ clazz .getConstructor (SparkConf .class ).newInstance (openLineageSparkConf );
154
211
155
- log .info ("Created datadog spark listener: {}" , this .getClass ().getSimpleName ());
212
+ log .info (
213
+ "Created OL spark listener: {}" , openLineageSparkListener .getClass ().getSimpleName ());
214
+ } catch (Exception e ) {
215
+ log .warn ("Failed to instantiate OL Spark Listener: {}" , e .toString ());
216
+ }
156
217
}
157
218
158
219
/** Resource name of the spark job. Provide an implementation based on a specific scala version */
@@ -183,6 +244,8 @@ private void initApplicationSpanIfNotInitialized() {
183
244
return ;
184
245
}
185
246
247
+ log .error ("Starting tracer application span." );
248
+
186
249
AgentTracer .SpanBuilder builder = buildSparkSpan ("spark.application" , null );
187
250
188
251
if (applicationStart != null ) {
@@ -202,6 +265,9 @@ private void initApplicationSpanIfNotInitialized() {
202
265
applicationSpan = builder .start ();
203
266
setDataJobsSamplingPriority (applicationSpan );
204
267
applicationSpan .setMeasured (true );
268
+ // We need to set it up after we create application span to have correlation.
269
+ setupOpenLineage ();
270
+ notifyOl (x -> openLineageSparkListener .onApplicationStart (x ), applicationStart );
205
271
}
206
272
207
273
private void captureOpenlineageContextIfPresent (AgentTracer .SpanBuilder builder ) {
@@ -233,6 +299,7 @@ public void onApplicationEnd(SparkListenerApplicationEnd applicationEnd) {
233
299
log .info (
234
300
"Received spark application end event, finish trace on this event: {}" ,
235
301
finishTraceOnApplicationEnd );
302
+ notifyOl (x -> openLineageSparkListener .onApplicationEnd (x ), applicationEnd );
236
303
237
304
if (finishTraceOnApplicationEnd ) {
238
305
finishApplication (applicationEnd .time (), null , 0 , null );
@@ -405,6 +472,7 @@ public synchronized void onJobStart(SparkListenerJobStart jobStart) {
405
472
if (sqlSpan != null ) {
406
473
jobSpanBuilder .asChildOf (sqlSpan .context ());
407
474
} else if (batchKey != null ) {
475
+ isStreamingJob = true ;
408
476
AgentSpan batchSpan =
409
477
getOrCreateStreamingBatchSpan (batchKey , jobStart .time (), jobStart .properties ());
410
478
jobSpanBuilder .asChildOf (batchSpan .context ());
@@ -426,6 +494,7 @@ public synchronized void onJobStart(SparkListenerJobStart jobStart) {
426
494
stageToJob .put (stageId , jobStart .jobId ());
427
495
}
428
496
jobSpans .put (jobStart .jobId (), jobSpan );
497
+ notifyOl (x -> openLineageSparkListener .onJobStart (x ), jobStart );
429
498
}
430
499
431
500
@ Override
@@ -456,6 +525,7 @@ public synchronized void onJobEnd(SparkListenerJobEnd jobEnd) {
456
525
if (metrics != null ) {
457
526
metrics .setSpanMetrics (jobSpan );
458
527
}
528
+ notifyOl (x -> openLineageSparkListener .onJobEnd (x ), jobEnd );
459
529
460
530
jobSpan .finish (jobEnd .time () * 1000 );
461
531
}
@@ -624,6 +694,8 @@ public void onTaskEnd(SparkListenerTaskEnd taskEnd) {
624
694
625
695
Properties props = stageProperties .get (stageSpanKey );
626
696
sendTaskSpan (stageSpan , taskEnd , props );
697
+
698
+ notifyOl (x -> openLineageSparkListener .onTaskEnd (x ), taskEnd );
627
699
}
628
700
629
701
private void sendTaskSpan (
@@ -705,6 +777,20 @@ public void onOtherEvent(SparkListenerEvent event) {
705
777
updateAdaptiveSQLPlan (event );
706
778
}
707
779
780
+ private <T extends SparkListenerEvent > void notifyOl (Consumer <T > ol , T event ) {
781
+ if (isRunningOnDatabricks || isStreamingJob ) {
782
+ log .error ("Not emitting event when running on databricks or on streaming jobs" );
783
+ return ;
784
+ }
785
+ initApplicationSpanIfNotInitialized ();
786
+ if (openLineageSparkListener != null ) {
787
+ log .error ("Notifying with event `{}`" , event .getClass ().getCanonicalName ());
788
+ ol .accept (event );
789
+ } else {
790
+ log .error ("OpenLineageSparkListener is null" );
791
+ }
792
+ }
793
+
708
794
private static final Class <?> adaptiveExecutionUpdateClass ;
709
795
private static final MethodHandle adaptiveExecutionIdMethod ;
710
796
private static final MethodHandle adaptiveSparkPlanMethod ;
@@ -753,6 +839,7 @@ private synchronized void updateAdaptiveSQLPlan(SparkListenerEvent event) {
753
839
private synchronized void onSQLExecutionStart (SparkListenerSQLExecutionStart sqlStart ) {
754
840
sqlPlans .put (sqlStart .executionId (), sqlStart .sparkPlanInfo ());
755
841
sqlQueries .put (sqlStart .executionId (), sqlStart );
842
+ notifyOl (x -> openLineageSparkListener .onOtherEvent (x ), sqlStart );
756
843
}
757
844
758
845
private synchronized void onSQLExecutionEnd (SparkListenerSQLExecutionEnd sqlEnd ) {
@@ -765,6 +852,7 @@ private synchronized void onSQLExecutionEnd(SparkListenerSQLExecutionEnd sqlEnd)
765
852
if (metrics != null ) {
766
853
metrics .setSpanMetrics (span );
767
854
}
855
+ notifyOl (x -> openLineageSparkListener .onOtherEvent (x ), sqlEnd );
768
856
769
857
span .finish (sqlEnd .time () * 1000 );
770
858
}
@@ -1260,6 +1348,15 @@ private static String getDatabricksRunName(SparkConf conf) {
1260
1348
return null ;
1261
1349
}
1262
1350
1351
+ private static String getAgentHttpUrl () {
1352
+ StringBuilder sb =
1353
+ new StringBuilder ("http://" )
1354
+ .append (Config .get ().getAgentHost ())
1355
+ .append (":" )
1356
+ .append (Config .get ().getAgentPort ());
1357
+ return sb .toString ();
1358
+ }
1359
+
1263
1360
@ SuppressForbidden // called at most once per spark application
1264
1361
private static String removeUuidFromEndOfString (String input ) {
1265
1362
return input .replaceAll (
0 commit comments