From f5d53734225448abe3c34e3f109face4c0fea0fd Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Tue, 8 Apr 2014 19:26:02 -0500 Subject: [PATCH 1/3] SPARK-1408 Modify Spark on Yarn to point to the history server when app finishes --- docs/configuration.md | 7 +++++++ .../org/apache/spark/deploy/yarn/ApplicationMaster.scala | 3 +-- .../org/apache/spark/deploy/yarn/ApplicationMaster.scala | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 57bda20edcdf..14ea41fa9932 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -627,6 +627,13 @@ Apart from these, the following properties are also available, and may be useful Number of cores to allocate for each task. + + spark.historyserver.address + localhost:18080 + + The URL of the Spark history server. The history server is optional. + + ## Viewing Spark Properties diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 910484ed5432..b55d086a234e 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -366,8 +366,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, finishReq.setAppAttemptId(appAttemptId) finishReq.setFinishApplicationStatus(status) finishReq.setDiagnostics(diagnostics) - // Set tracking url to empty since we don't have a history server. - finishReq.setTrackingUrl("") + finishReq.setTrackingUrl(sparkConf.get("spark.historyserver.address", "")) resourceManager.finishApplicationMaster(finishReq) } } diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 30735cbfdf26..0ea714b7c59b 100644 --- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -347,8 +347,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, logInfo("finishApplicationMaster with " + status) if (registered) { - // Set tracking URL to empty since we don't have a history server. - amClient.unregisterApplicationMaster(status, "" /* appMessage */ , "" /* appTrackingUrl */) + val trackingUrl = sparkConf.get("spark.historyserver.address", "") + amClient.unregisterApplicationMaster(status, diagnostics, trackingUrl) } } } From 446122defcee55712718c2236e94266308c7947c Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Mon, 14 Apr 2014 11:13:49 -0500 Subject: [PATCH 2/3] Make config yarn specific --- docs/configuration.md | 7 ------- docs/running-on-yarn.md | 1 + .../org/apache/spark/deploy/yarn/ApplicationMaster.scala | 2 +- .../org/apache/spark/deploy/yarn/ApplicationMaster.scala | 2 +- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 14ea41fa9932..57bda20edcdf 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -627,13 +627,6 @@ Apart from these, the following properties are also available, and may be useful Number of cores to allocate for each task. - - spark.historyserver.address - localhost:18080 - - The URL of the Spark history server. The history server is optional. - - ## Viewing Spark Properties diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 982514391ac0..271de8cb8f08 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -42,6 +42,7 @@ System Properties: * `spark.yarn.preserve.staging.files`, set to true to preserve the staged files(spark jar, app jar, distributed cache files) at the end of the job rather then delete them. * `spark.yarn.scheduler.heartbeat.interval-ms`, the interval in ms in which the Spark application master heartbeats into the YARN ResourceManager. Default is 5 seconds. * `spark.yarn.max.executor.failures`, the maximum number of executor failures before failing the application. Default is the number of executors requested times 2 with minimum of 3. +* `spark.yarn.historyServer.address`, the address of the Spark history server (i.e. host.com:18080). The address should not contain a scheme (http://). Defaults to not being set since the history server is an optional service. This address is given to the Yarn ResourceManager when the Spark application finishes to link the application from the ResourceManaer UI to the Spark history server UI. # Launching Spark on YARN diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index b55d086a234e..058cdd99e597 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -366,7 +366,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, finishReq.setAppAttemptId(appAttemptId) finishReq.setFinishApplicationStatus(status) finishReq.setDiagnostics(diagnostics) - finishReq.setTrackingUrl(sparkConf.get("spark.historyserver.address", "")) + finishReq.setTrackingUrl(sparkConf.get("spark.yarn.historyServer.address", "")) resourceManager.finishApplicationMaster(finishReq) } } diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 0ea714b7c59b..f869ef93824e 100644 --- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -347,7 +347,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, logInfo("finishApplicationMaster with " + status) if (registered) { - val trackingUrl = sparkConf.get("spark.historyserver.address", "") + val trackingUrl = sparkConf.get("spark.yarn.historyServer.address", "") amClient.unregisterApplicationMaster(status, diagnostics, trackingUrl) } } From ec8970534fbac113b66bdc9c6a5c0ce6b840fd86 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Thu, 17 Apr 2014 14:21:47 -0500 Subject: [PATCH 3/3] Fix typo. --- docs/running-on-yarn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 271de8cb8f08..9765062ec689 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -42,7 +42,7 @@ System Properties: * `spark.yarn.preserve.staging.files`, set to true to preserve the staged files(spark jar, app jar, distributed cache files) at the end of the job rather then delete them. * `spark.yarn.scheduler.heartbeat.interval-ms`, the interval in ms in which the Spark application master heartbeats into the YARN ResourceManager. Default is 5 seconds. * `spark.yarn.max.executor.failures`, the maximum number of executor failures before failing the application. Default is the number of executors requested times 2 with minimum of 3. -* `spark.yarn.historyServer.address`, the address of the Spark history server (i.e. host.com:18080). The address should not contain a scheme (http://). Defaults to not being set since the history server is an optional service. This address is given to the Yarn ResourceManager when the Spark application finishes to link the application from the ResourceManaer UI to the Spark history server UI. +* `spark.yarn.historyServer.address`, the address of the Spark history server (i.e. host.com:18080). The address should not contain a scheme (http://). Defaults to not being set since the history server is an optional service. This address is given to the Yarn ResourceManager when the Spark application finishes to link the application from the ResourceManager UI to the Spark history server UI. # Launching Spark on YARN