From 6560ddcdde5cdd91d8d566a831d314de0a7b37c7 Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Mon, 22 Aug 2016 11:03:39 +0800 Subject: [PATCH 1/6] [SPARKR][SPARKSUBMIT] Allow to set sparkr shell command through --conf --- .../apache/spark/launcher/SparkLauncher.java | 2 ++ .../launcher/SparkSubmitCommandBuilder.java | 3 ++- .../SparkSubmitCommandBuilderSuite.java | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java index 7b7a7bf57b110..ea56214d2390c 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java @@ -68,6 +68,8 @@ public class SparkLauncher { static final String PYSPARK_PYTHON = "spark.pyspark.python"; + static final String SPARKR_R_SHELL = "spark.r.shell.command"; + /** Logger name to use when launching a child process. */ public static final String CHILD_PROCESS_LOGGER_NAME = "spark.launcher.childProcLoggerName"; diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index f6da644e4c376..8d486bd0bc7f9 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -336,7 +336,8 @@ private List buildSparkRCommand(Map env) throws IOExcept join(File.separator, sparkHome, "R", "lib", "SparkR", "profile", "shell.R")); List args = new ArrayList<>(); - args.add(firstNonEmpty(System.getenv("SPARKR_DRIVER_R"), "R")); + args.add(firstNonEmpty(firstNonEmpty(conf.get(SparkLauncher.SPARKR_R_SHELL), + System.getenv("SPARKR_DRIVER_R"), "R"))); return args; } diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java index 16e5a22401ca8..ad2e7a70c4eae 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java @@ -172,6 +172,24 @@ public void testPySparkFallback() throws Exception { assertEquals("arg1", cmd.get(cmd.size() - 1)); } + @Test + public void testSparkRShell() throws Exception { + List sparkSubmitArgs = Arrays.asList( + SparkSubmitCommandBuilder.SPARKR_SHELL, + "--master=foo", + "--deploy-mode=bar", + "--conf", "spark.r.shell.command=/usr/bin/R"); + + Map env = new HashMap<>(); + List cmd = buildCommand(sparkSubmitArgs, env); + assertEquals("/usr/bin/R", cmd.get(cmd.size() - 1)); + assertEquals( + String.format( + "\"%s\" \"foo\" \"%s\" \"bar\" \"--conf\" \"spark.r.shell.command=/usr/bin/R\" \"%s\"", + parser.MASTER, parser.DEPLOY_MODE, SparkSubmitCommandBuilder.SPARKR_SHELL_RESOURCE), + env.get("SPARKR_SUBMIT_ARGS")); + } + @Test public void testExamplesRunner() throws Exception { List sparkSubmitArgs = Arrays.asList( From f0231f9e0b63a5860ece0e67ee57f4074292e53e Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Mon, 22 Aug 2016 11:12:31 +0800 Subject: [PATCH 2/6] add doc --- docs/configuration.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/configuration.md b/docs/configuration.md index 96e8c6d08a1e3..8d78d0a5d810d 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1752,6 +1752,15 @@ showDF(properties, numRows = 200, truncate = FALSE) Executable for executing R scripts in client modes for driver. Ignored in cluster modes. + + spark.r.shell.command + R + + Executable for executing R shell in both client mode and cluster mode. For now sparkr shell only supports + client mode, but this property will be propagated to remote driver in cluster mode and can be used by driver if user want to implement + sparkr shell in cluster mode. + + #### Deploy From 0a24b2d88ebbff3718c1169ea842c0356cf1999d Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Mon, 22 Aug 2016 17:38:50 +0800 Subject: [PATCH 3/6] update doc --- docs/configuration.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 8d78d0a5d810d..adbfc94c974f3 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1756,9 +1756,7 @@ showDF(properties, numRows = 200, truncate = FALSE) spark.r.shell.command R - Executable for executing R shell in both client mode and cluster mode. For now sparkr shell only supports - client mode, but this property will be propagated to remote driver in cluster mode and can be used by driver if user want to implement - sparkr shell in cluster mode. + Executable for executing R shell. From fa11d2a9ba63543674cee38b171dec31f235ecc7 Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Thu, 25 Aug 2016 07:38:51 +0800 Subject: [PATCH 4/6] update doc --- docs/configuration.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index adbfc94c974f3..728044f61f148 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1756,7 +1756,8 @@ showDF(properties, numRows = 200, truncate = FALSE) spark.r.shell.command R - Executable for executing R shell. + Executable for executing sparkR shell in client modes for driver. Ignored in cluster modes. It is the same as environment variable SPARKR_DRIVER_R, but take precedence over it. + spark.r.shell.command is used for interactive mode of sparkR (sparkR shell) while spark.r.driver.command is used for the batch mode (running sparkR script). @@ -1825,7 +1826,8 @@ The following variables can be set in `spark-env.sh`: SPARKR_DRIVER_R - R binary executable to use for SparkR shell (default is R). + R binary executable to use for SparkR shell (default is R). + Property spark.r.shell.command take precedence if it is set SPARK_LOCAL_IP From 3303c5330df051f99dfff53c2993ce38ed67a2a6 Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Thu, 25 Aug 2016 10:43:23 +0800 Subject: [PATCH 5/6] address comments --- .../org/apache/spark/launcher/SparkSubmitCommandBuilder.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index 8d486bd0bc7f9..29c6d82cdbf19 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -336,8 +336,8 @@ private List buildSparkRCommand(Map env) throws IOExcept join(File.separator, sparkHome, "R", "lib", "SparkR", "profile", "shell.R")); List args = new ArrayList<>(); - args.add(firstNonEmpty(firstNonEmpty(conf.get(SparkLauncher.SPARKR_R_SHELL), - System.getenv("SPARKR_DRIVER_R"), "R"))); + args.add(firstNonEmpty(conf.get(SparkLauncher.SPARKR_R_SHELL), + System.getenv("SPARKR_DRIVER_R"), "R")); return args; } From bb751907ea0a04af1e6fbf3943ce57aa6c21552b Mon Sep 17 00:00:00 2001 From: Jeff Zhang Date: Thu, 25 Aug 2016 16:59:29 +0800 Subject: [PATCH 6/6] update doc --- docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index 728044f61f148..2a366e5d69c2f 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1757,7 +1757,7 @@ showDF(properties, numRows = 200, truncate = FALSE) R Executable for executing sparkR shell in client modes for driver. Ignored in cluster modes. It is the same as environment variable SPARKR_DRIVER_R, but take precedence over it. - spark.r.shell.command is used for interactive mode of sparkR (sparkR shell) while spark.r.driver.command is used for the batch mode (running sparkR script). + spark.r.shell.command is used for sparkR shell while spark.r.driver.command is used for running R script.