Skip to content

Commit 0e152f4

Browse files
committed
Merging master
2 parents 27a81c9 + a42af81 commit 0e152f4

File tree

73 files changed

+2237
-434
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+2237
-434
lines changed

bin/docker-image-tool.sh

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
# This script builds and pushes docker images when run from a release of Spark
2020
# with Kubernetes support.
2121

22+
set -x
23+
2224
function error {
2325
echo "$@" 1>&2
2426
exit 1
@@ -172,13 +174,19 @@ function build {
172174
local BASEDOCKERFILE=${BASEDOCKERFILE:-"kubernetes/dockerfiles/spark/Dockerfile"}
173175
local PYDOCKERFILE=${PYDOCKERFILE:-false}
174176
local RDOCKERFILE=${RDOCKERFILE:-false}
177+
local ARCHS=${ARCHS:-"--platform linux/amd64,linux/arm64"}
175178

176179
(cd $(img_ctx_dir base) && docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
177180
-t $(image_ref spark) \
178181
-f "$BASEDOCKERFILE" .)
179182
if [ $? -ne 0 ]; then
180183
error "Failed to build Spark JVM Docker image, please refer to Docker build output for details."
181184
fi
185+
if [ "${CROSS_BUILD}" != "false" ]; then
186+
(cd $(img_ctx_dir base) && docker buildx build $ARCHS $NOCACHEARG "${BUILD_ARGS[@]}" \
187+
-t $(image_ref spark) \
188+
-f "$BASEDOCKERFILE" .)
189+
fi
182190

183191
if [ "${PYDOCKERFILE}" != "false" ]; then
184192
(cd $(img_ctx_dir pyspark) && docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
@@ -187,6 +195,11 @@ function build {
187195
if [ $? -ne 0 ]; then
188196
error "Failed to build PySpark Docker image, please refer to Docker build output for details."
189197
fi
198+
if [ "${CROSS_BUILD}" != "false" ]; then
199+
(cd $(img_ctx_dir pyspark) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
200+
-t $(image_ref spark-py) \
201+
-f "$PYDOCKERFILE" .)
202+
fi
190203
fi
191204

192205
if [ "${RDOCKERFILE}" != "false" ]; then
@@ -196,6 +209,11 @@ function build {
196209
if [ $? -ne 0 ]; then
197210
error "Failed to build SparkR Docker image, please refer to Docker build output for details."
198211
fi
212+
if [ "${CROSS_BUILD}" != "false" ]; then
213+
(cd $(img_ctx_dir sparkr) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
214+
-t $(image_ref spark-r) \
215+
-f "$RDOCKERFILE" .)
216+
fi
199217
fi
200218
}
201219

@@ -227,6 +245,8 @@ Options:
227245
-n Build docker image with --no-cache
228246
-u uid UID to use in the USER directive to set the user the main Spark process runs as inside the
229247
resulting container
248+
-X Use docker buildx to cross build. Automatically pushes.
249+
See https://docs.docker.com/buildx/working-with-buildx/ for steps to setup buildx.
230250
-b arg Build arg to build or push the image. For multiple build args, this option needs to
231251
be used separately for each build arg.
232252
@@ -252,6 +272,12 @@ Examples:
252272
- Build and push JDK11-based image with tag "v3.0.0" to docker.io/myrepo
253273
$0 -r docker.io/myrepo -t v3.0.0 -b java_image_tag=11-jre-slim build
254274
$0 -r docker.io/myrepo -t v3.0.0 push
275+
276+
- Build and push JDK11-based image for multiple archs to docker.io/myrepo
277+
$0 -r docker.io/myrepo -t v3.0.0 -X -b java_image_tag=11-jre-slim build
278+
# Note: buildx, which does cross building, needs to do the push during build
279+
# So there is no seperate push step with -X
280+
255281
EOF
256282
}
257283

@@ -268,7 +294,8 @@ RDOCKERFILE=
268294
NOCACHEARG=
269295
BUILD_PARAMS=
270296
SPARK_UID=
271-
while getopts f:p:R:mr:t:nb:u: option
297+
CROSS_BUILD="false"
298+
while getopts f:p:R:mr:t:Xnb:u: option
272299
do
273300
case "${option}"
274301
in
@@ -279,6 +306,7 @@ do
279306
t) TAG=${OPTARG};;
280307
n) NOCACHEARG="--no-cache";;
281308
b) BUILD_PARAMS=${BUILD_PARAMS}" --build-arg "${OPTARG};;
309+
X) CROSS_BUILD=1;;
282310
m)
283311
if ! which minikube 1>/dev/null; then
284312
error "Cannot find minikube."

core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,11 @@ function renderDagViz(forJob) {
173173
});
174174

175175
metadataContainer().selectAll(".barrier-rdd").each(function() {
176-
var rddId = d3.select(this).text().trim();
177-
var clusterId = VizConstants.clusterPrefix + rddId;
178-
svg.selectAll("g." + clusterId).classed("barrier", true)
176+
var opId = d3.select(this).text().trim();
177+
var opClusterId = VizConstants.clusterPrefix + opId;
178+
var stageId = $(this).parents(".stage-metadata").attr("stage-id");
179+
var stageClusterId = VizConstants.graphPrefix + stageId;
180+
svg.selectAll("g[id=" + stageClusterId + "] g." + opClusterId).classed("barrier", true)
179181
});
180182

181183
resizeSvg(svg);
@@ -216,7 +218,7 @@ function renderDagVizForJob(svgContainer) {
216218
var dot = metadata.select(".dot-file").text();
217219
var stageId = metadata.attr("stage-id");
218220
var containerId = VizConstants.graphPrefix + stageId;
219-
var isSkipped = metadata.attr("skipped") == "true";
221+
var isSkipped = metadata.attr("skipped") === "true";
220222
var container;
221223
if (isSkipped) {
222224
container = svgContainer
@@ -225,11 +227,8 @@ function renderDagVizForJob(svgContainer) {
225227
.attr("skipped", "true");
226228
} else {
227229
// Link each graph to the corresponding stage page (TODO: handle stage attempts)
228-
// Use the link from the stage table so it also works for the history server
229230
var attemptId = 0;
230-
var stageLink = d3.select("#stage-" + stageId + "-" + attemptId)
231-
.select("a.name-link")
232-
.attr("href");
231+
var stageLink = uiRoot + appBasePath + "/stages/stage/?id=" + stageId + "&attempt=" + attemptId;
233232
container = svgContainer
234233
.append("a")
235234
.attr("xlink:href", stageLink)

core/src/main/resources/org/apache/spark/ui/static/webui.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,16 @@
1616
*/
1717

1818
var uiRoot = "";
19+
var appBasePath = "";
1920

2021
function setUIRoot(val) {
2122
uiRoot = val;
2223
}
2324

25+
function setAppBasePath(path) {
26+
appBasePath = path;
27+
}
28+
2429
function collapseTablePageLoad(name, table){
2530
if (window.localStorage.getItem(name) == "true") {
2631
// Set it to false so that the click function can revert it
@@ -33,7 +38,7 @@ function collapseTable(thisName, table){
3338
var status = window.localStorage.getItem(thisName) == "true";
3439
status = !status;
3540

36-
var thisClass = '.' + thisName
41+
var thisClass = '.' + thisName;
3742

3843
// Expand the list of additional metrics.
3944
var tableDiv = $(thisClass).parent().find('.' + table);

core/src/main/scala/org/apache/spark/ui/UIUtils.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,7 @@ private[spark] object UIUtils extends Logging {
292292
<html>
293293
<head>
294294
{commonHeaderNodes(request)}
295+
<script>setAppBasePath('{activeTab.basePath}')</script>
295296
{if (showVisualization) vizHeaderNodes(request) else Seq.empty}
296297
{if (useDataTables) dataTablesHeaderNodes(request) else Seq.empty}
297298
<link rel="shortcut icon"

core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,56 @@ abstract class RealBrowserUISeleniumSuite(val driverProp: String)
7878
}
7979
}
8080

81+
test("SPARK-31882: Link URL for Stage DAGs should not depend on paged table.") {
82+
withSpark(newSparkContext()) { sc =>
83+
sc.parallelize(1 to 100).map(v => (v, v)).repartition(10).reduceByKey(_ + _).collect
84+
85+
eventually(timeout(10.seconds), interval(50.microseconds)) {
86+
val pathWithPagedTable =
87+
"/jobs/job/?id=0&completedStage.page=2&completedStage.sort=Stage+Id&" +
88+
"completedStage.desc=true&completedStage.pageSize=1#completed"
89+
goToUi(sc, pathWithPagedTable)
90+
91+
// Open DAG Viz.
92+
webDriver.findElement(By.id("job-dag-viz")).click()
93+
val stages = webDriver.findElements(By.cssSelector("svg[class='job'] > a"))
94+
stages.size() should be (3)
95+
96+
stages.get(0).getAttribute("href") should include ("/stages/stage/?id=0&attempt=0")
97+
stages.get(1).getAttribute("href") should include ("/stages/stage/?id=1&attempt=0")
98+
stages.get(2).getAttribute("href") should include ("/stages/stage/?id=2&attempt=0")
99+
}
100+
}
101+
}
102+
103+
test("SPARK-31886: Color barrier execution mode RDD correctly") {
104+
withSpark(newSparkContext()) { sc =>
105+
sc.parallelize(1 to 10).barrier.mapPartitions(identity).repartition(1).collect()
106+
107+
eventually(timeout(10.seconds), interval(50.milliseconds)) {
108+
goToUi(sc, "/jobs/job/?id=0")
109+
webDriver.findElement(By.id("job-dag-viz")).click()
110+
111+
val stage0 = webDriver.findElement(By.cssSelector("g[id='graph_0']"))
112+
val stage1 = webDriver.findElement(By.cssSelector("g[id='graph_1']"))
113+
val barrieredOps = webDriver.findElements(By.className("barrier-rdd")).iterator()
114+
115+
while (barrieredOps.hasNext) {
116+
val barrieredOpId = barrieredOps.next().getAttribute("innerHTML")
117+
val foundInStage0 =
118+
stage0.findElements(
119+
By.cssSelector("g.barrier.cluster.cluster_" + barrieredOpId))
120+
assert(foundInStage0.size === 1)
121+
122+
val foundInStage1 =
123+
stage1.findElements(
124+
By.cssSelector("g.barrier.cluster.cluster_" + barrieredOpId))
125+
assert(foundInStage1.size === 0)
126+
}
127+
}
128+
}
129+
}
130+
81131
/**
82132
* Create a test SparkContext with the SparkUI enabled.
83133
* It is safe to `get` the SparkUI directly from the SparkContext returned here.

dev/create-release/do-release.sh

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
# limitations under the License.
1818
#
1919

20+
set -e
21+
2022
SELF=$(cd $(dirname $0) && pwd)
2123
. "$SELF/release-util.sh"
2224

@@ -52,9 +54,6 @@ function should_build {
5254
if should_build "tag" && [ $SKIP_TAG = 0 ]; then
5355
run_silent "Creating release tag $RELEASE_TAG..." "tag.log" \
5456
"$SELF/release-tag.sh"
55-
echo "It may take some time for the tag to be synchronized to github."
56-
echo "Press enter when you've verified that the new tag ($RELEASE_TAG) is available."
57-
read
5857
else
5958
echo "Skipping tag creation for $RELEASE_TAG."
6059
fi
@@ -79,3 +78,12 @@ if should_build "publish"; then
7978
else
8079
echo "Skipping publish step."
8180
fi
81+
82+
if should_build "tag" && [ $SKIP_TAG = 0 ]; then
83+
git push origin $RELEASE_TAG
84+
if [[ $RELEASE_TAG != *"preview"* ]]; then
85+
git push origin HEAD:$GIT_BRANCH
86+
else
87+
echo "It's preview release. We only push $RELEASE_TAG to remote."
88+
fi
89+
fi

dev/create-release/release-build.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,12 @@ BASE_DIR=$(pwd)
9292
init_java
9393
init_maven_sbt
9494

95-
rm -rf spark
96-
git clone "$ASF_REPO"
95+
# Only clone repo fresh if not present, otherwise use checkout from the tag step
96+
if [ ! -d spark ]; then
97+
git clone "$ASF_REPO"
98+
fi
9799
cd spark
100+
git fetch
98101
git checkout $GIT_REF
99102
git_hash=`git rev-parse --short HEAD`
100103
echo "Checked out Spark git hash $git_hash"

dev/create-release/release-tag.sh

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ function exit_with_usage {
2525
cat << EOF
2626
usage: $NAME
2727
Tags a Spark release on a particular branch.
28+
You must push the tags after.
2829
2930
Inputs are specified with the following environment variables:
3031
ASF_USERNAME - Apache Username
@@ -105,19 +106,8 @@ sed -i".tmp7" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$R_NEXT_VERSION"
105106

106107
git commit -a -m "Preparing development version $NEXT_VERSION"
107108

108-
if ! is_dry_run; then
109-
# Push changes
110-
git push origin $RELEASE_TAG
111-
if [[ $RELEASE_VERSION != *"preview"* ]]; then
112-
git push origin HEAD:$GIT_BRANCH
113-
else
114-
echo "It's preview release. We only push $RELEASE_TAG to remote."
115-
fi
116-
117-
cd ..
118-
rm -rf spark
119-
else
120-
cd ..
109+
cd ..
110+
if is_dry_run; then
121111
mv spark spark.tag
122112
echo "Clone with version changes and tag available as spark.tag in the output directory."
123113
fi

dev/deps/spark-deps-hadoop-2.7-hive-1.2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,4 @@ xmlenc/0.52//xmlenc-0.52.jar
208208
xz/1.5//xz-1.5.jar
209209
zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
210210
zookeeper/3.4.14//zookeeper-3.4.14.jar
211-
zstd-jni/1.4.4-3//zstd-jni-1.4.4-3.jar
211+
zstd-jni/1.4.5-2//zstd-jni-1.4.5-2.jar

dev/deps/spark-deps-hadoop-2.7-hive-2.3

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,4 +222,4 @@ xmlenc/0.52//xmlenc-0.52.jar
222222
xz/1.5//xz-1.5.jar
223223
zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
224224
zookeeper/3.4.14//zookeeper-3.4.14.jar
225-
zstd-jni/1.4.4-3//zstd-jni-1.4.4-3.jar
225+
zstd-jni/1.4.5-2//zstd-jni-1.4.5-2.jar

0 commit comments

Comments
 (0)