Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ install:
build_script:
# '-Djna.nosys=true' is required to avoid kernel32.dll load failure.
# See SPARK-28759.
- cmd: mvn -DskipTests -Psparkr -Phive -Phive-1.2 -Djna.nosys=true package
- cmd: mvn -DskipTests -Psparkr -Phive -Djna.nosys=true package

environment:
NOT_CRAN: true
Expand Down
5 changes: 5 additions & 0 deletions dev/run-tests-jenkins.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,11 @@ def main():
os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7"
if "test-hadoop3.2" in ghprb_pull_title:
os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2"
# Switch the Hive profile based on the PR title:
if "test-hive1.2" in ghprb_pull_title:
os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive1.2"
if "test-hive2.3" in ghprb_pull_title:
os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive2.3"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @shaneknapp, maybe we can use it this environment variable (?) later when we consider setting the Jenkins jobs for Hive 1.2/2.3 + Hadoop 2.7/3.2 + JDK 8/11 combinations.

The env name looks a bit odd given AMPLAB_JENKINS_BUILD_PROFILE. We might have to rename it later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, i missed this over the november holiday. yes, i've had a desire for years to rename the AMPLAB_* variables as the amplab project ended in december 2016. :)

i'll do this after the 3.0 cut. no time like the present!


build_display_name = os.environ["BUILD_DISPLAY_NAME"]
build_url = os.environ["BUILD_URL"]
Expand Down
78 changes: 50 additions & 28 deletions dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ def run_apache_rat_checks():
run_cmd([os.path.join(SPARK_HOME, "dev", "check-license")])


def run_scala_style_checks(build_profiles):
def run_scala_style_checks(extra_profiles):
build_profiles = extra_profiles + modules.root.build_profile_flags
set_title_and_block("Running Scala style checks", "BLOCK_SCALA_STYLE")
profiles = " ".join(build_profiles)
print("[info] Checking Scala style using SBT with these profiles: ", profiles)
Expand Down Expand Up @@ -283,8 +284,8 @@ def get_hadoop_profiles(hadoop_version):
"""

sbt_maven_hadoop_profiles = {
"hadoop2.7": ["-Phadoop-2.7", "-Phive-1.2"],
"hadoop3.2": ["-Phadoop-3.2", "-Phive-2.3"],
"hadoop2.7": ["-Phadoop-2.7"],
"hadoop3.2": ["-Phadoop-3.2"],
}

if hadoop_version in sbt_maven_hadoop_profiles:
Expand All @@ -295,9 +296,28 @@ def get_hadoop_profiles(hadoop_version):
sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))


def build_spark_maven(hadoop_version):
def get_hive_profiles(hive_version):
"""
For the given Hive version tag, return a list of Maven/SBT profile flags for
building and testing against that Hive version.
"""

sbt_maven_hive_profiles = {
"hive1.2": ["-Phive-1.2"],
"hive2.3": ["-Phive-2.3"],
}

if hive_version in sbt_maven_hive_profiles:
return sbt_maven_hive_profiles[hive_version]
else:
print("[error] Could not find", hive_version, "in the list. Valid options",
" are", sbt_maven_hive_profiles.keys())
sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))


def build_spark_maven(extra_profiles):
# Enable all of the profiles for the build:
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
build_profiles = extra_profiles + modules.root.build_profile_flags
mvn_goals = ["clean", "package", "-DskipTests"]
profiles_and_goals = build_profiles + mvn_goals

Expand All @@ -306,9 +326,9 @@ def build_spark_maven(hadoop_version):
exec_maven(profiles_and_goals)


def build_spark_sbt(hadoop_version):
def build_spark_sbt(extra_profiles):
# Enable all of the profiles for the build:
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
build_profiles = extra_profiles + modules.root.build_profile_flags
sbt_goals = ["test:package", # Build test jars as some tests depend on them
"streaming-kinesis-asl-assembly/assembly"]
profiles_and_goals = build_profiles + sbt_goals
Expand All @@ -318,10 +338,10 @@ def build_spark_sbt(hadoop_version):
exec_sbt(profiles_and_goals)


def build_spark_unidoc_sbt(hadoop_version):
def build_spark_unidoc_sbt(extra_profiles):
set_title_and_block("Building Unidoc API Documentation", "BLOCK_DOCUMENTATION")
# Enable all of the profiles for the build:
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
build_profiles = extra_profiles + modules.root.build_profile_flags
sbt_goals = ["unidoc"]
profiles_and_goals = build_profiles + sbt_goals

Expand All @@ -331,9 +351,9 @@ def build_spark_unidoc_sbt(hadoop_version):
exec_sbt(profiles_and_goals)


def build_spark_assembly_sbt(hadoop_version, checkstyle=False):
def build_spark_assembly_sbt(extra_profiles, checkstyle=False):
# Enable all of the profiles for the build:
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
build_profiles = extra_profiles + modules.root.build_profile_flags
sbt_goals = ["assembly/package"]
profiles_and_goals = build_profiles + sbt_goals
print("[info] Building Spark assembly using SBT with these arguments: ",
Expand All @@ -343,25 +363,25 @@ def build_spark_assembly_sbt(hadoop_version, checkstyle=False):
if checkstyle:
run_java_style_checks(build_profiles)

build_spark_unidoc_sbt(hadoop_version)
build_spark_unidoc_sbt(extra_profiles)


def build_apache_spark(build_tool, hadoop_version):
"""Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or
`maven`). Defaults to using `sbt`."""
def build_apache_spark(build_tool, extra_profiles):
"""Will build Spark with the extra profiles and the passed in build tool
(either `sbt` or `maven`). Defaults to using `sbt`."""

set_title_and_block("Building Spark", "BLOCK_BUILD")

rm_r("lib_managed")

if build_tool == "maven":
build_spark_maven(hadoop_version)
build_spark_maven(extra_profiles)
else:
build_spark_sbt(hadoop_version)
build_spark_sbt(extra_profiles)


def detect_binary_inop_with_mima(hadoop_version):
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
def detect_binary_inop_with_mima(extra_profiles):
build_profiles = extra_profiles + modules.root.build_profile_flags
set_title_and_block("Detecting binary incompatibilities with MiMa", "BLOCK_MIMA")
profiles = " ".join(build_profiles)
print("[info] Detecting binary incompatibilities with MiMa using SBT with these profiles: ",
Expand Down Expand Up @@ -395,14 +415,14 @@ def run_scala_tests_sbt(test_modules, test_profiles):
exec_sbt(profiles_and_goals)


def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags):
def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags):
"""Function to properly execute all tests passed in as a set from the
`determine_test_suites` function"""
set_title_and_block("Running Spark unit tests", "BLOCK_SPARK_UNIT_TESTS")

test_modules = set(test_modules)

test_profiles = get_hadoop_profiles(hadoop_version) + \
test_profiles = extra_profiles + \
list(set(itertools.chain.from_iterable(m.build_profile_flags for m in test_modules)))

if excluded_tags:
Expand Down Expand Up @@ -555,6 +575,7 @@ def main():
# to reflect the environment settings
build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt")
hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7")
hive_version = os.environ.get("AMPLAB_JENKINS_BUILD_HIVE_PROFILE", "hive2.3")
test_env = "amplab_jenkins"
# add path for Python3 in Jenkins if we're calling from a Jenkins machine
# TODO(sknapp): after all builds are ported to the ubuntu workers, change this to be:
Expand All @@ -564,10 +585,12 @@ def main():
# else we're running locally and can use local settings
build_tool = "sbt"
hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
hive_version = os.environ.get("HIVE_PROFILE", "hive2.3")
test_env = "local"

print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version,
"under environment", test_env)
"and Hive profile", hive_version, "under environment", test_env)
extra_profiles = get_hadoop_profiles(hadoop_version) + get_hive_profiles(hive_version)

changed_modules = None
changed_files = None
Expand Down Expand Up @@ -601,8 +624,7 @@ def main():
if not changed_files or any(f.endswith(".scala")
or f.endswith("scalastyle-config.xml")
for f in changed_files):
build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
run_scala_style_checks(build_profiles)
run_scala_style_checks(extra_profiles)
should_run_java_style_checks = False
if not changed_files or any(f.endswith(".java")
or f.endswith("checkstyle.xml")
Expand Down Expand Up @@ -630,18 +652,18 @@ def main():
run_build_tests()

# spark build
build_apache_spark(build_tool, hadoop_version)
build_apache_spark(build_tool, extra_profiles)

# backwards compatibility checks
if build_tool == "sbt":
# Note: compatibility tests only supported in sbt for now
detect_binary_inop_with_mima(hadoop_version)
detect_binary_inop_with_mima(extra_profiles)
# Since we did not build assembly/package before running dev/mima, we need to
# do it here because the tests still rely on it; see SPARK-13294 for details.
build_spark_assembly_sbt(hadoop_version, should_run_java_style_checks)
build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)

# run the test suites
run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags)
run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags)

modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
if modules_with_python_tests:
Expand Down
3 changes: 2 additions & 1 deletion project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,8 @@ object SparkParallelTestGrouping {
"org.apache.spark.ml.classification.LinearSVCSuite",
"org.apache.spark.sql.SQLQueryTestSuite",
"org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite",
"org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite"
"org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite",
"org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fixes the test failure against Hive 2.3 (tested in #26706); however, I have no explicit evidence. Just given my speculation and it worked.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still seeing failures like in https://amplab.cs.berkeley.edu/jenkins/job/NewSparkPullRequestBuilder/4953/testReport/ - should we make all the thriftserver tests single-threaded?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah. Maybe that's a better idea. I'll monitor a bit more and make a PR soon

)

private val DEFAULT_TEST_GROUP = "default_test_group"
Expand Down