Skip to content

Commit e0180ba

Browse files
authored
chore: Cleanup assembly and shading (#617)
* Try new method for shading * Fix tests * Add exportJars * Just use target dir * Add comment * Don't shade anything, just rename * Don't actually need the extra project
1 parent ff243de commit e0180ba

File tree

4 files changed

+15
-32
lines changed

4 files changed

+15
-32
lines changed

build.sbt

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ lazy val root = (project in file("."))
108108
commonSetting,
109109
name := "graphframes",
110110
moduleName := s"${name.value}-spark$sparkMajorVer",
111+
// Export the JAR so that this can be excluded from shading in connect
112+
exportJars := true,
111113

112114
// Global settings
113115
Global / concurrentRestrictions := Seq(Tags.limitAll(1)),
@@ -116,16 +118,6 @@ lazy val root = (project in file("."))
116118

117119
Compile / unmanagedSourceDirectories += (Compile / baseDirectory).value / "src" / "main" / s"scala-spark-$sparkMajorVer",
118120

119-
// Assembly settings
120-
assembly / test := {}, // No tests in assembly
121-
assemblyPackageScala / assembleArtifact := false,
122-
assembly / assemblyMergeStrategy := {
123-
case PathList("META-INF", xs @ _*) => MergeStrategy.discard
124-
case x if x.endsWith("module-info.class") => MergeStrategy.discard
125-
case x =>
126-
val oldStrategy = (assembly / assemblyMergeStrategy).value
127-
oldStrategy(x)
128-
},
129121
Test / packageBin / publishArtifact := false,
130122
Test / packageDoc / publishArtifact := false,
131123
Test / packageSrc / publishArtifact := false,
@@ -136,39 +128,29 @@ lazy val root = (project in file("."))
136128
lazy val connect = (project in file("graphframes-connect"))
137129
.dependsOn(root)
138130
.settings(
139-
commonSetting,
140131
name := s"graphframes-connect",
141132
moduleName := s"${name.value}-spark${sparkMajorVer}",
133+
commonSetting,
142134
Compile / unmanagedSourceDirectories += (Compile / baseDirectory).value / "src" / "main" / s"scala-spark-$sparkMajorVer",
143135
Compile / PB.targets := Seq(PB.gens.java -> (Compile / sourceManaged).value),
144136
Compile / PB.includePaths ++= Seq(file("src/main/protobuf")),
145137
PB.protocVersion := protocVersion,
138+
PB.additionalDependencies := Nil,
146139
libraryDependencies ++= Seq(
147140
"org.apache.spark" %% "spark-connect" % sparkVer % "provided" cross CrossVersion.for3Use2_13),
148141

149142
// Assembly and shading
143+
assembly / assemblyJarName := s"${moduleName.value}_${(scalaBinaryVersion).value}-${version.value}.jar",
150144
assembly / test := {},
151-
assemblyPackageScala / assembleArtifact := false,
152145
assembly / assemblyShadeRules := Seq(
153146
ShadeRule.rename("com.google.protobuf.**" -> protobufShadingPattern).inAll),
154-
assembly / assemblyMergeStrategy := {
155-
case PathList("google", "protobuf", xs @ _*) => MergeStrategy.discard
156-
case PathList("META-INF", xs @ _*) => MergeStrategy.discard
157-
case x if x.endsWith("module-info.class") => MergeStrategy.discard
158-
case x => MergeStrategy.first
159-
},
160-
assembly / assemblyExcludedJars := (Compile / fullClasspath).value.filter { className =>
161-
className.data
162-
.getName()
163-
.contains("scala-library-") || className.data
164-
.getName()
165-
.contains("slf4j-api-")
166-
},
167-
publish / skip := false,
147+
// Don't actually shade anything, we just need to rename the protobuf packages to what's bundled with Spark
148+
assembly / assemblyExcludedJars := (assembly / fullClasspath).value,
168149
Compile / packageBin := assembly.value,
169150
Test / packageBin / publishArtifact := false,
170151
Test / packageDoc / publishArtifact := false,
171152
Test / packageSrc / publishArtifact := false,
172153
Compile / packageBin / publishArtifact := true,
173154
Compile / packageDoc / publishArtifact := false,
174-
Compile / packageSrc / publishArtifact := false)
155+
Compile / packageSrc / publishArtifact := false
156+
)

python/dev/build_jar.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ def build(spark_versions: Sequence[str] = ["3.5.5"]):
1616
sbt_executable,
1717
f"-Dspark.version={spark_version}",
1818
"clean",
19-
"assembly",
19+
"package",
2020
"connect/clean",
21-
"connect/assembly"
21+
"connect/package"
2222
]
2323
sbt_build = subprocess.Popen(
2424
sbt_build_command,

python/tests/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def get_gf_jar_locations() -> Tuple[str, str]:
3636
core_jar: Optional[str] = None
3737
connect_jar: Optional[str] = None
3838

39-
for pp in core_dir.glob("graphframes-assembly-*.jar"):
39+
for pp in core_dir.glob(f"graphframes-spark{spark_major_version}*.jar"):
4040
assert isinstance(pp, pathlib.PosixPath) # type checking
4141
core_jar = str(pp.absolute())
4242

@@ -45,7 +45,7 @@ def get_gf_jar_locations() -> Tuple[str, str]:
4545
f"Failed to find graphframes jar for Spark {spark_major_version} in {core_dir}"
4646
)
4747

48-
for pp in connect_dir.glob("graphframes-connect-assembly-*.jar"):
48+
for pp in connect_dir.glob(f"graphframes-connect-spark{spark_major_version}*.jar"):
4949
assert isinstance(pp, pathlib.PosixPath) # type checking
5050
connect_jar = str(pp.absolute())
5151

src/test/scala/org/graphframes/ldbc/TestLDBCCases.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ import org.graphframes.GraphFrameTestSparkContext
1111
import org.graphframes.SparkFunSuite
1212
import org.graphframes.examples.LDBCUtils
1313

14+
import java.io.File
1415
import java.nio.file._
1516
import java.util.Properties
1617

1718
class TestLDBCCases extends SparkFunSuite with GraphFrameTestSparkContext {
18-
private val resourcesPath = Paths.get(getClass().getResource("/").toURI())
19+
private val resourcesPath = Path.of(new File("target").toURI())
1920
private val unreachableID = 9223372036854775807L
2021

2122
private def readUndirectedUnweighted(pathPrefix: String): GraphFrame = {

0 commit comments

Comments
 (0)