From f56b73223fbf765e408d9aef6565a2318f4836e3 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 21 Sep 2018 00:04:30 +0800 Subject: [PATCH 1/4] Refactor WideSchemaBenchmark --- .../WideSchemaBenchmark-results.txt | 223 ++++++++------ .../benchmark/WideSchemaBenchmark.scala | 272 +++++++++--------- 2 files changed, 263 insertions(+), 232 deletions(-) diff --git a/sql/core/benchmarks/WideSchemaBenchmark-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-results.txt index 0b9f791ac85e..0224f0d73a7d 100644 --- a/sql/core/benchmarks/WideSchemaBenchmark-results.txt +++ b/sql/core/benchmarks/WideSchemaBenchmark-results.txt @@ -1,117 +1,152 @@ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_92-b14 on Mac OS X 10.11.6 -Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz +================================================================================================ +parsing large select expressions +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz parsing large select: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 select expressions 2 / 4 0.0 2050147.0 1.0X -100 select expressions 6 / 7 0.0 6123412.0 0.3X -2500 select expressions 135 / 141 0.0 134623148.0 0.0X +1 select expressions 6 / 22 0.0 5645637.0 1.0X +100 select expressions 6 / 13 0.0 6046103.0 0.9X +2500 select expressions 172 / 271 0.0 171929312.0 0.0X + + +================================================================================================ +many column field read and write +================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_92-b14 on Mac OS X 10.11.6 -Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz many column field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 cols x 100000 rows (read in-mem) 16 / 18 6.3 158.6 1.0X -1 cols x 100000 rows (exec in-mem) 17 / 19 6.0 166.7 1.0X -1 cols x 100000 rows (read parquet) 24 / 26 4.3 235.1 0.7X -1 cols x 100000 rows (write parquet) 81 / 85 1.2 811.3 0.2X -100 cols x 1000 rows (read in-mem) 17 / 19 6.0 166.2 1.0X -100 cols x 1000 rows (exec in-mem) 25 / 27 4.0 249.2 0.6X -100 cols x 1000 rows (read parquet) 23 / 25 4.4 226.0 0.7X -100 cols x 1000 rows (write parquet) 83 / 87 1.2 831.0 0.2X -2500 cols x 40 rows (read in-mem) 132 / 137 0.8 1322.9 0.1X -2500 cols x 40 rows (exec in-mem) 326 / 330 0.3 3260.6 0.0X -2500 cols x 40 rows (read parquet) 831 / 839 0.1 8305.8 0.0X -2500 cols x 40 rows (write parquet) 237 / 245 0.4 2372.6 0.1X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_92-b14 on Mac OS X 10.11.6 -Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz +1 cols x 100000 rows (read in-mem) 27 / 47 3.7 267.8 1.0X +1 cols x 100000 rows (exec in-mem) 24 / 31 4.3 235.0 1.1X +1 cols x 100000 rows (read parquet) 312 / 385 0.3 3123.8 0.1X +1 cols x 100000 rows (write parquet) 195 / 217 0.5 1947.3 0.1X +100 cols x 1000 rows (read in-mem) 25 / 31 3.9 254.4 1.1X +100 cols x 1000 rows (exec in-mem) 32 / 38 3.1 318.5 0.8X +100 cols x 1000 rows (read parquet) 304 / 421 0.3 3043.0 0.1X +100 cols x 1000 rows (write parquet) 211 / 391 0.5 2111.9 0.1X +2500 cols x 40 rows (read in-mem) 338 / 542 0.3 3382.1 0.1X +2500 cols x 40 rows (exec in-mem) 573 / 680 0.2 5733.2 0.0X +2500 cols x 40 rows (read parquet) 1297 / 1509 0.1 12967.6 0.0X +2500 cols x 40 rows (write parquet) 407 / 452 0.2 4074.0 0.1X + + +================================================================================================ +wide shallowly nested struct field read and write +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz wide shallowly nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 15 / 17 6.6 151.0 1.0X -1 wide x 100000 rows (exec in-mem) 20 / 22 5.1 196.6 0.8X -1 wide x 100000 rows (read parquet) 59 / 63 1.7 592.8 0.3X -1 wide x 100000 rows (write parquet) 81 / 87 1.2 814.6 0.2X -100 wide x 1000 rows (read in-mem) 21 / 25 4.8 208.7 0.7X -100 wide x 1000 rows (exec in-mem) 72 / 81 1.4 718.5 0.2X -100 wide x 1000 rows (read parquet) 75 / 85 1.3 752.6 0.2X -100 wide x 1000 rows (write parquet) 88 / 95 1.1 876.7 0.2X -2500 wide x 40 rows (read in-mem) 28 / 34 3.5 282.2 0.5X -2500 wide x 40 rows (exec in-mem) 1269 / 1284 0.1 12688.1 0.0X -2500 wide x 40 rows (read parquet) 549 / 578 0.2 5493.4 0.0X -2500 wide x 40 rows (write parquet) 96 / 104 1.0 959.1 0.2X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_92-b14 on Mac OS X 10.11.6 -Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz +1 wide x 100000 rows (read in-mem) 24 / 30 4.1 241.9 1.0X +1 wide x 100000 rows (exec in-mem) 28 / 39 3.6 280.3 0.9X +1 wide x 100000 rows (read parquet) 339 / 438 0.3 3392.7 0.1X +1 wide x 100000 rows (write parquet) 202 / 261 0.5 2021.5 0.1X +100 wide x 1000 rows (read in-mem) 44 / 108 2.3 438.1 0.6X +100 wide x 1000 rows (exec in-mem) 59 / 77 1.7 585.6 0.4X +100 wide x 1000 rows (read parquet) 578 / 740 0.2 5776.7 0.0X +100 wide x 1000 rows (write parquet) 216 / 259 0.5 2157.9 0.1X +2500 wide x 40 rows (read in-mem) 49 / 56 2.0 494.1 0.5X +2500 wide x 40 rows (exec in-mem) 291 / 304 0.3 2907.1 0.1X +2500 wide x 40 rows (read parquet) 2203 / 2269 0.0 22032.7 0.0X +2500 wide x 40 rows (write parquet) 212 / 237 0.5 2115.6 0.1X + + +================================================================================================ +deeply nested struct field read and write +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz deeply nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 deep x 100000 rows (read in-mem) 14 / 16 7.0 143.8 1.0X -1 deep x 100000 rows (exec in-mem) 17 / 19 5.9 169.7 0.8X -1 deep x 100000 rows (read parquet) 33 / 35 3.1 327.0 0.4X -1 deep x 100000 rows (write parquet) 79 / 84 1.3 786.9 0.2X -100 deep x 1000 rows (read in-mem) 21 / 24 4.7 211.3 0.7X -100 deep x 1000 rows (exec in-mem) 221 / 235 0.5 2214.5 0.1X -100 deep x 1000 rows (read parquet) 1928 / 1952 0.1 19277.1 0.0X -100 deep x 1000 rows (write parquet) 91 / 96 1.1 909.5 0.2X -250 deep x 400 rows (read in-mem) 57 / 61 1.8 567.1 0.3X -250 deep x 400 rows (exec in-mem) 1329 / 1385 0.1 13291.8 0.0X -250 deep x 400 rows (read parquet) 36563 / 36750 0.0 365630.2 0.0X -250 deep x 400 rows (write parquet) 126 / 130 0.8 1262.0 0.1X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_92-b14 on Mac OS X 10.11.6 -Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz +1 deep x 100000 rows (read in-mem) 20 / 24 5.0 200.2 1.0X +1 deep x 100000 rows (exec in-mem) 23 / 25 4.4 226.9 0.9X +1 deep x 100000 rows (read parquet) 232 / 250 0.4 2324.0 0.1X +1 deep x 100000 rows (write parquet) 181 / 194 0.6 1811.6 0.1X +100 deep x 1000 rows (read in-mem) 41 / 47 2.4 412.0 0.5X +100 deep x 1000 rows (exec in-mem) 454 / 506 0.2 4541.5 0.0X +100 deep x 1000 rows (read parquet) 8463 / 8497 0.0 84630.4 0.0X +100 deep x 1000 rows (write parquet) 205 / 234 0.5 2049.7 0.1X +250 deep x 400 rows (read in-mem) 145 / 159 0.7 1451.1 0.1X +250 deep x 400 rows (exec in-mem) 2714 / 2757 0.0 27135.5 0.0X +250 deep x 400 rows (read parquet) 116363 / 116465 0.0 1163631.4 0.0X +250 deep x 400 rows (write parquet) 322 / 389 0.3 3221.4 0.1X + + +================================================================================================ +bushy struct field read and write +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz bushy struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 x 1 deep x 100000 rows (read in-mem) 13 / 15 7.8 127.7 1.0X -1 x 1 deep x 100000 rows (exec in-mem) 15 / 17 6.6 151.5 0.8X -1 x 1 deep x 100000 rows (read parquet) 20 / 23 5.0 198.3 0.6X -1 x 1 deep x 100000 rows (write parquet) 77 / 82 1.3 770.4 0.2X -128 x 8 deep x 1000 rows (read in-mem) 12 / 14 8.2 122.5 1.0X -128 x 8 deep x 1000 rows (exec in-mem) 124 / 140 0.8 1241.2 0.1X -128 x 8 deep x 1000 rows (read parquet) 69 / 74 1.4 693.9 0.2X -128 x 8 deep x 1000 rows (write parquet) 78 / 83 1.3 777.7 0.2X -1024 x 11 deep x 100 rows (read in-mem) 25 / 29 4.1 246.1 0.5X -1024 x 11 deep x 100 rows (exec in-mem) 1197 / 1223 0.1 11974.6 0.0X -1024 x 11 deep x 100 rows (read parquet) 426 / 433 0.2 4263.7 0.0X -1024 x 11 deep x 100 rows (write parquet) 91 / 98 1.1 913.5 0.1X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_92-b14 on Mac OS X 10.11.6 -Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz +1 x 1 deep x 100000 rows (read in-mem) 22 / 27 4.5 224.4 1.0X +1 x 1 deep x 100000 rows (exec in-mem) 26 / 35 3.8 261.6 0.9X +1 x 1 deep x 100000 rows (read parquet) 225 / 256 0.4 2251.7 0.1X +1 x 1 deep x 100000 rows (write parquet) 190 / 208 0.5 1902.3 0.1X +128 x 8 deep x 1000 rows (read in-mem) 18 / 21 5.7 176.7 1.3X +128 x 8 deep x 1000 rows (exec in-mem) 58 / 72 1.7 576.7 0.4X +128 x 8 deep x 1000 rows (read parquet) 464 / 470 0.2 4641.9 0.0X +128 x 8 deep x 1000 rows (write parquet) 178 / 198 0.6 1776.8 0.1X +1024 x 11 deep x 100 rows (read in-mem) 46 / 53 2.2 456.8 0.5X +1024 x 11 deep x 100 rows (exec in-mem) 236 / 247 0.4 2356.5 0.1X +1024 x 11 deep x 100 rows (read parquet) 1853 / 1862 0.1 18534.2 0.0X +1024 x 11 deep x 100 rows (write parquet) 218 / 229 0.5 2177.8 0.1X + + +================================================================================================ +wide array field read and write +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz wide array field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 14 / 16 7.0 143.2 1.0X -1 wide x 100000 rows (exec in-mem) 17 / 19 5.9 170.9 0.8X -1 wide x 100000 rows (read parquet) 43 / 46 2.3 434.1 0.3X -1 wide x 100000 rows (write parquet) 78 / 83 1.3 777.6 0.2X -100 wide x 1000 rows (read in-mem) 11 / 13 9.0 111.5 1.3X -100 wide x 1000 rows (exec in-mem) 13 / 15 7.8 128.3 1.1X -100 wide x 1000 rows (read parquet) 24 / 27 4.1 245.0 0.6X -100 wide x 1000 rows (write parquet) 74 / 80 1.4 740.5 0.2X -2500 wide x 40 rows (read in-mem) 11 / 13 9.1 109.5 1.3X -2500 wide x 40 rows (exec in-mem) 13 / 15 7.7 129.4 1.1X -2500 wide x 40 rows (read parquet) 24 / 26 4.1 241.3 0.6X -2500 wide x 40 rows (write parquet) 75 / 81 1.3 751.8 0.2X - -Java HotSpot(TM) 64-Bit Server VM 1.8.0_92-b14 on Mac OS X 10.11.6 -Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz +1 wide x 100000 rows (read in-mem) 20 / 24 5.0 200.7 1.0X +1 wide x 100000 rows (exec in-mem) 23 / 25 4.3 232.9 0.9X +1 wide x 100000 rows (read parquet) 243 / 258 0.4 2432.1 0.1X +1 wide x 100000 rows (write parquet) 182 / 200 0.5 1824.7 0.1X +100 wide x 1000 rows (read in-mem) 16 / 18 6.3 158.6 1.3X +100 wide x 1000 rows (exec in-mem) 18 / 20 5.4 184.9 1.1X +100 wide x 1000 rows (read parquet) 218 / 237 0.5 2184.5 0.1X +100 wide x 1000 rows (write parquet) 176 / 193 0.6 1763.7 0.1X +2500 wide x 40 rows (read in-mem) 16 / 19 6.4 157.5 1.3X +2500 wide x 40 rows (exec in-mem) 18 / 22 5.5 182.1 1.1X +2500 wide x 40 rows (read parquet) 229 / 321 0.4 2286.3 0.1X +2500 wide x 40 rows (write parquet) 178 / 219 0.6 1784.8 0.1X + + +================================================================================================ +wide map field read and write +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz wide map field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 16 / 18 6.2 162.6 1.0X -1 wide x 100000 rows (exec in-mem) 21 / 23 4.8 208.2 0.8X -1 wide x 100000 rows (read parquet) 54 / 59 1.8 543.6 0.3X -1 wide x 100000 rows (write parquet) 80 / 86 1.2 804.5 0.2X -100 wide x 1000 rows (read in-mem) 11 / 13 8.7 114.5 1.4X -100 wide x 1000 rows (exec in-mem) 14 / 16 7.0 143.5 1.1X -100 wide x 1000 rows (read parquet) 30 / 32 3.3 300.4 0.5X -100 wide x 1000 rows (write parquet) 75 / 80 1.3 749.9 0.2X -2500 wide x 40 rows (read in-mem) 13 / 15 7.8 128.1 1.3X -2500 wide x 40 rows (exec in-mem) 15 / 18 6.5 153.6 1.1X -2500 wide x 40 rows (read parquet) 30 / 33 3.3 304.4 0.5X -2500 wide x 40 rows (write parquet) 77 / 83 1.3 768.5 0.2X +1 wide x 100000 rows (read in-mem) 16 / 18 6.2 160.4 1.0X +1 wide x 100000 rows (exec in-mem) 21 / 22 4.8 207.0 0.8X +1 wide x 100000 rows (read parquet) 292 / 303 0.3 2921.9 0.1X +1 wide x 100000 rows (write parquet) 180 / 194 0.6 1796.7 0.1X +100 wide x 1000 rows (read in-mem) 11 / 13 8.9 111.9 1.4X +100 wide x 1000 rows (exec in-mem) 14 / 15 7.1 141.3 1.1X +100 wide x 1000 rows (read parquet) 254 / 420 0.4 2540.8 0.1X +100 wide x 1000 rows (write parquet) 176 / 234 0.6 1764.8 0.1X +2500 wide x 40 rows (read in-mem) 13 / 15 7.6 132.1 1.2X +2500 wide x 40 rows (exec in-mem) 16 / 18 6.2 162.2 1.0X +2500 wide x 40 rows (read parquet) 238 / 257 0.4 2380.4 0.1X +2500 wide x 40 rows (write parquet) 174 / 184 0.6 1737.0 0.1X + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala index c368f17a8436..ffde2fecc1ad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala @@ -17,22 +17,19 @@ package org.apache.spark.sql -import java.io.{File, FileOutputStream, OutputStream} +import java.io.File -import org.scalatest.BeforeAndAfterEach - -import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.functions._ -import org.apache.spark.util.{Benchmark, Utils} +import org.apache.spark.util.{Benchmark, BenchmarkBase => FileBenchmarkBase, Utils} /** * Benchmark for performance with very wide and nested DataFrames. - * To run this: - * build/sbt "sql/test-only *WideSchemaBenchmark" - * - * Results will be written to "sql/core/benchmarks/WideSchemaBenchmark-results.txt". + * To run this benchmark: + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "sql/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/WideSchemaBenchmark-results.txt". */ -class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach { +object WideSchemaBenchmark extends FileBenchmarkBase { private val scaleFactor = 100000 private val widthsToTest = Seq(1, 100, 2500) private val depthsToTest = Seq(1, 100, 250) @@ -45,40 +42,19 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach { import sparkSession.implicits._ - private var tmpFiles: List[File] = Nil - private var out: OutputStream = null - - override def beforeAll() { - super.beforeAll() - out = new FileOutputStream(new File("benchmarks/WideSchemaBenchmark-results.txt")) - } - - override def afterAll() { - try { - out.close() - } finally { - super.afterAll() - } - } - - override def afterEach() { - super.afterEach() - for (tmpFile <- tmpFiles) { - Utils.deleteRecursively(tmpFile) - } + def withTempDir(f: File => Unit): Unit = { + val dir = Utils.createTempDir().getCanonicalFile + try f(dir) finally Utils.deleteRecursively(dir) } /** * Writes the given DataFrame to parquet at a temporary location, and returns a DataFrame * backed by the written parquet files. */ - private def saveAsParquet(df: DataFrame): DataFrame = { - val tmpFile = File.createTempFile("WideSchemaBenchmark", "tmp") - tmpFiles ::= tmpFile - tmpFile.delete() - df.write.parquet(tmpFile.getAbsolutePath) - assert(tmpFile.isDirectory()) - sparkSession.read.parquet(tmpFile.getAbsolutePath) + private def saveAsParquet(dir: File, df: DataFrame): DataFrame = { + df.write.mode(SaveMode.Overwrite).parquet(dir.getAbsolutePath) + assert(dir.isDirectory()) + sparkSession.read.parquet(dir.getAbsolutePath) } /** @@ -86,139 +62,159 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach { */ private def addCases( benchmark: Benchmark, + dir: File, df: DataFrame, desc: String, selector: String): Unit = { + val readDir = File.createTempFile("read", "", dir) + val writeDir = File.createTempFile("write", "", dir) + benchmark.addCase(desc + " (read in-mem)") { iter => df.selectExpr(s"sum($selector)").collect() } benchmark.addCase(desc + " (exec in-mem)") { iter => df.selectExpr("*", s"hash($selector) as f").selectExpr(s"sum($selector)", "sum(f)").collect() } - val parquet = saveAsParquet(df) benchmark.addCase(desc + " (read parquet)") { iter => - parquet.selectExpr(s"sum($selector) as f").collect() + saveAsParquet(readDir, df).selectExpr(s"sum($selector) as f").collect() } benchmark.addCase(desc + " (write parquet)") { iter => - saveAsParquet(df.selectExpr(s"sum($selector) as f")) + saveAsParquet(writeDir, df.selectExpr(s"sum($selector) as f")) } } - ignore("parsing large select expressions") { - val benchmark = new Benchmark("parsing large select", 1, output = Some(out)) - for (width <- widthsToTest) { - val selectExpr = (1 to width).map(i => s"id as a_$i") - benchmark.addCase(s"$width select expressions") { iter => - sparkSession.range(1).toDF.selectExpr(selectExpr: _*) + override def benchmark(): Unit = { + runBenchmark("parsing large select expressions") { + withTempDir { dir => + val benchmark = new Benchmark("parsing large select", 1, output = output) + for (width <- widthsToTest) { + val selectExpr = (1 to width).map(i => s"id as a_$i") + benchmark.addCase(s"$width select expressions") { iter => + sparkSession.range(1).toDF.selectExpr(selectExpr: _*) + } + } + benchmark.run() } } - benchmark.run() - } - ignore("many column field read and write") { - val benchmark = new Benchmark("many column field r/w", scaleFactor, output = Some(out)) - for (width <- widthsToTest) { - // normalize by width to keep constant data size - val numRows = scaleFactor / width - val selectExpr = (1 to width).map(i => s"id as a_$i") - val df = sparkSession.range(numRows).toDF.selectExpr(selectExpr: _*).cache() - df.count() // force caching - addCases(benchmark, df, s"$width cols x $numRows rows", "a_1") + runBenchmark("many column field read and write") { + withTempDir { dir => + val benchmark = new Benchmark("many column field r/w", scaleFactor, output = output) + for (width <- widthsToTest) { + // normalize by width to keep constant data size + val numRows = scaleFactor / width + val selectExpr = (1 to width).map(i => s"id as a_$i") + val df = sparkSession.range(numRows).toDF.selectExpr(selectExpr: _*).cache() + df.count() // force caching + addCases(benchmark, dir, df, s"$width cols x $numRows rows", "a_1") + } + benchmark.run() + } } - benchmark.run() - } - ignore("wide shallowly nested struct field read and write") { - val benchmark = new Benchmark( - "wide shallowly nested struct field r/w", scaleFactor, output = Some(out)) - for (width <- widthsToTest) { - val numRows = scaleFactor / width - var datum: String = "{" - for (i <- 1 to width) { - if (i == 1) { - datum += s""""value_$i": 1""" - } else { - datum += s""", "value_$i": 1""" + runBenchmark("wide shallowly nested struct field read and write") { + withTempDir { dir => + val benchmark = new Benchmark( + "wide shallowly nested struct field r/w", scaleFactor, output = output) + for (width <- widthsToTest) { + val numRows = scaleFactor / width + var datum: String = "{" + for (i <- 1 to width) { + if (i == 1) { + datum += s""""value_$i": 1""" + } else { + datum += s""", "value_$i": 1""" + } + } + datum += "}" + datum = s"""{"a": {"b": {"c": $datum, "d": $datum}, "e": $datum}}""" + val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() + df.count() // force caching + addCases(benchmark, dir, df, s"$width wide x $numRows rows", "a.b.c.value_1") } + benchmark.run() } - datum += "}" - datum = s"""{"a": {"b": {"c": $datum, "d": $datum}, "e": $datum}}""" - val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() - df.count() // force caching - addCases(benchmark, df, s"$width wide x $numRows rows", "a.b.c.value_1") } - benchmark.run() - } - ignore("deeply nested struct field read and write") { - val benchmark = new Benchmark("deeply nested struct field r/w", scaleFactor, output = Some(out)) - for (depth <- depthsToTest) { - val numRows = scaleFactor / depth - var datum: String = "{\"value\": 1}" - var selector: String = "value" - for (i <- 1 to depth) { - datum = "{\"value\": " + datum + "}" - selector = selector + ".value" + runBenchmark("deeply nested struct field read and write") { + withTempDir { dir => + val benchmark = + new Benchmark("deeply nested struct field r/w", scaleFactor, output = output) + for (depth <- depthsToTest) { + val numRows = scaleFactor / depth + var datum: String = "{\"value\": 1}" + var selector: String = "value" + for (i <- 1 to depth) { + datum = "{\"value\": " + datum + "}" + selector = selector + ".value" + } + val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() + df.count() // force caching + addCases(benchmark, dir, df, s"$depth deep x $numRows rows", selector) + } + benchmark.run() } - val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() - df.count() // force caching - addCases(benchmark, df, s"$depth deep x $numRows rows", selector) } - benchmark.run() - } - ignore("bushy struct field read and write") { - val benchmark = new Benchmark("bushy struct field r/w", scaleFactor, output = Some(out)) - for (width <- Seq(1, 100, 1000)) { - val numRows = scaleFactor / width - var numNodes = 1 - var datum: String = "{\"value\": 1}" - var selector: String = "value" - var depth = 1 - while (numNodes < width) { - numNodes *= 2 - datum = s"""{"left_$depth": $datum, "right_$depth": $datum}""" - selector = s"left_$depth." + selector - depth += 1 + runBenchmark("bushy struct field read and write") { + withTempDir { dir => + val benchmark = new Benchmark("bushy struct field r/w", scaleFactor, output = output) + for (width <- Seq(1, 100, 1000)) { + val numRows = scaleFactor / width + var numNodes = 1 + var datum: String = "{\"value\": 1}" + var selector: String = "value" + var depth = 1 + while (numNodes < width) { + numNodes *= 2 + datum = s"""{"left_$depth": $datum, "right_$depth": $datum}""" + selector = s"left_$depth." + selector + depth += 1 + } + // TODO(ekl) seems like the json parsing is actually the majority of the time, perhaps + // we should benchmark that too separately. + val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() + df.count() // force caching + addCases(benchmark, dir, df, s"$numNodes x $depth deep x $numRows rows", selector) + } + benchmark.run() } - // TODO(ekl) seems like the json parsing is actually the majority of the time, perhaps - // we should benchmark that too separately. - val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() - df.count() // force caching - addCases(benchmark, df, s"$numNodes x $depth deep x $numRows rows", selector) } - benchmark.run() - } - ignore("wide array field read and write") { - val benchmark = new Benchmark("wide array field r/w", scaleFactor, output = Some(out)) - for (width <- widthsToTest) { - val numRows = scaleFactor / width - var datum: String = "{\"value\": [" - for (i <- 1 to width) { - if (i == 1) { - datum += "1" - } else { - datum += ", 1" + runBenchmark("wide array field read and write") { + withTempDir { dir => + val benchmark = new Benchmark("wide array field r/w", scaleFactor, output = output) + for (width <- widthsToTest) { + val numRows = scaleFactor / width + var datum: String = "{\"value\": [" + for (i <- 1 to width) { + if (i == 1) { + datum += "1" + } else { + datum += ", 1" + } + } + datum += "]}" + val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() + df.count() // force caching + addCases(benchmark, dir, df, s"$width wide x $numRows rows", "value[0]") } + benchmark.run() } - datum += "]}" - val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() - df.count() // force caching - addCases(benchmark, df, s"$width wide x $numRows rows", "value[0]") } - benchmark.run() - } - ignore("wide map field read and write") { - val benchmark = new Benchmark("wide map field r/w", scaleFactor, output = Some(out)) - for (width <- widthsToTest) { - val numRows = scaleFactor / width - val datum = Tuple1((1 to width).map(i => ("value_" + i -> 1)).toMap) - val df = sparkSession.range(numRows).map(_ => datum).toDF.cache() - df.count() // force caching - addCases(benchmark, df, s"$width wide x $numRows rows", "_1[\"value_1\"]") + runBenchmark("wide map field read and write") { + withTempDir { dir => + val benchmark = new Benchmark("wide map field r/w", scaleFactor, output = output) + for (width <- widthsToTest) { + val numRows = scaleFactor / width + val datum = Tuple1((1 to width).map(i => ("value_" + i -> 1)).toMap) + val df = sparkSession.range(numRows).map(_ => datum).toDF.cache() + df.count() // force caching + addCases(benchmark, dir, df, s"$width wide x $numRows rows", "_1[\"value_1\"]") + } + benchmark.run() + } } - benchmark.run() } } From e6f39f36b5d806f1afcea980ba43d544dadbe35f Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 6 Oct 2018 17:26:34 +0100 Subject: [PATCH 2/4] merge master --- .../spark/benchmark/BenchmarkBase.scala | 8 +- .../WideSchemaBenchmark-results.txt | 150 +++++++++--------- .../benchmark/WideSchemaBenchmark.scala | 133 +++++++++------- 3 files changed, 151 insertions(+), 140 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala index 89e927e5784d..4a8a5d552a67 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala @@ -48,15 +48,11 @@ abstract class BenchmarkBase { if (!file.exists()) { file.createNewFile() } - output = Some(new FileOutputStream(file)) + output = Option(new FileOutputStream(file)) } runBenchmarkSuite() - output.foreach { o => - if (o != null) { - o.close() - } - } + output.foreach(_.close()) } } diff --git a/sql/core/benchmarks/WideSchemaBenchmark-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-results.txt index 0224f0d73a7d..52039d08bb34 100644 --- a/sql/core/benchmarks/WideSchemaBenchmark-results.txt +++ b/sql/core/benchmarks/WideSchemaBenchmark-results.txt @@ -7,9 +7,9 @@ Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz parsing large select: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 select expressions 6 / 22 0.0 5645637.0 1.0X -100 select expressions 6 / 13 0.0 6046103.0 0.9X -2500 select expressions 172 / 271 0.0 171929312.0 0.0X +1 select expressions 2 / 5 0.0 2183931.0 1.0X +100 select expressions 4 / 5 0.0 3614440.0 0.6X +2500 select expressions 67 / 74 0.0 66814202.0 0.0X ================================================================================================ @@ -21,18 +21,18 @@ Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz many column field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 cols x 100000 rows (read in-mem) 27 / 47 3.7 267.8 1.0X -1 cols x 100000 rows (exec in-mem) 24 / 31 4.3 235.0 1.1X -1 cols x 100000 rows (read parquet) 312 / 385 0.3 3123.8 0.1X -1 cols x 100000 rows (write parquet) 195 / 217 0.5 1947.3 0.1X -100 cols x 1000 rows (read in-mem) 25 / 31 3.9 254.4 1.1X -100 cols x 1000 rows (exec in-mem) 32 / 38 3.1 318.5 0.8X -100 cols x 1000 rows (read parquet) 304 / 421 0.3 3043.0 0.1X -100 cols x 1000 rows (write parquet) 211 / 391 0.5 2111.9 0.1X -2500 cols x 40 rows (read in-mem) 338 / 542 0.3 3382.1 0.1X -2500 cols x 40 rows (exec in-mem) 573 / 680 0.2 5733.2 0.0X -2500 cols x 40 rows (read parquet) 1297 / 1509 0.1 12967.6 0.0X -2500 cols x 40 rows (write parquet) 407 / 452 0.2 4074.0 0.1X +1 cols x 100000 rows (read in-mem) 33 / 52 3.1 325.2 1.0X +1 cols x 100000 rows (exec in-mem) 35 / 76 2.9 348.2 0.9X +1 cols x 100000 rows (read parquet) 82 / 188 1.2 824.2 0.4X +1 cols x 100000 rows (write parquet) 407 / 571 0.2 4066.9 0.1X +100 cols x 1000 rows (read in-mem) 33 / 63 3.0 331.2 1.0X +100 cols x 1000 rows (exec in-mem) 39 / 75 2.6 390.9 0.8X +100 cols x 1000 rows (read parquet) 65 / 103 1.5 651.9 0.5X +100 cols x 1000 rows (write parquet) 237 / 271 0.4 2368.0 0.1X +2500 cols x 40 rows (read in-mem) 209 / 225 0.5 2090.8 0.2X +2500 cols x 40 rows (exec in-mem) 340 / 401 0.3 3400.2 0.1X +2500 cols x 40 rows (read parquet) 86 / 99 1.2 856.2 0.4X +2500 cols x 40 rows (write parquet) 315 / 329 0.3 3150.8 0.1X ================================================================================================ @@ -44,18 +44,18 @@ Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz wide shallowly nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 24 / 30 4.1 241.9 1.0X -1 wide x 100000 rows (exec in-mem) 28 / 39 3.6 280.3 0.9X -1 wide x 100000 rows (read parquet) 339 / 438 0.3 3392.7 0.1X -1 wide x 100000 rows (write parquet) 202 / 261 0.5 2021.5 0.1X -100 wide x 1000 rows (read in-mem) 44 / 108 2.3 438.1 0.6X -100 wide x 1000 rows (exec in-mem) 59 / 77 1.7 585.6 0.4X -100 wide x 1000 rows (read parquet) 578 / 740 0.2 5776.7 0.0X -100 wide x 1000 rows (write parquet) 216 / 259 0.5 2157.9 0.1X -2500 wide x 40 rows (read in-mem) 49 / 56 2.0 494.1 0.5X -2500 wide x 40 rows (exec in-mem) 291 / 304 0.3 2907.1 0.1X -2500 wide x 40 rows (read parquet) 2203 / 2269 0.0 22032.7 0.0X -2500 wide x 40 rows (write parquet) 212 / 237 0.5 2115.6 0.1X +1 wide x 100000 rows (read in-mem) 23 / 28 4.3 235.0 1.0X +1 wide x 100000 rows (exec in-mem) 29 / 32 3.5 285.2 0.8X +1 wide x 100000 rows (read parquet) 97 / 103 1.0 968.3 0.2X +1 wide x 100000 rows (write parquet) 209 / 338 0.5 2087.1 0.1X +100 wide x 1000 rows (read in-mem) 44 / 82 2.3 435.5 0.5X +100 wide x 1000 rows (exec in-mem) 54 / 81 1.9 537.3 0.4X +100 wide x 1000 rows (read parquet) 138 / 282 0.7 1376.5 0.2X +100 wide x 1000 rows (write parquet) 247 / 378 0.4 2469.7 0.1X +2500 wide x 40 rows (read in-mem) 53 / 92 1.9 532.7 0.4X +2500 wide x 40 rows (exec in-mem) 240 / 252 0.4 2398.1 0.1X +2500 wide x 40 rows (read parquet) 1166 / 1171 0.1 11664.2 0.0X +2500 wide x 40 rows (write parquet) 227 / 291 0.4 2269.5 0.1X ================================================================================================ @@ -67,18 +67,18 @@ Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz deeply nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 deep x 100000 rows (read in-mem) 20 / 24 5.0 200.2 1.0X -1 deep x 100000 rows (exec in-mem) 23 / 25 4.4 226.9 0.9X -1 deep x 100000 rows (read parquet) 232 / 250 0.4 2324.0 0.1X -1 deep x 100000 rows (write parquet) 181 / 194 0.6 1811.6 0.1X -100 deep x 1000 rows (read in-mem) 41 / 47 2.4 412.0 0.5X -100 deep x 1000 rows (exec in-mem) 454 / 506 0.2 4541.5 0.0X -100 deep x 1000 rows (read parquet) 8463 / 8497 0.0 84630.4 0.0X -100 deep x 1000 rows (write parquet) 205 / 234 0.5 2049.7 0.1X -250 deep x 400 rows (read in-mem) 145 / 159 0.7 1451.1 0.1X -250 deep x 400 rows (exec in-mem) 2714 / 2757 0.0 27135.5 0.0X -250 deep x 400 rows (read parquet) 116363 / 116465 0.0 1163631.4 0.0X -250 deep x 400 rows (write parquet) 322 / 389 0.3 3221.4 0.1X +1 deep x 100000 rows (read in-mem) 21 / 32 4.8 209.1 1.0X +1 deep x 100000 rows (exec in-mem) 24 / 33 4.2 235.9 0.9X +1 deep x 100000 rows (read parquet) 50 / 77 2.0 496.9 0.4X +1 deep x 100000 rows (write parquet) 248 / 310 0.4 2475.4 0.1X +100 deep x 1000 rows (read in-mem) 44 / 82 2.3 443.6 0.5X +100 deep x 1000 rows (exec in-mem) 543 / 793 0.2 5433.8 0.0X +100 deep x 1000 rows (read parquet) 8755 / 8936 0.0 87553.5 0.0X +100 deep x 1000 rows (write parquet) 216 / 365 0.5 2163.1 0.1X +250 deep x 400 rows (read in-mem) 154 / 168 0.6 1544.8 0.1X +250 deep x 400 rows (exec in-mem) 2617 / 2728 0.0 26172.1 0.0X +250 deep x 400 rows (read parquet) 113432 / 114016 0.0 1134316.8 0.0X +250 deep x 400 rows (write parquet) 329 / 361 0.3 3291.9 0.1X ================================================================================================ @@ -90,18 +90,18 @@ Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz bushy struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 x 1 deep x 100000 rows (read in-mem) 22 / 27 4.5 224.4 1.0X -1 x 1 deep x 100000 rows (exec in-mem) 26 / 35 3.8 261.6 0.9X -1 x 1 deep x 100000 rows (read parquet) 225 / 256 0.4 2251.7 0.1X -1 x 1 deep x 100000 rows (write parquet) 190 / 208 0.5 1902.3 0.1X -128 x 8 deep x 1000 rows (read in-mem) 18 / 21 5.7 176.7 1.3X -128 x 8 deep x 1000 rows (exec in-mem) 58 / 72 1.7 576.7 0.4X -128 x 8 deep x 1000 rows (read parquet) 464 / 470 0.2 4641.9 0.0X -128 x 8 deep x 1000 rows (write parquet) 178 / 198 0.6 1776.8 0.1X -1024 x 11 deep x 100 rows (read in-mem) 46 / 53 2.2 456.8 0.5X -1024 x 11 deep x 100 rows (exec in-mem) 236 / 247 0.4 2356.5 0.1X -1024 x 11 deep x 100 rows (read parquet) 1853 / 1862 0.1 18534.2 0.0X -1024 x 11 deep x 100 rows (write parquet) 218 / 229 0.5 2177.8 0.1X +1 x 1 deep x 100000 rows (read in-mem) 23 / 27 4.3 233.2 1.0X +1 x 1 deep x 100000 rows (exec in-mem) 26 / 29 3.9 258.4 0.9X +1 x 1 deep x 100000 rows (read parquet) 36 / 40 2.8 359.1 0.6X +1 x 1 deep x 100000 rows (write parquet) 199 / 213 0.5 1987.1 0.1X +128 x 8 deep x 1000 rows (read in-mem) 19 / 22 5.4 186.9 1.2X +128 x 8 deep x 1000 rows (exec in-mem) 56 / 61 1.8 558.9 0.4X +128 x 8 deep x 1000 rows (read parquet) 200 / 209 0.5 2001.6 0.1X +128 x 8 deep x 1000 rows (write parquet) 193 / 207 0.5 1928.5 0.1X +1024 x 11 deep x 100 rows (read in-mem) 50 / 54 2.0 502.4 0.5X +1024 x 11 deep x 100 rows (exec in-mem) 214 / 220 0.5 2139.0 0.1X +1024 x 11 deep x 100 rows (read parquet) 1371 / 1378 0.1 13706.6 0.0X +1024 x 11 deep x 100 rows (write parquet) 230 / 245 0.4 2297.8 0.1X ================================================================================================ @@ -113,18 +113,18 @@ Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz wide array field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 20 / 24 5.0 200.7 1.0X -1 wide x 100000 rows (exec in-mem) 23 / 25 4.3 232.9 0.9X -1 wide x 100000 rows (read parquet) 243 / 258 0.4 2432.1 0.1X -1 wide x 100000 rows (write parquet) 182 / 200 0.5 1824.7 0.1X -100 wide x 1000 rows (read in-mem) 16 / 18 6.3 158.6 1.3X -100 wide x 1000 rows (exec in-mem) 18 / 20 5.4 184.9 1.1X -100 wide x 1000 rows (read parquet) 218 / 237 0.5 2184.5 0.1X -100 wide x 1000 rows (write parquet) 176 / 193 0.6 1763.7 0.1X -2500 wide x 40 rows (read in-mem) 16 / 19 6.4 157.5 1.3X -2500 wide x 40 rows (exec in-mem) 18 / 22 5.5 182.1 1.1X -2500 wide x 40 rows (read parquet) 229 / 321 0.4 2286.3 0.1X -2500 wide x 40 rows (write parquet) 178 / 219 0.6 1784.8 0.1X +1 wide x 100000 rows (read in-mem) 20 / 42 4.9 203.4 1.0X +1 wide x 100000 rows (exec in-mem) 23 / 27 4.3 231.7 0.9X +1 wide x 100000 rows (read parquet) 55 / 66 1.8 554.9 0.4X +1 wide x 100000 rows (write parquet) 194 / 228 0.5 1942.9 0.1X +100 wide x 1000 rows (read in-mem) 16 / 25 6.1 164.0 1.2X +100 wide x 1000 rows (exec in-mem) 19 / 23 5.2 191.3 1.1X +100 wide x 1000 rows (read parquet) 40 / 49 2.5 403.7 0.5X +100 wide x 1000 rows (write parquet) 190 / 224 0.5 1902.7 0.1X +2500 wide x 40 rows (read in-mem) 17 / 22 5.8 171.1 1.2X +2500 wide x 40 rows (exec in-mem) 19 / 23 5.2 192.1 1.1X +2500 wide x 40 rows (read parquet) 39 / 44 2.6 389.2 0.5X +2500 wide x 40 rows (write parquet) 195 / 210 0.5 1953.7 0.1X ================================================================================================ @@ -136,17 +136,17 @@ Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz wide map field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 16 / 18 6.2 160.4 1.0X -1 wide x 100000 rows (exec in-mem) 21 / 22 4.8 207.0 0.8X -1 wide x 100000 rows (read parquet) 292 / 303 0.3 2921.9 0.1X -1 wide x 100000 rows (write parquet) 180 / 194 0.6 1796.7 0.1X -100 wide x 1000 rows (read in-mem) 11 / 13 8.9 111.9 1.4X -100 wide x 1000 rows (exec in-mem) 14 / 15 7.1 141.3 1.1X -100 wide x 1000 rows (read parquet) 254 / 420 0.4 2540.8 0.1X -100 wide x 1000 rows (write parquet) 176 / 234 0.6 1764.8 0.1X -2500 wide x 40 rows (read in-mem) 13 / 15 7.6 132.1 1.2X -2500 wide x 40 rows (exec in-mem) 16 / 18 6.2 162.2 1.0X -2500 wide x 40 rows (read parquet) 238 / 257 0.4 2380.4 0.1X -2500 wide x 40 rows (write parquet) 174 / 184 0.6 1737.0 0.1X +1 wide x 100000 rows (read in-mem) 17 / 18 6.0 165.3 1.0X +1 wide x 100000 rows (exec in-mem) 21 / 25 4.7 212.8 0.8X +1 wide x 100000 rows (read parquet) 80 / 85 1.3 798.4 0.2X +1 wide x 100000 rows (write parquet) 187 / 204 0.5 1867.8 0.1X +100 wide x 1000 rows (read in-mem) 12 / 14 8.3 120.7 1.4X +100 wide x 1000 rows (exec in-mem) 15 / 16 6.9 145.9 1.1X +100 wide x 1000 rows (read parquet) 46 / 51 2.2 461.2 0.4X +100 wide x 1000 rows (write parquet) 186 / 197 0.5 1862.2 0.1X +2500 wide x 40 rows (read in-mem) 14 / 15 7.4 135.7 1.2X +2500 wide x 40 rows (exec in-mem) 17 / 19 6.0 167.4 1.0X +2500 wide x 40 rows (read parquet) 46 / 51 2.2 462.9 0.4X +2500 wide x 40 rows (write parquet) 181 / 197 0.6 1807.6 0.1X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala index 81017a6d244f..50d9dcea7ddf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala @@ -15,59 +15,35 @@ * limitations under the License. */ -package org.apache.spark.sql +package org.apache.spark.sql.execution.benchmark -import java.io.{File, FileOutputStream, OutputStream} +import java.io.File -import org.scalatest.BeforeAndAfterEach - -import org.apache.spark.SparkFunSuite import org.apache.spark.benchmark.Benchmark -import org.apache.spark.sql.functions._ +import org.apache.spark.sql.DataFrame import org.apache.spark.util.Utils /** * Benchmark for performance with very wide and nested DataFrames. - * To run this: - * build/sbt "sql/test-only *WideSchemaBenchmark" - * - * Results will be written to "sql/core/benchmarks/WideSchemaBenchmark-results.txt". + * To run this benchmark: + * {{{ + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "sql/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/WideSchemaBenchmark-results.txt". + * }}} */ -class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach { +object WideSchemaBenchmark extends SqlBasedBenchmark { private val scaleFactor = 100000 private val widthsToTest = Seq(1, 100, 2500) private val depthsToTest = Seq(1, 100, 250) assert(scaleFactor > widthsToTest.max) - private lazy val sparkSession = SparkSession.builder - .master("local[1]") - .appName("microbenchmark") - .getOrCreate() - - import sparkSession.implicits._ + import spark.implicits._ private var tmpFiles: List[File] = Nil - private var out: OutputStream = null - - override def beforeAll() { - super.beforeAll() - out = new FileOutputStream(new File("benchmarks/WideSchemaBenchmark-results.txt")) - } - override def afterAll() { - try { - out.close() - } finally { - super.afterAll() - } - } - - override def afterEach() { - super.afterEach() - for (tmpFile <- tmpFiles) { - Utils.deleteRecursively(tmpFile) - } - } + private def deleteTmpFiles(): Unit = tmpFiles.foreach(Utils.deleteRecursively) /** * Writes the given DataFrame to parquet at a temporary location, and returns a DataFrame @@ -79,7 +55,7 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach { tmpFile.delete() df.write.parquet(tmpFile.getAbsolutePath) assert(tmpFile.isDirectory()) - sparkSession.read.parquet(tmpFile.getAbsolutePath) + spark.read.parquet(tmpFile.getAbsolutePath) } /** @@ -105,33 +81,33 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach { } } - ignore("parsing large select expressions") { - val benchmark = new Benchmark("parsing large select", 1, output = Some(out)) + def parsingLargeSelectExpressions(): Unit = { + val benchmark = new Benchmark("parsing large select", 1, output = output) for (width <- widthsToTest) { val selectExpr = (1 to width).map(i => s"id as a_$i") benchmark.addCase(s"$width select expressions") { iter => - sparkSession.range(1).toDF.selectExpr(selectExpr: _*) + spark.range(1).toDF.selectExpr(selectExpr: _*) } } benchmark.run() } - ignore("many column field read and write") { - val benchmark = new Benchmark("many column field r/w", scaleFactor, output = Some(out)) + def manyColumnFieldReadAndWrite(): Unit = { + val benchmark = new Benchmark("many column field r/w", scaleFactor, output = output) for (width <- widthsToTest) { // normalize by width to keep constant data size val numRows = scaleFactor / width val selectExpr = (1 to width).map(i => s"id as a_$i") - val df = sparkSession.range(numRows).toDF.selectExpr(selectExpr: _*).cache() + val df = spark.range(numRows).toDF.selectExpr(selectExpr: _*).cache() df.count() // force caching addCases(benchmark, df, s"$width cols x $numRows rows", "a_1") } benchmark.run() } - ignore("wide shallowly nested struct field read and write") { + def wideShallowlyNestedStructFieldReadAndWrite(): Unit = { val benchmark = new Benchmark( - "wide shallowly nested struct field r/w", scaleFactor, output = Some(out)) + "wide shallowly nested struct field r/w", scaleFactor, output = output) for (width <- widthsToTest) { val numRows = scaleFactor / width var datum: String = "{" @@ -144,15 +120,15 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach { } datum += "}" datum = s"""{"a": {"b": {"c": $datum, "d": $datum}, "e": $datum}}""" - val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() + val df = spark.read.json(spark.range(numRows).map(_ => datum)).cache() df.count() // force caching addCases(benchmark, df, s"$width wide x $numRows rows", "a.b.c.value_1") } benchmark.run() } - ignore("deeply nested struct field read and write") { - val benchmark = new Benchmark("deeply nested struct field r/w", scaleFactor, output = Some(out)) + def deeplyNestedStructFieldReadAndWrite(): Unit = { + val benchmark = new Benchmark("deeply nested struct field r/w", scaleFactor, output = output) for (depth <- depthsToTest) { val numRows = scaleFactor / depth var datum: String = "{\"value\": 1}" @@ -161,15 +137,15 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach { datum = "{\"value\": " + datum + "}" selector = selector + ".value" } - val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() + val df = spark.read.json(spark.range(numRows).map(_ => datum)).cache() df.count() // force caching addCases(benchmark, df, s"$depth deep x $numRows rows", selector) } benchmark.run() } - ignore("bushy struct field read and write") { - val benchmark = new Benchmark("bushy struct field r/w", scaleFactor, output = Some(out)) + def bushyStructFieldReadAndWrite(): Unit = { + val benchmark = new Benchmark("bushy struct field r/w", scaleFactor, output = output) for (width <- Seq(1, 100, 1000)) { val numRows = scaleFactor / width var numNodes = 1 @@ -184,15 +160,16 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach { } // TODO(ekl) seems like the json parsing is actually the majority of the time, perhaps // we should benchmark that too separately. - val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() + val df = spark.read.json(spark.range(numRows).map(_ => datum)).cache() df.count() // force caching addCases(benchmark, df, s"$numNodes x $depth deep x $numRows rows", selector) } benchmark.run() } - ignore("wide array field read and write") { - val benchmark = new Benchmark("wide array field r/w", scaleFactor, output = Some(out)) + + def wideArrayFieldReadAndWrite(): Unit = { + val benchmark = new Benchmark("wide array field r/w", scaleFactor, output = output) for (width <- widthsToTest) { val numRows = scaleFactor / width var datum: String = "{\"value\": [" @@ -204,22 +181,60 @@ class WideSchemaBenchmark extends SparkFunSuite with BeforeAndAfterEach { } } datum += "]}" - val df = sparkSession.read.json(sparkSession.range(numRows).map(_ => datum)).cache() + val df = spark.read.json(spark.range(numRows).map(_ => datum)).cache() df.count() // force caching addCases(benchmark, df, s"$width wide x $numRows rows", "value[0]") } benchmark.run() } - ignore("wide map field read and write") { - val benchmark = new Benchmark("wide map field r/w", scaleFactor, output = Some(out)) + def wideMapFieldReadAndWrite(): Unit = { + val benchmark = new Benchmark("wide map field r/w", scaleFactor, output = output) for (width <- widthsToTest) { val numRows = scaleFactor / width val datum = Tuple1((1 to width).map(i => ("value_" + i -> 1)).toMap) - val df = sparkSession.range(numRows).map(_ => datum).toDF.cache() + val df = spark.range(numRows).map(_ => datum).toDF.cache() df.count() // force caching addCases(benchmark, df, s"$width wide x $numRows rows", "_1[\"value_1\"]") } benchmark.run() } + + def runBenchmarkWithDeleteTmpFiles(benchmarkName: String)(func: => Any): Unit = { + runBenchmark(benchmarkName) { + func + } + deleteTmpFiles() + } + + override def runBenchmarkSuite(): Unit = { + + runBenchmarkWithDeleteTmpFiles("parsing large select expressions") { + parsingLargeSelectExpressions() + } + + runBenchmarkWithDeleteTmpFiles("many column field read and write") { + manyColumnFieldReadAndWrite() + } + + runBenchmarkWithDeleteTmpFiles("wide shallowly nested struct field read and write") { + wideShallowlyNestedStructFieldReadAndWrite() + } + + runBenchmarkWithDeleteTmpFiles("deeply nested struct field read and write") { + deeplyNestedStructFieldReadAndWrite() + } + + runBenchmarkWithDeleteTmpFiles("bushy struct field read and write") { + bushyStructFieldReadAndWrite() + } + + runBenchmarkWithDeleteTmpFiles("wide array field read and write") { + wideArrayFieldReadAndWrite() + } + + runBenchmarkWithDeleteTmpFiles("wide map field read and write") { + wideMapFieldReadAndWrite() + } + } } From 82e2367a203ffc03dea9bf826a5085059e1391ed Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 18 Oct 2018 20:32:39 +0800 Subject: [PATCH 3/4] revert BenchmarkBase --- .../spark/benchmark/BenchmarkBase.scala | 8 +- .../WideSchemaBenchmark-results.txt | 157 +++++++++--------- .../benchmark/WideSchemaBenchmark.scala | 3 +- 3 files changed, 83 insertions(+), 85 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala index 4a8a5d552a67..89e927e5784d 100644 --- a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala +++ b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala @@ -48,11 +48,15 @@ abstract class BenchmarkBase { if (!file.exists()) { file.createNewFile() } - output = Option(new FileOutputStream(file)) + output = Some(new FileOutputStream(file)) } runBenchmarkSuite() - output.foreach(_.close()) + output.foreach { o => + if (o != null) { + o.close() + } + } } } diff --git a/sql/core/benchmarks/WideSchemaBenchmark-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-results.txt index 52039d08bb34..0bc2c503f1fc 100644 --- a/sql/core/benchmarks/WideSchemaBenchmark-results.txt +++ b/sql/core/benchmarks/WideSchemaBenchmark-results.txt @@ -4,12 +4,11 @@ parsing large select expressions Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz - parsing large select: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 select expressions 2 / 5 0.0 2183931.0 1.0X -100 select expressions 4 / 5 0.0 3614440.0 0.6X -2500 select expressions 67 / 74 0.0 66814202.0 0.0X +1 select expressions 2 / 4 0.0 1934953.0 1.0X +100 select expressions 4 / 5 0.0 3659399.0 0.5X +2500 select expressions 68 / 76 0.0 68278937.0 0.0X ================================================================================================ @@ -18,21 +17,20 @@ many column field read and write Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz - many column field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 cols x 100000 rows (read in-mem) 33 / 52 3.1 325.2 1.0X -1 cols x 100000 rows (exec in-mem) 35 / 76 2.9 348.2 0.9X -1 cols x 100000 rows (read parquet) 82 / 188 1.2 824.2 0.4X -1 cols x 100000 rows (write parquet) 407 / 571 0.2 4066.9 0.1X -100 cols x 1000 rows (read in-mem) 33 / 63 3.0 331.2 1.0X -100 cols x 1000 rows (exec in-mem) 39 / 75 2.6 390.9 0.8X -100 cols x 1000 rows (read parquet) 65 / 103 1.5 651.9 0.5X -100 cols x 1000 rows (write parquet) 237 / 271 0.4 2368.0 0.1X -2500 cols x 40 rows (read in-mem) 209 / 225 0.5 2090.8 0.2X -2500 cols x 40 rows (exec in-mem) 340 / 401 0.3 3400.2 0.1X -2500 cols x 40 rows (read parquet) 86 / 99 1.2 856.2 0.4X -2500 cols x 40 rows (write parquet) 315 / 329 0.3 3150.8 0.1X +1 cols x 100000 rows (read in-mem) 22 / 25 4.6 219.4 1.0X +1 cols x 100000 rows (exec in-mem) 22 / 28 4.5 223.8 1.0X +1 cols x 100000 rows (read parquet) 45 / 49 2.2 449.6 0.5X +1 cols x 100000 rows (write parquet) 204 / 223 0.5 2044.4 0.1X +100 cols x 1000 rows (read in-mem) 26 / 28 3.9 255.8 0.9X +100 cols x 1000 rows (exec in-mem) 32 / 35 3.1 319.3 0.7X +100 cols x 1000 rows (read parquet) 45 / 52 2.2 445.9 0.5X +100 cols x 1000 rows (write parquet) 275 / 536 0.4 2746.1 0.1X +2500 cols x 40 rows (read in-mem) 261 / 434 0.4 2607.3 0.1X +2500 cols x 40 rows (exec in-mem) 624 / 701 0.2 6240.5 0.0X +2500 cols x 40 rows (read parquet) 196 / 301 0.5 1963.4 0.1X +2500 cols x 40 rows (write parquet) 687 / 1049 0.1 6870.6 0.0X ================================================================================================ @@ -41,21 +39,20 @@ wide shallowly nested struct field read and write Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz - wide shallowly nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 23 / 28 4.3 235.0 1.0X -1 wide x 100000 rows (exec in-mem) 29 / 32 3.5 285.2 0.8X -1 wide x 100000 rows (read parquet) 97 / 103 1.0 968.3 0.2X -1 wide x 100000 rows (write parquet) 209 / 338 0.5 2087.1 0.1X -100 wide x 1000 rows (read in-mem) 44 / 82 2.3 435.5 0.5X -100 wide x 1000 rows (exec in-mem) 54 / 81 1.9 537.3 0.4X -100 wide x 1000 rows (read parquet) 138 / 282 0.7 1376.5 0.2X -100 wide x 1000 rows (write parquet) 247 / 378 0.4 2469.7 0.1X -2500 wide x 40 rows (read in-mem) 53 / 92 1.9 532.7 0.4X -2500 wide x 40 rows (exec in-mem) 240 / 252 0.4 2398.1 0.1X -2500 wide x 40 rows (read parquet) 1166 / 1171 0.1 11664.2 0.0X -2500 wide x 40 rows (write parquet) 227 / 291 0.4 2269.5 0.1X +1 wide x 100000 rows (read in-mem) 23 / 42 4.4 226.2 1.0X +1 wide x 100000 rows (exec in-mem) 29 / 53 3.5 288.5 0.8X +1 wide x 100000 rows (read parquet) 93 / 102 1.1 928.2 0.2X +1 wide x 100000 rows (write parquet) 201 / 222 0.5 2009.6 0.1X +100 wide x 1000 rows (read in-mem) 42 / 55 2.4 421.8 0.5X +100 wide x 1000 rows (exec in-mem) 55 / 113 1.8 547.0 0.4X +100 wide x 1000 rows (read parquet) 139 / 263 0.7 1390.6 0.2X +100 wide x 1000 rows (write parquet) 245 / 338 0.4 2450.9 0.1X +2500 wide x 40 rows (read in-mem) 51 / 72 2.0 511.7 0.4X +2500 wide x 40 rows (exec in-mem) 265 / 303 0.4 2654.8 0.1X +2500 wide x 40 rows (read parquet) 1285 / 1339 0.1 12845.1 0.0X +2500 wide x 40 rows (write parquet) 238 / 262 0.4 2378.8 0.1X ================================================================================================ @@ -64,21 +61,20 @@ deeply nested struct field read and write Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz - deeply nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 deep x 100000 rows (read in-mem) 21 / 32 4.8 209.1 1.0X -1 deep x 100000 rows (exec in-mem) 24 / 33 4.2 235.9 0.9X -1 deep x 100000 rows (read parquet) 50 / 77 2.0 496.9 0.4X -1 deep x 100000 rows (write parquet) 248 / 310 0.4 2475.4 0.1X -100 deep x 1000 rows (read in-mem) 44 / 82 2.3 443.6 0.5X -100 deep x 1000 rows (exec in-mem) 543 / 793 0.2 5433.8 0.0X -100 deep x 1000 rows (read parquet) 8755 / 8936 0.0 87553.5 0.0X -100 deep x 1000 rows (write parquet) 216 / 365 0.5 2163.1 0.1X -250 deep x 400 rows (read in-mem) 154 / 168 0.6 1544.8 0.1X -250 deep x 400 rows (exec in-mem) 2617 / 2728 0.0 26172.1 0.0X -250 deep x 400 rows (read parquet) 113432 / 114016 0.0 1134316.8 0.0X -250 deep x 400 rows (write parquet) 329 / 361 0.3 3291.9 0.1X +1 deep x 100000 rows (read in-mem) 20 / 24 5.1 197.9 1.0X +1 deep x 100000 rows (exec in-mem) 23 / 28 4.4 227.8 0.9X +1 deep x 100000 rows (read parquet) 50 / 58 2.0 500.1 0.4X +1 deep x 100000 rows (write parquet) 195 / 219 0.5 1945.1 0.1X +100 deep x 1000 rows (read in-mem) 39 / 57 2.5 393.1 0.5X +100 deep x 1000 rows (exec in-mem) 480 / 556 0.2 4795.7 0.0X +100 deep x 1000 rows (read parquet) 7943 / 7950 0.0 79427.5 0.0X +100 deep x 1000 rows (write parquet) 227 / 245 0.4 2267.6 0.1X +250 deep x 400 rows (read in-mem) 150 / 168 0.7 1500.1 0.1X +250 deep x 400 rows (exec in-mem) 2925 / 2979 0.0 29247.3 0.0X +250 deep x 400 rows (read parquet) 121815 / 128302 0.0 1218145.9 0.0X +250 deep x 400 rows (write parquet) 335 / 362 0.3 3351.9 0.1X ================================================================================================ @@ -87,21 +83,20 @@ bushy struct field read and write Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz - bushy struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 x 1 deep x 100000 rows (read in-mem) 23 / 27 4.3 233.2 1.0X -1 x 1 deep x 100000 rows (exec in-mem) 26 / 29 3.9 258.4 0.9X -1 x 1 deep x 100000 rows (read parquet) 36 / 40 2.8 359.1 0.6X -1 x 1 deep x 100000 rows (write parquet) 199 / 213 0.5 1987.1 0.1X -128 x 8 deep x 1000 rows (read in-mem) 19 / 22 5.4 186.9 1.2X -128 x 8 deep x 1000 rows (exec in-mem) 56 / 61 1.8 558.9 0.4X -128 x 8 deep x 1000 rows (read parquet) 200 / 209 0.5 2001.6 0.1X -128 x 8 deep x 1000 rows (write parquet) 193 / 207 0.5 1928.5 0.1X -1024 x 11 deep x 100 rows (read in-mem) 50 / 54 2.0 502.4 0.5X -1024 x 11 deep x 100 rows (exec in-mem) 214 / 220 0.5 2139.0 0.1X -1024 x 11 deep x 100 rows (read parquet) 1371 / 1378 0.1 13706.6 0.0X -1024 x 11 deep x 100 rows (write parquet) 230 / 245 0.4 2297.8 0.1X +1 x 1 deep x 100000 rows (read in-mem) 23 / 27 4.4 229.0 1.0X +1 x 1 deep x 100000 rows (exec in-mem) 25 / 30 4.0 249.3 0.9X +1 x 1 deep x 100000 rows (read parquet) 35 / 40 2.8 351.1 0.7X +1 x 1 deep x 100000 rows (write parquet) 193 / 213 0.5 1929.8 0.1X +128 x 8 deep x 1000 rows (read in-mem) 18 / 21 5.6 179.2 1.3X +128 x 8 deep x 1000 rows (exec in-mem) 54 / 61 1.8 544.4 0.4X +128 x 8 deep x 1000 rows (read parquet) 195 / 212 0.5 1950.2 0.1X +128 x 8 deep x 1000 rows (write parquet) 195 / 203 0.5 1952.2 0.1X +1024 x 11 deep x 100 rows (read in-mem) 47 / 51 2.1 468.4 0.5X +1024 x 11 deep x 100 rows (exec in-mem) 210 / 219 0.5 2102.0 0.1X +1024 x 11 deep x 100 rows (read parquet) 1332 / 1367 0.1 13323.4 0.0X +1024 x 11 deep x 100 rows (write parquet) 223 / 241 0.4 2230.3 0.1X ================================================================================================ @@ -110,21 +105,20 @@ wide array field read and write Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz - wide array field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 20 / 42 4.9 203.4 1.0X -1 wide x 100000 rows (exec in-mem) 23 / 27 4.3 231.7 0.9X -1 wide x 100000 rows (read parquet) 55 / 66 1.8 554.9 0.4X -1 wide x 100000 rows (write parquet) 194 / 228 0.5 1942.9 0.1X -100 wide x 1000 rows (read in-mem) 16 / 25 6.1 164.0 1.2X -100 wide x 1000 rows (exec in-mem) 19 / 23 5.2 191.3 1.1X -100 wide x 1000 rows (read parquet) 40 / 49 2.5 403.7 0.5X -100 wide x 1000 rows (write parquet) 190 / 224 0.5 1902.7 0.1X -2500 wide x 40 rows (read in-mem) 17 / 22 5.8 171.1 1.2X -2500 wide x 40 rows (exec in-mem) 19 / 23 5.2 192.1 1.1X -2500 wide x 40 rows (read parquet) 39 / 44 2.6 389.2 0.5X -2500 wide x 40 rows (write parquet) 195 / 210 0.5 1953.7 0.1X +1 wide x 100000 rows (read in-mem) 19 / 21 5.3 188.9 1.0X +1 wide x 100000 rows (exec in-mem) 23 / 29 4.3 232.0 0.8X +1 wide x 100000 rows (read parquet) 59 / 65 1.7 588.8 0.3X +1 wide x 100000 rows (write parquet) 200 / 217 0.5 1998.0 0.1X +100 wide x 1000 rows (read in-mem) 16 / 18 6.2 162.5 1.2X +100 wide x 1000 rows (exec in-mem) 19 / 21 5.4 185.2 1.0X +100 wide x 1000 rows (read parquet) 42 / 45 2.4 415.6 0.5X +100 wide x 1000 rows (write parquet) 193 / 216 0.5 1928.5 0.1X +2500 wide x 40 rows (read in-mem) 16 / 19 6.2 162.4 1.2X +2500 wide x 40 rows (exec in-mem) 18 / 21 5.4 184.0 1.0X +2500 wide x 40 rows (read parquet) 40 / 44 2.5 398.7 0.5X +2500 wide x 40 rows (write parquet) 194 / 211 0.5 1943.6 0.1X ================================================================================================ @@ -133,20 +127,19 @@ wide map field read and write Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz - wide map field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 17 / 18 6.0 165.3 1.0X -1 wide x 100000 rows (exec in-mem) 21 / 25 4.7 212.8 0.8X -1 wide x 100000 rows (read parquet) 80 / 85 1.3 798.4 0.2X -1 wide x 100000 rows (write parquet) 187 / 204 0.5 1867.8 0.1X -100 wide x 1000 rows (read in-mem) 12 / 14 8.3 120.7 1.4X -100 wide x 1000 rows (exec in-mem) 15 / 16 6.9 145.9 1.1X -100 wide x 1000 rows (read parquet) 46 / 51 2.2 461.2 0.4X -100 wide x 1000 rows (write parquet) 186 / 197 0.5 1862.2 0.1X -2500 wide x 40 rows (read in-mem) 14 / 15 7.4 135.7 1.2X -2500 wide x 40 rows (exec in-mem) 17 / 19 6.0 167.4 1.0X -2500 wide x 40 rows (read parquet) 46 / 51 2.2 462.9 0.4X -2500 wide x 40 rows (write parquet) 181 / 197 0.6 1807.6 0.1X +1 wide x 100000 rows (read in-mem) 17 / 20 6.0 165.5 1.0X +1 wide x 100000 rows (exec in-mem) 21 / 25 4.7 214.3 0.8X +1 wide x 100000 rows (read parquet) 79 / 105 1.3 785.8 0.2X +1 wide x 100000 rows (write parquet) 196 / 240 0.5 1957.0 0.1X +100 wide x 1000 rows (read in-mem) 12 / 13 8.6 115.7 1.4X +100 wide x 1000 rows (exec in-mem) 15 / 17 6.8 147.8 1.1X +100 wide x 1000 rows (read parquet) 46 / 52 2.2 460.9 0.4X +100 wide x 1000 rows (write parquet) 184 / 202 0.5 1843.1 0.1X +2500 wide x 40 rows (read in-mem) 13 / 15 7.4 134.7 1.2X +2500 wide x 40 rows (exec in-mem) 17 / 19 6.0 167.5 1.0X +2500 wide x 40 rows (read parquet) 46 / 51 2.2 461.0 0.4X +2500 wide x 40 rows (write parquet) 189 / 206 0.5 1887.0 0.1X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala index 50d9dcea7ddf..124661986ca0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala @@ -27,7 +27,8 @@ import org.apache.spark.util.Utils * Benchmark for performance with very wide and nested DataFrames. * To run this benchmark: * {{{ - * 1. without sbt: bin/spark-submit --class + * 1. without sbt: + * bin/spark-submit --class --jars * 2. build/sbt "sql/test:runMain " * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " * Results will be written to "benchmarks/WideSchemaBenchmark-results.txt". From 64e5ede51fcc900d51256d421d86939b202f3d75 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 19 Oct 2018 18:22:28 -0700 Subject: [PATCH 4/4] Update result (#19) --- .../WideSchemaBenchmark-results.txt | 178 +++++++++--------- 1 file changed, 89 insertions(+), 89 deletions(-) diff --git a/sql/core/benchmarks/WideSchemaBenchmark-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-results.txt index 0bc2c503f1fc..6347a6ac6b67 100644 --- a/sql/core/benchmarks/WideSchemaBenchmark-results.txt +++ b/sql/core/benchmarks/WideSchemaBenchmark-results.txt @@ -2,144 +2,144 @@ parsing large select expressions ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz parsing large select: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 select expressions 2 / 4 0.0 1934953.0 1.0X -100 select expressions 4 / 5 0.0 3659399.0 0.5X -2500 select expressions 68 / 76 0.0 68278937.0 0.0X +1 select expressions 6 / 13 0.0 5997373.0 1.0X +100 select expressions 7 / 10 0.0 7204596.0 0.8X +2500 select expressions 103 / 107 0.0 102962705.0 0.1X ================================================================================================ many column field read and write ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz many column field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 cols x 100000 rows (read in-mem) 22 / 25 4.6 219.4 1.0X -1 cols x 100000 rows (exec in-mem) 22 / 28 4.5 223.8 1.0X -1 cols x 100000 rows (read parquet) 45 / 49 2.2 449.6 0.5X -1 cols x 100000 rows (write parquet) 204 / 223 0.5 2044.4 0.1X -100 cols x 1000 rows (read in-mem) 26 / 28 3.9 255.8 0.9X -100 cols x 1000 rows (exec in-mem) 32 / 35 3.1 319.3 0.7X -100 cols x 1000 rows (read parquet) 45 / 52 2.2 445.9 0.5X -100 cols x 1000 rows (write parquet) 275 / 536 0.4 2746.1 0.1X -2500 cols x 40 rows (read in-mem) 261 / 434 0.4 2607.3 0.1X -2500 cols x 40 rows (exec in-mem) 624 / 701 0.2 6240.5 0.0X -2500 cols x 40 rows (read parquet) 196 / 301 0.5 1963.4 0.1X -2500 cols x 40 rows (write parquet) 687 / 1049 0.1 6870.6 0.0X +1 cols x 100000 rows (read in-mem) 40 / 51 2.5 396.5 1.0X +1 cols x 100000 rows (exec in-mem) 41 / 48 2.4 414.4 1.0X +1 cols x 100000 rows (read parquet) 61 / 70 1.6 610.2 0.6X +1 cols x 100000 rows (write parquet) 209 / 233 0.5 2086.1 0.2X +100 cols x 1000 rows (read in-mem) 43 / 49 2.3 433.8 0.9X +100 cols x 1000 rows (exec in-mem) 57 / 66 1.8 568.4 0.7X +100 cols x 1000 rows (read parquet) 60 / 66 1.7 599.0 0.7X +100 cols x 1000 rows (write parquet) 212 / 224 0.5 2120.6 0.2X +2500 cols x 40 rows (read in-mem) 268 / 275 0.4 2676.5 0.1X +2500 cols x 40 rows (exec in-mem) 494 / 504 0.2 4936.9 0.1X +2500 cols x 40 rows (read parquet) 132 / 139 0.8 1319.7 0.3X +2500 cols x 40 rows (write parquet) 371 / 381 0.3 3710.1 0.1X ================================================================================================ wide shallowly nested struct field read and write ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz wide shallowly nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 23 / 42 4.4 226.2 1.0X -1 wide x 100000 rows (exec in-mem) 29 / 53 3.5 288.5 0.8X -1 wide x 100000 rows (read parquet) 93 / 102 1.1 928.2 0.2X -1 wide x 100000 rows (write parquet) 201 / 222 0.5 2009.6 0.1X -100 wide x 1000 rows (read in-mem) 42 / 55 2.4 421.8 0.5X -100 wide x 1000 rows (exec in-mem) 55 / 113 1.8 547.0 0.4X -100 wide x 1000 rows (read parquet) 139 / 263 0.7 1390.6 0.2X -100 wide x 1000 rows (write parquet) 245 / 338 0.4 2450.9 0.1X -2500 wide x 40 rows (read in-mem) 51 / 72 2.0 511.7 0.4X -2500 wide x 40 rows (exec in-mem) 265 / 303 0.4 2654.8 0.1X -2500 wide x 40 rows (read parquet) 1285 / 1339 0.1 12845.1 0.0X -2500 wide x 40 rows (write parquet) 238 / 262 0.4 2378.8 0.1X +1 wide x 100000 rows (read in-mem) 37 / 43 2.7 373.6 1.0X +1 wide x 100000 rows (exec in-mem) 47 / 54 2.1 472.7 0.8X +1 wide x 100000 rows (read parquet) 132 / 145 0.8 1316.5 0.3X +1 wide x 100000 rows (write parquet) 205 / 232 0.5 2046.3 0.2X +100 wide x 1000 rows (read in-mem) 68 / 79 1.5 676.3 0.6X +100 wide x 1000 rows (exec in-mem) 88 / 97 1.1 882.2 0.4X +100 wide x 1000 rows (read parquet) 197 / 234 0.5 1971.8 0.2X +100 wide x 1000 rows (write parquet) 236 / 249 0.4 2359.6 0.2X +2500 wide x 40 rows (read in-mem) 77 / 85 1.3 768.0 0.5X +2500 wide x 40 rows (exec in-mem) 386 / 393 0.3 3855.2 0.1X +2500 wide x 40 rows (read parquet) 1741 / 1765 0.1 17408.3 0.0X +2500 wide x 40 rows (write parquet) 243 / 256 0.4 2425.2 0.2X ================================================================================================ deeply nested struct field read and write ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz deeply nested struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 deep x 100000 rows (read in-mem) 20 / 24 5.1 197.9 1.0X -1 deep x 100000 rows (exec in-mem) 23 / 28 4.4 227.8 0.9X -1 deep x 100000 rows (read parquet) 50 / 58 2.0 500.1 0.4X -1 deep x 100000 rows (write parquet) 195 / 219 0.5 1945.1 0.1X -100 deep x 1000 rows (read in-mem) 39 / 57 2.5 393.1 0.5X -100 deep x 1000 rows (exec in-mem) 480 / 556 0.2 4795.7 0.0X -100 deep x 1000 rows (read parquet) 7943 / 7950 0.0 79427.5 0.0X -100 deep x 1000 rows (write parquet) 227 / 245 0.4 2267.6 0.1X -250 deep x 400 rows (read in-mem) 150 / 168 0.7 1500.1 0.1X -250 deep x 400 rows (exec in-mem) 2925 / 2979 0.0 29247.3 0.0X -250 deep x 400 rows (read parquet) 121815 / 128302 0.0 1218145.9 0.0X -250 deep x 400 rows (write parquet) 335 / 362 0.3 3351.9 0.1X +1 deep x 100000 rows (read in-mem) 35 / 42 2.9 350.2 1.0X +1 deep x 100000 rows (exec in-mem) 40 / 43 2.5 399.5 0.9X +1 deep x 100000 rows (read parquet) 69 / 73 1.4 691.6 0.5X +1 deep x 100000 rows (write parquet) 203 / 224 0.5 2025.9 0.2X +100 deep x 1000 rows (read in-mem) 70 / 75 1.4 703.7 0.5X +100 deep x 1000 rows (exec in-mem) 654 / 684 0.2 6539.9 0.1X +100 deep x 1000 rows (read parquet) 10503 / 10550 0.0 105030.5 0.0X +100 deep x 1000 rows (write parquet) 235 / 243 0.4 2353.2 0.1X +250 deep x 400 rows (read in-mem) 249 / 259 0.4 2492.6 0.1X +250 deep x 400 rows (exec in-mem) 3842 / 3854 0.0 38424.8 0.0X +250 deep x 400 rows (read parquet) 153080 / 153444 0.0 1530796.1 0.0X +250 deep x 400 rows (write parquet) 434 / 441 0.2 4344.6 0.1X ================================================================================================ bushy struct field read and write ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz bushy struct field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 x 1 deep x 100000 rows (read in-mem) 23 / 27 4.4 229.0 1.0X -1 x 1 deep x 100000 rows (exec in-mem) 25 / 30 4.0 249.3 0.9X -1 x 1 deep x 100000 rows (read parquet) 35 / 40 2.8 351.1 0.7X -1 x 1 deep x 100000 rows (write parquet) 193 / 213 0.5 1929.8 0.1X -128 x 8 deep x 1000 rows (read in-mem) 18 / 21 5.6 179.2 1.3X -128 x 8 deep x 1000 rows (exec in-mem) 54 / 61 1.8 544.4 0.4X -128 x 8 deep x 1000 rows (read parquet) 195 / 212 0.5 1950.2 0.1X -128 x 8 deep x 1000 rows (write parquet) 195 / 203 0.5 1952.2 0.1X -1024 x 11 deep x 100 rows (read in-mem) 47 / 51 2.1 468.4 0.5X -1024 x 11 deep x 100 rows (exec in-mem) 210 / 219 0.5 2102.0 0.1X -1024 x 11 deep x 100 rows (read parquet) 1332 / 1367 0.1 13323.4 0.0X -1024 x 11 deep x 100 rows (write parquet) 223 / 241 0.4 2230.3 0.1X +1 x 1 deep x 100000 rows (read in-mem) 37 / 42 2.7 370.2 1.0X +1 x 1 deep x 100000 rows (exec in-mem) 43 / 47 2.4 425.3 0.9X +1 x 1 deep x 100000 rows (read parquet) 48 / 51 2.1 478.7 0.8X +1 x 1 deep x 100000 rows (write parquet) 204 / 215 0.5 2042.0 0.2X +128 x 8 deep x 1000 rows (read in-mem) 32 / 37 3.1 318.6 1.2X +128 x 8 deep x 1000 rows (exec in-mem) 91 / 96 1.1 906.6 0.4X +128 x 8 deep x 1000 rows (read parquet) 351 / 379 0.3 3510.3 0.1X +128 x 8 deep x 1000 rows (write parquet) 199 / 203 0.5 1988.3 0.2X +1024 x 11 deep x 100 rows (read in-mem) 73 / 76 1.4 730.4 0.5X +1024 x 11 deep x 100 rows (exec in-mem) 327 / 334 0.3 3267.2 0.1X +1024 x 11 deep x 100 rows (read parquet) 2063 / 2078 0.0 20629.2 0.0X +1024 x 11 deep x 100 rows (write parquet) 248 / 266 0.4 2475.1 0.1X ================================================================================================ wide array field read and write ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz wide array field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 19 / 21 5.3 188.9 1.0X -1 wide x 100000 rows (exec in-mem) 23 / 29 4.3 232.0 0.8X -1 wide x 100000 rows (read parquet) 59 / 65 1.7 588.8 0.3X -1 wide x 100000 rows (write parquet) 200 / 217 0.5 1998.0 0.1X -100 wide x 1000 rows (read in-mem) 16 / 18 6.2 162.5 1.2X -100 wide x 1000 rows (exec in-mem) 19 / 21 5.4 185.2 1.0X -100 wide x 1000 rows (read parquet) 42 / 45 2.4 415.6 0.5X -100 wide x 1000 rows (write parquet) 193 / 216 0.5 1928.5 0.1X -2500 wide x 40 rows (read in-mem) 16 / 19 6.2 162.4 1.2X -2500 wide x 40 rows (exec in-mem) 18 / 21 5.4 184.0 1.0X -2500 wide x 40 rows (read parquet) 40 / 44 2.5 398.7 0.5X -2500 wide x 40 rows (write parquet) 194 / 211 0.5 1943.6 0.1X +1 wide x 100000 rows (read in-mem) 33 / 38 3.0 328.4 1.0X +1 wide x 100000 rows (exec in-mem) 40 / 44 2.5 402.7 0.8X +1 wide x 100000 rows (read parquet) 83 / 91 1.2 826.6 0.4X +1 wide x 100000 rows (write parquet) 204 / 218 0.5 2039.1 0.2X +100 wide x 1000 rows (read in-mem) 28 / 31 3.6 277.2 1.2X +100 wide x 1000 rows (exec in-mem) 34 / 37 2.9 343.2 1.0X +100 wide x 1000 rows (read parquet) 56 / 61 1.8 556.4 0.6X +100 wide x 1000 rows (write parquet) 202 / 206 0.5 2017.3 0.2X +2500 wide x 40 rows (read in-mem) 29 / 30 3.5 286.4 1.1X +2500 wide x 40 rows (exec in-mem) 33 / 39 3.0 330.2 1.0X +2500 wide x 40 rows (read parquet) 54 / 66 1.8 544.0 0.6X +2500 wide x 40 rows (write parquet) 196 / 208 0.5 1959.2 0.2X ================================================================================================ wide map field read and write ================================================================================================ -Java HotSpot(TM) 64-Bit Server VM 1.8.0_151-b12 on Mac OS X 10.12.6 -Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz wide map field r/w: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ -1 wide x 100000 rows (read in-mem) 17 / 20 6.0 165.5 1.0X -1 wide x 100000 rows (exec in-mem) 21 / 25 4.7 214.3 0.8X -1 wide x 100000 rows (read parquet) 79 / 105 1.3 785.8 0.2X -1 wide x 100000 rows (write parquet) 196 / 240 0.5 1957.0 0.1X -100 wide x 1000 rows (read in-mem) 12 / 13 8.6 115.7 1.4X -100 wide x 1000 rows (exec in-mem) 15 / 17 6.8 147.8 1.1X -100 wide x 1000 rows (read parquet) 46 / 52 2.2 460.9 0.4X -100 wide x 1000 rows (write parquet) 184 / 202 0.5 1843.1 0.1X -2500 wide x 40 rows (read in-mem) 13 / 15 7.4 134.7 1.2X -2500 wide x 40 rows (exec in-mem) 17 / 19 6.0 167.5 1.0X -2500 wide x 40 rows (read parquet) 46 / 51 2.2 461.0 0.4X -2500 wide x 40 rows (write parquet) 189 / 206 0.5 1887.0 0.1X +1 wide x 100000 rows (read in-mem) 31 / 34 3.3 305.7 1.0X +1 wide x 100000 rows (exec in-mem) 39 / 44 2.6 390.0 0.8X +1 wide x 100000 rows (read parquet) 125 / 132 0.8 1250.5 0.2X +1 wide x 100000 rows (write parquet) 198 / 213 0.5 1979.9 0.2X +100 wide x 1000 rows (read in-mem) 21 / 23 4.7 212.7 1.4X +100 wide x 1000 rows (exec in-mem) 28 / 32 3.5 283.3 1.1X +100 wide x 1000 rows (read parquet) 68 / 73 1.5 683.0 0.4X +100 wide x 1000 rows (write parquet) 188 / 206 0.5 1882.1 0.2X +2500 wide x 40 rows (read in-mem) 25 / 28 4.0 252.2 1.2X +2500 wide x 40 rows (exec in-mem) 32 / 34 3.1 318.5 1.0X +2500 wide x 40 rows (read parquet) 69 / 73 1.4 691.5 0.4X +2500 wide x 40 rows (write parquet) 193 / 202 0.5 1932.8 0.2X