From 7b569ae1318316129d4b0d46969b02324b18b0aa Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 27 Jul 2018 13:59:39 +0200 Subject: [PATCH 1/2] Bumping version of uniVocity parser up to 2.7.2 --- dev/deps/spark-deps-hadoop-2.6 | 2 +- dev/deps/spark-deps-hadoop-2.7 | 2 +- dev/deps/spark-deps-hadoop-3.1 | 2 +- sql/core/pom.xml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index ff6d5c30c1eb..852c69112a1c 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -191,7 +191,7 @@ stax-api-1.0.1.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar -univocity-parsers-2.6.3.jar +univocity-parsers-2.7.2.jar validation-api-1.1.0.Final.jar xbean-asm6-shaded-4.8.jar xercesImpl-2.9.1.jar diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index 72a94f8953c6..05e0f4645533 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -192,7 +192,7 @@ stax-api-1.0.1.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar -univocity-parsers-2.6.3.jar +univocity-parsers-2.7.2.jar validation-api-1.1.0.Final.jar xbean-asm6-shaded-4.8.jar xercesImpl-2.9.1.jar diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1 index 3409dc461332..206724a01785 100644 --- a/dev/deps/spark-deps-hadoop-3.1 +++ b/dev/deps/spark-deps-hadoop-3.1 @@ -212,7 +212,7 @@ stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar token-provider-1.0.1.jar -univocity-parsers-2.6.3.jar +univocity-parsers-2.7.2.jar validation-api-1.1.0.Final.jar woodstox-core-5.0.3.jar xbean-asm6-shaded-4.8.jar diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 8873b00e7117..618c0a0021c8 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -38,7 +38,7 @@ com.univocity univocity-parsers - 2.6.3 + 2.7.2 jar From b116987d9a0adb887201177d41c1b94e6f5aeb63 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 27 Jul 2018 15:25:11 +0200 Subject: [PATCH 2/2] Call uniVocity even the set of selected columns is empty --- .../execution/datasources/csv/UnivocityParser.scala | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala index 79143cce4a38..beed9063b97a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala @@ -203,19 +203,11 @@ class UnivocityParser( } } - private val doParse = if (requiredSchema.nonEmpty) { - (input: String) => convert(tokenizer.parseLine(input)) - } else { - // If `columnPruning` enabled and partition attributes scanned only, - // `schema` gets empty. - (_: String) => InternalRow.empty - } - /** * Parses a single CSV string and turns it into either one resulting row or no row (if the * the record is malformed). */ - def parse(input: String): InternalRow = doParse(input) + def parse(input: String): InternalRow = convert(tokenizer.parseLine(input)) private val getToken = if (options.columnPruning) { (tokens: Array[String], index: Int) => tokens(index)