Skip to content

Commit 4ec5d93

Browse files
HyukjinKwonrxin
authored andcommitted
[SPARK-15148][SQL] Upgrade Univocity library from 2.0.2 to 2.1.0
## What changes were proposed in this pull request? https://issues.apache.org/jira/browse/SPARK-15148 Mainly it improves the performance roughtly about 30%-40% according to the [release note](https://github.com/uniVocity/univocity-parsers/releases/tag/v2.1.0). For the details of the purpose is described in the JIRA. This PR upgrades Univocity library from 2.0.2 to 2.1.0. ## How was this patch tested? Existing tests should cover this. Author: hyukjinkwon <[email protected]> Closes #12923 from HyukjinKwon/SPARK-15148. (cherry picked from commit ac12b35) Signed-off-by: Reynold Xin <[email protected]>
1 parent 3468111 commit 4ec5d93

File tree

7 files changed

+8
-8
lines changed

7 files changed

+8
-8
lines changed

dev/deps/spark-deps-hadoop-2.2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ stax-api-1.0.1.jar
158158
stream-2.7.0.jar
159159
stringtemplate-3.2.1.jar
160160
super-csv-2.2.0.jar
161-
univocity-parsers-2.0.2.jar
161+
univocity-parsers-2.1.0.jar
162162
validation-api-1.1.0.Final.jar
163163
xbean-asm5-shaded-4.4.jar
164164
xmlenc-0.52.jar

dev/deps/spark-deps-hadoop-2.3

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ stax-api-1.0.1.jar
166166
stream-2.7.0.jar
167167
stringtemplate-3.2.1.jar
168168
super-csv-2.2.0.jar
169-
univocity-parsers-2.0.2.jar
169+
univocity-parsers-2.1.0.jar
170170
validation-api-1.1.0.Final.jar
171171
xbean-asm5-shaded-4.4.jar
172172
xmlenc-0.52.jar

dev/deps/spark-deps-hadoop-2.4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ stax-api-1.0.1.jar
166166
stream-2.7.0.jar
167167
stringtemplate-3.2.1.jar
168168
super-csv-2.2.0.jar
169-
univocity-parsers-2.0.2.jar
169+
univocity-parsers-2.1.0.jar
170170
validation-api-1.1.0.Final.jar
171171
xbean-asm5-shaded-4.4.jar
172172
xmlenc-0.52.jar

dev/deps/spark-deps-hadoop-2.6

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ stax-api-1.0.1.jar
174174
stream-2.7.0.jar
175175
stringtemplate-3.2.1.jar
176176
super-csv-2.2.0.jar
177-
univocity-parsers-2.0.2.jar
177+
univocity-parsers-2.1.0.jar
178178
validation-api-1.1.0.Final.jar
179179
xbean-asm5-shaded-4.4.jar
180180
xercesImpl-2.9.1.jar

dev/deps/spark-deps-hadoop-2.7

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ stax-api-1.0.1.jar
175175
stream-2.7.0.jar
176176
stringtemplate-3.2.1.jar
177177
super-csv-2.2.0.jar
178-
univocity-parsers-2.0.2.jar
178+
univocity-parsers-2.1.0.jar
179179
validation-api-1.1.0.Final.jar
180180
xbean-asm5-shaded-4.4.jar
181181
xercesImpl-2.9.1.jar

sql/core/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
<dependency>
4040
<groupId>com.univocity</groupId>
4141
<artifactId>univocity-parsers</artifactId>
42-
<version>2.0.2</version>
42+
<version>2.1.0</version>
4343
<type>jar</type>
4444
</dependency>
4545
<dependency>

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.csv
2020
import java.io.{ByteArrayOutputStream, OutputStreamWriter, StringReader}
2121
import java.nio.charset.StandardCharsets
2222

23-
import com.univocity.parsers.csv.{CsvParser, CsvParserSettings, CsvWriter, CsvWriterSettings}
23+
import com.univocity.parsers.csv._
2424

2525
import org.apache.spark.internal.Logging
2626

@@ -47,7 +47,7 @@ private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String])
4747
settings.setMaxColumns(params.maxColumns)
4848
settings.setNullValue(params.nullValue)
4949
settings.setMaxCharsPerColumn(params.maxCharsPerColumn)
50-
settings.setParseUnescapedQuotesUntilDelimiter(true)
50+
settings.setUnescapedQuoteHandling(UnescapedQuoteHandling.STOP_AT_DELIMITER)
5151
if (headers != null) settings.setHeaders(headers: _*)
5252

5353
new CsvParser(settings)

0 commit comments

Comments
 (0)