@@ -20,6 +20,7 @@ package org.apache.spark.io
2020import java .io ._
2121import java .util .Locale
2222
23+ import com .github .luben .zstd .{ZstdInputStream , ZstdOutputStream }
2324import com .ning .compress .lzf .{LZFInputStream , LZFOutputStream }
2425import net .jpountz .lz4 .LZ4BlockOutputStream
2526import org .xerial .snappy .{Snappy , SnappyInputStream , SnappyOutputStream }
@@ -50,13 +51,14 @@ private[spark] object CompressionCodec {
5051
5152 private [spark] def supportsConcatenationOfSerializedStreams (codec : CompressionCodec ): Boolean = {
5253 (codec.isInstanceOf [SnappyCompressionCodec ] || codec.isInstanceOf [LZFCompressionCodec ]
53- || codec.isInstanceOf [LZ4CompressionCodec ])
54+ || codec.isInstanceOf [LZ4CompressionCodec ] || codec. isInstanceOf [ ZStandardCompressionCodec ] )
5455 }
5556
5657 private val shortCompressionCodecNames = Map (
5758 " lz4" -> classOf [LZ4CompressionCodec ].getName,
5859 " lzf" -> classOf [LZFCompressionCodec ].getName,
59- " snappy" -> classOf [SnappyCompressionCodec ].getName)
60+ " snappy" -> classOf [SnappyCompressionCodec ].getName,
61+ " zstd" -> classOf [SnappyCompressionCodec ].getName)
6062
6163 def getCodecName (conf : SparkConf ): String = {
6264 conf.get(configKey, DEFAULT_COMPRESSION_CODEC )
@@ -216,3 +218,30 @@ private final class SnappyOutputStreamWrapper(os: SnappyOutputStream) extends Ou
216218 }
217219 }
218220}
221+
222+ /**
223+ * :: DeveloperApi ::
224+ * ZStandard implementation of [[org.apache.spark.io.CompressionCodec ]].
225+ *
226+ * @note The wire protocol for this codec is not guaranteed to be compatible across versions
227+ * of Spark. This is intended for use as an internal compression utility within a single Spark
228+ * application.
229+ */
230+ @ DeveloperApi
231+ class ZStandardCompressionCodec (conf : SparkConf ) extends CompressionCodec {
232+
233+ override def compressedOutputStream (s : OutputStream ): OutputStream = {
234+ val level = conf.getSizeAsBytes(" spark.io.compression.zstandard.level" , " 1" ).toInt
235+ val compressionBuffer = conf.getSizeAsBytes(" spark.io.compression.lz4.blockSize" , " 32k" ).toInt
236+ // Wrap the zstd output stream in a buffered output stream, so that we can
237+ // avoid overhead excessive of JNI call while trying to compress small amount of data.
238+ new BufferedOutputStream (new ZstdOutputStream (s, level), compressionBuffer)
239+ }
240+
241+ override def compressedInputStream (s : InputStream ): InputStream = {
242+ val compressionBuffer = conf.getSizeAsBytes(" spark.io.compression.lz4.blockSize" , " 32k" ).toInt
243+ // Wrap the zstd input stream in a buffered input stream so that we can
244+ // avoid overhead excessive of JNI call while trying to uncompress small amount of data.
245+ new BufferedInputStream (new ZstdInputStream (s), compressionBuffer)
246+ }
247+ }
0 commit comments