Skip to content

Commit 8f60a90

Browse files
viiryasteveloughran
authored andcommitted
HADOOP-17125. Use snappy-java in SnappyCodec (#2297)
This switches the SnappyCodec to use the java-snappy codec, rather than the native one. To use the codec, snappy-java.jar (from org.xerial.snappy) needs to be on the classpath. This comesin as an avro dependency, so it is already on the hadoop-common classpath, as well as in hadoop-common/lib. The version used is now managed in the hadoop-project POM; initially 1.1.7.7 Contributed by DB Tsai and Liang-Chi Hsieh Change-Id: Id52a404a0005480e68917cd17f0a27b7744aea4e
1 parent 3ae78e4 commit 8f60a90

File tree

22 files changed

+109
-561
lines changed

22 files changed

+109
-561
lines changed

BUILDING.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ Installing required packages for clean install of Ubuntu 14.04 LTS Desktop:
7575

7676
Optional packages:
7777

78-
* Snappy compression
78+
* Snappy compression (only used for hadoop-mapreduce-client-nativetask)
7979
$ sudo apt-get install snappy libsnappy-dev
8080
* Intel ISA-L library for erasure coding
8181
Please refer to https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version
@@ -161,7 +161,8 @@ Maven build goals:
161161

162162
Snappy is a compression library that can be utilized by the native code.
163163
It is currently an optional component, meaning that Hadoop can be built with
164-
or without this dependency.
164+
or without this dependency. Snappy library as optional dependency is only
165+
used for hadoop-mapreduce-client-nativetask.
165166

166167
* Use -Drequire.snappy to fail the build if libsnappy.so is not found.
167168
If this option is not specified and the snappy library is missing,

dev-support/bin/dist-copynativelibs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,6 @@ for i in "$@"; do
111111
--openssllibbundle=*)
112112
OPENSSLLIBBUNDLE=${i#*=}
113113
;;
114-
--snappybinbundle=*)
115-
SNAPPYBINBUNDLE=${i#*=}
116-
;;
117114
--snappylib=*)
118115
SNAPPYLIB=${i#*=}
119116
;;
@@ -176,8 +173,6 @@ if [[ -d "${BIN_DIR}" ]] ; then
176173
exit 1
177174
fi
178175

179-
bundle_native_bin "${SNAPPYBINBUNDLE}" "${SNAPPYLIBBUNDLE}" "snappy.lib" "snappy" "${SNAPPYLIB}"
180-
181176
bundle_native_bin "${ZSTDBINBUNDLE}" "${ZSTDLIBBUNDLE}" "zstd.lib" "zstd" "${ZSTDLIB}"
182177

183178
bundle_native_bin "${OPENSSLBINBUNDLE}" "${OPENSSLLIBBUNDLE}" "openssl.lib" "crypto" "${OPENSSLLIB}"

hadoop-common-project/hadoop-common/pom.xml

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,11 @@
362362
<artifactId>wildfly-openssl-java</artifactId>
363363
<scope>provided</scope>
364364
</dependency>
365+
<dependency>
366+
<groupId>org.xerial.snappy</groupId>
367+
<artifactId>snappy-java</artifactId>
368+
<scope>compile</scope>
369+
</dependency>
365370
</dependencies>
366371

367372
<build>
@@ -641,10 +646,6 @@
641646
</activation>
642647
<properties>
643648
<require.bzip2>false</require.bzip2>
644-
<snappy.prefix></snappy.prefix>
645-
<snappy.lib></snappy.lib>
646-
<snappy.include></snappy.include>
647-
<require.snappy>false</require.snappy>
648649
<zstd.prefix></zstd.prefix>
649650
<zstd.lib></zstd.lib>
650651
<zstd.include></zstd.include>
@@ -698,11 +699,7 @@
698699
<GENERATED_JAVAH>${project.build.directory}/native/javah</GENERATED_JAVAH>
699700
<JVM_ARCH_DATA_MODEL>${sun.arch.data.model}</JVM_ARCH_DATA_MODEL>
700701
<REQUIRE_BZIP2>${require.bzip2}</REQUIRE_BZIP2>
701-
<REQUIRE_SNAPPY>${require.snappy}</REQUIRE_SNAPPY>
702702
<REQUIRE_ZSTD>${require.zstd}</REQUIRE_ZSTD>
703-
<CUSTOM_SNAPPY_PREFIX>${snappy.prefix}</CUSTOM_SNAPPY_PREFIX>
704-
<CUSTOM_SNAPPY_LIB>${snappy.lib} </CUSTOM_SNAPPY_LIB>
705-
<CUSTOM_SNAPPY_INCLUDE>${snappy.include} </CUSTOM_SNAPPY_INCLUDE>
706703
<CUSTOM_ZSTD_PREFIX>${zstd.prefix}</CUSTOM_ZSTD_PREFIX>
707704
<CUSTOM_ZSTD_LIB>${zstd.lib} </CUSTOM_ZSTD_LIB>
708705
<CUSTOM_ZSTD_INCLUDE>${zstd.include} </CUSTOM_ZSTD_INCLUDE>
@@ -757,14 +754,9 @@
757754
</os>
758755
</activation>
759756
<properties>
760-
<snappy.prefix></snappy.prefix>
761-
<snappy.lib></snappy.lib>
762-
<snappy.include></snappy.include>
763757
<require.isal>false</require.isal>
764758
<isal.prefix></isal.prefix>
765759
<isal.lib></isal.lib>
766-
<require.snappy>false</require.snappy>
767-
<bundle.snappy.in.bin>true</bundle.snappy.in.bin>
768760
<zstd.prefix></zstd.prefix>
769761
<zstd.lib></zstd.lib>
770762
<zstd.include></zstd.include>
@@ -864,10 +856,6 @@
864856
<argument>/nologo</argument>
865857
<argument>/p:Configuration=Release</argument>
866858
<argument>/p:OutDir=${project.build.directory}/bin/</argument>
867-
<argument>/p:CustomSnappyPrefix=${snappy.prefix}</argument>
868-
<argument>/p:CustomSnappyLib=${snappy.lib}</argument>
869-
<argument>/p:CustomSnappyInclude=${snappy.include}</argument>
870-
<argument>/p:RequireSnappy=${require.snappy}</argument>
871859
<argument>/p:CustomZstdPrefix=${zstd.prefix}</argument>
872860
<argument>/p:CustomZstdLib=${zstd.lib}</argument>
873861
<argument>/p:CustomZstdInclude=${zstd.include}</argument>

hadoop-common-project/hadoop-common/src/CMakeLists.txt

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -67,33 +67,6 @@ else()
6767
endif()
6868
set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES})
6969

70-
# Require snappy.
71-
set(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
72-
hadoop_set_find_shared_library_version("1")
73-
find_library(SNAPPY_LIBRARY
74-
NAMES snappy
75-
PATHS ${CUSTOM_SNAPPY_PREFIX} ${CUSTOM_SNAPPY_PREFIX}/lib
76-
${CUSTOM_SNAPPY_PREFIX}/lib64 ${CUSTOM_SNAPPY_LIB})
77-
set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES})
78-
find_path(SNAPPY_INCLUDE_DIR
79-
NAMES snappy.h
80-
PATHS ${CUSTOM_SNAPPY_PREFIX} ${CUSTOM_SNAPPY_PREFIX}/include
81-
${CUSTOM_SNAPPY_INCLUDE})
82-
if(SNAPPY_LIBRARY AND SNAPPY_INCLUDE_DIR)
83-
get_filename_component(HADOOP_SNAPPY_LIBRARY ${SNAPPY_LIBRARY} NAME)
84-
set(SNAPPY_SOURCE_FILES
85-
"${SRC}/io/compress/snappy/SnappyCompressor.c"
86-
"${SRC}/io/compress/snappy/SnappyDecompressor.c")
87-
set(REQUIRE_SNAPPY ${REQUIRE_SNAPPY}) # Stop warning about unused variable.
88-
message(STATUS "Found Snappy: ${SNAPPY_LIBRARY}")
89-
else()
90-
set(SNAPPY_INCLUDE_DIR "")
91-
set(SNAPPY_SOURCE_FILES "")
92-
if(REQUIRE_SNAPPY)
93-
message(FATAL_ERROR "Required snappy library could not be found. SNAPPY_LIBRARY=${SNAPPY_LIBRARY}, SNAPPY_INCLUDE_DIR=${SNAPPY_INCLUDE_DIR}, CUSTOM_SNAPPY_INCLUDE_DIR=${CUSTOM_SNAPPY_INCLUDE_DIR}, CUSTOM_SNAPPY_PREFIX=${CUSTOM_SNAPPY_PREFIX}, CUSTOM_SNAPPY_INCLUDE=${CUSTOM_SNAPPY_INCLUDE}")
94-
endif()
95-
endif()
96-
9770
# Require zstandard
9871
SET(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
9972
hadoop_set_find_shared_library_version("1")
@@ -253,7 +226,6 @@ include_directories(
253226
${JNI_INCLUDE_DIRS}
254227
${ZLIB_INCLUDE_DIRS}
255228
${BZIP2_INCLUDE_DIR}
256-
${SNAPPY_INCLUDE_DIR}
257229
${ISAL_INCLUDE_DIR}
258230
${ZSTD_INCLUDE_DIR}
259231
${OPENSSL_INCLUDE_DIR}
@@ -269,7 +241,6 @@ hadoop_add_dual_library(hadoop
269241
${SRC}/io/compress/lz4/lz4.c
270242
${SRC}/io/compress/lz4/lz4hc.c
271243
${ISAL_SOURCE_FILES}
272-
${SNAPPY_SOURCE_FILES}
273244
${ZSTD_SOURCE_FILES}
274245
${OPENSSL_SOURCE_FILES}
275246
${SRC}/io/compress/zlib/ZlibCompressor.c

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public interface Decompressor {
9292
* {@link #needsInput()} should be called in order to determine if more
9393
* input data is required.
9494
*
95-
* @param b Buffer for the compressed data
95+
* @param b Buffer for the uncompressed data
9696
* @param off Start offset of the data
9797
* @param len Size of the buffer
9898
* @return The actual number of bytes of uncompressed data.

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
import org.apache.hadoop.io.compress.snappy.SnappyDecompressor;
2929
import org.apache.hadoop.io.compress.snappy.SnappyDecompressor.SnappyDirectDecompressor;
3030
import org.apache.hadoop.fs.CommonConfigurationKeys;
31-
import org.apache.hadoop.util.NativeCodeLoader;
3231

3332
/**
3433
* This class creates snappy compressors/decompressors.
@@ -56,37 +55,6 @@ public Configuration getConf() {
5655
return conf;
5756
}
5857

59-
/**
60-
* Are the native snappy libraries loaded &amp; initialized?
61-
*/
62-
public static void checkNativeCodeLoaded() {
63-
if (!NativeCodeLoader.buildSupportsSnappy()) {
64-
throw new RuntimeException("native snappy library not available: " +
65-
"this version of libhadoop was built without " +
66-
"snappy support.");
67-
}
68-
if (!NativeCodeLoader.isNativeCodeLoaded()) {
69-
throw new RuntimeException("Failed to load libhadoop.");
70-
}
71-
if (!SnappyCompressor.isNativeCodeLoaded()) {
72-
throw new RuntimeException("native snappy library not available: " +
73-
"SnappyCompressor has not been loaded.");
74-
}
75-
if (!SnappyDecompressor.isNativeCodeLoaded()) {
76-
throw new RuntimeException("native snappy library not available: " +
77-
"SnappyDecompressor has not been loaded.");
78-
}
79-
}
80-
81-
public static boolean isNativeCodeLoaded() {
82-
return SnappyCompressor.isNativeCodeLoaded() &&
83-
SnappyDecompressor.isNativeCodeLoaded();
84-
}
85-
86-
public static String getLibraryName() {
87-
return SnappyCompressor.getLibraryName();
88-
}
89-
9058
/**
9159
* Create a {@link CompressionOutputStream} that will write to the given
9260
* {@link OutputStream}.
@@ -115,7 +83,6 @@ public CompressionOutputStream createOutputStream(OutputStream out)
11583
public CompressionOutputStream createOutputStream(OutputStream out,
11684
Compressor compressor)
11785
throws IOException {
118-
checkNativeCodeLoaded();
11986
int bufferSize = conf.getInt(
12087
CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY,
12188
CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT);
@@ -133,7 +100,6 @@ public CompressionOutputStream createOutputStream(OutputStream out,
133100
*/
134101
@Override
135102
public Class<? extends Compressor> getCompressorType() {
136-
checkNativeCodeLoaded();
137103
return SnappyCompressor.class;
138104
}
139105

@@ -144,7 +110,6 @@ public Class<? extends Compressor> getCompressorType() {
144110
*/
145111
@Override
146112
public Compressor createCompressor() {
147-
checkNativeCodeLoaded();
148113
int bufferSize = conf.getInt(
149114
CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY,
150115
CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT);
@@ -179,7 +144,6 @@ public CompressionInputStream createInputStream(InputStream in)
179144
public CompressionInputStream createInputStream(InputStream in,
180145
Decompressor decompressor)
181146
throws IOException {
182-
checkNativeCodeLoaded();
183147
return new BlockDecompressorStream(in, decompressor, conf.getInt(
184148
CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY,
185149
CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT));
@@ -192,7 +156,6 @@ public CompressionInputStream createInputStream(InputStream in,
192156
*/
193157
@Override
194158
public Class<? extends Decompressor> getDecompressorType() {
195-
checkNativeCodeLoaded();
196159
return SnappyDecompressor.class;
197160
}
198161

@@ -203,7 +166,6 @@ public Class<? extends Decompressor> getDecompressorType() {
203166
*/
204167
@Override
205168
public Decompressor createDecompressor() {
206-
checkNativeCodeLoaded();
207169
int bufferSize = conf.getInt(
208170
CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY,
209171
CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT);
@@ -215,7 +177,7 @@ public Decompressor createDecompressor() {
215177
*/
216178
@Override
217179
public DirectDecompressor createDirectDecompressor() {
218-
return isNativeCodeLoaded() ? new SnappyDirectDecompressor() : null;
180+
return new SnappyDirectDecompressor();
219181
}
220182

221183
/**

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java

Lines changed: 14 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424

2525
import org.apache.hadoop.conf.Configuration;
2626
import org.apache.hadoop.io.compress.Compressor;
27-
import org.apache.hadoop.util.NativeCodeLoader;
2827
import org.slf4j.Logger;
2928
import org.slf4j.LoggerFactory;
29+
import org.xerial.snappy.Snappy;
3030

3131
/**
3232
* A {@link Compressor} based on the snappy compression algorithm.
@@ -48,24 +48,6 @@ public class SnappyCompressor implements Compressor {
4848
private long bytesRead = 0L;
4949
private long bytesWritten = 0L;
5050

51-
private static boolean nativeSnappyLoaded = false;
52-
53-
static {
54-
if (NativeCodeLoader.isNativeCodeLoaded() &&
55-
NativeCodeLoader.buildSupportsSnappy()) {
56-
try {
57-
initIDs();
58-
nativeSnappyLoaded = true;
59-
} catch (Throwable t) {
60-
LOG.error("failed to load SnappyCompressor", t);
61-
}
62-
}
63-
}
64-
65-
public static boolean isNativeCodeLoaded() {
66-
return nativeSnappyLoaded;
67-
}
68-
6951
/**
7052
* Creates a new compressor.
7153
*
@@ -225,7 +207,7 @@ public int compress(byte[] b, int off, int len)
225207
}
226208

227209
// Compress data
228-
n = compressBytesDirect();
210+
n = compressDirectBuf();
229211
compressedDirectBuf.limit(n);
230212
uncompressedDirectBuf.clear(); // snappy consumes all buffer input
231213

@@ -291,9 +273,16 @@ public long getBytesWritten() {
291273
public void end() {
292274
}
293275

294-
private native static void initIDs();
295-
296-
private native int compressBytesDirect();
297-
298-
public native static String getLibraryName();
276+
private int compressDirectBuf() throws IOException {
277+
if (uncompressedDirectBufLen == 0) {
278+
return 0;
279+
} else {
280+
// Set the position and limit of `uncompressedDirectBuf` for reading
281+
uncompressedDirectBuf.limit(uncompressedDirectBufLen).position(0);
282+
int size = Snappy.compress((ByteBuffer) uncompressedDirectBuf,
283+
(ByteBuffer) compressedDirectBuf);
284+
uncompressedDirectBufLen = 0;
285+
return size;
286+
}
287+
}
299288
}

0 commit comments

Comments
 (0)