Skip to content

Commit 3695db2

Browse files
HADOOP-19654. Upgrade AWS SDK to 2.35.4 (#7882)
AWS SDK upgraded to 2.35.4. This SDK has changed checksum/checksum headers handling significantly, causing problems with third party stores, and, in some combinations AWS S3 itself. The S3A connector has retained old behavior; options to change these settings are now available. The default settings are chosen for maximum compatiblity and performance. fs.s3a.request.md5.header: true fs.s3a.checksum.generation: false fs.s3a.create.checksum.algorithm: "" Consult the documentation for more details. Contributed by Steve Loughran
1 parent 004d2a5 commit 3695db2

File tree

65 files changed

+1298
-198
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1298
-198
lines changed

LICENSE-binary

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ io.reactivex:rxnetty:0.4.20
352352
io.swagger:swagger-annotations:1.5.4
353353
javax.inject:javax.inject:1
354354
net.java.dev.jna:jna:5.2.0
355-
net.minidev:accessors-smart:1.2
355+
net.minidev:accessors-smart:1.21
356356
org.apache.avro:avro:1.11.4
357357
org.apache.commons:commons-compress:1.26.1
358358
org.apache.commons:commons-configuration2:2.10.1
@@ -419,7 +419,7 @@ org.xerial.snappy:snappy-java:1.1.10.4
419419
org.yaml:snakeyaml:2.0
420420
org.wildfly.openssl:wildfly-openssl:2.2.5.Final
421421
ro.isdc.wro4j:wro4j-maven-plugin:1.8.0
422-
software.amazon.awssdk:bundle:2.29.52
422+
software.amazon.awssdk:bundle:2.35.4
423423
software.amazon.s3.analyticsaccelerator:analyticsaccelerator-s3:1.3.0
424424
net.jodah:failsafe:2.4.4
425425

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractUnbufferTest.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.hadoop.fs.contract;
2020

21+
import org.assertj.core.api.Assertions;
2122
import org.junit.jupiter.api.BeforeEach;
2223
import org.junit.jupiter.api.Test;
2324

@@ -30,6 +31,7 @@
3031

3132
import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
3233
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
34+
import static org.apache.hadoop.fs.contract.ContractTestUtils.readNBytes;
3335

3436
/**
3537
* Contract tests for {@link org.apache.hadoop.fs.CanUnbuffer#unbuffer}.
@@ -145,10 +147,12 @@ protected void validateFileContents(FSDataInputStream stream, int length,
145147
int startIndex)
146148
throws IOException {
147149
byte[] streamData = new byte[length];
148-
assertEquals(length, stream.read(streamData),
149-
"failed to read expected number of bytes from "
150-
+ "stream. This may be transient");
150+
final int read = readNBytes(stream, streamData, 0, length);
151+
Assertions.assertThat(read)
152+
.describedAs("failed to read expected number of bytes from stream. %s", stream)
153+
.isEqualTo(length);
151154
byte[] validateFileBytes;
155+
152156
if (startIndex == 0 && length == fileBytes.length) {
153157
validateFileBytes = fileBytes;
154158
} else {

hadoop-project/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@
209209
<make-maven-plugin.version>1.0-beta-1</make-maven-plugin.version>
210210
<surefire.fork.timeout>900</surefire.fork.timeout>
211211
<aws-java-sdk.version>1.12.720</aws-java-sdk.version>
212-
<aws-java-sdk-v2.version>2.29.52</aws-java-sdk-v2.version>
212+
<aws-java-sdk-v2.version>2.35.4</aws-java-sdk-v2.version>
213213
<amazon-s3-encryption-client-java.version>3.1.1</amazon-s3-encryption-client-java.version>
214214
<amazon-s3-analyticsaccelerator-s3.version>1.3.0</amazon-s3-analyticsaccelerator-s3.version>
215215
<aws.eventstream.version>1.0.1</aws.eventstream.version>

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientIOException.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,8 @@ public String getMessage() {
5454
public boolean retryable() {
5555
return getCause().retryable();
5656
}
57+
58+
public String getOperation() {
59+
return operation;
60+
}
5761
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSNoResponseException.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424
* Status code 443, no response from server. This is considered idempotent.
2525
*/
2626
public class AWSNoResponseException extends AWSServiceIOException {
27+
28+
/**
29+
* Constructor.
30+
* @param operation operation in progress.
31+
* @param cause inner cause
32+
*/
2733
public AWSNoResponseException(String operation,
2834
AwsServiceException cause) {
2935
super(operation, cause);

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.hadoop.classification.InterfaceAudience;
2222
import org.apache.hadoop.classification.InterfaceStability;
2323
import org.apache.hadoop.fs.Options;
24+
import org.apache.hadoop.fs.s3a.impl.ChecksumSupport;
2425
import org.apache.hadoop.fs.s3a.impl.streams.StreamIntegration;
2526
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
2627

@@ -1836,15 +1837,53 @@ private Constants() {
18361837
*/
18371838
public static final boolean CHECKSUM_VALIDATION_DEFAULT = false;
18381839

1840+
/**
1841+
* Should checksums always be generated?
1842+
* Not all third-party stores like this being enabled for every request.
1843+
* Value: {@value}.
1844+
*/
1845+
public static final String CHECKSUM_GENERATION =
1846+
"fs.s3a.checksum.generation";
1847+
1848+
/**
1849+
* Default value of {@link #CHECKSUM_GENERATION}.
1850+
* Value: {@value}.
1851+
*/
1852+
public static final boolean DEFAULT_CHECKSUM_GENERATION = false;
1853+
18391854
/**
18401855
* Indicates the algorithm used to create the checksum for the object
18411856
* to be uploaded to S3. Unset by default. It supports the following values:
1842-
* 'CRC32', 'CRC32C', 'SHA1', and 'SHA256'
1857+
* 'CRC32', 'CRC32C', 'SHA1', 'SHA256', 'CRC64_NVME 'NONE', ''.
1858+
* When checksum calculation is enabled this MUST be set to a valid algorithm.
18431859
* value:{@value}
18441860
*/
18451861
public static final String CHECKSUM_ALGORITHM =
18461862
"fs.s3a.create.checksum.algorithm";
18471863

1864+
/**
1865+
* Default checksum algorithm: {@code "NONE"}.
1866+
*/
1867+
public static final String DEFAULT_CHECKSUM_ALGORITHM =
1868+
ChecksumSupport.NONE;
1869+
1870+
/**
1871+
* Send a {@code Content-MD5 header} with every request.
1872+
* This is required when performing some operations with third party stores
1873+
* For example: bulk delete).
1874+
* It is supported by AWS S3, though has unexpected behavior with AWS S3 Express storage.
1875+
* See https://github.com/aws/aws-sdk-java-v2/issues/6459 for details.
1876+
*/
1877+
public static final String REQUEST_MD5_HEADER =
1878+
"fs.s3a.request.md5.header";
1879+
1880+
/**
1881+
* Default value of {@link #REQUEST_MD5_HEADER}.
1882+
* Value: {@value}.
1883+
*/
1884+
public static final boolean DEFAULT_REQUEST_MD5_HEADER = true;
1885+
1886+
18481887
/**
18491888
* Are extensions classes, such as {@code fs.s3a.aws.credentials.provider},
18501889
* going to be loaded from the same classloader that loaded

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import org.slf4j.LoggerFactory;
3131

3232
import software.amazon.awssdk.awscore.util.AwsHostNameUtils;
33+
import software.amazon.awssdk.core.checksums.RequestChecksumCalculation;
34+
import software.amazon.awssdk.core.checksums.ResponseChecksumValidation;
3335
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
3436
import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
3537
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
@@ -41,6 +43,7 @@
4143
import software.amazon.awssdk.metrics.LoggingMetricPublisher;
4244
import software.amazon.awssdk.regions.Region;
4345
import software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsPlugin;
46+
import software.amazon.awssdk.services.s3.LegacyMd5Plugin;
4447
import software.amazon.awssdk.services.s3.S3AsyncClient;
4548
import software.amazon.awssdk.services.s3.S3AsyncClientBuilder;
4649
import software.amazon.awssdk.services.s3.S3BaseClientBuilder;
@@ -202,11 +205,34 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> Build
202205

203206
configureEndpointAndRegion(builder, parameters, conf);
204207

208+
// add a plugin to add a Content-MD5 header.
209+
// this is required when performing some operations with third party stores
210+
// (for example: bulk delete), and is somewhat harmless when working with AWS S3.
211+
if (parameters.isMd5HeaderEnabled()) {
212+
LOG.debug("MD5 header enabled");
213+
builder.addPlugin(LegacyMd5Plugin.create());
214+
}
215+
216+
//when to calculate request checksums.
217+
final RequestChecksumCalculation checksumCalculation =
218+
parameters.isChecksumCalculationEnabled()
219+
? RequestChecksumCalculation.WHEN_SUPPORTED
220+
: RequestChecksumCalculation.WHEN_REQUIRED;
221+
LOG.debug("Using checksum calculation policy: {}", checksumCalculation);
222+
builder.requestChecksumCalculation(checksumCalculation);
223+
224+
// response checksum validation. Slow, even with CRC32 checksums.
225+
final ResponseChecksumValidation checksumValidation;
226+
checksumValidation = parameters.isChecksumValidationEnabled()
227+
? ResponseChecksumValidation.WHEN_SUPPORTED
228+
: ResponseChecksumValidation.WHEN_REQUIRED;
229+
LOG.debug("Using checksum validation policy: {}", checksumValidation);
230+
builder.responseChecksumValidation(checksumValidation);
231+
205232
maybeApplyS3AccessGrantsConfigurations(builder, conf);
206233

207234
S3Configuration serviceConfiguration = S3Configuration.builder()
208235
.pathStyleAccessEnabled(parameters.isPathStyleAccess())
209-
.checksumValidationEnabled(parameters.isChecksumValidationEnabled())
210236
.build();
211237

212238
final ClientOverrideConfiguration.Builder override =

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,10 +1173,15 @@ private ClientManager createClientManager(URI fsURI, boolean dtEnabled) throws I
11731173
.withTransferManagerExecutor(unboundedThreadPool)
11741174
.withRegion(configuredRegion)
11751175
.withFipsEnabled(fipsEnabled)
1176+
.withS3ExpressStore(s3ExpressStore)
11761177
.withExpressCreateSession(
11771178
conf.getBoolean(S3EXPRESS_CREATE_SESSION, S3EXPRESS_CREATE_SESSION_DEFAULT))
11781179
.withChecksumValidationEnabled(
11791180
conf.getBoolean(CHECKSUM_VALIDATION, CHECKSUM_VALIDATION_DEFAULT))
1181+
.withChecksumCalculationEnabled(
1182+
conf.getBoolean(CHECKSUM_GENERATION, DEFAULT_CHECKSUM_GENERATION))
1183+
.withMd5HeaderEnabled(conf.getBoolean(REQUEST_MD5_HEADER,
1184+
DEFAULT_REQUEST_MD5_HEADER))
11801185
.withClientSideEncryptionEnabled(isCSEEnabled)
11811186
.withClientSideEncryptionMaterials(cseMaterials)
11821187
.withAnalyticsAcceleratorEnabled(isAnalyticsAcceleratorEnabled)

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.hadoop.fs.s3a;
2020

21+
import software.amazon.awssdk.awscore.exception.AwsErrorDetails;
2122
import software.amazon.awssdk.awscore.exception.AwsServiceException;
2223
import software.amazon.awssdk.core.exception.AbortedException;
2324
import software.amazon.awssdk.core.exception.ApiCallAttemptTimeoutException;
@@ -240,8 +241,13 @@ public static IOException translateException(@Nullable String operation,
240241
? (S3Exception) ase
241242
: null;
242243
int status = ase.statusCode();
243-
if (ase.awsErrorDetails() != null) {
244-
message = message + ":" + ase.awsErrorDetails().errorCode();
244+
// error details, may be null
245+
final AwsErrorDetails errorDetails = ase.awsErrorDetails();
246+
// error code, will be null if errorDetails is null
247+
String errorCode = "";
248+
if (errorDetails != null) {
249+
errorCode = errorDetails.errorCode();
250+
message = message + ":" + errorCode;
245251
}
246252

247253
// big switch on the HTTP status code.
@@ -308,6 +314,8 @@ public static IOException translateException(@Nullable String operation,
308314
// precondition failure: the object is there, but the precondition
309315
// (e.g. etag) didn't match. Assume remote file change during
310316
// rename or status passed in to openfile had an etag which didn't match.
317+
// See the SC_200 handler for the treatment of the S3 Express failure
318+
// variant.
311319
case SC_412_PRECONDITION_FAILED:
312320
ioe = new RemoteFileChangedException(path, message, "", ase);
313321
break;
@@ -352,6 +360,16 @@ public static IOException translateException(@Nullable String operation,
352360
return ((MultiObjectDeleteException) exception)
353361
.translateException(message);
354362
}
363+
if (PRECONDITION_FAILED.equals(errorCode)) {
364+
// S3 Express stores report conflict in conditional writes
365+
// as a 200 + an error code of "PreconditionFailed".
366+
// This is mapped to RemoteFileChangedException for consistency
367+
// with SC_412_PRECONDITION_FAILED handling.
368+
return new RemoteFileChangedException(path,
369+
operation,
370+
exception.getMessage(),
371+
exception);
372+
}
355373
// other 200: FALL THROUGH
356374

357375
default:

0 commit comments

Comments
 (0)