From 3a5e6321bb745c3cc4e305b08a585b49462ab07b Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Tue, 24 May 2022 11:09:19 +0100 Subject: [PATCH 01/24] adds in partial configuration for s3 v2 client --- hadoop-project/pom.xml | 12 ++ hadoop-tools/hadoop-aws/pom.xml | 5 + .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 57 ++++++++ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 7 + .../org/apache/hadoop/fs/s3a/S3AUtils.java | 130 ++++++++++++++++++ .../apache/hadoop/fs/s3a/S3ClientFactory.java | 13 ++ 6 files changed, 224 insertions(+) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index ec07369977b32..acd2964ff66e8 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -188,6 +188,7 @@ 1.0-beta-1 900 1.12.132 + 2.17.196 2.3.4 1.11.2 2.1 @@ -1063,6 +1064,17 @@ + + software.amazon.awssdk + bundle + ${aws-java-sdk-v2.version} + + + io.netty + * + + + org.apache.mina mina-core diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 5583bb7ad05ec..11139050f204a 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -435,6 +435,11 @@ aws-java-sdk-bundle compile + + software.amazon.awssdk + bundle + compile + org.assertj assertj-core diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index c374ef7397c97..fd4262c467599 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.net.URI; +import java.net.URISyntaxException; import com.amazonaws.ClientConfiguration; import com.amazonaws.SdkClientException; @@ -45,6 +46,13 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; +import software.amazon.awssdk.core.retry.RetryPolicy; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.apache.ProxyConfiguration; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3ClientBuilder; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; @@ -160,6 +168,55 @@ public AmazonS3 createS3Client( } } + /** + * Creates a new {@link S3Client} + * + * @param uri S3A file system URI + * @param parameters parameter object + * @return S3 client + * @throws IOException + */ + @Override + public S3Client createS3ClientV2( + final URI uri, + final S3ClientCreationParameters parameters) throws IOException { + + Configuration conf = getConf(); + bucket = uri.getHost(); + + final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder = + S3AUtils.createClientConfigBuilder(conf); + + final ApacheHttpClient.Builder httpClientBuilder = S3AUtils.createHttpClientBuilder(conf); + + final RetryPolicy.Builder retryPolicyBuilder = S3AUtils.createRetryPolicyBuilder(conf); + + final ProxyConfiguration.Builder proxyConfigBuilder = + S3AUtils.createProxyConfigurationBuilder(conf, bucket); + + S3ClientBuilder s3ClientBuilder = S3Client.builder(); + + // add any headers + parameters.getHeaders().forEach((h, v) -> + clientOverrideConfigBuilder.putHeader(h, v)); + + if (parameters.isRequesterPays()) { + // All calls must acknowledge requester will pay via header. + clientOverrideConfigBuilder.putHeader(REQUESTER_PAYS_HEADER, REQUESTER_PAYS_HEADER_VALUE); + } + + if (!StringUtils.isEmpty(parameters.getUserAgentSuffix())) { + clientOverrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_SUFFIX, + parameters.getUserAgentSuffix()); + } + + return s3ClientBuilder.overrideConfiguration( + clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build()).build()) + .httpClientBuilder(httpClientBuilder.proxyConfiguration(proxyConfigBuilder.build())) + .build(); + } + + /** * Create an {@link AmazonS3} client of type * {@link AmazonS3EncryptionV2} if CSE is enabled. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 15e240f901865..bd6f02eff2773 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -78,6 +78,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.S3Client; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.classification.InterfaceAudience; @@ -260,6 +261,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private Path workingDir; private String username; private AmazonS3 s3; + private S3Client s3V2; // initial callback policy is fail-once; it's there just to assist // some mock tests and other codepaths trying to call the low level // APIs on an uninitialized filesystem. @@ -869,6 +871,11 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { s3 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf) .createS3Client(getUri(), parameters); + + s3V2 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf) + .createS3ClientV2(getUri(), + parameters); + } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index d644d3f47667c..246546dbe9153 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -54,6 +54,11 @@ import org.apache.hadoop.util.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; +import software.amazon.awssdk.core.retry.RetryPolicy; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.apache.ProxyConfiguration; import javax.annotation.Nullable; import java.io.Closeable; @@ -69,6 +74,7 @@ import java.net.SocketTimeoutException; import java.net.URI; import java.nio.file.AccessDeniedException; +import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -1236,6 +1242,119 @@ public static ClientConfiguration createAwsConf(Configuration conf, return awsConf; } + /*** + * + * + * @param conf + * @return + */ + public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Configuration conf) { + ClientOverrideConfiguration.Builder overrideConfigBuilder = + ClientOverrideConfiguration.builder(); + + long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT, + DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); + + if (requestTimeoutMillis > Integer.MAX_VALUE) { + LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead", + requestTimeoutMillis, Integer.MAX_VALUE); + requestTimeoutMillis = Integer.MAX_VALUE; + } + + if(requestTimeoutMillis > 0) { + overrideConfigBuilder.apiCallAttemptTimeout(Duration.ofMillis(requestTimeoutMillis)); + } + + initUserAgentV2(conf, overrideConfigBuilder); + + // TODO: Look at signers. See issue https://github.com/aws/aws-sdk-java-v2/issues/1024 + // String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, ""); + // if (!signerOverride.isEmpty()) { + // LOG.debug("Signer override = {}", signerOverride); + // overrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.SIGNER) + // } + + return overrideConfigBuilder; + } + + public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration conf) { + ApacheHttpClient.Builder httpClientBuilder = + ApacheHttpClient.builder(); + + httpClientBuilder.maxConnections(intOption(conf, MAXIMUM_CONNECTIONS, + DEFAULT_MAXIMUM_CONNECTIONS, 1)); + + httpClientBuilder.connectionTimeout(Duration.ofSeconds(intOption(conf, ESTABLISH_TIMEOUT, + DEFAULT_ESTABLISH_TIMEOUT, 0))); + + httpClientBuilder.socketTimeout(Duration.ofSeconds(intOption(conf, SOCKET_TIMEOUT, + DEFAULT_SOCKET_TIMEOUT, 0))); + + // NOT_SUPPORTED: The protocol is now HTTPS by default, + // and can only be modified by setting an HTTP endpoint on the client builder. + //TODO: See where this logic should go now + //initProtocolSettings(conf, awsConf); + + return httpClientBuilder; + } + + public static RetryPolicy.Builder createRetryPolicyBuilder(Configuration conf) { + + RetryPolicy.Builder retryPolicyBuilder = + RetryPolicy.builder(); + + retryPolicyBuilder.numRetries(intOption(conf, MAX_ERROR_RETRIES, + DEFAULT_MAX_ERROR_RETRIES, 0)); + + return retryPolicyBuilder; + } + + public static ProxyConfiguration.Builder createProxyConfigurationBuilder(Configuration conf, + String bucket) throws IOException { + + ProxyConfiguration.Builder proxyConfigBuilder = ProxyConfiguration.builder(); + + String proxyHost = conf.getTrimmed(PROXY_HOST, ""); + int proxyPort = conf.getInt(PROXY_PORT, -1); + + if (!proxyHost.isEmpty()) { + if (proxyPort >= 0) { + // TODO: Check how URI should be created + proxyConfigBuilder.endpoint(URI.create(proxyHost + ":" + proxyPort)); + } else { + if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { + LOG.warn("Proxy host set without port. Using HTTPS default 443"); + proxyConfigBuilder.endpoint(URI.create(proxyHost + ":" + 443)); + } else { + LOG.warn("Proxy host set without port. Using HTTP default 80"); + proxyConfigBuilder.endpoint(URI.create(proxyHost + ":" + 80)); + } + } + final String proxyUsername = lookupPassword(bucket, conf, PROXY_USERNAME, + null, null); + final String proxyPassword = lookupPassword(bucket, conf, PROXY_PASSWORD, + null, null); + if ((proxyUsername == null) != (proxyPassword == null)) { + String msg = "Proxy error: " + PROXY_USERNAME + " or " + + PROXY_PASSWORD + " set without the other."; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + proxyConfigBuilder.username(proxyUsername); + proxyConfigBuilder.password(proxyPassword); + proxyConfigBuilder.ntlmDomain(conf.getTrimmed(PROXY_DOMAIN)); + proxyConfigBuilder.ntlmWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); + } else if (proxyPort >= 0) { + String msg = + "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + + return proxyConfigBuilder; + } + + /** * Initializes all AWS SDK settings related to connection management. * @@ -1381,6 +1500,17 @@ private static void initUserAgent(Configuration conf, awsConf.setUserAgentPrefix(userAgent); } + private static void initUserAgentV2(Configuration conf, + ClientOverrideConfiguration.Builder clientConfig) { + String userAgent = "Hadoop " + VersionInfo.getVersion(); + String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, ""); + if (!userAgentPrefix.isEmpty()) { + userAgent = userAgentPrefix + ", " + userAgent; + } + LOG.debug("Using User-Agent: {}", userAgent); + clientConfig.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, userAgent); + } + /** * Convert the data of an iterator of {@link S3AFileStatus} to * an array. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index 34674c788901f..151a7e671cb06 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -29,6 +29,7 @@ import com.amazonaws.handlers.RequestHandler2; import com.amazonaws.monitoring.MonitoringListener; import com.amazonaws.services.s3.AmazonS3; +import software.amazon.awssdk.services.s3.S3Client; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -60,6 +61,18 @@ public interface S3ClientFactory { AmazonS3 createS3Client(URI uri, S3ClientCreationParameters parameters) throws IOException; + /** + * Creates a new {@link S3Client} + * + * @param uri S3A file system URI + * @param parameters parameter object + * @return S3 client + * @throws IOException + */ + S3Client createS3ClientV2(URI uri, + S3ClientCreationParameters parameters) throws IOException; + + /** * Settings for the S3 Client. * Implemented as a class to pass in so that adding From a1fa20decbf99935149610bab3c05006203ac5d9 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Wed, 25 May 2022 13:38:29 +0100 Subject: [PATCH 02/24] updates listing operation --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 1 - .../org/apache/hadoop/fs/s3a/Listing.java | 31 +++++------ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 39 +++++++++----- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 23 ++++---- .../apache/hadoop/fs/s3a/S3ListRequest.java | 16 +++--- .../apache/hadoop/fs/s3a/S3ListResult.java | 48 +++++++++-------- .../hadoop/fs/s3a/api/RequestFactory.java | 8 ++- .../fs/s3a/impl/RequestFactoryImpl.java | 52 ++++++++++++++----- .../hadoop/fs/s3a/MockS3ClientFactory.java | 10 +++- .../fs/s3a/impl/TestRequestFactory.java | 5 +- 10 files changed, 141 insertions(+), 92 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index fd4262c467599..05d29e4bd5f83 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; import com.amazonaws.ClientConfiguration; import com.amazonaws.SdkClientException; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index a1dd4d8df0247..dd715e22f3484 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -18,7 +18,6 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.S3ObjectSummary; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.VisibleForTesting; @@ -37,6 +36,8 @@ import org.apache.hadoop.util.functional.RemoteIterators; import org.slf4j.Logger; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.S3Object; import java.io.Closeable; import java.io.IOException; @@ -287,7 +288,7 @@ interface FileStatusAcceptor { * @return true if the entry is accepted (i.e. that a status entry * should be generated. */ - boolean accept(Path keyPath, S3ObjectSummary summary); + boolean accept(Path keyPath, S3Object summary); /** * Predicate to decide whether or not to accept a prefix. @@ -444,8 +445,8 @@ private boolean buildNextStatusBatch(S3ListResult objects) { objects.getObjectSummaries().size() + objects.getCommonPrefixes().size()); // objects - for (S3ObjectSummary summary : objects.getObjectSummaries()) { - String key = summary.getKey(); + for (S3Object summary : objects.getObjectSummaries()) { + String key = summary.key(); Path keyPath = getStoreContext().getContextAccessors().keyToPath(key); if (LOG.isDebugEnabled()) { LOG.debug("{}: {}", keyPath, stringify(summary)); @@ -455,7 +456,7 @@ private boolean buildNextStatusBatch(S3ListResult objects) { S3AFileStatus status = createFileStatus(keyPath, summary, listingOperationCallbacks.getDefaultBlockSize(keyPath), getStoreContext().getUsername(), - summary.getETag(), null, isCSEEnabled); + summary.eTag(), null, isCSEEnabled); LOG.debug("Adding: {}", status); stats.add(status); added++; @@ -466,11 +467,11 @@ private boolean buildNextStatusBatch(S3ListResult objects) { } // prefixes: always directories - for (String prefix : objects.getCommonPrefixes()) { + for (CommonPrefix prefix : objects.getCommonPrefixes()) { Path keyPath = getStoreContext() .getContextAccessors() - .keyToPath(prefix); - if (acceptor.accept(keyPath, prefix) && filter.accept(keyPath)) { + .keyToPath(prefix.prefix()); + if (acceptor.accept(keyPath, prefix.prefix()) && filter.accept(keyPath)) { S3AFileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath, getStoreContext().getUsername()); LOG.debug("Adding directory: {}", status); @@ -721,10 +722,10 @@ public AcceptFilesOnly(Path qualifiedPath) { * should be generated. */ @Override - public boolean accept(Path keyPath, S3ObjectSummary summary) { + public boolean accept(Path keyPath, S3Object summary) { return !keyPath.equals(qualifiedPath) - && !summary.getKey().endsWith(S3N_FOLDER_SUFFIX) - && !objectRepresentsDirectory(summary.getKey()); + && !summary.key().endsWith(S3N_FOLDER_SUFFIX) + && !objectRepresentsDirectory(summary.key()); } /** @@ -749,8 +750,8 @@ public boolean accept(FileStatus status) { */ static class AcceptAllButS3nDirs implements FileStatusAcceptor { - public boolean accept(Path keyPath, S3ObjectSummary summary) { - return !summary.getKey().endsWith(S3N_FOLDER_SUFFIX); + public boolean accept(Path keyPath, S3Object summary) { + return !summary.key().endsWith(S3N_FOLDER_SUFFIX); } public boolean accept(Path keyPath, String prefix) { @@ -789,9 +790,9 @@ public AcceptAllButSelfAndS3nDirs(Path qualifiedPath) { * should be generated.) */ @Override - public boolean accept(Path keyPath, S3ObjectSummary summary) { + public boolean accept(Path keyPath, S3Object summary) { return !keyPath.equals(qualifiedPath) && - !summary.getKey().endsWith(S3N_FOLDER_SUFFIX); + !summary.key().endsWith(S3N_FOLDER_SUFFIX); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index bd6f02eff2773..7092b51f0e214 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -58,8 +58,6 @@ import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; import com.amazonaws.services.s3.model.MultiObjectDeleteException; import com.amazonaws.services.s3.model.MultipartUpload; import com.amazonaws.services.s3.model.ObjectMetadata; @@ -76,6 +74,10 @@ import com.amazonaws.services.s3.transfer.model.UploadResult; import com.amazonaws.event.ProgressListener; + +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.services.s3.S3Client; @@ -2451,9 +2453,9 @@ protected S3ListResult listObjects(S3ListRequest request, OBJECT_LIST_REQUEST, () -> { if (useListV1) { - return S3ListResult.v1(s3.listObjects(request.getV1())); + return S3ListResult.v1(s3V2.listObjects(request.getV1())); } else { - return S3ListResult.v2(s3.listObjectsV2(request.getV2())); + return S3ListResult.v2(s3V2.listObjectsV2(request.getV2())); } })); } @@ -2496,15 +2498,28 @@ protected S3ListResult continueListObjects(S3ListRequest request, OBJECT_CONTINUE_LIST_REQUEST, () -> { if (useListV1) { - return S3ListResult.v1( - s3.listNextBatchOfObjects( - getRequestFactory() - .newListNextBatchOfObjectsRequest( - prevResult.getV1()))); +// return S3ListResult.v1( +// s3.listNextBatchOfObjects( +// getRequestFactory() +// .newListNextBatchOfObjectsRequest( +// prevResult.getV1()))) + + //TODO: V1 does not seem to have a way to paginate? + return S3ListResult.v1(s3V2.listObjects(request.getV1())); } else { - request.getV2().setContinuationToken(prevResult.getV2() - .getNextContinuationToken()); - return S3ListResult.v2(s3.listObjectsV2(request.getV2())); + + System.out.println("CONTINUING REQUEST HERE"); + + //TODO: SDKV2 now supports automatic pagination, can we use that here instead? + ListObjectsV2Request prevRequest = request.getV2(); + + ListObjectsV2Request.Builder newRequestBuilder = + getRequestFactory().newListObjectsV2RequestBuilder(prevRequest.prefix(), + prevRequest.delimiter(), prevRequest.maxKeys()); + + newRequestBuilder.continuationToken(prevResult.getV2().nextContinuationToken()); + + return S3ListResult.v2(s3V2.listObjectsV2(newRequestBuilder.build())); } })); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 246546dbe9153..dd77bbe03ecd1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -59,6 +59,7 @@ import software.amazon.awssdk.core.retry.RetryPolicy; import software.amazon.awssdk.http.apache.ApacheHttpClient; import software.amazon.awssdk.http.apache.ProxyConfiguration; +import software.amazon.awssdk.services.s3.model.S3Object; import javax.annotation.Nullable; import java.io.Closeable; @@ -479,20 +480,20 @@ public static String stringify(AmazonS3Exception e) { * @return a status entry */ public static S3AFileStatus createFileStatus(Path keyPath, - S3ObjectSummary summary, + S3Object summary, long blockSize, String owner, String eTag, String versionId, boolean isCSEEnabled) { - long size = summary.getSize(); + long size = summary.size(); // check if cse is enabled; strip out constant padding length. if (isCSEEnabled && size >= CSE_PADDING_LENGTH) { size -= CSE_PADDING_LENGTH; } return createFileStatus(keyPath, - objectRepresentsDirectory(summary.getKey()), - size, summary.getLastModified(), blockSize, owner, eTag, versionId); + objectRepresentsDirectory(summary.key()), + size, Date.from(summary.lastModified()), blockSize, owner, eTag, versionId); } /** @@ -937,10 +938,10 @@ static String lookupPassword(Configuration conf, String key, String defVal) * @param summary summary object * @return string value */ - public static String stringify(S3ObjectSummary summary) { - StringBuilder builder = new StringBuilder(summary.getKey().length() + 100); - builder.append(summary.getKey()).append(' '); - builder.append("size=").append(summary.getSize()); + public static String stringify(S3Object summary) { + StringBuilder builder = new StringBuilder(summary.key().length() + 100); + builder.append(summary.key()).append(' '); + builder.append("size=").append(summary.size()); return builder.toString(); } @@ -1242,12 +1243,6 @@ public static ClientConfiguration createAwsConf(Configuration conf, return awsConf; } - /*** - * - * - * @param conf - * @return - */ public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Configuration conf) { ClientOverrideConfiguration.Builder overrideConfigBuilder = ClientOverrideConfiguration.builder(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java index d51211516f251..c729f3de15f08 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListRequest.java @@ -18,8 +18,8 @@ package org.apache.hadoop.fs.s3a; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; /** * API version-independent container for S3 List requests. @@ -78,14 +78,14 @@ public ListObjectsV2Request getV2() { public String toString() { if (isV1()) { return String.format(DESCRIPTION, - v1Request.getBucketName(), v1Request.getPrefix(), - v1Request.getDelimiter(), v1Request.getMaxKeys(), - v1Request.isRequesterPays()); + v1Request.bucket(), v1Request.prefix(), + v1Request.delimiter(), v1Request.maxKeys(), + v1Request.requestPayerAsString()); } else { return String.format(DESCRIPTION, - v2Request.getBucketName(), v2Request.getPrefix(), - v2Request.getDelimiter(), v2Request.getMaxKeys(), - v2Request.isRequesterPays()); + v2Request.bucket(), v2Request.prefix(), + v2Request.delimiter(), v2Request.maxKeys(), + v2Request.requestPayerAsString()); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java index 69c42bfe1471a..cbbcff0c69b69 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java @@ -22,19 +22,21 @@ import java.util.List; import java.util.stream.Collectors; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.ObjectListing; -import com.amazonaws.services.s3.model.S3ObjectSummary; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.ListObjectsResponse; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; +import software.amazon.awssdk.services.s3.model.S3Object; + import org.slf4j.Logger; /** * API version-independent container for S3 List responses. */ public class S3ListResult { - private ObjectListing v1Result; - private ListObjectsV2Result v2Result; + private ListObjectsResponse v1Result; + private ListObjectsV2Response v2Result; - protected S3ListResult(ObjectListing v1, ListObjectsV2Result v2) { + protected S3ListResult(ListObjectsResponse v1, ListObjectsV2Response v2) { v1Result = v1; v2Result = v2; } @@ -44,7 +46,7 @@ protected S3ListResult(ObjectListing v1, ListObjectsV2Result v2) { * @param result v1 result * @return new list result container */ - public static S3ListResult v1(ObjectListing result) { + public static S3ListResult v1(ListObjectsResponse result) { return new S3ListResult(result, null); } @@ -53,7 +55,7 @@ public static S3ListResult v1(ObjectListing result) { * @param result v2 result * @return new list result container */ - public static S3ListResult v2(ListObjectsV2Result result) { + public static S3ListResult v2(ListObjectsV2Response result) { return new S3ListResult(null, result); } @@ -65,19 +67,19 @@ public boolean isV1() { return v1Result != null; } - public ObjectListing getV1() { + public ListObjectsResponse getV1() { return v1Result; } - public ListObjectsV2Result getV2() { + public ListObjectsV2Response getV2() { return v2Result; } - public List getObjectSummaries() { + public List getObjectSummaries() { if (isV1()) { - return v1Result.getObjectSummaries(); + return v1Result.contents(); } else { - return v2Result.getObjectSummaries(); + return v2Result.contents(); } } @@ -89,11 +91,11 @@ public boolean isTruncated() { } } - public List getCommonPrefixes() { + public List getCommonPrefixes() { if (isV1()) { - return v1Result.getCommonPrefixes(); + return v1Result.commonPrefixes(); } else { - return v2Result.getCommonPrefixes(); + return v2Result.commonPrefixes(); } } @@ -103,7 +105,7 @@ public List getCommonPrefixes() { */ private List objectSummaryKeys() { return getObjectSummaries().stream() - .map(S3ObjectSummary::getKey) + .map(s3Object -> s3Object.key()) .collect(Collectors.toList()); } @@ -138,15 +140,15 @@ public boolean representsEmptyDirectory( * @param log log to use */ public void logAtDebug(Logger log) { - Collection prefixes = getCommonPrefixes(); - Collection summaries = getObjectSummaries(); + Collection prefixes = getCommonPrefixes(); + Collection summaries = getObjectSummaries(); log.debug("Prefix count = {}; object count={}", prefixes.size(), summaries.size()); - for (S3ObjectSummary summary : summaries) { - log.debug("Summary: {} {}", summary.getKey(), summary.getSize()); + for (S3Object summary : summaries) { + log.debug("Summary: {} {}", summary.key(), summary.size()); } - for (String prefix : prefixes) { - log.debug("Prefix: {}", prefix); + for (CommonPrefix prefix : prefixes) { + log.debug("Prefix: {}", prefix.prefix()); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index 97a15d95132f4..f5b65f5c70929 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -35,8 +35,8 @@ import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; import com.amazonaws.services.s3.model.ListNextBatchOfObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PartETag; @@ -281,6 +281,10 @@ ListObjectsV2Request newListObjectsV2Request(String key, String delimiter, int maxKeys); + ListObjectsV2Request.Builder newListObjectsV2RequestBuilder(String key, + String delimiter, + int maxKeys); + /** * Create a request to delete a single object. * @param key object to delete diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index a73e7199380fc..19713b73aa16c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -37,8 +37,6 @@ import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; import com.amazonaws.services.s3.model.ListNextBatchOfObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.PartETag; @@ -50,6 +48,8 @@ import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.Retries; @@ -529,14 +529,18 @@ public ListObjectsRequest newListObjectsV1Request( final String key, final String delimiter, final int maxKeys) { - ListObjectsRequest request = new ListObjectsRequest() - .withBucketName(bucket) - .withMaxKeys(maxKeys) - .withPrefix(key); + + ListObjectsRequest.Builder requestBuilder = + ListObjectsRequest.builder().bucket(bucket).maxKeys(maxKeys).prefix(key); + if (delimiter != null) { - request.setDelimiter(delimiter); + requestBuilder.delimiter(delimiter); } - return prepareRequest(request); + + //TODO: add call to prepareRequest + return requestBuilder.build(); + + // return prepareRequest(request); } @Override @@ -550,17 +554,37 @@ public ListObjectsV2Request newListObjectsV2Request( final String key, final String delimiter, final int maxKeys) { - final ListObjectsV2Request request = new ListObjectsV2Request() - .withBucketName(bucket) - .withMaxKeys(maxKeys) - .withPrefix(key); + + final ListObjectsV2Request.Builder requestBuilder = + newListObjectsV2RequestBuilder(key, delimiter, maxKeys); + if (delimiter != null) { - request.setDelimiter(delimiter); + requestBuilder.delimiter(delimiter); } - return prepareRequest(request); + + //TODO: add call to prepareRequest + return requestBuilder.build(); + + // return prepareRequest(request); } @Override + public ListObjectsV2Request.Builder newListObjectsV2RequestBuilder( + final String key, + final String delimiter, + final int maxKeys) { + + final ListObjectsV2Request.Builder requestBuilder = + ListObjectsV2Request.builder().bucket(bucket).maxKeys(maxKeys).prefix(key); + + if (delimiter != null) { + requestBuilder.delimiter(delimiter); + } + + return requestBuilder; + } + + @Override public DeleteObjectRequest newDeleteObjectRequest(String key) { return prepareRequest(new DeleteObjectRequest(bucket, key)); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java index bd121ba2728eb..5b53b3d417161 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java @@ -26,6 +26,7 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.MultipartUploadListing; import com.amazonaws.services.s3.model.Region; +import software.amazon.awssdk.services.s3.S3Client; /** * An {@link S3ClientFactory} that returns Mockito mocks of the {@link AmazonS3} @@ -50,4 +51,11 @@ public AmazonS3 createS3Client(URI uri, .thenReturn(Region.US_West.toString()); return s3; } -} + + //TODO: This is incomplete, add in mocks as we update operations + @Override + public S3Client createS3ClientV2(URI uri, final S3ClientCreationParameters parameters) { + S3Client s3 = mock(S3Client.class); + return s3; + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java index 9bc3aef83aacb..bd9f29b7f6d67 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java @@ -169,9 +169,10 @@ private void createFactoryObjects(RequestFactory factory) { a(factory.newGetObjectRequest(path)); a(factory.newGetObjectMetadataRequest(path)); a(factory.newListMultipartUploadsRequest(path)); - a(factory.newListObjectsV1Request(path, "/", 1)); + //TODO: Commenting out for now, new request extends AwsRequest, this can be updated once all request factory operations are updated. + //a(factory.newListObjectsV1Request(path, "/", 1)); a(factory.newListNextBatchOfObjectsRequest(new ObjectListing())); - a(factory.newListObjectsV2Request(path, "/", 1)); + // a(factory.newListObjectsV2Request(path, "/", 1)); a(factory.newMultipartUploadRequest(path)); File srcfile = new File("/tmp/a"); a(factory.newPutObjectRequest(path, From 226d4b44baba5d4bac8ffe01d795d8073aeff52a Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Wed, 25 May 2022 17:36:51 +0100 Subject: [PATCH 03/24] use nextMarker to paginate listV1 --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 30 +++++++------------ .../hadoop/fs/s3a/api/RequestFactory.java | 4 --- .../fs/s3a/impl/RequestFactoryImpl.java | 28 ++++------------- 3 files changed, 15 insertions(+), 47 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 7092b51f0e214..ad9de4e996537 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -2498,28 +2498,18 @@ protected S3ListResult continueListObjects(S3ListRequest request, OBJECT_CONTINUE_LIST_REQUEST, () -> { if (useListV1) { -// return S3ListResult.v1( -// s3.listNextBatchOfObjects( -// getRequestFactory() -// .newListNextBatchOfObjectsRequest( -// prevResult.getV1()))) + List + prevListResult = prevResult.getV1().contents(); - //TODO: V1 does not seem to have a way to paginate? - return S3ListResult.v1(s3V2.listObjects(request.getV1())); - } else { - - System.out.println("CONTINUING REQUEST HERE"); - - //TODO: SDKV2 now supports automatic pagination, can we use that here instead? - ListObjectsV2Request prevRequest = request.getV2(); + String nextMarker = prevListResult.get(prevListResult.size() - 1).key(); - ListObjectsV2Request.Builder newRequestBuilder = - getRequestFactory().newListObjectsV2RequestBuilder(prevRequest.prefix(), - prevRequest.delimiter(), prevRequest.maxKeys()); - - newRequestBuilder.continuationToken(prevResult.getV2().nextContinuationToken()); - - return S3ListResult.v2(s3V2.listObjectsV2(newRequestBuilder.build())); + return S3ListResult.v1(s3V2.listObjects( + request.getV1().toBuilder().marker(nextMarker).build())); + } else { + //TODO: SDKV2 now supports automatic pagination, using that here requires + // significant refactoring. Investigate performance benefits and if this is something we want to do. + return S3ListResult.v2(s3V2.listObjectsV2(request.getV2().toBuilder() + .continuationToken(prevResult.getV2().nextContinuationToken()).build())); } })); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index f5b65f5c70929..213759f044f6e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -281,10 +281,6 @@ ListObjectsV2Request newListObjectsV2Request(String key, String delimiter, int maxKeys); - ListObjectsV2Request.Builder newListObjectsV2RequestBuilder(String key, - String delimiter, - int maxKeys); - /** * Create a request to delete a single object. * @param key object to delete diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index 19713b73aa16c..ca9af2b4fe916 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -49,6 +49,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsResponse; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import org.apache.hadoop.fs.PathIOException; @@ -536,10 +537,7 @@ public ListObjectsRequest newListObjectsV1Request( if (delimiter != null) { requestBuilder.delimiter(delimiter); } - - //TODO: add call to prepareRequest return requestBuilder.build(); - // return prepareRequest(request); } @@ -556,35 +554,19 @@ public ListObjectsV2Request newListObjectsV2Request( final int maxKeys) { final ListObjectsV2Request.Builder requestBuilder = - newListObjectsV2RequestBuilder(key, delimiter, maxKeys); + ListObjectsV2Request.builder().bucket(bucket).maxKeys(maxKeys).prefix(key); if (delimiter != null) { requestBuilder.delimiter(delimiter); } - //TODO: add call to prepareRequest + //TODO: add call to prepareRequest, not added for now as PrepareRequest is a functional interface, + // uses AmazonWebServiceRequest, SDKV2 uses AwsRequest. What will prepare request actually do? + // Requests are no longer mutable return requestBuilder.build(); - - // return prepareRequest(request); } @Override - public ListObjectsV2Request.Builder newListObjectsV2RequestBuilder( - final String key, - final String delimiter, - final int maxKeys) { - - final ListObjectsV2Request.Builder requestBuilder = - ListObjectsV2Request.builder().bucket(bucket).maxKeys(maxKeys).prefix(key); - - if (delimiter != null) { - requestBuilder.delimiter(delimiter); - } - - return requestBuilder; - } - - @Override public DeleteObjectRequest newDeleteObjectRequest(String key) { return prepareRequest(new DeleteObjectRequest(bucket, key)); } From 504b58711bb423492bc9a79267ffeb5a285ff871 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Thu, 26 May 2022 10:05:29 +0100 Subject: [PATCH 04/24] renames objectSummary to s3Object --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 1 + .../org/apache/hadoop/fs/s3a/Listing.java | 44 +++++++++---------- .../apache/hadoop/fs/s3a/S3ListResult.java | 23 +++++----- .../fs/s3a/impl/RequestFactoryImpl.java | 3 +- 4 files changed, 36 insertions(+), 35 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 05d29e4bd5f83..11a81ac689d42 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -45,6 +45,7 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; import software.amazon.awssdk.core.retry.RetryPolicy; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index dd715e22f3484..d32828d70f97e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -276,19 +276,19 @@ public S3ListRequest createListObjectsRequest(String key, } /** - * Interface to implement by the logic deciding whether to accept a summary + * Interface to implement by the logic deciding whether to accept a s3Object * entry or path as a valid file or directory. */ interface FileStatusAcceptor { /** - * Predicate to decide whether or not to accept a summary entry. + * Predicate to decide whether or not to accept a s3Object entry. * @param keyPath qualified path to the entry - * @param summary summary entry + * @param s3Object s3Object entry * @return true if the entry is accepted (i.e. that a status entry * should be generated. */ - boolean accept(Path keyPath, S3Object summary); + boolean accept(Path keyPath, S3Object s3Object); /** * Predicate to decide whether or not to accept a prefix. @@ -442,21 +442,21 @@ private boolean buildNextStatusBatch(S3ListResult objects) { int added = 0, ignored = 0; // list to fill in with results. Initial size will be list maximum. List stats = new ArrayList<>( - objects.getObjectSummaries().size() + + objects.getS3Objects().size() + objects.getCommonPrefixes().size()); // objects - for (S3Object summary : objects.getObjectSummaries()) { - String key = summary.key(); + for (S3Object s3Object : objects.getS3Objects()) { + String key = s3Object.key(); Path keyPath = getStoreContext().getContextAccessors().keyToPath(key); if (LOG.isDebugEnabled()) { - LOG.debug("{}: {}", keyPath, stringify(summary)); + LOG.debug("{}: {}", keyPath, stringify(s3Object)); } // Skip over keys that are ourselves and old S3N _$folder$ files - if (acceptor.accept(keyPath, summary) && filter.accept(keyPath)) { - S3AFileStatus status = createFileStatus(keyPath, summary, + if (acceptor.accept(keyPath, s3Object) && filter.accept(keyPath)) { + S3AFileStatus status = createFileStatus(keyPath, s3Object, listingOperationCallbacks.getDefaultBlockSize(keyPath), getStoreContext().getUsername(), - summary.eTag(), null, isCSEEnabled); + s3Object.eTag(), null, isCSEEnabled); LOG.debug("Adding: {}", status); stats.add(status); added++; @@ -714,18 +714,18 @@ public AcceptFilesOnly(Path qualifiedPath) { } /** - * Reject a summary entry if the key path is the qualified Path, or + * Reject a s3Object entry if the key path is the qualified Path, or * it ends with {@code "_$folder$"}. * @param keyPath key path of the entry - * @param summary summary entry + * @param s3Object s3Object entry * @return true if the entry is accepted (i.e. that a status entry * should be generated. */ @Override - public boolean accept(Path keyPath, S3Object summary) { + public boolean accept(Path keyPath, S3Object s3Object) { return !keyPath.equals(qualifiedPath) - && !summary.key().endsWith(S3N_FOLDER_SUFFIX) - && !objectRepresentsDirectory(summary.key()); + && !s3Object.key().endsWith(S3N_FOLDER_SUFFIX) + && !objectRepresentsDirectory(s3Object.key()); } /** @@ -750,8 +750,8 @@ public boolean accept(FileStatus status) { */ static class AcceptAllButS3nDirs implements FileStatusAcceptor { - public boolean accept(Path keyPath, S3Object summary) { - return !summary.key().endsWith(S3N_FOLDER_SUFFIX); + public boolean accept(Path keyPath, S3Object s3Object) { + return !s3Object.key().endsWith(S3N_FOLDER_SUFFIX); } public boolean accept(Path keyPath, String prefix) { @@ -782,17 +782,17 @@ public AcceptAllButSelfAndS3nDirs(Path qualifiedPath) { } /** - * Reject a summary entry if the key path is the qualified Path, or + * Reject a s3Object entry if the key path is the qualified Path, or * it ends with {@code "_$folder$"}. * @param keyPath key path of the entry - * @param summary summary entry + * @param s3Object s3Object entry * @return true if the entry is accepted (i.e. that a status entry * should be generated.) */ @Override - public boolean accept(Path keyPath, S3Object summary) { + public boolean accept(Path keyPath, S3Object s3Object) { return !keyPath.equals(qualifiedPath) && - !summary.key().endsWith(S3N_FOLDER_SUFFIX); + !s3Object.key().endsWith(S3N_FOLDER_SUFFIX); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java index cbbcff0c69b69..c77311211abcb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ListResult.java @@ -75,7 +75,7 @@ public ListObjectsV2Response getV2() { return v2Result; } - public List getObjectSummaries() { + public List getS3Objects() { if (isV1()) { return v1Result.contents(); } else { @@ -100,12 +100,12 @@ public List getCommonPrefixes() { } /** - * Get the list of keys in the object summary. + * Get the list of keys in the list result. * @return a possibly empty list */ - private List objectSummaryKeys() { - return getObjectSummaries().stream() - .map(s3Object -> s3Object.key()) + private List objectKeys() { + return getS3Objects().stream() + .map(S3Object::key) .collect(Collectors.toList()); } @@ -114,9 +114,8 @@ private List objectSummaryKeys() { * @return true if the result is non-empty */ public boolean hasPrefixesOrObjects() { - return !(getCommonPrefixes()).isEmpty() - || !getObjectSummaries().isEmpty(); + || !getS3Objects().isEmpty(); } /** @@ -130,7 +129,7 @@ public boolean representsEmptyDirectory( // no children. // So the listing must contain the marker entry only as an object, // and prefixes is null - List keys = objectSummaryKeys(); + List keys = objectKeys(); return keys.size() == 1 && keys.contains(dirKey) && getCommonPrefixes().isEmpty(); } @@ -141,11 +140,11 @@ public boolean representsEmptyDirectory( */ public void logAtDebug(Logger log) { Collection prefixes = getCommonPrefixes(); - Collection summaries = getObjectSummaries(); + Collection s3Objects = getS3Objects(); log.debug("Prefix count = {}; object count={}", - prefixes.size(), summaries.size()); - for (S3Object summary : summaries) { - log.debug("Summary: {} {}", summary.key(), summary.size()); + prefixes.size(), s3Objects.size()); + for (S3Object s3Object : s3Objects) { + log.debug("Summary: {} {}", s3Object.key(), s3Object.size()); } for (CommonPrefix prefix : prefixes) { log.debug("Prefix: {}", prefix.prefix()); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index ca9af2b4fe916..1593c6649ff70 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -537,8 +537,9 @@ public ListObjectsRequest newListObjectsV1Request( if (delimiter != null) { requestBuilder.delimiter(delimiter); } + + //TODO: add call to prepareRequest return requestBuilder.build(); - // return prepareRequest(request); } @Override From 0c39c242d6c9f41a9642000b9a189681e770bb06 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Thu, 26 May 2022 10:26:10 +0100 Subject: [PATCH 05/24] formatting changes --- .../org/apache/hadoop/fs/s3a/Listing.java | 2 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 8 ++++---- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 20 +++++++++---------- .../fs/s3a/impl/RequestFactoryImpl.java | 3 +-- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index d32828d70f97e..a396e26355cd4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -276,7 +276,7 @@ public S3ListRequest createListObjectsRequest(String key, } /** - * Interface to implement by the logic deciding whether to accept a s3Object + * Interface to implement the logic deciding whether to accept a s3Object * entry or path as a valid file or directory. */ interface FileStatusAcceptor { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index ad9de4e996537..84b05c483e2fc 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -74,13 +74,12 @@ import com.amazonaws.services.s3.transfer.model.UploadResult; import com.amazonaws.event.ProgressListener; - -import software.amazon.awssdk.services.s3.model.ListObjectsRequest; -import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.classification.InterfaceAudience; @@ -2498,6 +2497,7 @@ protected S3ListResult continueListObjects(S3ListRequest request, OBJECT_CONTINUE_LIST_REQUEST, () -> { if (useListV1) { + //TODO: Update to List once we can get rid of the other S3Object import List prevListResult = prevResult.getV1().contents(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index dd77bbe03ecd1..89edec6207cd8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -471,7 +471,7 @@ public static String stringify(AmazonS3Exception e) { /** * Create a files status instance from a listing. * @param keyPath path to entry - * @param summary summary from AWS + * @param s3Object s3Object entry * @param blockSize block size to declare. * @param owner owner of the file * @param eTag S3 object eTag or null if unavailable @@ -480,20 +480,20 @@ public static String stringify(AmazonS3Exception e) { * @return a status entry */ public static S3AFileStatus createFileStatus(Path keyPath, - S3Object summary, + S3Object s3Object, long blockSize, String owner, String eTag, String versionId, boolean isCSEEnabled) { - long size = summary.size(); + long size = s3Object.size(); // check if cse is enabled; strip out constant padding length. if (isCSEEnabled && size >= CSE_PADDING_LENGTH) { size -= CSE_PADDING_LENGTH; } return createFileStatus(keyPath, - objectRepresentsDirectory(summary.key()), - size, Date.from(summary.lastModified()), blockSize, owner, eTag, versionId); + objectRepresentsDirectory(s3Object.key()), + size, Date.from(s3Object.lastModified()), blockSize, owner, eTag, versionId); } /** @@ -935,13 +935,13 @@ static String lookupPassword(Configuration conf, String key, String defVal) /** * String information about a summary entry for debug messages. - * @param summary summary object + * @param s3Object s3Object entry * @return string value */ - public static String stringify(S3Object summary) { - StringBuilder builder = new StringBuilder(summary.key().length() + 100); - builder.append(summary.key()).append(' '); - builder.append("size=").append(summary.size()); + public static String stringify(S3Object s3Object) { + StringBuilder builder = new StringBuilder(s3Object.key().length() + 100); + builder.append(s3Object.key()).append(' '); + builder.append("size=").append(s3Object.size()); return builder.toString(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index 1593c6649ff70..362fc9d35eeee 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -562,8 +562,7 @@ public ListObjectsV2Request newListObjectsV2Request( } //TODO: add call to prepareRequest, not added for now as PrepareRequest is a functional interface, - // uses AmazonWebServiceRequest, SDKV2 uses AwsRequest. What will prepare request actually do? - // Requests are no longer mutable + // uses AmazonWebServiceRequest, SDKV2 uses AwsRequest. return requestBuilder.build(); } From 9ce7399f0127900b5a2784e2ac9b048e9a4a6660 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Fri, 27 May 2022 15:02:37 +0100 Subject: [PATCH 06/24] fixes some failing tests --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 8 ++++++- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 21 +++++++++++++++---- .../hadoop/fs/s3a/impl/ChangeTracker.java | 5 ++++- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 4 ++++ 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 84b05c483e2fc..ed6c3bb5f61b9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -2501,7 +2501,13 @@ protected S3ListResult continueListObjects(S3ListRequest request, List prevListResult = prevResult.getV1().contents(); - String nextMarker = prevListResult.get(prevListResult.size() - 1).key(); + // Next markers are only present when a delimiter is specified. + String nextMarker; + if(prevResult.getV1().nextMarker() != null) { + nextMarker = prevResult.getV1().nextMarker(); + } else { + nextMarker = prevListResult.get(prevListResult.size() - 1).key(); + } return S3ListResult.v1(s3V2.listObjects( request.getV1().toBuilder().marker(nextMarker).build())); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 89edec6207cd8..eb408d8303f1b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -52,6 +52,8 @@ import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.util.Lists; +import org.apache.http.client.utils.URIBuilder; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; @@ -74,6 +76,7 @@ import java.lang.reflect.Modifier; import java.net.SocketTimeoutException; import java.net.URI; +import java.net.URISyntaxException; import java.nio.file.AccessDeniedException; import java.time.Duration; import java.util.ArrayList; @@ -1314,15 +1317,14 @@ public static ProxyConfiguration.Builder createProxyConfigurationBuilder(Configu if (!proxyHost.isEmpty()) { if (proxyPort >= 0) { - // TODO: Check how URI should be created - proxyConfigBuilder.endpoint(URI.create(proxyHost + ":" + proxyPort)); + proxyConfigBuilder.endpoint(buildURI(proxyHost, proxyPort)); } else { if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { LOG.warn("Proxy host set without port. Using HTTPS default 443"); - proxyConfigBuilder.endpoint(URI.create(proxyHost + ":" + 443)); + proxyConfigBuilder.endpoint(buildURI(proxyHost, 443)); } else { LOG.warn("Proxy host set without port. Using HTTP default 80"); - proxyConfigBuilder.endpoint(URI.create(proxyHost + ":" + 80)); + proxyConfigBuilder.endpoint(buildURI(proxyHost, 80)); } } final String proxyUsername = lookupPassword(bucket, conf, PROXY_USERNAME, @@ -1349,6 +1351,17 @@ public static ProxyConfiguration.Builder createProxyConfigurationBuilder(Configu return proxyConfigBuilder; } + private static URI buildURI(String host, int port) { + try { + return new URIBuilder().setHost(host).setPort(port).build(); + } catch (URISyntaxException e) { + String msg = + "Proxy error: incrorect " + PROXY_HOST + " or " + PROXY_PORT; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + } + /** * Initializes all AWS SDK settings related to connection management. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index e7dd75c581131..8b8cd0a9588c9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -281,7 +281,10 @@ private void processNewRevision(final String newRevisionId, LOG.debug("Setting revision ID for object at {}: {}", uri, newRevisionId); revisionId = newRevisionId; - } else if (!revisionId.equals(newRevisionId)) { + //TODO: Remove this. This is a temporary fix to prevent tests from failing. Needed because + // SDKV2 returns etag with quotation marks, and V1 does not use quotations so this equality + // fails. Regex removes quotation marks. + } else if (!revisionId.replaceAll("^\"|\"$", "").equals(newRevisionId)) { LOG.debug("Revision ID changed from {} to {}", revisionId, newRevisionId); ImmutablePair pair = diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 61d12747c0a58..89951177076c1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -127,6 +127,8 @@ public void testEndpoint() throws Exception { @Test public void testProxyConnection() throws Exception { + // FIXME Fails because SDKV2 throws SdkException, + // will work once errors are handled and translated properly. useFailFastConfiguration(); conf.set(Constants.PROXY_HOST, "127.0.0.1"); conf.setInt(Constants.PROXY_PORT, 1); @@ -182,6 +184,8 @@ public void testProxyPortWithoutHost() throws Exception { @Test public void testAutomaticProxyPortSelection() throws Exception { + // FIXME Fails because SDKV2 throws SdkException, + // will work once errors are handled and translated properly. useFailFastConfiguration(); conf.unset(Constants.PROXY_PORT); conf.set(Constants.PROXY_HOST, "127.0.0.1"); From 34b77e3d939ff9086596d9ca4a5d646d6f36486a Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Sat, 28 May 2022 11:07:10 +0100 Subject: [PATCH 07/24] fixes unit test --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 6 +++ .../hadoop/fs/s3a/AbstractS3AMockTest.java | 3 ++ .../hadoop/fs/s3a/TestS3AGetFileStatus.java | 51 ++++++++++--------- .../org.mockito.plugins.MockMaker | 1 + 4 files changed, 36 insertions(+), 25 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index ed6c3bb5f61b9..c1b290045d3a4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -1153,6 +1153,12 @@ public AmazonS3 getAmazonS3ClientForTesting(String reason) { return s3; } + @VisibleForTesting + public S3Client getAmazonS3V2ClientForTesting(String reason) { + LOG.warn("Access to S3A client requested, reason {}", reason); + return s3V2; + } + /** * Set the client -used in mocking tests to force in a different client. * @param client client. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index a80a24881fa74..1269564c8fa04 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -31,6 +31,7 @@ import org.junit.Before; import org.junit.Rule; import org.junit.rules.ExpectedException; +import software.amazon.awssdk.services.s3.S3Client; /** * Abstract base class for S3A unit tests using a mock S3 client and a null @@ -50,6 +51,7 @@ public abstract class AbstractS3AMockTest { protected S3AFileSystem fs; protected AmazonS3 s3; + protected S3Client s3V2; @Before public void setup() throws Exception { @@ -60,6 +62,7 @@ public void setup() throws Exception { conf.unset(Constants.S3_ENCRYPTION_ALGORITHM); fs.initialize(uri, conf); s3 = fs.getAmazonS3ClientForTesting("mocking"); + s3V2 = fs.getAmazonS3V2ClientForTesting("mocking"); } public Configuration createConfiguration() { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java index 34a275b580f25..97928eb5f8bf4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java @@ -25,17 +25,13 @@ import static org.mockito.Mockito.when; import java.io.FileNotFoundException; +import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.List; import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.ListObjectsRequest; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3ObjectSummary; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; @@ -43,6 +39,12 @@ import org.apache.hadoop.fs.contract.ContractTestUtils; import org.junit.Test; import org.mockito.ArgumentMatcher; +import software.amazon.awssdk.services.s3.model.CommonPrefix; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsResponse; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Response; +import software.amazon.awssdk.services.s3.model.S3Object; /** * S3A tests for getFileStatus using mock S3 client. @@ -77,14 +79,12 @@ public void testFakeDirectory() throws Exception { when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); String keyDir = key + "/"; - ListObjectsV2Result listResult = new ListObjectsV2Result(); - S3ObjectSummary objectSummary = new S3ObjectSummary(); - objectSummary.setKey(keyDir); - objectSummary.setSize(0L); - listResult.getObjectSummaries().add(objectSummary); - when(s3.listObjectsV2(argThat( + List s3Objects = new ArrayList<>(1); + s3Objects.add(S3Object.builder().key(keyDir).size(0L).build()); + ListObjectsV2Response listObjectsV2Response = ListObjectsV2Response.builder().contents(s3Objects).build(); + when(s3V2.listObjectsV2(argThat( matchListV2Request(BUCKET, keyDir)) - )).thenReturn(listResult); + )).thenReturn(listObjectsV2Response); FileStatus stat = fs.getFileStatus(path); assertNotNull(stat); assertEquals(fs.makeQualified(path), stat.getPath()); @@ -100,7 +100,8 @@ public void testImplicitDirectory() throws Exception { when(s3.getObjectMetadata(argThat( correctGetMetadataRequest(BUCKET, key + "/")) )).thenThrow(NOT_FOUND); - setupListMocks(Collections.singletonList("dir/"), Collections.emptyList()); + setupListMocks(Collections.singletonList(CommonPrefix.builder().prefix("dir/").build()), + Collections.emptyList()); FileStatus stat = fs.getFileStatus(path); assertNotNull(stat); assertEquals(fs.makeQualified(path), stat.getPath()); @@ -142,20 +143,20 @@ public void testNotFound() throws Exception { fs.getFileStatus(path); } - private void setupListMocks(List prefixes, - List summaries) { + private void setupListMocks(List prefixes, + List s3Objects) { // V1 list API mock - ObjectListing objects = mock(ObjectListing.class); - when(objects.getCommonPrefixes()).thenReturn(prefixes); - when(objects.getObjectSummaries()).thenReturn(summaries); - when(s3.listObjects(any(ListObjectsRequest.class))).thenReturn(objects); + ListObjectsResponse v1Response = mock(ListObjectsResponse.class); + when(v1Response.commonPrefixes()).thenReturn(prefixes); + when(v1Response.contents()).thenReturn(s3Objects); + when(s3V2.listObjects(any(ListObjectsRequest.class))).thenReturn(v1Response); // V2 list API mock - ListObjectsV2Result v2Result = mock(ListObjectsV2Result.class); - when(v2Result.getCommonPrefixes()).thenReturn(prefixes); - when(v2Result.getObjectSummaries()).thenReturn(summaries); - when(s3.listObjectsV2(any(ListObjectsV2Request.class))) + ListObjectsV2Response v2Result = mock(ListObjectsV2Response.class); + when(v2Result.commonPrefixes()).thenReturn(prefixes); + when(v2Result.contents()).thenReturn(s3Objects); + when(s3V2.listObjectsV2(any(software.amazon.awssdk.services.s3.model.ListObjectsV2Request.class))) .thenReturn(v2Result); } @@ -170,8 +171,8 @@ private ArgumentMatcher matchListV2Request( String bucket, String key) { return (ListObjectsV2Request request) -> { return request != null - && request.getBucketName().equals(bucket) - && request.getPrefix().equals(key); + && request.bucket().equals(bucket) + && request.prefix().equals(key); }; } diff --git a/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker new file mode 100644 index 0000000000000..ca6ee9cea8ec1 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker @@ -0,0 +1 @@ +mock-maker-inline \ No newline at end of file From add26930faa86e7fcfa440551a386209fd27c08c Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Mon, 30 May 2022 09:53:39 +0100 Subject: [PATCH 08/24] adds in javadocs --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 8 ++++ .../org/apache/hadoop/fs/s3a/S3AUtils.java | 42 ++++++++++++++++++- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 4 -- 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index c1b290045d3a4..e7ed157e86a90 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -1147,12 +1147,20 @@ AmazonS3 getAmazonS3Client() { * @param reason a justification for requesting access. * @return AmazonS3Client */ + // TODO: Remove when we remove S3V1 client @VisibleForTesting public AmazonS3 getAmazonS3ClientForTesting(String reason) { LOG.warn("Access to S3A client requested, reason {}", reason); return s3; } + /** + * Returns the S3 client used by this filesystem. + * Warning: this must only be used for testing, as it bypasses core + * S3A operations. + * @param reason a justification for requesting access. + * @return S3Client + */ @VisibleForTesting public S3Client getAmazonS3V2ClientForTesting(String reason) { LOG.warn("Access to S3A client requested, reason {}", reason); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index eb408d8303f1b..3bfc2cdea40c8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -1274,7 +1274,13 @@ public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Conf return overrideConfigBuilder; } - + + /** + * Configures the http client + * + * @param conf The Hadoop configuration + * @return Http client builder + */ public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration conf) { ApacheHttpClient.Builder httpClientBuilder = ApacheHttpClient.builder(); @@ -1288,7 +1294,7 @@ public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration con httpClientBuilder.socketTimeout(Duration.ofSeconds(intOption(conf, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, 0))); - // NOT_SUPPORTED: The protocol is now HTTPS by default, + // The protocol is now HTTPS by default, // and can only be modified by setting an HTTP endpoint on the client builder. //TODO: See where this logic should go now //initProtocolSettings(conf, awsConf); @@ -1296,6 +1302,12 @@ public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration con return httpClientBuilder; } + /** + * Configures the retry policy + * + * @param conf The Hadoop configuration + * @return Retry policy builder + */ public static RetryPolicy.Builder createRetryPolicyBuilder(Configuration conf) { RetryPolicy.Builder retryPolicyBuilder = @@ -1307,6 +1319,14 @@ public static RetryPolicy.Builder createRetryPolicyBuilder(Configuration conf) { return retryPolicyBuilder; } + /** + * Configures the proxy + * + * @param conf The Hadoop configuration + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @return Proxy configuration builder + * @throws IOException + */ public static ProxyConfiguration.Builder createProxyConfigurationBuilder(Configuration conf, String bucket) throws IOException { @@ -1351,6 +1371,13 @@ public static ProxyConfiguration.Builder createProxyConfigurationBuilder(Configu return proxyConfigBuilder; } + /*** + * Builds a URI, throws an IllegalArgumentException in case of errors. + * + * @param host proxy host + * @param port proxy port + * @return + */ private static URI buildURI(String host, int port) { try { return new URIBuilder().setHost(host).setPort(port).build(); @@ -1497,6 +1524,7 @@ public static void initProxySupport(Configuration conf, * @param conf Hadoop configuration * @param awsConf AWS SDK configuration to update */ + // TODO: Remove when we remove S3V1 client private static void initUserAgent(Configuration conf, ClientConfiguration awsConf) { String userAgent = "Hadoop " + VersionInfo.getVersion(); @@ -1508,6 +1536,16 @@ private static void initUserAgent(Configuration conf, awsConf.setUserAgentPrefix(userAgent); } + /** + * Initializes the User-Agent header to send in HTTP requests to AWS + * services. We always include the Hadoop version number. The user also + * may set an optional custom prefix to put in front of the Hadoop version + * number. The AWS SDK internally appends its own information, which seems + * to include the AWS SDK version, OS and JVM version. + * + * @param conf Hadoop configuration + * @param awsConf AWS SDK configuration to update + */ private static void initUserAgentV2(Configuration conf, ClientOverrideConfiguration.Builder clientConfig) { String userAgent = "Hadoop " + VersionInfo.getVersion(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 89951177076c1..61d12747c0a58 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -127,8 +127,6 @@ public void testEndpoint() throws Exception { @Test public void testProxyConnection() throws Exception { - // FIXME Fails because SDKV2 throws SdkException, - // will work once errors are handled and translated properly. useFailFastConfiguration(); conf.set(Constants.PROXY_HOST, "127.0.0.1"); conf.setInt(Constants.PROXY_PORT, 1); @@ -184,8 +182,6 @@ public void testProxyPortWithoutHost() throws Exception { @Test public void testAutomaticProxyPortSelection() throws Exception { - // FIXME Fails because SDKV2 throws SdkException, - // will work once errors are handled and translated properly. useFailFastConfiguration(); conf.unset(Constants.PROXY_PORT); conf.set(Constants.PROXY_HOST, "127.0.0.1"); From 4378a3d1510f6e1fe6ead561cac01259ae26eeeb Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Mon, 30 May 2022 09:55:12 +0100 Subject: [PATCH 09/24] fixes typo --- .../src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 3bfc2cdea40c8..d935e74d1b311 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -1544,7 +1544,7 @@ private static void initUserAgent(Configuration conf, * to include the AWS SDK version, OS and JVM version. * * @param conf Hadoop configuration - * @param awsConf AWS SDK configuration to update + * @param clientConfig AWS SDK configuration to update */ private static void initUserAgentV2(Configuration conf, ClientOverrideConfiguration.Builder clientConfig) { From 7f9ca6ce8bc84b21f49b39f65eada8c259921014 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Thu, 9 Jun 2022 10:12:02 +0100 Subject: [PATCH 10/24] formatting changes --- .../apache/hadoop/fs/s3a/AWSClientConfig.java | 213 ++++++++++++++++++ .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 23 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 4 +- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 166 -------------- .../hadoop/fs/s3a/MockS3ClientFactory.java | 11 +- 5 files changed, 232 insertions(+), 185 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java new file mode 100644 index 0000000000000..d5a0795c54c42 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java @@ -0,0 +1,213 @@ +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.time.Duration; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; +import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; +import software.amazon.awssdk.core.retry.RetryPolicy; +import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.apache.ProxyConfiguration; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.VersionInfo; +import org.apache.http.client.utils.URIBuilder; + +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ESTABLISH_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAXIMUM_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_MAX_ERROR_RETRIES; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_REQUEST_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SOCKET_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.ESTABLISH_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.MAXIMUM_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.MAX_ERROR_RETRIES; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_DOMAIN; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_HOST; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_PASSWORD; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_PORT; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_USERNAME; +import static org.apache.hadoop.fs.s3a.Constants.PROXY_WORKSTATION; +import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS; +import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.USER_AGENT_PREFIX; + +public final class AWSClientConfig { + private static final Logger LOG = LoggerFactory.getLogger(AWSClientConfig.class); + + + public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Configuration conf) { + ClientOverrideConfiguration.Builder overrideConfigBuilder = + ClientOverrideConfiguration.builder(); + + long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT, + DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); + + if (requestTimeoutMillis > Integer.MAX_VALUE) { + LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead", + requestTimeoutMillis, Integer.MAX_VALUE); + requestTimeoutMillis = Integer.MAX_VALUE; + } + + if(requestTimeoutMillis > 0) { + overrideConfigBuilder.apiCallAttemptTimeout(Duration.ofMillis(requestTimeoutMillis)); + } + + initUserAgent(conf, overrideConfigBuilder); + + // TODO: Look at signers. See issue https://github.com/aws/aws-sdk-java-v2/issues/1024 + // String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, ""); + // if (!signerOverride.isEmpty()) { + // LOG.debug("Signer override = {}", signerOverride); + // overrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.SIGNER) + // } + + return overrideConfigBuilder; + } + + /** + * Configures the http client + * + * @param conf The Hadoop configuration + * @return Http client builder + */ + public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration conf) { + ApacheHttpClient.Builder httpClientBuilder = + ApacheHttpClient.builder(); + + httpClientBuilder.maxConnections(S3AUtils.intOption(conf, MAXIMUM_CONNECTIONS, + DEFAULT_MAXIMUM_CONNECTIONS, 1)); + + httpClientBuilder.connectionTimeout(Duration.ofSeconds(S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, + DEFAULT_ESTABLISH_TIMEOUT, 0))); + + httpClientBuilder.socketTimeout(Duration.ofSeconds(S3AUtils.intOption(conf, SOCKET_TIMEOUT, + DEFAULT_SOCKET_TIMEOUT, 0))); + + // The protocol is now HTTPS by default, + // and can only be modified by setting an HTTP endpoint on the client builder. + //TODO: See where this logic should go now + //initProtocolSettings(conf, awsConf); + + return httpClientBuilder; + } + + /** + * Configures the retry policy + * + * @param conf The Hadoop configuration + * @return Retry policy builder + */ + public static RetryPolicy.Builder createRetryPolicyBuilder(Configuration conf) { + + RetryPolicy.Builder retryPolicyBuilder = RetryPolicy.builder(); + + retryPolicyBuilder.numRetries(S3AUtils.intOption(conf, MAX_ERROR_RETRIES, + DEFAULT_MAX_ERROR_RETRIES, 0)); + + return retryPolicyBuilder; + } + + /** + * Configures the proxy + * + * @param conf The Hadoop configuration + * @param bucket Optional bucket to use to look up per-bucket proxy secrets + * @return Proxy configuration builder + * @throws IOException + */ + public static ProxyConfiguration.Builder createProxyConfigurationBuilder(Configuration conf, + String bucket) throws IOException { + + ProxyConfiguration.Builder proxyConfigBuilder = ProxyConfiguration.builder(); + + String proxyHost = conf.getTrimmed(PROXY_HOST, ""); + int proxyPort = conf.getInt(PROXY_PORT, -1); + + if (!proxyHost.isEmpty()) { + if (proxyPort >= 0) { + proxyConfigBuilder.endpoint(buildURI(proxyHost, proxyPort)); + } else { + if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { + LOG.warn("Proxy host set without port. Using HTTPS default 443"); + proxyConfigBuilder.endpoint(buildURI(proxyHost, 443)); + } else { + LOG.warn("Proxy host set without port. Using HTTP default 80"); + proxyConfigBuilder.endpoint(buildURI(proxyHost, 80)); + } + } + final String proxyUsername = S3AUtils.lookupPassword(bucket, conf, PROXY_USERNAME, + null, null); + final String proxyPassword = S3AUtils.lookupPassword(bucket, conf, PROXY_PASSWORD, + null, null); + if ((proxyUsername == null) != (proxyPassword == null)) { + String msg = "Proxy error: " + PROXY_USERNAME + " or " + + PROXY_PASSWORD + " set without the other."; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + proxyConfigBuilder.username(proxyUsername); + proxyConfigBuilder.password(proxyPassword); + proxyConfigBuilder.ntlmDomain(conf.getTrimmed(PROXY_DOMAIN)); + proxyConfigBuilder.ntlmWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); + if (LOG.isDebugEnabled()) { + LOG.debug("Using proxy server {}:{} as user {} with password {} on " + + "domain {} as workstation {}", proxyHost, proxyPort, proxyUsername, proxyPassword, + PROXY_DOMAIN, PROXY_WORKSTATION); + } + } else if (proxyPort >= 0) { + String msg = + "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + + return proxyConfigBuilder; + } + + /*** + * Builds a URI, throws an IllegalArgumentException in case of errors. + * + * @param host proxy host + * @param port proxy port + * @return uri with host & port + */ + private static URI buildURI(String host, int port) { + try { + return new URIBuilder().setHost(host).setPort(port).build(); + } catch (URISyntaxException e) { + String msg = + "Proxy error: incorrect " + PROXY_HOST + " or " + PROXY_PORT; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + } + + /** + * Initializes the User-Agent header to send in HTTP requests to AWS + * services. We always include the Hadoop version number. The user also + * may set an optional custom prefix to put in front of the Hadoop version + * number. The AWS SDK internally appends its own information, which seems + * to include the AWS SDK version, OS and JVM version. + * + * @param conf Hadoop configuration + * @param clientConfig AWS SDK configuration to update + */ + private static void initUserAgent(Configuration conf, + ClientOverrideConfiguration.Builder clientConfig) { + String userAgent = "Hadoop " + VersionInfo.getVersion(); + String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, ""); + if (!userAgentPrefix.isEmpty()) { + userAgent = userAgentPrefix + ", " + userAgent; + } + LOG.debug("Using User-Agent: {}", userAgent); + clientConfig.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, userAgent); + } + +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 11a81ac689d42..4e70de9b59354 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -185,20 +185,20 @@ public S3Client createS3ClientV2( bucket = uri.getHost(); final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder = - S3AUtils.createClientConfigBuilder(conf); + AWSClientConfig.createClientConfigBuilder(conf); - final ApacheHttpClient.Builder httpClientBuilder = S3AUtils.createHttpClientBuilder(conf); + final ApacheHttpClient.Builder httpClientBuilder = + AWSClientConfig.createHttpClientBuilder(conf); - final RetryPolicy.Builder retryPolicyBuilder = S3AUtils.createRetryPolicyBuilder(conf); + final RetryPolicy.Builder retryPolicyBuilder = AWSClientConfig.createRetryPolicyBuilder(conf); final ProxyConfiguration.Builder proxyConfigBuilder = - S3AUtils.createProxyConfigurationBuilder(conf, bucket); + AWSClientConfig.createProxyConfigurationBuilder(conf, bucket); S3ClientBuilder s3ClientBuilder = S3Client.builder(); // add any headers - parameters.getHeaders().forEach((h, v) -> - clientOverrideConfigBuilder.putHeader(h, v)); + parameters.getHeaders().forEach((h, v) -> clientOverrideConfigBuilder.putHeader(h, v)); if (parameters.isRequesterPays()) { // All calls must acknowledge requester will pay via header. @@ -210,10 +210,13 @@ public S3Client createS3ClientV2( parameters.getUserAgentSuffix()); } - return s3ClientBuilder.overrideConfiguration( - clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build()).build()) - .httpClientBuilder(httpClientBuilder.proxyConfiguration(proxyConfigBuilder.build())) - .build(); + clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build()); + httpClientBuilder.proxyConfiguration(proxyConfigBuilder.build()); + + s3ClientBuilder.httpClientBuilder(httpClientBuilder) + .overrideConfiguration(clientOverrideConfigBuilder.build()); + + return s3ClientBuilder.build(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index e7ed157e86a90..83d021c53d3f7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -2517,10 +2517,10 @@ protected S3ListResult continueListObjects(S3ListRequest request, // Next markers are only present when a delimiter is specified. String nextMarker; - if(prevResult.getV1().nextMarker() != null) { + if (prevResult.getV1().nextMarker() != null) { nextMarker = prevResult.getV1().nextMarker(); } else { - nextMarker = prevListResult.get(prevListResult.size() - 1).key(); + nextMarker = prevListResult.get(prevListResult.size() - 1).key(); } return S3ListResult.v1(s3V2.listObjects( diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index d935e74d1b311..ffee5b5c67e0b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -1246,150 +1246,6 @@ public static ClientConfiguration createAwsConf(Configuration conf, return awsConf; } - public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Configuration conf) { - ClientOverrideConfiguration.Builder overrideConfigBuilder = - ClientOverrideConfiguration.builder(); - - long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT, - DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); - - if (requestTimeoutMillis > Integer.MAX_VALUE) { - LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead", - requestTimeoutMillis, Integer.MAX_VALUE); - requestTimeoutMillis = Integer.MAX_VALUE; - } - - if(requestTimeoutMillis > 0) { - overrideConfigBuilder.apiCallAttemptTimeout(Duration.ofMillis(requestTimeoutMillis)); - } - - initUserAgentV2(conf, overrideConfigBuilder); - - // TODO: Look at signers. See issue https://github.com/aws/aws-sdk-java-v2/issues/1024 - // String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, ""); - // if (!signerOverride.isEmpty()) { - // LOG.debug("Signer override = {}", signerOverride); - // overrideConfigBuilder.putAdvancedOption(SdkAdvancedClientOption.SIGNER) - // } - - return overrideConfigBuilder; - } - - /** - * Configures the http client - * - * @param conf The Hadoop configuration - * @return Http client builder - */ - public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration conf) { - ApacheHttpClient.Builder httpClientBuilder = - ApacheHttpClient.builder(); - - httpClientBuilder.maxConnections(intOption(conf, MAXIMUM_CONNECTIONS, - DEFAULT_MAXIMUM_CONNECTIONS, 1)); - - httpClientBuilder.connectionTimeout(Duration.ofSeconds(intOption(conf, ESTABLISH_TIMEOUT, - DEFAULT_ESTABLISH_TIMEOUT, 0))); - - httpClientBuilder.socketTimeout(Duration.ofSeconds(intOption(conf, SOCKET_TIMEOUT, - DEFAULT_SOCKET_TIMEOUT, 0))); - - // The protocol is now HTTPS by default, - // and can only be modified by setting an HTTP endpoint on the client builder. - //TODO: See where this logic should go now - //initProtocolSettings(conf, awsConf); - - return httpClientBuilder; - } - - /** - * Configures the retry policy - * - * @param conf The Hadoop configuration - * @return Retry policy builder - */ - public static RetryPolicy.Builder createRetryPolicyBuilder(Configuration conf) { - - RetryPolicy.Builder retryPolicyBuilder = - RetryPolicy.builder(); - - retryPolicyBuilder.numRetries(intOption(conf, MAX_ERROR_RETRIES, - DEFAULT_MAX_ERROR_RETRIES, 0)); - - return retryPolicyBuilder; - } - - /** - * Configures the proxy - * - * @param conf The Hadoop configuration - * @param bucket Optional bucket to use to look up per-bucket proxy secrets - * @return Proxy configuration builder - * @throws IOException - */ - public static ProxyConfiguration.Builder createProxyConfigurationBuilder(Configuration conf, - String bucket) throws IOException { - - ProxyConfiguration.Builder proxyConfigBuilder = ProxyConfiguration.builder(); - - String proxyHost = conf.getTrimmed(PROXY_HOST, ""); - int proxyPort = conf.getInt(PROXY_PORT, -1); - - if (!proxyHost.isEmpty()) { - if (proxyPort >= 0) { - proxyConfigBuilder.endpoint(buildURI(proxyHost, proxyPort)); - } else { - if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) { - LOG.warn("Proxy host set without port. Using HTTPS default 443"); - proxyConfigBuilder.endpoint(buildURI(proxyHost, 443)); - } else { - LOG.warn("Proxy host set without port. Using HTTP default 80"); - proxyConfigBuilder.endpoint(buildURI(proxyHost, 80)); - } - } - final String proxyUsername = lookupPassword(bucket, conf, PROXY_USERNAME, - null, null); - final String proxyPassword = lookupPassword(bucket, conf, PROXY_PASSWORD, - null, null); - if ((proxyUsername == null) != (proxyPassword == null)) { - String msg = "Proxy error: " + PROXY_USERNAME + " or " + - PROXY_PASSWORD + " set without the other."; - LOG.error(msg); - throw new IllegalArgumentException(msg); - } - proxyConfigBuilder.username(proxyUsername); - proxyConfigBuilder.password(proxyPassword); - proxyConfigBuilder.ntlmDomain(conf.getTrimmed(PROXY_DOMAIN)); - proxyConfigBuilder.ntlmWorkstation(conf.getTrimmed(PROXY_WORKSTATION)); - } else if (proxyPort >= 0) { - String msg = - "Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST; - LOG.error(msg); - throw new IllegalArgumentException(msg); - } - - return proxyConfigBuilder; - } - - /*** - * Builds a URI, throws an IllegalArgumentException in case of errors. - * - * @param host proxy host - * @param port proxy port - * @return - */ - private static URI buildURI(String host, int port) { - try { - return new URIBuilder().setHost(host).setPort(port).build(); - } catch (URISyntaxException e) { - String msg = - "Proxy error: incrorect " + PROXY_HOST + " or " + PROXY_PORT; - LOG.error(msg); - throw new IllegalArgumentException(msg); - } - } - - /** * Initializes all AWS SDK settings related to connection management. * @@ -1524,7 +1380,6 @@ public static void initProxySupport(Configuration conf, * @param conf Hadoop configuration * @param awsConf AWS SDK configuration to update */ - // TODO: Remove when we remove S3V1 client private static void initUserAgent(Configuration conf, ClientConfiguration awsConf) { String userAgent = "Hadoop " + VersionInfo.getVersion(); @@ -1536,27 +1391,6 @@ private static void initUserAgent(Configuration conf, awsConf.setUserAgentPrefix(userAgent); } - /** - * Initializes the User-Agent header to send in HTTP requests to AWS - * services. We always include the Hadoop version number. The user also - * may set an optional custom prefix to put in front of the Hadoop version - * number. The AWS SDK internally appends its own information, which seems - * to include the AWS SDK version, OS and JVM version. - * - * @param conf Hadoop configuration - * @param clientConfig AWS SDK configuration to update - */ - private static void initUserAgentV2(Configuration conf, - ClientOverrideConfiguration.Builder clientConfig) { - String userAgent = "Hadoop " + VersionInfo.getVersion(); - String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, ""); - if (!userAgentPrefix.isEmpty()) { - userAgent = userAgentPrefix + ", " + userAgent; - } - LOG.debug("Using User-Agent: {}", userAgent); - clientConfig.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, userAgent); - } - /** * Convert the data of an iterator of {@link S3AFileStatus} to * an array. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java index 5b53b3d417161..81284d9265a56 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java @@ -35,8 +35,7 @@ public class MockS3ClientFactory implements S3ClientFactory { @Override - public AmazonS3 createS3Client(URI uri, - final S3ClientCreationParameters parameters) { + public AmazonS3 createS3Client(URI uri, final S3ClientCreationParameters parameters) { AmazonS3 s3 = mock(AmazonS3.class); String bucket = uri.getHost(); when(s3.doesBucketExist(bucket)).thenReturn(true); @@ -45,10 +44,8 @@ public AmazonS3 createS3Client(URI uri, // return a stub value MultipartUploadListing noUploads = new MultipartUploadListing(); noUploads.setMultipartUploads(new ArrayList<>(0)); - when(s3.listMultipartUploads(any())) - .thenReturn(noUploads); - when(s3.getBucketLocation(anyString())) - .thenReturn(Region.US_West.toString()); + when(s3.listMultipartUploads(any())).thenReturn(noUploads); + when(s3.getBucketLocation(anyString())).thenReturn(Region.US_West.toString()); return s3; } @@ -58,4 +55,4 @@ public S3Client createS3ClientV2(URI uri, final S3ClientCreationParameters param S3Client s3 = mock(S3Client.class); return s3; } - } +} From 5df4cc55249ad18cc87a952496f8f2355dfa87d5 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Thu, 9 Jun 2022 10:52:14 +0100 Subject: [PATCH 11/24] removes unused imports --- .../java/org/apache/hadoop/fs/s3a/AWSClientConfig.java | 2 +- .../src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java | 9 --------- .../apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java | 1 - 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java index d5a0795c54c42..d6d4eef0929c5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java @@ -176,7 +176,7 @@ public static ProxyConfiguration.Builder createProxyConfigurationBuilder(Configu * * @param host proxy host * @param port proxy port - * @return uri with host & port + * @return uri with host and port */ private static URI buildURI(String host, int port) { try { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index ffee5b5c67e0b..a2a1f881fa156 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -29,7 +29,6 @@ import com.amazonaws.retry.RetryUtils; import com.amazonaws.services.s3.model.AmazonS3Exception; import com.amazonaws.services.s3.model.MultiObjectDeleteException; -import com.amazonaws.services.s3.model.S3ObjectSummary; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.util.Preconditions; @@ -52,15 +51,9 @@ import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.util.Lists; -import org.apache.http.client.utils.URIBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; -import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; -import software.amazon.awssdk.core.retry.RetryPolicy; -import software.amazon.awssdk.http.apache.ApacheHttpClient; -import software.amazon.awssdk.http.apache.ProxyConfiguration; import software.amazon.awssdk.services.s3.model.S3Object; import javax.annotation.Nullable; @@ -76,9 +69,7 @@ import java.lang.reflect.Modifier; import java.net.SocketTimeoutException; import java.net.URI; -import java.net.URISyntaxException; import java.nio.file.AccessDeniedException; -import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index 362fc9d35eeee..fdef74ea62b36 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -49,7 +49,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.services.s3.model.ListObjectsRequest; -import software.amazon.awssdk.services.s3.model.ListObjectsResponse; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import org.apache.hadoop.fs.PathIOException; From 961b01b869d57567f9dcdd9d330fd79233af0f95 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Mon, 1 Aug 2022 14:27:56 +0100 Subject: [PATCH 12/24] adds in V1ToV2 adapter --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 5 ++ .../V1ToV2AwsCredentialProviderAdapter.java | 70 +++++++++++++++++++ .../V1V2AwsCredentialProviderAdapter.java | 36 ++++++++++ 3 files changed, 111 insertions(+) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 4e70de9b59354..ad8e89326c854 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -41,6 +41,8 @@ import com.amazonaws.services.s3.model.KMSEncryptionMaterialsProvider; import com.amazonaws.util.AwsHostNameUtils; import com.amazonaws.util.RuntimeHttpUtils; + +import org.apache.hadoop.fs.s3a.adapter.V1V2AwsCredentialProviderAdapter; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.classification.VisibleForTesting; import org.slf4j.Logger; @@ -216,6 +218,9 @@ public S3Client createS3ClientV2( s3ClientBuilder.httpClientBuilder(httpClientBuilder) .overrideConfiguration(clientOverrideConfigBuilder.build()); + s3ClientBuilder.credentialsProvider( + V1V2AwsCredentialProviderAdapter.adapt(parameters.getCredentialSet())); + return s3ClientBuilder.build(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java new file mode 100644 index 0000000000000..efea6540ef9ac --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.adapter; + +import com.amazonaws.auth.AWSCredentials; +import com.amazonaws.auth.AWSCredentialsProvider; +import com.amazonaws.auth.AWSSessionCredentials; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; + +/** + * Adapts a V1 {@link AWSCredentialsProvider} to the V2 {@link AwsCredentialsProvider} interface. + * Implements both interfaces so can be used with either the V1 or V2 AWS SDK. + */ +final class V1ToV2AwsCredentialProviderAdapter implements V1V2AwsCredentialProviderAdapter { + + private final AWSCredentialsProvider v1CredentialsProvider; + + private V1ToV2AwsCredentialProviderAdapter(AWSCredentialsProvider v1CredentialsProvider) { + this.v1CredentialsProvider = v1CredentialsProvider; + } + + @Override + public AwsCredentials resolveCredentials() { + AWSCredentials toAdapt = v1CredentialsProvider.getCredentials(); + if (toAdapt instanceof AWSSessionCredentials) { + return AwsSessionCredentials.create(toAdapt.getAWSAccessKeyId(), + toAdapt.getAWSSecretKey(), + ((AWSSessionCredentials) toAdapt).getSessionToken()); + } else { + return AwsBasicCredentials.create(toAdapt.getAWSAccessKeyId(), toAdapt.getAWSSecretKey()); + } + } + + @Override + public AWSCredentials getCredentials() { + return v1CredentialsProvider.getCredentials(); + } + + @Override + public void refresh() { + v1CredentialsProvider.refresh(); + } + + /** + * @param v1CredentialsProvider V1 credential provider to adapt. + * @return A new instance of the credentials provider adapter. + */ + static V1ToV2AwsCredentialProviderAdapter create(AWSCredentialsProvider v1CredentialsProvider) { + return new V1ToV2AwsCredentialProviderAdapter(v1CredentialsProvider); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java new file mode 100644 index 0000000000000..f27166a9ef91d --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.adapter; + +import com.amazonaws.auth.AWSCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; + +public interface V1V2AwsCredentialProviderAdapter extends AWSCredentialsProvider, + AwsCredentialsProvider { + + /** + * Creates a two-way adapter from a V1 {@link AWSCredentialsProvider} interface. + * + * @param v1CredentialsProvider V1 credentials provider. + * @return Two-way credential provider adapter. + */ + static V1V2AwsCredentialProviderAdapter adapt(AWSCredentialsProvider v1CredentialsProvider) { + return V1ToV2AwsCredentialProviderAdapter.create(v1CredentialsProvider); + } +} From a580ea701cd2e0053b0310a6ed0374a4700f7241 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Mon, 8 Aug 2022 15:49:04 +0100 Subject: [PATCH 13/24] sets endpoint and region --- .../apache/hadoop/fs/s3a/AWSClientConfig.java | 5 +- .../org/apache/hadoop/fs/s3a/Constants.java | 4 + .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 95 +++++++++++++++++++ 3 files changed, 100 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java index d6d4eef0929c5..91fd3362385df 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java @@ -90,10 +90,7 @@ public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration con httpClientBuilder.socketTimeout(Duration.ofSeconds(S3AUtils.intOption(conf, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, 0))); - // The protocol is now HTTPS by default, - // and can only be modified by setting an HTTP endpoint on the client builder. - //TODO: See where this logic should go now - //initProtocolSettings(conf, awsConf); + // TODO: Need to set ssl socket factory, as done in NetworkBinding.bindSSLChannelMode(conf, awsConf); return httpClientBuilder; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 764a6adaca27d..2db2960d0ad21 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1203,4 +1203,8 @@ private Constants() { * Default maximum read size in bytes during vectored reads : {@value}. */ public static final int DEFAULT_AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE = 1253376; //1M + + public static final String HTTP = "http"; + + public static final String HTTPS = "https"; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index ad8e89326c854..795345eb0b3ea 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.net.URI; +import java.net.URISyntaxException; import com.amazonaws.ClientConfiguration; import com.amazonaws.SdkClientException; @@ -53,8 +54,12 @@ import software.amazon.awssdk.core.retry.RetryPolicy; import software.amazon.awssdk.http.apache.ApacheHttpClient; import software.amazon.awssdk.http.apache.ProxyConfiguration; +import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.S3ClientBuilder; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; +import software.amazon.awssdk.services.s3.model.GetBucketLocationResponse; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; @@ -67,9 +72,14 @@ import static com.amazonaws.services.s3.Headers.REQUESTER_PAYS_HEADER; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; +import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT; +import static org.apache.hadoop.fs.s3a.Constants.HTTP; +import static org.apache.hadoop.fs.s3a.Constants.HTTPS; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_KEY; +import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS; import static org.apache.hadoop.fs.s3a.S3AUtils.getEncryptionAlgorithm; import static org.apache.hadoop.fs.s3a.S3AUtils.getS3EncryptionKey; import static org.apache.hadoop.fs.s3a.S3AUtils.translateException; @@ -199,6 +209,11 @@ public S3Client createS3ClientV2( S3ClientBuilder s3ClientBuilder = S3Client.builder(); + // build a s3 client with region us-east-2 that can be used to get the region of the bucket as + // getBucketLocation does not work with us-east-1. + // See https://github.com/aws/aws-sdk-java/issues/1338. + S3Client defaultS3Client = S3Client.builder().region(Region.US_EAST_2).build(); + // add any headers parameters.getHeaders().forEach((h, v) -> clientOverrideConfigBuilder.putHeader(h, v)); @@ -221,6 +236,17 @@ public S3Client createS3ClientV2( s3ClientBuilder.credentialsProvider( V1V2AwsCredentialProviderAdapter.adapt(parameters.getCredentialSet())); + URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf); + + Region region = + getS3Region(conf.getTrimmed(AWS_REGION), defaultS3Client); + + LOG.debug("Using endpoint {}; and region {}", endpoint, region); + + s3ClientBuilder.endpointOverride(endpoint).region(region); + + configureBasicParamsV2(s3ClientBuilder, clientOverrideConfigBuilder, parameters); + return s3ClientBuilder.build(); } @@ -334,6 +360,18 @@ private void configureBasicParams(AmazonS3Builder builder, } + private void configureBasicParamsV2(S3ClientBuilder s3ClientBuilder, + ClientOverrideConfiguration.Builder overrideConfigBuilder, S3ClientCreationParameters parameters) { + + S3Configuration s3Configuration = S3Configuration.builder() + .pathStyleAccessEnabled(parameters.isPathStyleAccess()) + .build(); + + // TODO: set request handlers, will be done as part of auditor work + + // TODO: Metrics collection, not sure if this can be done with the SDK V2 yet. + } + /** * A method to configure endpoint and Region for an AmazonS3Builder. * @@ -455,4 +493,61 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3, endpoint, epr, region); return new AwsClientBuilder.EndpointConfiguration(endpoint, region); } + + /** + * Given a endpoint string, create the endpoint URI. + * + * @param endpoint possibly null endpoint. + * @param conf config to build the URI from. + * @return an endpoint uri + */ + private static URI getS3Endpoint( + String endpoint, final Configuration conf) { + + boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, + DEFAULT_SECURE_CONNECTIONS); + + String protocol = secureConnections ? HTTPS : HTTP; + + if (endpoint == null || endpoint.isEmpty()) { + // the default endpoint + endpoint = CENTRAL_ENDPOINT; + } + + if (!endpoint.contains("://")) { + endpoint = String.format("%s://%s", protocol, endpoint); + } + + try { + return new URI(endpoint); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + /** + * Get the bucket region. + * + * @param region AWS S3 Region set in the config. This property may not be set, in which case this + * ask S3 for the region. + * @param s3Client A S3 Client with default config, used for getting the region of a bucket. + * @return region of the bucket. + */ + private Region getS3Region(String region, S3Client s3Client) { + + if(!StringUtils.isBlank(region)) { + return Region.of(region); + } + + //No region specified so ask s3 for bucket region + GetBucketLocationResponse bucketLocationResponse = + s3Client.getBucketLocation(GetBucketLocationRequest.builder().bucket(bucket).build()); + + // buckets in eu-east-1 have a location constraint of null + if(bucketLocationResponse.locationConstraintAsString() != null) { + return Region.of(bucketLocationResponse.locationConstraintAsString()); + } else { + return Region.US_EAST_1; + } + } } From 61bac01f393f793ea8a082b72ab5a797ee50d13a Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Wed, 10 Aug 2022 16:27:20 +0100 Subject: [PATCH 14/24] set landsat bucket region to fix failing tests --- .../org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java | 5 +++++ .../org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java | 6 +++++- .../java/org/apache/hadoop/fs/s3a/S3ATestConstants.java | 5 +++++ .../hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java | 1 + .../apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java | 2 ++ 5 files changed, 18 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java index c0f6a4b23226b..9627a08cb143b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java @@ -40,6 +40,7 @@ import java.nio.file.AccessDeniedException; import static org.apache.hadoop.fs.contract.ContractTestUtils.*; +import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.S3ATestUtils.createFiles; import static org.apache.hadoop.fs.s3a.test.ExtraAssertions.failIf; import static org.apache.hadoop.test.LambdaTestUtils.*; @@ -151,6 +152,8 @@ private Path maybeGetCsvPath() { public void testMultiObjectDeleteNoPermissions() throws Throwable { describe("Delete the landsat CSV file and expect it to fail"); Path csvPath = maybeGetCsvPath(); + Configuration conf = getConfiguration(); + conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); S3AFileSystem fs = (S3AFileSystem) csvPath.getFileSystem( getConfiguration()); // create a span, expect it to be activated. @@ -172,6 +175,8 @@ public void testMultiObjectDeleteNoPermissions() throws Throwable { public void testSingleObjectDeleteNoPermissionsTranslated() throws Throwable { describe("Delete the landsat CSV file and expect it to fail"); Path csvPath = maybeGetCsvPath(); + Configuration conf = getConfiguration(); + conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); S3AFileSystem fs = (S3AFileSystem) csvPath.getFileSystem( getConfiguration()); AccessDeniedException aex = intercept(AccessDeniedException.class, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java index 9a818d037e4c0..15259567deb4f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.statistics.StreamStatisticNames; import static org.apache.hadoop.fs.s3a.Constants.ALLOW_REQUESTER_PAYS; +import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -49,7 +50,8 @@ protected Configuration createConfiguration() { requesterPaysBucketName, conf, ALLOW_REQUESTER_PAYS, - S3A_BUCKET_PROBE); + S3A_BUCKET_PROBE, + AWS_REGION); return conf; } @@ -62,6 +64,7 @@ public void testRequesterPaysOptionSuccess() throws Throwable { conf.setBoolean(ALLOW_REQUESTER_PAYS, true); // Enable bucket exists check, the first failure point people may encounter conf.setInt(S3A_BUCKET_PROBE, 2); + conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); Path requesterPaysPath = getRequesterPaysPath(conf); @@ -95,6 +98,7 @@ public void testRequesterPaysDisabledFails() throws Throwable { Configuration conf = this.createConfiguration(); conf.setBoolean(ALLOW_REQUESTER_PAYS, false); + conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); Path requesterPaysPath = getRequesterPaysPath(conf); try (FileSystem fs = requesterPaysPath.getFileSystem(conf)) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java index a6269c437665a..389e898f8ef32 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java @@ -105,6 +105,11 @@ public interface S3ATestConstants { */ String DEFAULT_CSVTEST_FILE = LANDSAT_BUCKET + "scene_list.gz"; + /** + * The landsat bucket region: {@value}. + */ + String LANDSAT_BUCKET_REGION = "us-west-2"; + /** * Configuration key for an existing object in a requester pays bucket: {@value}. * diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java index b8195cb9964fa..53e690b1c0a70 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java @@ -103,6 +103,7 @@ public void openFS() throws IOException { Configuration conf = getConf(); conf.setInt(SOCKET_SEND_BUFFER, 16 * 1024); conf.setInt(SOCKET_RECV_BUFFER, 16 * 1024); + conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); String testFile = conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE); if (testFile.isEmpty()) { assumptionMessage = "Empty test property: " + KEY_CSVTEST_FILE; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java index bf5d96e73b3b2..6fe270ecc9109 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java @@ -60,6 +60,7 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.apache.hadoop.util.DurationInfo; +import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getLandsatCSVPath; import static org.apache.hadoop.fs.s3a.select.CsvFile.ALL_QUOTES; import static org.apache.hadoop.fs.s3a.select.SelectConstants.*; @@ -290,6 +291,7 @@ public void setup() throws Exception { + getFileSystem().getUri(), isSelectAvailable(getFileSystem())); Configuration conf = getConfiguration(); + conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); landsatGZ = getLandsatCSVPath(conf); landsatFS = (S3AFileSystem) landsatGZ.getFileSystem(conf); Assume.assumeTrue("S3 Select is not enabled on " + landsatFS.getUri(), From 9f18e37bcbad1230dcad021deece22acc0d736b8 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Thu, 11 Aug 2022 16:07:47 +0100 Subject: [PATCH 15/24] use us-west-2 for default client --- .../org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 2 +- .../org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java | 5 ----- .../hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java | 1 - .../apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java | 2 -- 4 files changed, 1 insertion(+), 9 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 795345eb0b3ea..3152556f27c4d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -212,7 +212,7 @@ public S3Client createS3ClientV2( // build a s3 client with region us-east-2 that can be used to get the region of the bucket as // getBucketLocation does not work with us-east-1. // See https://github.com/aws/aws-sdk-java/issues/1338. - S3Client defaultS3Client = S3Client.builder().region(Region.US_EAST_2).build(); + S3Client defaultS3Client = S3Client.builder().region(Region.US_WEST_2).build(); // add any headers parameters.getHeaders().forEach((h, v) -> clientOverrideConfigBuilder.putHeader(h, v)); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java index 9627a08cb143b..c0f6a4b23226b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFailureHandling.java @@ -40,7 +40,6 @@ import java.nio.file.AccessDeniedException; import static org.apache.hadoop.fs.contract.ContractTestUtils.*; -import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.S3ATestUtils.createFiles; import static org.apache.hadoop.fs.s3a.test.ExtraAssertions.failIf; import static org.apache.hadoop.test.LambdaTestUtils.*; @@ -152,8 +151,6 @@ private Path maybeGetCsvPath() { public void testMultiObjectDeleteNoPermissions() throws Throwable { describe("Delete the landsat CSV file and expect it to fail"); Path csvPath = maybeGetCsvPath(); - Configuration conf = getConfiguration(); - conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); S3AFileSystem fs = (S3AFileSystem) csvPath.getFileSystem( getConfiguration()); // create a span, expect it to be activated. @@ -175,8 +172,6 @@ public void testMultiObjectDeleteNoPermissions() throws Throwable { public void testSingleObjectDeleteNoPermissionsTranslated() throws Throwable { describe("Delete the landsat CSV file and expect it to fail"); Path csvPath = maybeGetCsvPath(); - Configuration conf = getConfiguration(); - conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); S3AFileSystem fs = (S3AFileSystem) csvPath.getFileSystem( getConfiguration()); AccessDeniedException aex = intercept(AccessDeniedException.class, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java index 53e690b1c0a70..b8195cb9964fa 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java @@ -103,7 +103,6 @@ public void openFS() throws IOException { Configuration conf = getConf(); conf.setInt(SOCKET_SEND_BUFFER, 16 * 1024); conf.setInt(SOCKET_RECV_BUFFER, 16 * 1024); - conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); String testFile = conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE); if (testFile.isEmpty()) { assumptionMessage = "Empty test property: " + KEY_CSVTEST_FILE; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java index 6fe270ecc9109..bf5d96e73b3b2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java @@ -60,7 +60,6 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.apache.hadoop.util.DurationInfo; -import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getLandsatCSVPath; import static org.apache.hadoop.fs.s3a.select.CsvFile.ALL_QUOTES; import static org.apache.hadoop.fs.s3a.select.SelectConstants.*; @@ -291,7 +290,6 @@ public void setup() throws Exception { + getFileSystem().getUri(), isSelectAvailable(getFileSystem())); Configuration conf = getConfiguration(); - conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); landsatGZ = getLandsatCSVPath(conf); landsatFS = (S3AFileSystem) landsatGZ.getFileSystem(conf); Assume.assumeTrue("S3 Select is not enabled on " + landsatFS.getUri(), From 4e30fdca3a5e2d2a859d957c55858b5f98e29205 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Sun, 21 Aug 2022 14:03:00 +0100 Subject: [PATCH 16/24] updates region logic --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 32 ++++++++++++------- .../hadoop/fs/s3a/ITestS3ARequesterPays.java | 6 +--- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 3152556f27c4d..6eb3806175392 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.util.List; import com.amazonaws.ClientConfiguration; import com.amazonaws.SdkClientException; @@ -60,6 +61,9 @@ import software.amazon.awssdk.services.s3.S3Configuration; import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; import software.amazon.awssdk.services.s3.model.GetBucketLocationResponse; +import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; +import software.amazon.awssdk.services.s3.model.S3Exception; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; @@ -209,10 +213,10 @@ public S3Client createS3ClientV2( S3ClientBuilder s3ClientBuilder = S3Client.builder(); - // build a s3 client with region us-east-2 that can be used to get the region of the bucket as + // build a s3 client with region eu-west-2 that can be used to get the region of the bucket as // getBucketLocation does not work with us-east-1. // See https://github.com/aws/aws-sdk-java/issues/1338. - S3Client defaultS3Client = S3Client.builder().region(Region.US_WEST_2).build(); + S3Client defaultS3Client = S3Client.builder().region(Region.EU_WEST_2).build(); // add any headers parameters.getHeaders().forEach((h, v) -> clientOverrideConfigBuilder.putHeader(h, v)); @@ -528,7 +532,7 @@ private static URI getS3Endpoint( /** * Get the bucket region. * - * @param region AWS S3 Region set in the config. This property may not be set, in which case this + * @param region AWS S3 Region set in the config. This property may not be set, in which case * ask S3 for the region. * @param s3Client A S3 Client with default config, used for getting the region of a bucket. * @return region of the bucket. @@ -539,15 +543,19 @@ private Region getS3Region(String region, S3Client s3Client) { return Region.of(region); } - //No region specified so ask s3 for bucket region - GetBucketLocationResponse bucketLocationResponse = - s3Client.getBucketLocation(GetBucketLocationRequest.builder().bucket(bucket).build()); - - // buckets in eu-east-1 have a location constraint of null - if(bucketLocationResponse.locationConstraintAsString() != null) { - return Region.of(bucketLocationResponse.locationConstraintAsString()); - } else { - return Region.US_EAST_1; + try { + HeadBucketResponse headBucketResponse = + s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); + return Region.of( + headBucketResponse.sdkHttpResponse().headers().get("x-amz-bucket-region").get(0)); + } catch (S3Exception exception) { + if (exception.statusCode() == 301) { + List bucketRegion = + exception.awsErrorDetails().sdkHttpResponse().headers().get("x-amz-bucket-region"); + return Region.of(bucketRegion.get(0)); + } } + + return Region.US_EAST_1; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java index 15259567deb4f..9a818d037e4c0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java @@ -31,7 +31,6 @@ import org.apache.hadoop.fs.statistics.StreamStatisticNames; import static org.apache.hadoop.fs.s3a.Constants.ALLOW_REQUESTER_PAYS; -import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -50,8 +49,7 @@ protected Configuration createConfiguration() { requesterPaysBucketName, conf, ALLOW_REQUESTER_PAYS, - S3A_BUCKET_PROBE, - AWS_REGION); + S3A_BUCKET_PROBE); return conf; } @@ -64,7 +62,6 @@ public void testRequesterPaysOptionSuccess() throws Throwable { conf.setBoolean(ALLOW_REQUESTER_PAYS, true); // Enable bucket exists check, the first failure point people may encounter conf.setInt(S3A_BUCKET_PROBE, 2); - conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); Path requesterPaysPath = getRequesterPaysPath(conf); @@ -98,7 +95,6 @@ public void testRequesterPaysDisabledFails() throws Throwable { Configuration conf = this.createConfiguration(); conf.setBoolean(ALLOW_REQUESTER_PAYS, false); - conf.set(AWS_REGION, LANDSAT_BUCKET_REGION); Path requesterPaysPath = getRequesterPaysPath(conf); try (FileSystem fs = requesterPaysPath.getFileSystem(conf)) { From 0e2da75f9502e1ef2998ff33ec4faea38e2d8ce3 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Sun, 21 Aug 2022 15:21:46 +0100 Subject: [PATCH 17/24] fixes yetus errors --- .../apache/hadoop/fs/s3a/AWSClientConfig.java | 33 +++++++++++++++---- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 33 +++++++------------ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 2 -- .../apache/hadoop/fs/s3a/S3ClientFactory.java | 2 +- .../fs/s3a/impl/RequestFactoryImpl.java | 4 +-- .../hadoop/fs/s3a/TestS3AGetFileStatus.java | 8 +++-- .../fs/s3a/impl/TestRequestFactory.java | 3 +- .../org.mockito.plugins.MockMaker | 18 ++++++++++ 8 files changed, 67 insertions(+), 36 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java index 91fd3362385df..a50caa6a1dfe7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.fs.s3a; import java.io.IOException; @@ -41,6 +59,8 @@ public final class AWSClientConfig { private static final Logger LOG = LoggerFactory.getLogger(AWSClientConfig.class); + private AWSClientConfig() { + } public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Configuration conf) { ClientOverrideConfiguration.Builder overrideConfigBuilder = @@ -72,7 +92,7 @@ public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Conf } /** - * Configures the http client + * Configures the http client. * * @param conf The Hadoop configuration * @return Http client builder @@ -84,19 +104,20 @@ public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration con httpClientBuilder.maxConnections(S3AUtils.intOption(conf, MAXIMUM_CONNECTIONS, DEFAULT_MAXIMUM_CONNECTIONS, 1)); - httpClientBuilder.connectionTimeout(Duration.ofSeconds(S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, - DEFAULT_ESTABLISH_TIMEOUT, 0))); + httpClientBuilder.connectionTimeout(Duration.ofSeconds( + S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT, 0))); httpClientBuilder.socketTimeout(Duration.ofSeconds(S3AUtils.intOption(conf, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, 0))); - // TODO: Need to set ssl socket factory, as done in NetworkBinding.bindSSLChannelMode(conf, awsConf); + // TODO: Need to set ssl socket factory, as done in + // NetworkBinding.bindSSLChannelMode(conf, awsConf); return httpClientBuilder; } /** - * Configures the retry policy + * Configures the retry policy. * * @param conf The Hadoop configuration * @return Retry policy builder @@ -112,7 +133,7 @@ public static RetryPolicy.Builder createRetryPolicyBuilder(Configuration conf) { } /** - * Configures the proxy + * Configures the proxy. * * @param conf The Hadoop configuration * @param bucket Optional bucket to use to look up per-bucket proxy secrets diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 6eb3806175392..3686520674460 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -185,7 +185,7 @@ public AmazonS3 createS3Client( } /** - * Creates a new {@link S3Client} + * Creates a new {@link S3Client}. * * @param uri S3A file system URI * @param parameters parameter object @@ -249,7 +249,12 @@ public S3Client createS3ClientV2( s3ClientBuilder.endpointOverride(endpoint).region(region); - configureBasicParamsV2(s3ClientBuilder, clientOverrideConfigBuilder, parameters); + s3ClientBuilder.serviceConfiguration( + S3Configuration.builder().pathStyleAccessEnabled(parameters.isPathStyleAccess()).build()); + + // TODO: Some configuration done in configureBasicParams is not done yet. + // Request handlers will be added during auditor work. Need to verify how metrics collection + // can be done, as SDK V2 only seems to have a metrics publisher. return s3ClientBuilder.build(); } @@ -364,18 +369,6 @@ private void configureBasicParams(AmazonS3Builder builder, } - private void configureBasicParamsV2(S3ClientBuilder s3ClientBuilder, - ClientOverrideConfiguration.Builder overrideConfigBuilder, S3ClientCreationParameters parameters) { - - S3Configuration s3Configuration = S3Configuration.builder() - .pathStyleAccessEnabled(parameters.isPathStyleAccess()) - .build(); - - // TODO: set request handlers, will be done as part of auditor work - - // TODO: Metrics collection, not sure if this can be done with the SDK V2 yet. - } - /** * A method to configure endpoint and Region for an AmazonS3Builder. * @@ -505,13 +498,11 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3, * @param conf config to build the URI from. * @return an endpoint uri */ - private static URI getS3Endpoint( - String endpoint, final Configuration conf) { + private static URI getS3Endpoint(String endpoint, final Configuration conf) { - boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, - DEFAULT_SECURE_CONNECTIONS); + boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS); - String protocol = secureConnections ? HTTPS : HTTP; + String protocol = secureConnections ? HTTPS : HTTP; if (endpoint == null || endpoint.isEmpty()) { // the default endpoint @@ -539,7 +530,7 @@ private static URI getS3Endpoint( */ private Region getS3Region(String region, S3Client s3Client) { - if(!StringUtils.isBlank(region)) { + if (!StringUtils.isBlank(region)) { return Region.of(region); } @@ -552,7 +543,7 @@ private Region getS3Region(String region, S3Client s3Client) { if (exception.statusCode() == 301) { List bucketRegion = exception.awsErrorDetails().sdkHttpResponse().headers().get("x-amz-bucket-region"); - return Region.of(bucketRegion.get(0)); + return Region.of(bucketRegion.get(0)); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index c27cdb520537e..192519e2f9274 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -2638,8 +2638,6 @@ protected S3ListResult continueListObjects(S3ListRequest request, return S3ListResult.v1(s3V2.listObjects( request.getV1().toBuilder().marker(nextMarker).build())); } else { - //TODO: SDKV2 now supports automatic pagination, using that here requires - // significant refactoring. Investigate performance benefits and if this is something we want to do. return S3ListResult.v2(s3V2.listObjectsV2(request.getV2().toBuilder() .continuationToken(prevResult.getV2().nextContinuationToken()).build())); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index a7620392b4f63..44de0517f12f6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -62,7 +62,7 @@ AmazonS3 createS3Client(URI uri, S3ClientCreationParameters parameters) throws IOException; /** - * Creates a new {@link S3Client} + * Creates a new {@link S3Client}. * * @param uri S3A file system URI * @param parameters parameter object diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index 892fd88ab6d65..be7fc0716e0bd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -594,8 +594,8 @@ public ListObjectsV2Request newListObjectsV2Request( requestBuilder.delimiter(delimiter); } - //TODO: add call to prepareRequest, not added for now as PrepareRequest is a functional interface, - // uses AmazonWebServiceRequest, SDKV2 uses AwsRequest. + //TODO: add call to prepareRequest, not added for now as PrepareRequest is a functional + // interface, uses AmazonWebServiceRequest, SDKV2 uses AwsRequest. return requestBuilder.build(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java index 97928eb5f8bf4..2f85ea825bbd1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java @@ -81,7 +81,8 @@ public void testFakeDirectory() throws Exception { String keyDir = key + "/"; List s3Objects = new ArrayList<>(1); s3Objects.add(S3Object.builder().key(keyDir).size(0L).build()); - ListObjectsV2Response listObjectsV2Response = ListObjectsV2Response.builder().contents(s3Objects).build(); + ListObjectsV2Response listObjectsV2Response = + ListObjectsV2Response.builder().contents(s3Objects).build(); when(s3V2.listObjectsV2(argThat( matchListV2Request(BUCKET, keyDir)) )).thenReturn(listObjectsV2Response); @@ -156,8 +157,9 @@ private void setupListMocks(List prefixes, ListObjectsV2Response v2Result = mock(ListObjectsV2Response.class); when(v2Result.commonPrefixes()).thenReturn(prefixes); when(v2Result.contents()).thenReturn(s3Objects); - when(s3V2.listObjectsV2(any(software.amazon.awssdk.services.s3.model.ListObjectsV2Request.class))) - .thenReturn(v2Result); + when(s3V2.listObjectsV2( + any(software.amazon.awssdk.services.s3.model.ListObjectsV2Request.class))).thenReturn( + v2Result); } private ArgumentMatcher correctGetMetadataRequest( diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java index a9ab40ab85fef..03f03c46f97b8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java @@ -170,7 +170,8 @@ private void createFactoryObjects(RequestFactory factory) { a(factory.newGetObjectRequest(path)); a(factory.newGetObjectMetadataRequest(path)); a(factory.newListMultipartUploadsRequest(path)); - //TODO: Commenting out for now, new request extends AwsRequest, this can be updated once all request factory operations are updated. + //TODO: Commenting out for now, new request extends AwsRequest, this can be updated once all + // request factory operations are updated. //a(factory.newListObjectsV1Request(path, "/", 1)); a(factory.newListNextBatchOfObjectsRequest(new ObjectListing())); // a(factory.newListObjectsV2Request(path, "/", 1)); diff --git a/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker index ca6ee9cea8ec1..4ef8cd4ecb58d 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker +++ b/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker @@ -1 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + mock-maker-inline \ No newline at end of file From da7a6d3b027fad631760798e1b2f55051d231ab9 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Sun, 21 Aug 2022 17:31:31 +0100 Subject: [PATCH 18/24] removes unused imports --- .../apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 3686520674460..50319836a564b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -59,8 +59,6 @@ import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.S3ClientBuilder; import software.amazon.awssdk.services.s3.S3Configuration; -import software.amazon.awssdk.services.s3.model.GetBucketLocationRequest; -import software.amazon.awssdk.services.s3.model.GetBucketLocationResponse; import software.amazon.awssdk.services.s3.model.HeadBucketRequest; import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import software.amazon.awssdk.services.s3.model.S3Exception; @@ -213,10 +211,10 @@ public S3Client createS3ClientV2( S3ClientBuilder s3ClientBuilder = S3Client.builder(); - // build a s3 client with region eu-west-2 that can be used to get the region of the bucket as - // getBucketLocation does not work with us-east-1. - // See https://github.com/aws/aws-sdk-java/issues/1338. - S3Client defaultS3Client = S3Client.builder().region(Region.EU_WEST_2).build(); + // build a s3 client with region eu-west-2 that can be used to get the region of the bucket. + S3Client defaultS3Client = S3Client.builder().region(Region.EU_WEST_2) + .credentialsProvider(V1V2AwsCredentialProviderAdapter.adapt(parameters.getCredentialSet())) + .build(); // add any headers parameters.getHeaders().forEach((h, v) -> clientOverrideConfigBuilder.putHeader(h, v)); @@ -237,6 +235,8 @@ public S3Client createS3ClientV2( s3ClientBuilder.httpClientBuilder(httpClientBuilder) .overrideConfiguration(clientOverrideConfigBuilder.build()); + // use adapter classes so V1 credential providers continue to work. This will be moved to + // AWSCredentialProviderList.add() when that class is updated. s3ClientBuilder.credentialsProvider( V1V2AwsCredentialProviderAdapter.adapt(parameters.getCredentialSet())); From 20cab7e937147ca0731345960b4da8460d07d594 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Mon, 22 Aug 2022 10:24:52 +0100 Subject: [PATCH 19/24] fixes yetus errors --- .../apache/hadoop/fs/s3a/AWSClientConfig.java | 2 +- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 2 +- .../apache/hadoop/fs/s3a/S3ClientFactory.java | 2 +- .../hadoop/fs/s3a/adapter/package-info.java | 27 ++++++++++++++++++ .../hadoop/fs/s3a/AbstractS3AMockTest.java | 6 +++- .../hadoop/fs/s3a/TestS3AGetFileStatus.java | 4 ++- .../org.mockito.plugins.MockMaker | 28 ++++++++----------- 7 files changed, 49 insertions(+), 22 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java index a50caa6a1dfe7..e60346c19ce95 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java @@ -138,7 +138,7 @@ public static RetryPolicy.Builder createRetryPolicyBuilder(Configuration conf) { * @param conf The Hadoop configuration * @param bucket Optional bucket to use to look up per-bucket proxy secrets * @return Proxy configuration builder - * @throws IOException + * @throws IOException on any IO problem */ public static ProxyConfiguration.Builder createProxyConfigurationBuilder(Configuration conf, String bucket) throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 50319836a564b..bf20ec3e1215a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -188,7 +188,7 @@ public AmazonS3 createS3Client( * @param uri S3A file system URI * @param parameters parameter object * @return S3 client - * @throws IOException + * @throws IOException on any IO problem */ @Override public S3Client createS3ClientV2( diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index 44de0517f12f6..1272db1fd958d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -67,7 +67,7 @@ AmazonS3 createS3Client(URI uri, * @param uri S3A file system URI * @param parameters parameter object * @return S3 client - * @throws IOException + * @throws IOException on any IO problem */ S3Client createS3ClientV2(URI uri, S3ClientCreationParameters parameters) throws IOException; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java new file mode 100644 index 0000000000000..8d03c915e171a --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Adapter classes for allowing V1 credential providers to be used with SDKV2. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +package org.apache.hadoop.fs.s3a.adapter; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index 1269564c8fa04..1f0bf8608b481 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -51,7 +51,7 @@ public abstract class AbstractS3AMockTest { protected S3AFileSystem fs; protected AmazonS3 s3; - protected S3Client s3V2; + private S3Client s3V2; @Before public void setup() throws Exception { @@ -80,6 +80,10 @@ public Configuration createConfiguration() { return conf; } + public S3Client getS3Client() { + return s3V2; + } + @After public void teardown() throws Exception { if (fs != null) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java index 2f85ea825bbd1..cc61ef1fe2ab7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AGetFileStatus.java @@ -39,6 +39,7 @@ import org.apache.hadoop.fs.contract.ContractTestUtils; import org.junit.Test; import org.mockito.ArgumentMatcher; +import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.CommonPrefix; import software.amazon.awssdk.services.s3.model.ListObjectsRequest; import software.amazon.awssdk.services.s3.model.ListObjectsResponse; @@ -76,6 +77,7 @@ public void testFile() throws Exception { public void testFakeDirectory() throws Exception { Path path = new Path("/dir"); String key = path.toUri().getPath().substring(1); + S3Client s3V2 = getS3Client(); when(s3.getObjectMetadata(argThat(correctGetMetadataRequest(BUCKET, key)))) .thenThrow(NOT_FOUND); String keyDir = key + "/"; @@ -146,7 +148,7 @@ public void testNotFound() throws Exception { private void setupListMocks(List prefixes, List s3Objects) { - + S3Client s3V2 = getS3Client(); // V1 list API mock ListObjectsResponse v1Response = mock(ListObjectsResponse.class); when(v1Response.commonPrefixes()).thenReturn(prefixes); diff --git a/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker index 4ef8cd4ecb58d..3b308f19255c3 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker +++ b/hadoop-tools/hadoop-aws/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker @@ -1,19 +1,13 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. mock-maker-inline \ No newline at end of file From d89f11edf29b763695965654f177ad5a15a3371c Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Mon, 22 Aug 2022 10:56:25 +0100 Subject: [PATCH 20/24] removes unused constant --- .../java/org/apache/hadoop/fs/s3a/S3ATestConstants.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java index 389e898f8ef32..5218d58b49071 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java @@ -104,12 +104,7 @@ public interface S3ATestConstants { * Default path for the multi MB test file: {@value}. */ String DEFAULT_CSVTEST_FILE = LANDSAT_BUCKET + "scene_list.gz"; - - /** - * The landsat bucket region: {@value}. - */ - String LANDSAT_BUCKET_REGION = "us-west-2"; - + /** * Configuration key for an existing object in a requester pays bucket: {@value}. * From a1e8bedcbcc61d493bcb7dc025af05677144e19a Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Tue, 23 Aug 2022 16:35:15 +0100 Subject: [PATCH 21/24] changes as per @dannycjones review --- .../apache/hadoop/fs/s3a/AWSClientConfig.java | 48 ++++++++++++------- .../org/apache/hadoop/fs/s3a/Constants.java | 3 -- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 28 ++++++----- .../org/apache/hadoop/fs/s3a/Listing.java | 2 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 2 +- .../fs/s3a/impl/RequestFactoryImpl.java | 25 +++++++--- .../hadoop/fs/s3a/MockS3ClientFactory.java | 3 +- .../hadoop/fs/s3a/S3ATestConstants.java | 2 +- 8 files changed, 70 insertions(+), 43 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java index e60346c19ce95..40ee14d86daa2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSClientConfig.java @@ -56,6 +56,11 @@ import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.USER_AGENT_PREFIX; +/** + * Methods for configuring the S3 client. + * These methods are used when creating and configuring + * {@link software.amazon.awssdk.services.s3.S3Client} which communicates with the S3 service. + */ public final class AWSClientConfig { private static final Logger LOG = LoggerFactory.getLogger(AWSClientConfig.class); @@ -66,18 +71,7 @@ public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Conf ClientOverrideConfiguration.Builder overrideConfigBuilder = ClientOverrideConfiguration.builder(); - long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT, - DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); - - if (requestTimeoutMillis > Integer.MAX_VALUE) { - LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead", - requestTimeoutMillis, Integer.MAX_VALUE); - requestTimeoutMillis = Integer.MAX_VALUE; - } - - if(requestTimeoutMillis > 0) { - overrideConfigBuilder.apiCallAttemptTimeout(Duration.ofMillis(requestTimeoutMillis)); - } + initRequestTimeout(conf, overrideConfigBuilder); initUserAgent(conf, overrideConfigBuilder); @@ -104,11 +98,12 @@ public static ApacheHttpClient.Builder createHttpClientBuilder(Configuration con httpClientBuilder.maxConnections(S3AUtils.intOption(conf, MAXIMUM_CONNECTIONS, DEFAULT_MAXIMUM_CONNECTIONS, 1)); - httpClientBuilder.connectionTimeout(Duration.ofSeconds( - S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT, 0))); + int connectionEstablishTimeout = + S3AUtils.intOption(conf, ESTABLISH_TIMEOUT, DEFAULT_ESTABLISH_TIMEOUT, 0); + int socketTimeout = S3AUtils.intOption(conf, SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, 0); - httpClientBuilder.socketTimeout(Duration.ofSeconds(S3AUtils.intOption(conf, SOCKET_TIMEOUT, - DEFAULT_SOCKET_TIMEOUT, 0))); + httpClientBuilder.connectionTimeout(Duration.ofSeconds(connectionEstablishTimeout)); + httpClientBuilder.socketTimeout(Duration.ofSeconds(socketTimeout)); // TODO: Need to set ssl socket factory, as done in // NetworkBinding.bindSSLChannelMode(conf, awsConf); @@ -228,4 +223,25 @@ private static void initUserAgent(Configuration conf, clientConfig.putAdvancedOption(SdkAdvancedClientOption.USER_AGENT_PREFIX, userAgent); } + /** + * Configures request timeout. + * + * @param conf Hadoop configuration + * @param clientConfig AWS SDK configuration to update + */ + private static void initRequestTimeout(Configuration conf, + ClientOverrideConfiguration.Builder clientConfig) { + long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT, + DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS); + + if (requestTimeoutMillis > Integer.MAX_VALUE) { + LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead", + requestTimeoutMillis, Integer.MAX_VALUE); + requestTimeoutMillis = Integer.MAX_VALUE; + } + + if(requestTimeoutMillis > 0) { + clientConfig.apiCallAttemptTimeout(Duration.ofMillis(requestTimeoutMillis)); + } + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 2db2960d0ad21..ea2011f6b64c1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1204,7 +1204,4 @@ private Constants() { */ public static final int DEFAULT_AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE = 1253376; //1M - public static final String HTTP = "http"; - - public static final String HTTPS = "https"; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index bf20ec3e1215a..55a4a55ee6ca2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -25,6 +25,7 @@ import com.amazonaws.ClientConfiguration; import com.amazonaws.SdkClientException; +import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.client.builder.AwsClientBuilder; import com.amazonaws.handlers.RequestHandler2; import com.amazonaws.regions.RegionUtils; @@ -78,8 +79,6 @@ import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT; -import static org.apache.hadoop.fs.s3a.Constants.HTTP; -import static org.apache.hadoop.fs.s3a.Constants.HTTPS; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_KEY; import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS; import static org.apache.hadoop.fs.s3a.S3AUtils.getEncryptionAlgorithm; @@ -211,11 +210,6 @@ public S3Client createS3ClientV2( S3ClientBuilder s3ClientBuilder = S3Client.builder(); - // build a s3 client with region eu-west-2 that can be used to get the region of the bucket. - S3Client defaultS3Client = S3Client.builder().region(Region.EU_WEST_2) - .credentialsProvider(V1V2AwsCredentialProviderAdapter.adapt(parameters.getCredentialSet())) - .build(); - // add any headers parameters.getHeaders().forEach((h, v) -> clientOverrideConfigBuilder.putHeader(h, v)); @@ -243,14 +237,17 @@ public S3Client createS3ClientV2( URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf); Region region = - getS3Region(conf.getTrimmed(AWS_REGION), defaultS3Client); + getS3Region(conf.getTrimmed(AWS_REGION), parameters.getCredentialSet()); LOG.debug("Using endpoint {}; and region {}", endpoint, region); s3ClientBuilder.endpointOverride(endpoint).region(region); - s3ClientBuilder.serviceConfiguration( - S3Configuration.builder().pathStyleAccessEnabled(parameters.isPathStyleAccess()).build()); + S3Configuration s3Configuration = S3Configuration.builder() + .pathStyleAccessEnabled(parameters.isPathStyleAccess()) + .build(); + + s3ClientBuilder.serviceConfiguration(s3Configuration); // TODO: Some configuration done in configureBasicParams is not done yet. // Request handlers will be added during auditor work. Need to verify how metrics collection @@ -502,7 +499,7 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) { boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS); - String protocol = secureConnections ? HTTPS : HTTP; + String protocol = secureConnections ? "https" : "http"; if (endpoint == null || endpoint.isEmpty()) { // the default endpoint @@ -525,16 +522,21 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) { * * @param region AWS S3 Region set in the config. This property may not be set, in which case * ask S3 for the region. - * @param s3Client A S3 Client with default config, used for getting the region of a bucket. + * @param credentialsProvider Credentials provider to be used with the default s3 client. * @return region of the bucket. */ - private Region getS3Region(String region, S3Client s3Client) { + private Region getS3Region(String region, AWSCredentialsProvider credentialsProvider) { if (!StringUtils.isBlank(region)) { return Region.of(region); } try { + // build a s3 client with region eu-west-2 that can be used to get the region of the bucket. + S3Client s3Client = S3Client.builder().region(Region.EU_WEST_2) + .credentialsProvider(V1V2AwsCredentialProviderAdapter.adapt(credentialsProvider)) + .build(); + HeadBucketResponse headBucketResponse = s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); return Region.of( diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index 976caf3310816..b4674159ea473 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -466,7 +466,7 @@ private boolean buildNextStatusBatch(S3ListResult objects) { S3AFileStatus status = createFileStatus(keyPath, s3Object, listingOperationCallbacks.getDefaultBlockSize(keyPath), getStoreContext().getUsername(), - s3Object.eTag(), null, isCSEEnabled); + s3Object.eTag(), null, isCSEEnabled); LOG.debug("Adding: {}", status); stats.add(status); added++; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 192519e2f9274..62eaf60a92d6d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -1207,7 +1207,7 @@ public AmazonS3 getAmazonS3ClientForTesting(String reason) { */ @VisibleForTesting public S3Client getAmazonS3V2ClientForTesting(String reason) { - LOG.warn("Access to S3A client requested, reason {}", reason); + LOG.warn("Access to S3 client requested, reason {}", reason); return s3V2; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index be7fc0716e0bd..14e9e19db4383 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -51,6 +51,7 @@ import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import software.amazon.awssdk.awscore.AwsRequest; import software.amazon.awssdk.services.s3.model.ListObjectsRequest; import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; @@ -151,6 +152,17 @@ private T prepareRequest(T t) { : t; } + /** + * Preflight preparation of V2 AWS request. + * @param web service request + * @return prepared entry. + */ + // TODO: Currently this is a NOOP, will be completed separately as part of auditor work. + @Retries.OnceRaw + private T prepareV2Request(T t) { + return t; + } + /** * Get the canned ACL of this FS. * @return an ACL, if any @@ -571,8 +583,7 @@ public ListObjectsRequest newListObjectsV1Request( requestBuilder.delimiter(delimiter); } - //TODO: add call to prepareRequest - return requestBuilder.build(); + return prepareV2Request(requestBuilder.build()); } @Override @@ -587,16 +598,16 @@ public ListObjectsV2Request newListObjectsV2Request( final String delimiter, final int maxKeys) { - final ListObjectsV2Request.Builder requestBuilder = - ListObjectsV2Request.builder().bucket(bucket).maxKeys(maxKeys).prefix(key); + final ListObjectsV2Request.Builder requestBuilder = ListObjectsV2Request.builder() + .bucket(bucket) + .maxKeys(maxKeys) + .prefix(key); if (delimiter != null) { requestBuilder.delimiter(delimiter); } - //TODO: add call to prepareRequest, not added for now as PrepareRequest is a functional - // interface, uses AmazonWebServiceRequest, SDKV2 uses AwsRequest. - return requestBuilder.build(); + return prepareV2Request(requestBuilder.build()); } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java index 81284d9265a56..14dfaed7483bc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3ClientFactory.java @@ -35,7 +35,8 @@ public class MockS3ClientFactory implements S3ClientFactory { @Override - public AmazonS3 createS3Client(URI uri, final S3ClientCreationParameters parameters) { + public AmazonS3 createS3Client(URI uri, + final S3ClientCreationParameters parameters) { AmazonS3 s3 = mock(AmazonS3.class); String bucket = uri.getHost(); when(s3.doesBucketExist(bucket)).thenReturn(true); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java index 5218d58b49071..a6269c437665a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java @@ -104,7 +104,7 @@ public interface S3ATestConstants { * Default path for the multi MB test file: {@value}. */ String DEFAULT_CSVTEST_FILE = LANDSAT_BUCKET + "scene_list.gz"; - + /** * Configuration key for an existing object in a requester pays bucket: {@value}. * From 4dd228d40d0b7c28ccf2e52212dd20fa2dd8238e Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Thu, 25 Aug 2022 11:52:49 +0100 Subject: [PATCH 22/24] adds some constants --- .../main/java/org/apache/hadoop/fs/s3a/Constants.java | 9 +++++++++ .../org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 6 ++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index ea2011f6b64c1..33a1ca28bf040 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1204,4 +1204,13 @@ private Constants() { */ public static final int DEFAULT_AWS_S3_VECTOR_READS_MAX_MERGED_READ_SIZE = 1253376; //1M + /** + * The bucket region header. + */ + public static final String BUCKET_REGION_HEADER = "x-amz-bucket-region"; + + /** + * Status code for moved permanently. + */ + public static final int HTTP_STATUS_CODE_MOVED_PERMANENTLY = 301; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 55a4a55ee6ca2..1dd4fc2efe18c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -75,10 +75,12 @@ import static com.amazonaws.services.s3.Headers.REQUESTER_PAYS_HEADER; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION; +import static org.apache.hadoop.fs.s3a.Constants.BUCKET_REGION_HEADER; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING; import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT; +import static org.apache.hadoop.fs.s3a.Constants.HTTP_STATUS_CODE_MOVED_PERMANENTLY; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_KEY; import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS; import static org.apache.hadoop.fs.s3a.S3AUtils.getEncryptionAlgorithm; @@ -540,9 +542,9 @@ private Region getS3Region(String region, AWSCredentialsProvider credentialsProv HeadBucketResponse headBucketResponse = s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); return Region.of( - headBucketResponse.sdkHttpResponse().headers().get("x-amz-bucket-region").get(0)); + headBucketResponse.sdkHttpResponse().headers().get(BUCKET_REGION_HEADER).get(0)); } catch (S3Exception exception) { - if (exception.statusCode() == 301) { + if (exception.statusCode() == HTTP_STATUS_CODE_MOVED_PERMANENTLY) { List bucketRegion = exception.awsErrorDetails().sdkHttpResponse().headers().get("x-amz-bucket-region"); return Region.of(bucketRegion.get(0)); From 2ad9c81486e334ea644041e2735dd3c030e49d69 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Thu, 25 Aug 2022 14:02:39 +0100 Subject: [PATCH 23/24] use eu-west-1 for default s3 client --- .../org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 1dd4fc2efe18c..7c09fd0782708 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -534,8 +534,12 @@ private Region getS3Region(String region, AWSCredentialsProvider credentialsProv } try { - // build a s3 client with region eu-west-2 that can be used to get the region of the bucket. - S3Client s3Client = S3Client.builder().region(Region.EU_WEST_2) + // build a s3 client with region eu-west-1 that can be used to get the region of the bucket. + // Using eu-west-1, as headBucket() doesn't work with us-east-1. This is because + // us-east-1 uses the endpoint s3.amazonaws.com, which resolves bucket.s3.amazonaws.com to + // the actual region the bucket is in. As the request is signed with us-east-1 and not the + // bucket's region, it fails. + S3Client s3Client = S3Client.builder().region(Region.EU_WEST_1) .credentialsProvider(V1V2AwsCredentialProviderAdapter.adapt(credentialsProvider)) .build(); From 5a2bb9244bf18a3846f5331f7e8eefa6b186b911 Mon Sep 17 00:00:00 2001 From: Ahmar Suhail Date: Thu, 25 Aug 2022 14:07:22 +0100 Subject: [PATCH 24/24] use region header constant --- .../java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 7c09fd0782708..9b14b6ce4e3a6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -550,7 +550,7 @@ private Region getS3Region(String region, AWSCredentialsProvider credentialsProv } catch (S3Exception exception) { if (exception.statusCode() == HTTP_STATUS_CODE_MOVED_PERMANENTLY) { List bucketRegion = - exception.awsErrorDetails().sdkHttpResponse().headers().get("x-amz-bucket-region"); + exception.awsErrorDetails().sdkHttpResponse().headers().get(BUCKET_REGION_HEADER); return Region.of(bucketRegion.get(0)); } }