Skip to content

Commit c6fe610

Browse files
mehakmeetSimon-3008-Simon
authored andcommitted
HADOOP-17705. S3A to add Config to set AWS region (apache#3020)
The option `fs.s3a.endpoint.region` can be used to explicitly set the AWS region of a bucket. This is needed when using AWS Private Link, as the region cannot be automatically determined. Contributed by Mehakmeet Singh
1 parent a3b9c37 commit c6fe610

File tree

6 files changed

+150
-14
lines changed

6 files changed

+150
-14
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,4 +1081,10 @@ private Constants() {
10811081
*/
10821082
public static final String XA_HEADER_PREFIX = "header.";
10831083

1084+
/**
1085+
* AWS S3 region for the bucket. When set bypasses the construction of
1086+
* region through endpoint url.
1087+
*/
1088+
public static final String AWS_REGION = "fs.s3a.endpoint.region";
1089+
10841090
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.apache.hadoop.conf.Configured;
4343
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
4444

45+
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
4546
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING;
4647
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT;
4748

@@ -132,7 +133,7 @@ protected AmazonS3 buildAmazonS3Client(
132133
// endpoint set up is a PITA
133134
AwsClientBuilder.EndpointConfiguration epr
134135
= createEndpointConfiguration(parameters.getEndpoint(),
135-
awsConf);
136+
awsConf, getConf().getTrimmed(AWS_REGION));
136137
if (epr != null) {
137138
// an endpoint binding was constructed: use it.
138139
b.withEndpointConfiguration(epr);
@@ -197,12 +198,14 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
197198
*
198199
* @param endpoint possibly null endpoint.
199200
* @param awsConf config to build the URI from.
201+
* @param awsRegion AWS S3 Region if the corresponding config is set.
200202
* @return a configuration for the S3 client builder.
201203
*/
202204
@VisibleForTesting
203205
public static AwsClientBuilder.EndpointConfiguration
204206
createEndpointConfiguration(
205-
final String endpoint, final ClientConfiguration awsConf) {
207+
final String endpoint, final ClientConfiguration awsConf,
208+
String awsRegion) {
206209
LOG.debug("Creating endpoint configuration for {}", endpoint);
207210
if (endpoint == null || endpoint.isEmpty()) {
208211
// the default endpoint...we should be using null at this point.
@@ -212,17 +215,19 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
212215

213216
final URI epr = RuntimeHttpUtils.toUri(endpoint, awsConf);
214217
LOG.debug("Endpoint URI = {}", epr);
215-
216-
String region;
217-
if (!ServiceUtils.isS3USStandardEndpoint(endpoint)) {
218-
LOG.debug("Endpoint {} is not the default; parsing", epr);
219-
region = AwsHostNameUtils.parseRegion(
220-
epr.getHost(),
221-
S3_SERVICE_NAME);
222-
} else {
223-
// US-east, set region == null.
224-
LOG.debug("Endpoint {} is the standard one; declare region as null", epr);
225-
region = null;
218+
String region = awsRegion;
219+
if (StringUtils.isBlank(region)) {
220+
if (!ServiceUtils.isS3USStandardEndpoint(endpoint)) {
221+
LOG.debug("Endpoint {} is not the default; parsing", epr);
222+
region = AwsHostNameUtils.parseRegion(
223+
epr.getHost(),
224+
S3_SERVICE_NAME);
225+
} else {
226+
// US-east, set region == null.
227+
LOG.debug("Endpoint {} is the standard one; declare region as null",
228+
epr);
229+
region = null;
230+
}
226231
}
227232
LOG.debug("Region for endpoint {}, URI {} is determined as {}",
228233
endpoint, epr, region);

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,14 @@ options are covered in [Testing](./testing.md).
797797
</description>
798798
</property>
799799

800+
<property>
801+
<name>fs.s3a.endpoint.region</name>
802+
<description>AWS S3 region for a bucket, which bypasses the parsing of
803+
fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
804+
while using privateLink URL and explicitly set the bucket region.
805+
</description>
806+
</property>
807+
800808
<property>
801809
<name>fs.s3a.path.style.access</name>
802810
<value>false</value>

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,32 @@ As an example, the endpoint for S3 Frankfurt is `s3.eu-central-1.amazonaws.com`:
247247
<value>s3.eu-central-1.amazonaws.com</value>
248248
</property>
249249
```
250+
### <a name="AuthorizationHeaderMalformed"></a> "Authorization Header is Malformed"(400) exception when PrivateLink URL is used in "fs.s3a.endpoint"
251+
252+
When [PrivateLink](https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html) URL
253+
is used instead of standard s3a endpoint, it returns "authorization
254+
header is malformed" exception. So, if we set fs.s3a.endpoint=bucket.vpce
255+
-<some_string>.s3.ca-central-1.vpce.amazonaws.com and make s3 calls we get:
256+
```
257+
com.amazonaws.services.s3.model.AmazonS3Exception: The authorization header is malformed; the region 'vpce' is wrong; expecting 'ca-central-1'
258+
(Service: Amazon S3; Status Code: 400; Error Code: AuthorizationHeaderMalformed; Request ID: req-id; S3 Extended Request ID: req-id-2), S3 Extended Request ID: req-id-2:AuthorizationHeaderMalformed: The authorization
259+
header is malformed; the region 'vpce' is wrong; expecting 'ca-central-1' (Service: Amazon S3; Status Code: 400; Error Code: AuthorizationHeaderMalformed; Request ID: req-id;
260+
```
261+
Cause:
262+
263+
Since, endpoint parsing is done in a way that it assumes the AWS S3 region
264+
would be the 2nd component of the `fs.s3a.endpoint` URL delimited by ".", in
265+
case of PrivateLink URL, it can't figure out the region and throws an
266+
authorization exception. Thus, to add support to using PrivateLink URLs we use `fs.s3a.endpoint.region`
267+
to set the region and bypass this parsing of `fs.s3a.endpoint`, in the case shown above to make it work we'll set the AWS
268+
S3 region as `ca-central-1`.
269+
270+
```xml
271+
<property>
272+
<name>fs.s3a.endpoint.region</name>
273+
<value>ca-central-1</value>
274+
</property>
275+
```
250276

251277
### `Class does not implement AWSCredentialsProvider`
252278

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.s3a;
20+
21+
import com.amazonaws.ClientConfiguration;
22+
import com.amazonaws.client.builder.AwsClientBuilder;
23+
import com.amazonaws.util.AwsHostNameUtils;
24+
import org.assertj.core.api.Assertions;
25+
import org.junit.Test;
26+
27+
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
28+
29+
/**
30+
* Test to check correctness of S3A endpoint regions in
31+
* {@link DefaultS3ClientFactory}.
32+
*/
33+
public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
34+
35+
private static final String AWS_REGION_TEST = "test-region";
36+
private static final String AWS_ENDPOINT_TEST = "test-endpoint";
37+
private static final String AWS_ENDPOINT_TEST_WITH_REGION =
38+
"test-endpoint.some-region.amazonaws.com";
39+
40+
/**
41+
* Test to verify that setting a region with the config would bypass the
42+
* construction of region from endpoint.
43+
*/
44+
@Test
45+
public void testWithRegionConfig() {
46+
getFileSystem().getConf().set(AWS_REGION, AWS_REGION_TEST);
47+
48+
//Creating an endpoint config with a custom endpoint.
49+
AwsClientBuilder.EndpointConfiguration epr = createEpr(AWS_ENDPOINT_TEST,
50+
getFileSystem().getConf().getTrimmed(AWS_REGION));
51+
//Checking if setting region config bypasses the endpoint region.
52+
Assertions.assertThat(epr.getSigningRegion())
53+
.describedAs("There is a region mismatch")
54+
.isEqualTo(getFileSystem().getConf().get(AWS_REGION));
55+
}
56+
57+
/**
58+
* Test to verify that not setting the region config, would lead to using
59+
* endpoint to construct the region.
60+
*/
61+
@Test
62+
public void testWithoutRegionConfig() {
63+
getFileSystem().getConf().unset(AWS_REGION);
64+
65+
//Creating an endpoint config with a custom endpoint containing a region.
66+
AwsClientBuilder.EndpointConfiguration eprRandom =
67+
createEpr(AWS_ENDPOINT_TEST_WITH_REGION,
68+
getFileSystem().getConf().getTrimmed(AWS_REGION));
69+
String regionFromEndpoint =
70+
AwsHostNameUtils
71+
.parseRegionFromAwsPartitionPattern(AWS_ENDPOINT_TEST_WITH_REGION);
72+
//Checking if not setting region config leads to constructing the region
73+
// from endpoint.
74+
Assertions.assertThat(eprRandom.getSigningRegion())
75+
.describedAs("There is a region mismatch")
76+
.isNotEqualTo(getFileSystem().getConf().get(AWS_REGION))
77+
.isEqualTo(regionFromEndpoint);
78+
}
79+
80+
/**
81+
* Method to create EndpointConfiguration using an endpoint.
82+
*
83+
* @param endpoint the endpoint to be used for EndpointConfiguration creation.
84+
* @return an instance of EndpointConfiguration.
85+
*/
86+
private AwsClientBuilder.EndpointConfiguration createEpr(String endpoint,
87+
String awsRegion) {
88+
return DefaultS3ClientFactory.createEndpointConfiguration(endpoint,
89+
new ClientConfiguration(), awsRegion);
90+
}
91+
}

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestNetworkBinding.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ public void expectEndpoint(final String src,
8585
final boolean expectNull,
8686
final String expectRegion) {
8787
AwsClientBuilder.EndpointConfiguration epr =
88-
createEndpointConfiguration(src, new ClientConfiguration());
88+
createEndpointConfiguration(src, new ClientConfiguration(), src);
8989
String eprStr = epr == null
9090
? "(empty)"
9191
: ("(" + epr.getServiceEndpoint() + " " + epr.getSigningRegion());

0 commit comments

Comments
 (0)