Skip to content

Commit 41e32a2

Browse files
committed
HADOOP-19740. use region "external" for any third party endpoint
...and no attempt to fall back to central-1 here. You no longer need to declare a region for a third party store; endpoint is enough.
1 parent 6f662bf commit 41e32a2

File tree

3 files changed

+115
-68
lines changed

3 files changed

+115
-68
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -323,18 +323,20 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> Regio
323323
DEFAULT_REGION_CHAIN.info(SDK_REGION_CHAIN_IN_USE);
324324
LOG.debug(SDK_REGION_CHAIN_IN_USE);
325325
} else {
326+
327+
// a region has been determined from configuration,
328+
// or it is falling back to central region.
329+
326330
final Region region = resolution.getRegion();
327331
builder.region(requireNonNull(region));
328332
// s3 cross region access
329333
if (resolution.isCrossRegionAccessEnabled()) {
330334
builder.crossRegionAccessEnabled(true);
331335
}
332-
if (!resolution.isUseCentralEndpoint()) {
333-
final URI endpointUri = resolution.getEndpointUri();
334-
if (endpointUri != null) {
335-
builder.endpointOverride(endpointUri);
336-
LOG.debug("Setting endpoint to {}", endpointUri);
337-
}
336+
final URI endpointUri = resolution.getEndpointUri();
337+
if (endpointUri != null && !resolution.isUseCentralEndpoint()) {
338+
LOG.debug("Setting endpoint to {}", endpointUri);
339+
builder.endpointOverride(endpointUri);
338340
}
339341
}
340342
return resolution;

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RegionResolution.java

Lines changed: 75 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
import java.io.IOException;
2222
import java.net.URI;
2323
import java.net.URISyntaxException;
24+
import java.util.Locale;
2425
import java.util.Optional;
2526
import java.util.regex.Matcher;
2627
import java.util.regex.Pattern;
27-
2828
import javax.annotation.Nullable;
2929

3030
import org.slf4j.Logger;
@@ -80,56 +80,67 @@ public class RegionResolution {
8080
private static final Pattern VPC_ENDPOINT_PATTERN =
8181
Pattern.compile("^(?:.+\\.)?([a-z0-9-]+)\\.vpce\\.amazonaws\\.(?:com|com\\.cn)$");
8282

83-
/**
84-
* Error message when an endpoint is set with FIPS enabled: {@value}.
85-
*/
86-
@VisibleForTesting
87-
public static final String ERROR_ENDPOINT_WITH_FIPS =
88-
"Only S3 central endpoint cannot be set when " + FIPS_ENDPOINT + " is true";
83+
/**
84+
* Error message when an endpoint is set with FIPS enabled: {@value}.
85+
*/
86+
@VisibleForTesting
87+
public static final String ERROR_ENDPOINT_WITH_FIPS =
88+
"Only S3 central endpoint cannot be set when " + FIPS_ENDPOINT + " is true";
8989

9090
/**
9191
* Virtual hostnames MUST be used when using the FIPS endpoint.
9292
*/
93-
public static final String FIPS_PATH_ACCESS_INCOMPATIBLE =
94-
"Path style access must be disabled when "+ FIPS_ENDPOINT + " is true";
93+
public static final String FIPS_PATH_ACCESS_INCOMPATIBLE =
94+
"Path style access must be disabled when " + FIPS_ENDPOINT + " is true";
95+
96+
/**
97+
* String value for external region: {@value}.
98+
*/
99+
public static final String EXTERNAL = "external";
100+
101+
/**
102+
* External region, used for third party endpoints.
103+
*/
104+
public static final Region EXTERNAL_REGION = Region.of(EXTERNAL);
95105

96106
/**
97107
* How was the region resolved?
98108
*/
99-
public enum RegionResolutionMechanism {
109+
public enum RegionResolutionMechanism {
100110

101-
CalculatedFromEndpoint("Calculated from endpoint"),
102-
FallbackToCentral("Fallback to central endpoint"),
103-
ParseVpceEndpoint("Parse VPCE Endpoint"),
104-
Ec2Metadata("EC2 Metadata"),
105-
Sdk("SDK resolution chain"),
106-
Specified("region specified");
111+
CalculatedFromEndpoint("Calculated from endpoint"),
112+
ExternalEndpoint("External endpoint"),
113+
FallbackToCentral("Fallback to central endpoint"),
114+
ParseVpceEndpoint("Parse VPCE Endpoint"),
115+
Ec2Metadata("EC2 Metadata"),
116+
Sdk("SDK resolution chain"),
117+
Specified("region specified");
107118

108-
/**
109-
* Text of the mechanism.
110-
*/
111-
private final String mechanism;
119+
/**
120+
* Text of the mechanism.
121+
*/
122+
private final String mechanism;
112123

113-
RegionResolutionMechanism(String mechanism) {
114-
this.mechanism = mechanism;
115-
}
124+
RegionResolutionMechanism(String mechanism) {
125+
this.mechanism = mechanism;
126+
}
116127

117128
/**
118129
* String value of the resolution mechanism.
119130
* @return the resolution mechanism.
120131
*/
121-
public String getMechanism() {
122-
return mechanism;
123-
}
124-
125-
@Override
126-
public String toString() {
127-
final StringBuilder sb = new StringBuilder("RegionResolutionMechanism{");
128-
sb.append("mechanism='").append(mechanism).append('\'');
129-
sb.append('}');
130-
return sb.toString();
131-
}
132-
}
132+
public String getMechanism() {
133+
return mechanism;
134+
}
135+
136+
@Override
137+
public String toString() {
138+
final StringBuilder sb = new StringBuilder("RegionResolutionMechanism{");
139+
sb.append("mechanism='").append(mechanism).append('\'');
140+
sb.append('}');
141+
return sb.toString();
142+
}
143+
}
133144

134145
/**
135146
* The resolution of a region and endpoint..
@@ -298,7 +309,6 @@ public String toString() {
298309

299310
/**
300311
* Given a endpoint string, create the endpoint URI.
301-
*
302312
* @param endpoint possibly null endpoint.
303313
* @param secureConnections use secure HTTPS connection?
304314
* @return an endpoint uri or null if the endpoint was passed in was null/empty
@@ -341,7 +351,8 @@ public static Optional<Resolution> getS3RegionFromEndpoint(
341351
Matcher matcher = VPC_ENDPOINT_PATTERN.matcher(endpoint);
342352
if (matcher.find()) {
343353
LOG.debug("Mapping to VPCE");
344-
LOG.debug("Endpoint {} is vpc endpoint; parsing region as {}", endpoint, matcher.group(1));
354+
LOG.debug("Endpoint {} is VPC endpoint; parsing region as {}",
355+
endpoint, matcher.group(1));
345356
return Optional.of(new Resolution(
346357
Region.of(matcher.group(1)),
347358
RegionResolutionMechanism.ParseVpceEndpoint));
@@ -357,6 +368,20 @@ public static Optional<Resolution> getS3RegionFromEndpoint(
357368
return Optional.empty();
358369
}
359370

371+
/**
372+
* Is this an AWS endpoint, that is: has an endpoint been set which matches
373+
* amazon.
374+
* @param endpoint non-null endpoint URL
375+
* @return true if this is amazonaws or amazonaws china
376+
*/
377+
public static boolean isAwsEndpoint(final String endpoint) {
378+
final String h = endpoint.toLowerCase(Locale.ROOT);
379+
// Common AWS partitions: global (.amazonaws.com) and China (.amazonaws.com.cn).
380+
return h.endsWith(".amazonaws.com")
381+
|| h.endsWith(".amazonaws.com.cn");
382+
}
383+
384+
360385
/**
361386
* Does the region name refer to an SDK region?
362387
* @param configuredRegion region in the configuration
@@ -394,6 +419,7 @@ public static Resolution calculateRegion(
394419

395420
// endpoint; may be null
396421
final String endpointStr = parameters.getEndpoint();
422+
boolean endpointDeclared = endpointStr != null && !endpointStr.isEmpty();
397423
// will be null if endpointStr is null/empty
398424
final URI endpoint = buildEndpointUri(endpointStr,
399425
conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS));
@@ -419,8 +445,7 @@ public static Resolution calculateRegion(
419445

420446
// central endpoint if no endpoint has been set, or it is explicitly
421447
// requested
422-
boolean endpointEndsWithCentral = endpointStr == null
423-
|| endpointStr.isEmpty()
448+
boolean endpointEndsWithCentral = !endpointDeclared
424449
|| endpointStr.endsWith(CENTRAL_ENDPOINT);
425450

426451
if (!resolution.isRegionResolved()) {
@@ -452,10 +477,18 @@ public static Resolution calculateRegion(
452477
FIPS_PATH_ACCESS_INCOMPATIBLE);
453478
}
454479

480+
455481
if (!resolution.isRegionResolved()) {
456-
// still failing to resolve the region
457-
// fall back to central
458-
resolution.withRegion(US_EAST_2, RegionResolutionMechanism.FallbackToCentral);
482+
// still not resolved.
483+
if (!endpointDeclared || isAwsEndpoint(endpointStr)) {
484+
// still failing to resolve the region
485+
// fall back to central
486+
resolution.withRegion(US_EAST_2, RegionResolutionMechanism.FallbackToCentral);
487+
} else {
488+
// we are not resolved and not an aws region.
489+
// set the region to being "external"
490+
resolution.withRegion(EXTERNAL_REGION, RegionResolutionMechanism.ExternalEndpoint);
491+
}
459492
}
460493

461494
// No need to override endpoint with "s3.amazonaws.com".

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRegionResolution.java

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import static org.apache.hadoop.fs.s3a.Constants.SDK_REGION;
3636
import static org.apache.hadoop.fs.s3a.impl.RegionResolution.ERROR_ENDPOINT_WITH_FIPS;
3737
import static org.apache.hadoop.fs.s3a.impl.RegionResolution.calculateRegion;
38+
import static org.apache.hadoop.fs.s3a.impl.RegionResolution.isEc2Region;
3839
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
3940

4041
/**
@@ -140,7 +141,7 @@ public void testWithChinaVPCE() throws IOException {
140141
resolve(getConfiguration(), CN_VPC_ENDPOINT, null, false,
141142
CN_NORTHWEST_1, RegionResolution.RegionResolutionMechanism.ParseVpceEndpoint);
142143
assertEndpoint(r, CN_VPC_ENDPOINT);
143-
assertUseCentral(r, false);
144+
assertUseCentralValue(r, false);
144145
}
145146

146147
@Test
@@ -150,7 +151,7 @@ public void testCentralEndpointNoRegion() throws IOException {
150151
US_EAST_2,
151152
RegionResolution.RegionResolutionMechanism.FallbackToCentral);
152153
assertEndpoint(r, null);
153-
assertUseCentral(r, true);
154+
assertUseCentralValue(r, true);
154155
}
155156

156157
@Test
@@ -159,7 +160,7 @@ public void testCentralEndpointWithRegion() throws IOException {
159160
resolve(getConfiguration(), CENTRAL_ENDPOINT, US_WEST_2, false,
160161
US_WEST_2, RegionResolution.RegionResolutionMechanism.Specified);
161162
assertEndpoint(r, null);
162-
assertUseCentral(r, true);
163+
assertUseCentralValue(r, true);
163164
}
164165

165166
@Test
@@ -169,7 +170,7 @@ public void testConfiguredRegion() throws IOException {
169170
EU_WEST_2, RegionResolution.RegionResolutionMechanism.Specified);
170171
// this still uses the central endpoint.
171172
assertEndpoint(r, null);
172-
assertUseCentral(r, true);
173+
assertUseCentralValue(r, true);
173174
}
174175

175176
@Test
@@ -179,17 +180,7 @@ public void testSDKRegion() throws IOException {
179180
null, RegionResolution.RegionResolutionMechanism.Sdk);
180181
// SDK handles endpoint logic.
181182
assertEndpoint(r, null);
182-
assertUseCentral(r, true);
183-
}
184-
185-
@Test
186-
public void testEC2UpperCaseRegion() throws IOException {
187-
final RegionResolution.Resolution r =
188-
resolve(getConfiguration(), null, "EC2", false,
189-
null, RegionResolution.RegionResolutionMechanism.Sdk);
190-
// SDK handles endpoint logic.
191-
assertEndpoint(r, null);
192-
assertUseCentral(r, true);
183+
assertUseCentralValue(r, true);
193184
}
194185

195186
@Test
@@ -199,7 +190,7 @@ public void testSDKUpperCaseRegion() throws IOException {
199190
null, RegionResolution.RegionResolutionMechanism.Sdk);
200191
// SDK handles endpoint logic.
201192
assertEndpoint(r, null);
202-
assertUseCentral(r, true);
193+
assertUseCentralValue(r, true);
203194
}
204195

205196
@Test
@@ -209,7 +200,7 @@ public void testEmptyStringRegion() throws IOException {
209200
null, RegionResolution.RegionResolutionMechanism.Sdk);
210201
// SDK handles endpoint logic.
211202
assertEndpoint(r, null);
212-
assertUseCentral(r, true);
203+
assertUseCentralValue(r, true);
213204
}
214205

215206
@Test
@@ -265,7 +256,7 @@ public void testWithChinaEndpoint() throws IOException {
265256
CN_NORTHWEST_1,
266257
RegionResolution.RegionResolutionMechanism.CalculatedFromEndpoint);
267258
assertEndpoint(r, CN_ENDPOINT);
268-
assertUseCentral(r, false);
259+
assertUseCentralValue(r, false);
269260
}
270261

271262
@Test
@@ -276,7 +267,7 @@ public void testWithGovCloudEndpoint() throws IOException {
276267
US_GOV_EAST_1,
277268
RegionResolution.RegionResolutionMechanism.CalculatedFromEndpoint);
278269
assertEndpoint(r, GOV_ENDPOINT);
279-
assertUseCentral(r, false);
270+
assertUseCentralValue(r, false);
280271
}
281272

282273
@Test
@@ -310,7 +301,28 @@ public void testEC2Region() {
310301
// expected on anything except EC2
311302
LOG.info("Expected failure when EC2 IAM is not present", e);
312303
}
304+
}
313305

306+
@Test
307+
public void testEc2RegionCaseLogic() throws Throwable {
308+
Assertions.assertThat(isEc2Region("ec2"))
309+
.describedAs("lower case ec2").isTrue();
310+
Assertions.assertThat(isEc2Region("EC2"))
311+
.describedAs("upper case ec2").isTrue();
312+
}
313+
314+
@Test
315+
public void testGcsRegion() throws Throwable {
316+
resolve(getConfiguration(), "https://storage.googleapis.com", null, false,
317+
RegionResolution.EXTERNAL,
318+
RegionResolution.RegionResolutionMechanism.ExternalEndpoint);
319+
}
320+
321+
@Test
322+
public void testLocalhostRegion() throws Throwable {
323+
resolve(getConfiguration(), "127.0.0.1", null, false,
324+
RegionResolution.EXTERNAL,
325+
RegionResolution.RegionResolutionMechanism.ExternalEndpoint);
314326
}
315327

316328
/**
@@ -331,7 +343,7 @@ private static void assertEndpoint(final RegionResolution.Resolution r,
331343
* @param r resolution
332344
* @param expected expected value.
333345
*/
334-
private static void assertUseCentral(final RegionResolution.Resolution r,
346+
private static void assertUseCentralValue(final RegionResolution.Resolution r,
335347
final boolean expected) {
336348
Assertions.assertThat(r.isUseCentralEndpoint())
337349
.describedAs("Endpoint of %s", r)

0 commit comments

Comments
 (0)