Skip to content

Commit 4ca303a

Browse files
steveloughrandeepakdamri
authored andcommitted
HADOOP-17271. S3A connector to support IOStatistics. (apache#2580)
S3A connector to support the IOStatistics API of HADOOP-16830, This is a major rework of the S3A Statistics collection to * Embrace the IOStatistics APIs * Move from direct references of S3AInstrumention statistics collectors to interface/implementation classes in new packages. * Ubiquitous support of IOStatistics, including: S3AFileSystem, input and output streams, RemoteIterator instances provided in list calls. * Adoption of new statistic names from hadoop-common Regarding statistic collection, as well as all existing statistics, the connector now records min/max/mean durations of HTTP GET and HEAD requests, and those of LIST operations. Contributed by Steve Loughran. Change-Id: I182d34b6ac39e017a8b4a221dad8e930882b39cf
1 parent 1dabf15 commit 4ca303a

File tree

70 files changed

+3393
-443
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+3393
-443
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,13 @@ private Constants() {
728728
public static final String STREAM_READ_GAUGE_INPUT_POLICY =
729729
"stream_read_gauge_input_policy";
730730

731+
/**
732+
* Gauge name for the input policy : {@value}.
733+
* This references an enum currently exclusive to the S3A stream.
734+
*/
735+
public static final String STREAM_READ_GAUGE_INPUT_POLICY =
736+
"stream_read_gauge_input_policy";
737+
731738
@InterfaceAudience.Private
732739
@InterfaceStability.Unstable
733740
public static final String S3_CLIENT_FACTORY_IMPL =

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 186 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,26 @@
2323

2424
import com.amazonaws.ClientConfiguration;
2525
import com.amazonaws.auth.AWSCredentialsProvider;
26+
import com.amazonaws.client.builder.AwsClientBuilder;
27+
import com.amazonaws.metrics.RequestMetricCollector;
2628
import com.amazonaws.services.s3.AmazonS3;
2729
import com.amazonaws.services.s3.AmazonS3Client;
30+
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
2831
import com.amazonaws.services.s3.S3ClientOptions;
32+
import com.amazonaws.services.s3.internal.ServiceUtils;
33+
import com.amazonaws.util.AwsHostNameUtils;
34+
import com.amazonaws.util.RuntimeHttpUtils;
35+
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
2936
import org.slf4j.Logger;
37+
import org.slf4j.LoggerFactory;
3038

3139
import org.apache.commons.lang3.StringUtils;
3240
import org.apache.hadoop.classification.InterfaceAudience;
3341
import org.apache.hadoop.classification.InterfaceStability;
3442
import org.apache.hadoop.conf.Configuration;
3543
import org.apache.hadoop.conf.Configured;
44+
import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk;
45+
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
3646

3747
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING;
3848
import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
@@ -41,7 +51,7 @@
4151

4252
/**
4353
* The default {@link S3ClientFactory} implementation.
44-
* This which calls the AWS SDK to configure and create an
54+
* This calls the AWS SDK to configure and create an
4555
* {@link AmazonS3Client} that communicates with the S3 service.
4656
*/
4757
@InterfaceAudience.Private
@@ -50,13 +60,30 @@
5060
public class DefaultS3ClientFactory extends Configured
5161
implements S3ClientFactory {
5262

53-
protected static final Logger LOG = S3AFileSystem.LOG;
63+
private static final String S3_SERVICE_NAME = "s3";
64+
private static final String S3_SIGNER = "S3SignerType";
65+
private static final String S3_V4_SIGNER = "AWSS3V4SignerType";
5466

67+
/**
68+
* Subclasses refer to this.
69+
*/
70+
protected static final Logger LOG =
71+
LoggerFactory.getLogger(DefaultS3ClientFactory.class);
72+
73+
/**
74+
* Create the client.
75+
* <p>
76+
* If the AWS stats are not null then a {@link AwsStatisticsCollector}.
77+
* is created to bind to the two.
78+
* <i>Important: until this binding works properly across regions,
79+
* this should be null.</i>
80+
*/
5581
@Override
5682
public AmazonS3 createS3Client(URI name,
5783
final String bucket,
5884
final AWSCredentialsProvider credentials,
59-
final String userAgentSuffix) throws IOException {
85+
final String userAgentSuffix,
86+
final StatisticsFromAwsSdk statisticsFromAwsSdk) throws IOException {
6087
Configuration conf = getConf();
6188
final ClientConfiguration awsConf = S3AUtils
6289
.createAwsConf(conf, bucket, Constants.AWS_SERVICE_IDENTIFIER_S3);
@@ -73,36 +100,124 @@ public AmazonS3 createS3Client(URI name,
73100
if (!StringUtils.isEmpty(userAgentSuffix)) {
74101
awsConf.setUserAgentSuffix(userAgentSuffix);
75102
}
76-
return configureAmazonS3Client(
77-
newAmazonS3Client(credentials, awsConf), conf);
103+
// optional metrics
104+
RequestMetricCollector metrics = statisticsFromAwsSdk != null
105+
? new AwsStatisticsCollector(statisticsFromAwsSdk)
106+
: null;
107+
108+
return newAmazonS3Client(
109+
credentials,
110+
awsConf,
111+
metrics,
112+
conf.getTrimmed(ENDPOINT, ""),
113+
conf.getBoolean(PATH_STYLE_ACCESS, false));
78114
}
79115

80116
/**
81-
* Wrapper around constructor for {@link AmazonS3} client.
117+
* Create an {@link AmazonS3} client.
82118
* Override this to provide an extended version of the client
83119
* @param credentials credentials to use
84120
* @param awsConf AWS configuration
85-
* @return new AmazonS3 client
121+
* @param metrics metrics collector or null
122+
* @param endpoint endpoint string; may be ""
123+
* @param pathStyleAccess enable path style access?
124+
* @return new AmazonS3 client
86125
*/
87126
protected AmazonS3 newAmazonS3Client(
88-
AWSCredentialsProvider credentials, ClientConfiguration awsConf) {
89-
return new AmazonS3Client(credentials, awsConf);
127+
final AWSCredentialsProvider credentials,
128+
final ClientConfiguration awsConf,
129+
final RequestMetricCollector metrics,
130+
final String endpoint,
131+
final boolean pathStyleAccess) {
132+
if (metrics != null) {
133+
LOG.debug("Building S3 client using the SDK builder API");
134+
return buildAmazonS3Client(credentials, awsConf, metrics, endpoint,
135+
pathStyleAccess);
136+
} else {
137+
LOG.debug("Building S3 client using the SDK builder API");
138+
return classicAmazonS3Client(credentials, awsConf, endpoint,
139+
pathStyleAccess);
140+
}
90141
}
91142

92143
/**
93-
* Configure S3 client from the Hadoop configuration.
94-
*
144+
* Use the (newer) Builder SDK to create a an AWS S3 client.
145+
* <p>
146+
* This has a more complex endpoint configuration in a
147+
* way which does not yet work in this code in a way
148+
* which doesn't trigger regressions. So it is only used
149+
* when SDK metrics are supplied.
150+
* @param credentials credentials to use
151+
* @param awsConf AWS configuration
152+
* @param metrics metrics collector or null
153+
* @param endpoint endpoint string; may be ""
154+
* @param pathStyleAccess enable path style access?
155+
* @return new AmazonS3 client
156+
*/
157+
private AmazonS3 buildAmazonS3Client(
158+
final AWSCredentialsProvider credentials,
159+
final ClientConfiguration awsConf,
160+
final RequestMetricCollector metrics,
161+
final String endpoint,
162+
final boolean pathStyleAccess) {
163+
AmazonS3ClientBuilder b = AmazonS3Client.builder();
164+
b.withCredentials(credentials);
165+
b.withClientConfiguration(awsConf);
166+
b.withPathStyleAccessEnabled(pathStyleAccess);
167+
if (metrics != null) {
168+
b.withMetricsCollector(metrics);
169+
}
170+
171+
// endpoint set up is a PITA
172+
// client.setEndpoint("") is no longer available
173+
AwsClientBuilder.EndpointConfiguration epr
174+
= createEndpointConfiguration(endpoint, awsConf);
175+
if (epr != null) {
176+
// an endpoint binding was constructed: use it.
177+
b.withEndpointConfiguration(epr);
178+
}
179+
final AmazonS3 client = b.build();
180+
return client;
181+
}
182+
183+
/**
184+
* Wrapper around constructor for {@link AmazonS3} client.
185+
* Override this to provide an extended version of the client.
186+
* <p>
187+
* This uses a deprecated constructor -it is currently
188+
* the only one which works for us.
189+
* @param credentials credentials to use
190+
* @param awsConf AWS configuration
191+
* @param endpoint endpoint string; may be ""
192+
* @param pathStyleAccess enable path style access?
193+
* @return new AmazonS3 client
194+
*/
195+
@SuppressWarnings("deprecation")
196+
private AmazonS3 classicAmazonS3Client(
197+
AWSCredentialsProvider credentials,
198+
ClientConfiguration awsConf,
199+
final String endpoint,
200+
final boolean pathStyleAccess) {
201+
final AmazonS3 client = new AmazonS3Client(credentials, awsConf);
202+
return configureAmazonS3Client(client, endpoint, pathStyleAccess);
203+
}
204+
205+
/**
206+
* Configure classic S3 client.
207+
* <p>
95208
* This includes: endpoint, Path Access and possibly other
96209
* options.
97210
*
98-
* @param conf Hadoop configuration
211+
* @param s3 S3 Client.
212+
* @param endPoint s3 endpoint, may be empty
213+
* @param pathStyleAccess enable path style access?
99214
* @return S3 client
100215
* @throws IllegalArgumentException if misconfigured
101216
*/
102-
private static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
103-
Configuration conf)
217+
protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
218+
final String endPoint,
219+
final boolean pathStyleAccess)
104220
throws IllegalArgumentException {
105-
String endPoint = conf.getTrimmed(ENDPOINT, "");
106221
if (!endPoint.isEmpty()) {
107222
try {
108223
s3.setEndpoint(endPoint);
@@ -112,31 +227,31 @@ private static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
112227
throw new IllegalArgumentException(msg, e);
113228
}
114229
}
115-
return applyS3ClientOptions(s3, conf);
230+
return applyS3ClientOptions(s3, pathStyleAccess);
116231
}
117232

118233
/**
119234
* Perform any tuning of the {@code S3ClientOptions} settings based on
120235
* the Hadoop configuration.
121236
* This is different from the general AWS configuration creation as
122237
* it is unique to S3 connections.
123-
*
238+
* <p>
124239
* The {@link Constants#PATH_STYLE_ACCESS} option enables path-style access
125240
* to S3 buckets if configured. By default, the
126241
* behavior is to use virtual hosted-style access with URIs of the form
127242
* {@code http://bucketname.s3.amazonaws.com}
243+
* <p>
128244
* Enabling path-style access and a
129245
* region-specific endpoint switches the behavior to use URIs of the form
130246
* {@code http://s3-eu-west-1.amazonaws.com/bucketname}.
131247
* It is common to use this when connecting to private S3 servers, as it
132248
* avoids the need to play with DNS entries.
133249
* @param s3 S3 client
134-
* @param conf Hadoop configuration
250+
* @param pathStyleAccess enable path style access?
135251
* @return the S3 client
136252
*/
137-
private static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
138-
Configuration conf) {
139-
final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false);
253+
protected static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
254+
final boolean pathStyleAccess) {
140255
if (pathStyleAccess) {
141256
LOG.debug("Enabling path style access!");
142257
s3.setS3ClientOptions(S3ClientOptions.builder()
@@ -145,4 +260,54 @@ private static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
145260
}
146261
return s3;
147262
}
263+
264+
/**
265+
* Given an endpoint string, return an endpoint config, or null, if none
266+
* is needed.
267+
* <p>
268+
* This is a pretty painful piece of code. It is trying to replicate
269+
* what AwsClient.setEndpoint() does, because you can't
270+
* call that setter on an AwsClient constructed via
271+
* the builder, and you can't pass a metrics collector
272+
* down except through the builder.
273+
* <p>
274+
* Note also that AWS signing is a mystery which nobody fully
275+
* understands, especially given all problems surface in a
276+
* "400 bad request" response, which, like all security systems,
277+
* provides minimal diagnostics out of fear of leaking
278+
* secrets.
279+
*
280+
* @param endpoint possibly null endpoint.
281+
* @param awsConf config to build the URI from.
282+
* @return a configuration for the S3 client builder.
283+
*/
284+
@VisibleForTesting
285+
public static AwsClientBuilder.EndpointConfiguration
286+
createEndpointConfiguration(
287+
final String endpoint, final ClientConfiguration awsConf) {
288+
LOG.debug("Creating endpoint configuration for {}", endpoint);
289+
if (endpoint == null || endpoint.isEmpty()) {
290+
// the default endpoint...we should be using null at this point.
291+
LOG.debug("Using default endpoint -no need to generate a configuration");
292+
return null;
293+
}
294+
295+
final URI epr = RuntimeHttpUtils.toUri(endpoint, awsConf);
296+
LOG.debug("Endpoint URI = {}", epr);
297+
298+
String region;
299+
if (!ServiceUtils.isS3USStandardEndpoint(endpoint)) {
300+
LOG.debug("Endpoint {} is not the default; parsing", epr);
301+
region = AwsHostNameUtils.parseRegion(
302+
epr.getHost(),
303+
S3_SERVICE_NAME);
304+
} else {
305+
// US-east, set region == null.
306+
LOG.debug("Endpoint {} is the standard one; declare region as null", epr);
307+
region = null;
308+
}
309+
LOG.debug("Region for endpoint {}, URI {} is determined as {}",
310+
endpoint, epr, region);
311+
return new AwsClientBuilder.EndpointConfiguration(endpoint, region);
312+
}
148313
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import com.amazonaws.ClientConfiguration;
2222
import com.amazonaws.auth.AWSCredentialsProvider;
23+
import com.amazonaws.metrics.RequestMetricCollector;
2324
import com.amazonaws.services.s3.AmazonS3;
2425

2526
import org.apache.hadoop.classification.InterfaceAudience;
@@ -40,12 +41,21 @@ public class InconsistentS3ClientFactory extends DefaultS3ClientFactory {
4041
* Logs a warning that this is being done.
4142
* @param credentials credentials to use
4243
* @param awsConf AWS configuration
44+
* @param metrics metric collector
45+
* @param endpoint AWS endpoint
46+
* @param pathStyleAccess should path style access be supported?
4347
* @return an inconsistent client.
4448
*/
4549
@Override
4650
protected AmazonS3 newAmazonS3Client(AWSCredentialsProvider credentials,
47-
ClientConfiguration awsConf) {
51+
ClientConfiguration awsConf,
52+
final RequestMetricCollector metrics,
53+
final String endpoint,
54+
final boolean pathStyleAccess) {
4855
LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **");
49-
return new InconsistentAmazonS3Client(credentials, awsConf, getConf());
56+
InconsistentAmazonS3Client s3
57+
= new InconsistentAmazonS3Client(credentials, awsConf, getConf());
58+
configureAmazonS3Client(s3, endpoint, pathStyleAccess);
59+
return s3;
5060
}
5161
}

0 commit comments

Comments
 (0)