2323
2424import com .amazonaws .ClientConfiguration ;
2525import com .amazonaws .auth .AWSCredentialsProvider ;
26+ import com .amazonaws .client .builder .AwsClientBuilder ;
27+ import com .amazonaws .metrics .RequestMetricCollector ;
2628import com .amazonaws .services .s3 .AmazonS3 ;
2729import com .amazonaws .services .s3 .AmazonS3Client ;
30+ import com .amazonaws .services .s3 .AmazonS3ClientBuilder ;
2831import com .amazonaws .services .s3 .S3ClientOptions ;
32+ import com .amazonaws .services .s3 .internal .ServiceUtils ;
33+ import com .amazonaws .util .AwsHostNameUtils ;
34+ import com .amazonaws .util .RuntimeHttpUtils ;
35+ import org .apache .hadoop .thirdparty .com .google .common .annotations .VisibleForTesting ;
2936import org .slf4j .Logger ;
37+ import org .slf4j .LoggerFactory ;
3038
3139import org .apache .commons .lang3 .StringUtils ;
3240import org .apache .hadoop .classification .InterfaceAudience ;
3341import org .apache .hadoop .classification .InterfaceStability ;
3442import org .apache .hadoop .conf .Configuration ;
3543import org .apache .hadoop .conf .Configured ;
44+ import org .apache .hadoop .fs .s3a .statistics .StatisticsFromAwsSdk ;
45+ import org .apache .hadoop .fs .s3a .statistics .impl .AwsStatisticsCollector ;
3646
3747import static org .apache .hadoop .fs .s3a .Constants .EXPERIMENTAL_AWS_INTERNAL_THROTTLING ;
3848import static org .apache .hadoop .fs .s3a .Constants .ENDPOINT ;
4151
4252/**
4353 * The default {@link S3ClientFactory} implementation.
44- * This which calls the AWS SDK to configure and create an
54+ * This calls the AWS SDK to configure and create an
4555 * {@link AmazonS3Client} that communicates with the S3 service.
4656 */
4757@ InterfaceAudience .Private
5060public class DefaultS3ClientFactory extends Configured
5161 implements S3ClientFactory {
5262
53- protected static final Logger LOG = S3AFileSystem .LOG ;
63+ private static final String S3_SERVICE_NAME = "s3" ;
64+ private static final String S3_SIGNER = "S3SignerType" ;
65+ private static final String S3_V4_SIGNER = "AWSS3V4SignerType" ;
5466
67+ /**
68+ * Subclasses refer to this.
69+ */
70+ protected static final Logger LOG =
71+ LoggerFactory .getLogger (DefaultS3ClientFactory .class );
72+
73+ /**
74+ * Create the client.
75+ * <p>
76+ * If the AWS stats are not null then a {@link AwsStatisticsCollector}.
77+ * is created to bind to the two.
78+ * <i>Important: until this binding works properly across regions,
79+ * this should be null.</i>
80+ */
5581 @ Override
5682 public AmazonS3 createS3Client (URI name ,
5783 final String bucket ,
5884 final AWSCredentialsProvider credentials ,
59- final String userAgentSuffix ) throws IOException {
85+ final String userAgentSuffix ,
86+ final StatisticsFromAwsSdk statisticsFromAwsSdk ) throws IOException {
6087 Configuration conf = getConf ();
6188 final ClientConfiguration awsConf = S3AUtils
6289 .createAwsConf (conf , bucket , Constants .AWS_SERVICE_IDENTIFIER_S3 );
@@ -73,36 +100,124 @@ public AmazonS3 createS3Client(URI name,
73100 if (!StringUtils .isEmpty (userAgentSuffix )) {
74101 awsConf .setUserAgentSuffix (userAgentSuffix );
75102 }
76- return configureAmazonS3Client (
77- newAmazonS3Client (credentials , awsConf ), conf );
103+ // optional metrics
104+ RequestMetricCollector metrics = statisticsFromAwsSdk != null
105+ ? new AwsStatisticsCollector (statisticsFromAwsSdk )
106+ : null ;
107+
108+ return newAmazonS3Client (
109+ credentials ,
110+ awsConf ,
111+ metrics ,
112+ conf .getTrimmed (ENDPOINT , "" ),
113+ conf .getBoolean (PATH_STYLE_ACCESS , false ));
78114 }
79115
80116 /**
81- * Wrapper around constructor for {@link AmazonS3} client.
117+ * Create an {@link AmazonS3} client.
82118 * Override this to provide an extended version of the client
83119 * @param credentials credentials to use
84120 * @param awsConf AWS configuration
85- * @return new AmazonS3 client
121+ * @param metrics metrics collector or null
122+ * @param endpoint endpoint string; may be ""
123+ * @param pathStyleAccess enable path style access?
124+ * @return new AmazonS3 client
86125 */
87126 protected AmazonS3 newAmazonS3Client (
88- AWSCredentialsProvider credentials , ClientConfiguration awsConf ) {
89- return new AmazonS3Client (credentials , awsConf );
127+ final AWSCredentialsProvider credentials ,
128+ final ClientConfiguration awsConf ,
129+ final RequestMetricCollector metrics ,
130+ final String endpoint ,
131+ final boolean pathStyleAccess ) {
132+ if (metrics != null ) {
133+ LOG .debug ("Building S3 client using the SDK builder API" );
134+ return buildAmazonS3Client (credentials , awsConf , metrics , endpoint ,
135+ pathStyleAccess );
136+ } else {
137+ LOG .debug ("Building S3 client using the SDK builder API" );
138+ return classicAmazonS3Client (credentials , awsConf , endpoint ,
139+ pathStyleAccess );
140+ }
90141 }
91142
92143 /**
93- * Configure S3 client from the Hadoop configuration.
94- *
144+ * Use the (newer) Builder SDK to create a an AWS S3 client.
145+ * <p>
146+ * This has a more complex endpoint configuration in a
147+ * way which does not yet work in this code in a way
148+ * which doesn't trigger regressions. So it is only used
149+ * when SDK metrics are supplied.
150+ * @param credentials credentials to use
151+ * @param awsConf AWS configuration
152+ * @param metrics metrics collector or null
153+ * @param endpoint endpoint string; may be ""
154+ * @param pathStyleAccess enable path style access?
155+ * @return new AmazonS3 client
156+ */
157+ private AmazonS3 buildAmazonS3Client (
158+ final AWSCredentialsProvider credentials ,
159+ final ClientConfiguration awsConf ,
160+ final RequestMetricCollector metrics ,
161+ final String endpoint ,
162+ final boolean pathStyleAccess ) {
163+ AmazonS3ClientBuilder b = AmazonS3Client .builder ();
164+ b .withCredentials (credentials );
165+ b .withClientConfiguration (awsConf );
166+ b .withPathStyleAccessEnabled (pathStyleAccess );
167+ if (metrics != null ) {
168+ b .withMetricsCollector (metrics );
169+ }
170+
171+ // endpoint set up is a PITA
172+ // client.setEndpoint("") is no longer available
173+ AwsClientBuilder .EndpointConfiguration epr
174+ = createEndpointConfiguration (endpoint , awsConf );
175+ if (epr != null ) {
176+ // an endpoint binding was constructed: use it.
177+ b .withEndpointConfiguration (epr );
178+ }
179+ final AmazonS3 client = b .build ();
180+ return client ;
181+ }
182+
183+ /**
184+ * Wrapper around constructor for {@link AmazonS3} client.
185+ * Override this to provide an extended version of the client.
186+ * <p>
187+ * This uses a deprecated constructor -it is currently
188+ * the only one which works for us.
189+ * @param credentials credentials to use
190+ * @param awsConf AWS configuration
191+ * @param endpoint endpoint string; may be ""
192+ * @param pathStyleAccess enable path style access?
193+ * @return new AmazonS3 client
194+ */
195+ @ SuppressWarnings ("deprecation" )
196+ private AmazonS3 classicAmazonS3Client (
197+ AWSCredentialsProvider credentials ,
198+ ClientConfiguration awsConf ,
199+ final String endpoint ,
200+ final boolean pathStyleAccess ) {
201+ final AmazonS3 client = new AmazonS3Client (credentials , awsConf );
202+ return configureAmazonS3Client (client , endpoint , pathStyleAccess );
203+ }
204+
205+ /**
206+ * Configure classic S3 client.
207+ * <p>
95208 * This includes: endpoint, Path Access and possibly other
96209 * options.
97210 *
98- * @param conf Hadoop configuration
211+ * @param s3 S3 Client.
212+ * @param endPoint s3 endpoint, may be empty
213+ * @param pathStyleAccess enable path style access?
99214 * @return S3 client
100215 * @throws IllegalArgumentException if misconfigured
101216 */
102- private static AmazonS3 configureAmazonS3Client (AmazonS3 s3 ,
103- Configuration conf )
217+ protected static AmazonS3 configureAmazonS3Client (AmazonS3 s3 ,
218+ final String endPoint ,
219+ final boolean pathStyleAccess )
104220 throws IllegalArgumentException {
105- String endPoint = conf .getTrimmed (ENDPOINT , "" );
106221 if (!endPoint .isEmpty ()) {
107222 try {
108223 s3 .setEndpoint (endPoint );
@@ -112,31 +227,31 @@ private static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
112227 throw new IllegalArgumentException (msg , e );
113228 }
114229 }
115- return applyS3ClientOptions (s3 , conf );
230+ return applyS3ClientOptions (s3 , pathStyleAccess );
116231 }
117232
118233 /**
119234 * Perform any tuning of the {@code S3ClientOptions} settings based on
120235 * the Hadoop configuration.
121236 * This is different from the general AWS configuration creation as
122237 * it is unique to S3 connections.
123- *
238+ * <p>
124239 * The {@link Constants#PATH_STYLE_ACCESS} option enables path-style access
125240 * to S3 buckets if configured. By default, the
126241 * behavior is to use virtual hosted-style access with URIs of the form
127242 * {@code http://bucketname.s3.amazonaws.com}
243+ * <p>
128244 * Enabling path-style access and a
129245 * region-specific endpoint switches the behavior to use URIs of the form
130246 * {@code http://s3-eu-west-1.amazonaws.com/bucketname}.
131247 * It is common to use this when connecting to private S3 servers, as it
132248 * avoids the need to play with DNS entries.
133249 * @param s3 S3 client
134- * @param conf Hadoop configuration
250+ * @param pathStyleAccess enable path style access?
135251 * @return the S3 client
136252 */
137- private static AmazonS3 applyS3ClientOptions (AmazonS3 s3 ,
138- Configuration conf ) {
139- final boolean pathStyleAccess = conf .getBoolean (PATH_STYLE_ACCESS , false );
253+ protected static AmazonS3 applyS3ClientOptions (AmazonS3 s3 ,
254+ final boolean pathStyleAccess ) {
140255 if (pathStyleAccess ) {
141256 LOG .debug ("Enabling path style access!" );
142257 s3 .setS3ClientOptions (S3ClientOptions .builder ()
@@ -145,4 +260,54 @@ private static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
145260 }
146261 return s3 ;
147262 }
263+
264+ /**
265+ * Given an endpoint string, return an endpoint config, or null, if none
266+ * is needed.
267+ * <p>
268+ * This is a pretty painful piece of code. It is trying to replicate
269+ * what AwsClient.setEndpoint() does, because you can't
270+ * call that setter on an AwsClient constructed via
271+ * the builder, and you can't pass a metrics collector
272+ * down except through the builder.
273+ * <p>
274+ * Note also that AWS signing is a mystery which nobody fully
275+ * understands, especially given all problems surface in a
276+ * "400 bad request" response, which, like all security systems,
277+ * provides minimal diagnostics out of fear of leaking
278+ * secrets.
279+ *
280+ * @param endpoint possibly null endpoint.
281+ * @param awsConf config to build the URI from.
282+ * @return a configuration for the S3 client builder.
283+ */
284+ @ VisibleForTesting
285+ public static AwsClientBuilder .EndpointConfiguration
286+ createEndpointConfiguration (
287+ final String endpoint , final ClientConfiguration awsConf ) {
288+ LOG .debug ("Creating endpoint configuration for {}" , endpoint );
289+ if (endpoint == null || endpoint .isEmpty ()) {
290+ // the default endpoint...we should be using null at this point.
291+ LOG .debug ("Using default endpoint -no need to generate a configuration" );
292+ return null ;
293+ }
294+
295+ final URI epr = RuntimeHttpUtils .toUri (endpoint , awsConf );
296+ LOG .debug ("Endpoint URI = {}" , epr );
297+
298+ String region ;
299+ if (!ServiceUtils .isS3USStandardEndpoint (endpoint )) {
300+ LOG .debug ("Endpoint {} is not the default; parsing" , epr );
301+ region = AwsHostNameUtils .parseRegion (
302+ epr .getHost (),
303+ S3_SERVICE_NAME );
304+ } else {
305+ // US-east, set region == null.
306+ LOG .debug ("Endpoint {} is the standard one; declare region as null" , epr );
307+ region = null ;
308+ }
309+ LOG .debug ("Region for endpoint {}, URI {} is determined as {}" ,
310+ endpoint , epr , region );
311+ return new AwsClientBuilder .EndpointConfiguration (endpoint , region );
312+ }
148313}
0 commit comments