2323
2424import com .amazonaws .ClientConfiguration ;
2525import com .amazonaws .auth .AWSCredentialsProvider ;
26+ import com .amazonaws .client .builder .AwsClientBuilder ;
27+ import com .amazonaws .metrics .RequestMetricCollector ;
2628import com .amazonaws .services .s3 .AmazonS3 ;
2729import com .amazonaws .services .s3 .AmazonS3Client ;
30+ import com .amazonaws .services .s3 .AmazonS3ClientBuilder ;
2831import com .amazonaws .services .s3 .S3ClientOptions ;
32+ import com .amazonaws .services .s3 .internal .ServiceUtils ;
33+ import com .amazonaws .util .AwsHostNameUtils ;
34+ import com .amazonaws .util .RuntimeHttpUtils ;
35+ import org .apache .hadoop .thirdparty .com .google .common .annotations .VisibleForTesting ;
2936import org .slf4j .Logger ;
37+ import org .slf4j .LoggerFactory ;
3038
3139import org .apache .commons .lang3 .StringUtils ;
3240import org .apache .hadoop .classification .InterfaceAudience ;
3341import org .apache .hadoop .classification .InterfaceStability ;
3442import org .apache .hadoop .conf .Configuration ;
3543import org .apache .hadoop .conf .Configured ;
44+ import org .apache .hadoop .fs .s3a .statistics .StatisticsFromAwsSdk ;
45+ import org .apache .hadoop .fs .s3a .statistics .impl .AwsStatisticsCollector ;
3646
3747import static org .apache .hadoop .fs .s3a .Constants .EXPERIMENTAL_AWS_INTERNAL_THROTTLING ;
3848import static org .apache .hadoop .fs .s3a .Constants .ENDPOINT ;
4151
4252/**
4353 * The default {@link S3ClientFactory} implementation.
44- * This which calls the AWS SDK to configure and create an
54+ * This calls the AWS SDK to configure and create an
4555 * {@link AmazonS3Client} that communicates with the S3 service.
4656 */
4757@ InterfaceAudience .Private
4858@ InterfaceStability .Unstable
4959public class DefaultS3ClientFactory extends Configured
5060 implements S3ClientFactory {
5161
52- protected static final Logger LOG = S3AFileSystem .LOG ;
62+ private static final String S3_SERVICE_NAME = "s3" ;
63+ private static final String S3_SIGNER = "S3SignerType" ;
64+ private static final String S3_V4_SIGNER = "AWSS3V4SignerType" ;
5365
66+ /**
67+ * Subclasses refer to this.
68+ */
69+ protected static final Logger LOG =
70+ LoggerFactory .getLogger (DefaultS3ClientFactory .class );
71+
72+ /**
73+ * Create the client.
74+ * <p>
75+ * If the AWS stats are not null then a {@link AwsStatisticsCollector}.
76+ * is created to bind to the two.
77+ * <i>Important: until this binding works properly across regions,
78+ * this should be null.</i>
79+ */
5480 @ Override
5581 public AmazonS3 createS3Client (URI name ,
5682 final String bucket ,
5783 final AWSCredentialsProvider credentials ,
58- final String userAgentSuffix ) throws IOException {
84+ final String userAgentSuffix ,
85+ final StatisticsFromAwsSdk statisticsFromAwsSdk ) throws IOException {
5986 Configuration conf = getConf ();
6087 final ClientConfiguration awsConf = S3AUtils
6188 .createAwsConf (conf , bucket , Constants .AWS_SERVICE_IDENTIFIER_S3 );
@@ -72,36 +99,123 @@ public AmazonS3 createS3Client(URI name,
7299 if (!StringUtils .isEmpty (userAgentSuffix )) {
73100 awsConf .setUserAgentSuffix (userAgentSuffix );
74101 }
75- return configureAmazonS3Client (
76- newAmazonS3Client (credentials , awsConf ), conf );
102+ // optional metrics
103+ RequestMetricCollector metrics = statisticsFromAwsSdk != null
104+ ? new AwsStatisticsCollector (statisticsFromAwsSdk )
105+ : null ;
106+
107+ return newAmazonS3Client (
108+ credentials ,
109+ awsConf ,
110+ metrics ,
111+ conf .getTrimmed (ENDPOINT , "" ),
112+ conf .getBoolean (PATH_STYLE_ACCESS , false ));
77113 }
78114
79115 /**
80- * Wrapper around constructor for {@link AmazonS3} client.
116+ * Create an {@link AmazonS3} client.
81117 * Override this to provide an extended version of the client
82118 * @param credentials credentials to use
83119 * @param awsConf AWS configuration
84- * @return new AmazonS3 client
120+ * @param metrics metrics collector or null
121+ * @param endpoint endpoint string; may be ""
122+ * @param pathStyleAccess enable path style access?
123+ * @return new AmazonS3 client
85124 */
86125 protected AmazonS3 newAmazonS3Client (
87- AWSCredentialsProvider credentials , ClientConfiguration awsConf ) {
88- return new AmazonS3Client (credentials , awsConf );
126+ final AWSCredentialsProvider credentials ,
127+ final ClientConfiguration awsConf ,
128+ final RequestMetricCollector metrics ,
129+ final String endpoint ,
130+ final boolean pathStyleAccess ) {
131+ if (metrics != null ) {
132+ LOG .debug ("Building S3 client using the SDK builder API" );
133+ return buildAmazonS3Client (credentials , awsConf , metrics , endpoint ,
134+ pathStyleAccess );
135+ } else {
136+ LOG .debug ("Building S3 client using the SDK builder API" );
137+ return classicAmazonS3Client (credentials , awsConf , endpoint ,
138+ pathStyleAccess );
139+ }
89140 }
90141
91142 /**
92- * Configure S3 client from the Hadoop configuration.
93- *
143+ * Use the (newer) Builder SDK to create a an AWS S3 client.
144+ * <p>
145+ * This has a more complex endpoint configuration in a
146+ * way which does not yet work in this code in a way
147+ * which doesn't trigger regressions. So it is only used
148+ * when SDK metrics are supplied.
149+ * @param credentials credentials to use
150+ * @param awsConf AWS configuration
151+ * @param metrics metrics collector or null
152+ * @param endpoint endpoint string; may be ""
153+ * @param pathStyleAccess enable path style access?
154+ * @return new AmazonS3 client
155+ */
156+ private AmazonS3 buildAmazonS3Client (
157+ final AWSCredentialsProvider credentials ,
158+ final ClientConfiguration awsConf ,
159+ final RequestMetricCollector metrics ,
160+ final String endpoint ,
161+ final boolean pathStyleAccess ) {
162+ AmazonS3ClientBuilder b = AmazonS3Client .builder ();
163+ b .withCredentials (credentials );
164+ b .withClientConfiguration (awsConf );
165+ b .withPathStyleAccessEnabled (pathStyleAccess );
166+ if (metrics != null ) {
167+ b .withMetricsCollector (metrics );
168+ }
169+
170+ // endpoint set up is a PITA
171+ // client.setEndpoint("") is no longer available
172+ AwsClientBuilder .EndpointConfiguration epr
173+ = createEndpointConfiguration (endpoint , awsConf );
174+ if (epr != null ) {
175+ // an endpoint binding was constructed: use it.
176+ b .withEndpointConfiguration (epr );
177+ }
178+ final AmazonS3 client = b .build ();
179+ return client ;
180+ }
181+
182+ /**
183+ * Wrapper around constructor for {@link AmazonS3} client.
184+ * Override this to provide an extended version of the client.
185+ * <p>
186+ * This uses a deprecated constructor -it is currently
187+ * the only one which works for us.
188+ * @param credentials credentials to use
189+ * @param awsConf AWS configuration
190+ * @param endpoint endpoint string; may be ""
191+ * @param pathStyleAccess enable path style access?
192+ * @return new AmazonS3 client
193+ */
194+ @ SuppressWarnings ("deprecation" )
195+ private AmazonS3 classicAmazonS3Client (
196+ AWSCredentialsProvider credentials ,
197+ ClientConfiguration awsConf ,
198+ final String endpoint ,
199+ final boolean pathStyleAccess ) {
200+ final AmazonS3 client = new AmazonS3Client (credentials , awsConf );
201+ return configureAmazonS3Client (client , endpoint , pathStyleAccess );
202+ }
203+
204+ /**
205+ * Configure classic S3 client.
206+ * <p>
94207 * This includes: endpoint, Path Access and possibly other
95208 * options.
96209 *
97- * @param conf Hadoop configuration
210+ * @param s3 S3 Client.
211+ * @param pathStyleAccess enable path style access?
98212 * @return S3 client
99213 * @throws IllegalArgumentException if misconfigured
100214 */
101- private static AmazonS3 configureAmazonS3Client (AmazonS3 s3 ,
102- Configuration conf )
215+ protected static AmazonS3 configureAmazonS3Client (AmazonS3 s3 ,
216+ final String endPoint ,
217+ final boolean pathStyleAccess )
103218 throws IllegalArgumentException {
104- String endPoint = conf .getTrimmed (ENDPOINT , "" );
105219 if (!endPoint .isEmpty ()) {
106220 try {
107221 s3 .setEndpoint (endPoint );
@@ -111,31 +225,31 @@ private static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
111225 throw new IllegalArgumentException (msg , e );
112226 }
113227 }
114- return applyS3ClientOptions (s3 , conf );
228+ return applyS3ClientOptions (s3 , pathStyleAccess );
115229 }
116230
117231 /**
118232 * Perform any tuning of the {@code S3ClientOptions} settings based on
119233 * the Hadoop configuration.
120234 * This is different from the general AWS configuration creation as
121235 * it is unique to S3 connections.
122- *
236+ * <p>
123237 * The {@link Constants#PATH_STYLE_ACCESS} option enables path-style access
124238 * to S3 buckets if configured. By default, the
125239 * behavior is to use virtual hosted-style access with URIs of the form
126240 * {@code http://bucketname.s3.amazonaws.com}
241+ * <p>
127242 * Enabling path-style access and a
128243 * region-specific endpoint switches the behavior to use URIs of the form
129244 * {@code http://s3-eu-west-1.amazonaws.com/bucketname}.
130245 * It is common to use this when connecting to private S3 servers, as it
131246 * avoids the need to play with DNS entries.
132247 * @param s3 S3 client
133- * @param conf Hadoop configuration
248+ * @param pathStyleAccess enable path style access?
134249 * @return the S3 client
135250 */
136- private static AmazonS3 applyS3ClientOptions (AmazonS3 s3 ,
137- Configuration conf ) {
138- final boolean pathStyleAccess = conf .getBoolean (PATH_STYLE_ACCESS , false );
251+ protected static AmazonS3 applyS3ClientOptions (AmazonS3 s3 ,
252+ final boolean pathStyleAccess ) {
139253 if (pathStyleAccess ) {
140254 LOG .debug ("Enabling path style access!" );
141255 s3 .setS3ClientOptions (S3ClientOptions .builder ()
@@ -144,4 +258,54 @@ private static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
144258 }
145259 return s3 ;
146260 }
261+
262+ /**
263+ * Given an endpoint string, return an endpoint config, or null, if none
264+ * is needed.
265+ * <p>
266+ * This is a pretty painful piece of code. It is trying to replicate
267+ * what AwsClient.setEndpoint() does, because you can't
268+ * call that setter on an AwsClient constructed via
269+ * the builder, and you can't pass a metrics collector
270+ * down except through the builder.
271+ * <p>
272+ * Note also that AWS signing is a mystery which nobody fully
273+ * understands, especially given all problems surface in a
274+ * "400 bad request" response, which, like all security systems,
275+ * provides minimal diagnostics out of fear of leaking
276+ * secrets.
277+ *
278+ * @param endpoint possibly null endpoint.
279+ * @param awsConf config to build the URI from.
280+ * @return a configuration for the S3 client builder.
281+ */
282+ @ VisibleForTesting
283+ public static AwsClientBuilder .EndpointConfiguration
284+ createEndpointConfiguration (
285+ final String endpoint , final ClientConfiguration awsConf ) {
286+ LOG .debug ("Creating endpoint configuration for {}" , endpoint );
287+ if (endpoint == null || endpoint .isEmpty ()) {
288+ // the default endpoint...we should be using null at this point.
289+ LOG .debug ("Using default endpoint -no need to generate a configuration" );
290+ return null ;
291+ }
292+
293+ final URI epr = RuntimeHttpUtils .toUri (endpoint , awsConf );
294+ LOG .debug ("Endpoint URI = {}" , epr );
295+
296+ String region ;
297+ if (!ServiceUtils .isS3USStandardEndpoint (endpoint )) {
298+ LOG .debug ("Endpoint {} is not the default; parsing" , epr );
299+ region = AwsHostNameUtils .parseRegion (
300+ epr .getHost (),
301+ S3_SERVICE_NAME );
302+ } else {
303+ // US-east, set region == null.
304+ LOG .debug ("Endpoint {} is the standard one; declare region as null" , epr );
305+ region = null ;
306+ }
307+ LOG .debug ("Region for endpoint {}, URI {} is determined as {}" ,
308+ endpoint , epr , region );
309+ return new AwsClientBuilder .EndpointConfiguration (endpoint , region );
310+ }
147311}
0 commit comments