2323
2424import com .amazonaws .ClientConfiguration ;
2525import com .amazonaws .auth .AWSCredentialsProvider ;
26+ import com .amazonaws .client .builder .AwsClientBuilder ;
27+ import com .amazonaws .metrics .RequestMetricCollector ;
2628import com .amazonaws .services .s3 .AmazonS3 ;
2729import com .amazonaws .services .s3 .AmazonS3Client ;
30+ import com .amazonaws .services .s3 .AmazonS3ClientBuilder ;
2831import com .amazonaws .services .s3 .S3ClientOptions ;
32+ import com .amazonaws .services .s3 .internal .ServiceUtils ;
33+ import com .amazonaws .util .AwsHostNameUtils ;
34+ import com .amazonaws .util .RuntimeHttpUtils ;
35+ import com .google .common .annotations .VisibleForTesting ;
2936import org .slf4j .Logger ;
37+ import org .slf4j .LoggerFactory ;
3038
3139import org .apache .commons .lang3 .StringUtils ;
3240import org .apache .hadoop .classification .InterfaceAudience ;
3341import org .apache .hadoop .classification .InterfaceStability ;
3442import org .apache .hadoop .conf .Configuration ;
3543import org .apache .hadoop .conf .Configured ;
44+ import org .apache .hadoop .fs .s3a .statistics .StatisticsFromAwsSdk ;
45+ import org .apache .hadoop .fs .s3a .statistics .impl .AwsStatisticsCollector ;
3646
3747import static org .apache .hadoop .fs .s3a .Constants .EXPERIMENTAL_AWS_INTERNAL_THROTTLING ;
3848import static org .apache .hadoop .fs .s3a .Constants .ENDPOINT ;
4151
4252/**
4353 * The default {@link S3ClientFactory} implementation.
44- * This which calls the AWS SDK to configure and create an
54+ * This calls the AWS SDK to configure and create an
4555 * {@link AmazonS3Client} that communicates with the S3 service.
4656 */
4757@ InterfaceAudience .Private
4858@ InterfaceStability .Unstable
4959public class DefaultS3ClientFactory extends Configured
5060 implements S3ClientFactory {
5161
52- protected static final Logger LOG = S3AFileSystem .LOG ;
62+ private static final String S3_SERVICE_NAME = "s3" ;
63+ private static final String S3_SIGNER = "S3SignerType" ;
64+ private static final String S3_V4_SIGNER = "AWSS3V4SignerType" ;
5365
66+ /**
67+ * Subclasses refer to this.
68+ */
69+ protected static final Logger LOG =
70+ LoggerFactory .getLogger (DefaultS3ClientFactory .class );
71+
72+ /**
73+ * Create the client.
74+ * <p>
75+ * If the AWS stats are not null then a {@link AwsStatisticsCollector}.
76+ * is created to bind to the two.
77+ * <i>Important: until this binding works properly across regions,
78+ * this should be null.</i>
79+ */
5480 @ Override
5581 public AmazonS3 createS3Client (URI name ,
5682 final String bucket ,
5783 final AWSCredentialsProvider credentials ,
58- final String userAgentSuffix ) throws IOException {
84+ final String userAgentSuffix ,
85+ final StatisticsFromAwsSdk statisticsFromAwsSdk ) throws IOException {
5986 Configuration conf = getConf ();
6087 final ClientConfiguration awsConf = S3AUtils
6188 .createAwsConf (conf , bucket , Constants .AWS_SERVICE_IDENTIFIER_S3 );
@@ -72,36 +99,124 @@ public AmazonS3 createS3Client(URI name,
7299 if (!StringUtils .isEmpty (userAgentSuffix )) {
73100 awsConf .setUserAgentSuffix (userAgentSuffix );
74101 }
75- return configureAmazonS3Client (
76- newAmazonS3Client (credentials , awsConf ), conf );
102+ // optional metrics
103+ RequestMetricCollector metrics = statisticsFromAwsSdk != null
104+ ? new AwsStatisticsCollector (statisticsFromAwsSdk )
105+ : null ;
106+
107+ return newAmazonS3Client (
108+ credentials ,
109+ awsConf ,
110+ metrics ,
111+ conf .getTrimmed (ENDPOINT , "" ),
112+ conf .getBoolean (PATH_STYLE_ACCESS , false ));
77113 }
78114
79115 /**
80- * Wrapper around constructor for {@link AmazonS3} client.
116+ * Create an {@link AmazonS3} client.
81117 * Override this to provide an extended version of the client
82118 * @param credentials credentials to use
83119 * @param awsConf AWS configuration
84- * @return new AmazonS3 client
120+ * @param metrics metrics collector or null
121+ * @param endpoint endpoint string; may be ""
122+ * @param pathStyleAccess enable path style access?
123+ * @return new AmazonS3 client
85124 */
86125 protected AmazonS3 newAmazonS3Client (
87- AWSCredentialsProvider credentials , ClientConfiguration awsConf ) {
88- return new AmazonS3Client (credentials , awsConf );
126+ final AWSCredentialsProvider credentials ,
127+ final ClientConfiguration awsConf ,
128+ final RequestMetricCollector metrics ,
129+ final String endpoint ,
130+ final boolean pathStyleAccess ) {
131+ if (metrics != null ) {
132+ LOG .debug ("Building S3 client using the SDK builder API" );
133+ return buildAmazonS3Client (credentials , awsConf , metrics , endpoint ,
134+ pathStyleAccess );
135+ } else {
136+ LOG .debug ("Building S3 client using the SDK builder API" );
137+ return classicAmazonS3Client (credentials , awsConf , endpoint ,
138+ pathStyleAccess );
139+ }
89140 }
90141
91142 /**
92- * Configure S3 client from the Hadoop configuration.
93- *
143+ * Use the (newer) Builder SDK to create a an AWS S3 client.
144+ * <p>
145+ * This has a more complex endpoint configuration in a
146+ * way which does not yet work in this code in a way
147+ * which doesn't trigger regressions. So it is only used
148+ * when SDK metrics are supplied.
149+ * @param credentials credentials to use
150+ * @param awsConf AWS configuration
151+ * @param metrics metrics collector or null
152+ * @param endpoint endpoint string; may be ""
153+ * @param pathStyleAccess enable path style access?
154+ * @return new AmazonS3 client
155+ */
156+ private AmazonS3 buildAmazonS3Client (
157+ final AWSCredentialsProvider credentials ,
158+ final ClientConfiguration awsConf ,
159+ final RequestMetricCollector metrics ,
160+ final String endpoint ,
161+ final boolean pathStyleAccess ) {
162+ AmazonS3ClientBuilder b = AmazonS3Client .builder ();
163+ b .withCredentials (credentials );
164+ b .withClientConfiguration (awsConf );
165+ b .withPathStyleAccessEnabled (pathStyleAccess );
166+ if (metrics != null ) {
167+ b .withMetricsCollector (metrics );
168+ }
169+
170+ // endpoint set up is a PITA
171+ // client.setEndpoint("") is no longer available
172+ AwsClientBuilder .EndpointConfiguration epr
173+ = createEndpointConfiguration (endpoint , awsConf );
174+ if (epr != null ) {
175+ // an endpoint binding was constructed: use it.
176+ b .withEndpointConfiguration (epr );
177+ }
178+ final AmazonS3 client = b .build ();
179+ return client ;
180+ }
181+
182+ /**
183+ * Wrapper around constructor for {@link AmazonS3} client.
184+ * Override this to provide an extended version of the client.
185+ * <p>
186+ * This uses a deprecated constructor -it is currently
187+ * the only one which works for us.
188+ * @param credentials credentials to use
189+ * @param awsConf AWS configuration
190+ * @param endpoint endpoint string; may be ""
191+ * @param pathStyleAccess enable path style access?
192+ * @return new AmazonS3 client
193+ */
194+ @ SuppressWarnings ("deprecation" )
195+ private AmazonS3 classicAmazonS3Client (
196+ AWSCredentialsProvider credentials ,
197+ ClientConfiguration awsConf ,
198+ final String endpoint ,
199+ final boolean pathStyleAccess ) {
200+ final AmazonS3 client = new AmazonS3Client (credentials , awsConf );
201+ return configureAmazonS3Client (client , endpoint , pathStyleAccess );
202+ }
203+
204+ /**
205+ * Configure classic S3 client.
206+ * <p>
94207 * This includes: endpoint, Path Access and possibly other
95208 * options.
96209 *
97- * @param conf Hadoop configuration
210+ * @param s3 S3 Client.
211+ * @param endPoint s3 endpoint, may be empty
212+ * @param pathStyleAccess enable path style access?
98213 * @return S3 client
99214 * @throws IllegalArgumentException if misconfigured
100215 */
101- private static AmazonS3 configureAmazonS3Client (AmazonS3 s3 ,
102- Configuration conf )
216+ protected static AmazonS3 configureAmazonS3Client (AmazonS3 s3 ,
217+ final String endPoint ,
218+ final boolean pathStyleAccess )
103219 throws IllegalArgumentException {
104- String endPoint = conf .getTrimmed (ENDPOINT , "" );
105220 if (!endPoint .isEmpty ()) {
106221 try {
107222 s3 .setEndpoint (endPoint );
@@ -111,31 +226,31 @@ private static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
111226 throw new IllegalArgumentException (msg , e );
112227 }
113228 }
114- return applyS3ClientOptions (s3 , conf );
229+ return applyS3ClientOptions (s3 , pathStyleAccess );
115230 }
116231
117232 /**
118233 * Perform any tuning of the {@code S3ClientOptions} settings based on
119234 * the Hadoop configuration.
120235 * This is different from the general AWS configuration creation as
121236 * it is unique to S3 connections.
122- *
237+ * <p>
123238 * The {@link Constants#PATH_STYLE_ACCESS} option enables path-style access
124239 * to S3 buckets if configured. By default, the
125240 * behavior is to use virtual hosted-style access with URIs of the form
126241 * {@code http://bucketname.s3.amazonaws.com}
242+ * <p>
127243 * Enabling path-style access and a
128244 * region-specific endpoint switches the behavior to use URIs of the form
129245 * {@code http://s3-eu-west-1.amazonaws.com/bucketname}.
130246 * It is common to use this when connecting to private S3 servers, as it
131247 * avoids the need to play with DNS entries.
132248 * @param s3 S3 client
133- * @param conf Hadoop configuration
249+ * @param pathStyleAccess enable path style access?
134250 * @return the S3 client
135251 */
136- private static AmazonS3 applyS3ClientOptions (AmazonS3 s3 ,
137- Configuration conf ) {
138- final boolean pathStyleAccess = conf .getBoolean (PATH_STYLE_ACCESS , false );
252+ protected static AmazonS3 applyS3ClientOptions (AmazonS3 s3 ,
253+ final boolean pathStyleAccess ) {
139254 if (pathStyleAccess ) {
140255 LOG .debug ("Enabling path style access!" );
141256 s3 .setS3ClientOptions (S3ClientOptions .builder ()
@@ -144,4 +259,54 @@ private static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
144259 }
145260 return s3 ;
146261 }
262+
263+ /**
264+ * Given an endpoint string, return an endpoint config, or null, if none
265+ * is needed.
266+ * <p>
267+ * This is a pretty painful piece of code. It is trying to replicate
268+ * what AwsClient.setEndpoint() does, because you can't
269+ * call that setter on an AwsClient constructed via
270+ * the builder, and you can't pass a metrics collector
271+ * down except through the builder.
272+ * <p>
273+ * Note also that AWS signing is a mystery which nobody fully
274+ * understands, especially given all problems surface in a
275+ * "400 bad request" response, which, like all security systems,
276+ * provides minimal diagnostics out of fear of leaking
277+ * secrets.
278+ *
279+ * @param endpoint possibly null endpoint.
280+ * @param awsConf config to build the URI from.
281+ * @return a configuration for the S3 client builder.
282+ */
283+ @ VisibleForTesting
284+ public static AwsClientBuilder .EndpointConfiguration
285+ createEndpointConfiguration (
286+ final String endpoint , final ClientConfiguration awsConf ) {
287+ LOG .debug ("Creating endpoint configuration for {}" , endpoint );
288+ if (endpoint == null || endpoint .isEmpty ()) {
289+ // the default endpoint...we should be using null at this point.
290+ LOG .debug ("Using default endpoint -no need to generate a configuration" );
291+ return null ;
292+ }
293+
294+ final URI epr = RuntimeHttpUtils .toUri (endpoint , awsConf );
295+ LOG .debug ("Endpoint URI = {}" , epr );
296+
297+ String region ;
298+ if (!ServiceUtils .isS3USStandardEndpoint (endpoint )) {
299+ LOG .debug ("Endpoint {} is not the default; parsing" , epr );
300+ region = AwsHostNameUtils .parseRegion (
301+ epr .getHost (),
302+ S3_SERVICE_NAME );
303+ } else {
304+ // US-east, set region == null.
305+ LOG .debug ("Endpoint {} is the standard one; declare region as null" , epr );
306+ region = null ;
307+ }
308+ LOG .debug ("Region for endpoint {}, URI {} is determined as {}" ,
309+ endpoint , epr , region );
310+ return new AwsClientBuilder .EndpointConfiguration (endpoint , region );
311+ }
147312}
0 commit comments