Skip to content

Commit 74e5018

Browse files
committed
HADOOP-16635. S3A "directories only" scan still does a HEAD.
Contributed by Steve Loughran. Change-Id: I5e41d7f721364c392e1f4344db83dfa8c5aa06ce
1 parent dee9e97 commit 74e5018

File tree

5 files changed

+229
-32
lines changed

5 files changed

+229
-32
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 67 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2610,7 +2610,7 @@ public FileStatus getFileStatus(final Path f) throws IOException {
26102610
* @param f The path we want information from
26112611
* @param needEmptyDirectoryFlag if true, implementation will calculate
26122612
* a TRUE or FALSE value for {@link S3AFileStatus#isEmptyDirectory()}
2613-
* @param probes probes to make
2613+
* @param probes probes to make.
26142614
* @return a S3AFileStatus object
26152615
* @throws FileNotFoundException when the path does not exist
26162616
* @throws IOException on other problems.
@@ -2711,58 +2711,101 @@ S3AFileStatus innerGetFileStatus(final Path f,
27112711
// there was no entry in S3Guard
27122712
// retrieve the data and update the metadata store in the process.
27132713
return S3Guard.putAndReturn(metadataStore,
2714-
s3GetFileStatus(path, key, StatusProbeEnum.ALL, tombstones),
2714+
s3GetFileStatus(path, key, probes, tombstones),
27152715
instrumentation,
27162716
ttlTimeProvider);
27172717
}
27182718
}
27192719

27202720
/**
27212721
* Raw {@code getFileStatus} that talks direct to S3.
2722-
* Used to implement {@link #innerGetFileStatus(Path, boolean)},
2722+
* Used to implement {@link #innerGetFileStatus(Path, boolean, Set)},
27232723
* and for direct management of empty directory blobs.
2724+
*
2725+
* Checks made, in order:
2726+
* <ol>
2727+
* <li>
2728+
* Head: look for an object at the given key, provided that
2729+
* the key doesn't end in "/"
2730+
* </li>
2731+
* <li>
2732+
* DirMarker: look for the directory marker -the key with a trailing /
2733+
* if not passed in.
2734+
* If an object was found with size 0 bytes, a directory status entry
2735+
* is returned which declares that the directory is empty.
2736+
* </li>
2737+
* <li>
2738+
* List: issue a LIST on the key (with / if needed), require one
2739+
* entry to be found for the path to be considered a non-empty directory.
2740+
* </li>
2741+
* </ol>
2742+
*
2743+
* Notes:
2744+
* <ul>
2745+
* <li>
2746+
* Objects ending in / which are not 0-bytes long are not treated as
2747+
* directory markers, but instead as files.
2748+
* </li>
2749+
* <li>
2750+
* There's ongoing discussions about whether a dir marker
2751+
* should be interpreted as an empty dir.
2752+
* </li>
2753+
* <li>
2754+
* The HEAD requests require the permissions to read an object,
2755+
* including (we believe) the ability to decrypt the file.
2756+
* At the very least, for SSE-C markers, you need the same key on
2757+
* the client for the HEAD to work.
2758+
* </li>
2759+
* <li>
2760+
* The List probe needs list permission; it is also more prone to
2761+
* inconsistency, even on newly created files.
2762+
* </li>
2763+
* </ul>
2764+
*
27242765
* Retry policy: retry translated.
27252766
* @param path Qualified path
27262767
* @param key Key string for the path
27272768
* @param probes probes to make
27282769
* @param tombstones tombstones to filter
27292770
* @return Status
2730-
* @throws FileNotFoundException when the path does not exist
2771+
* @throws FileNotFoundException the supplied probes failed.
27312772
* @throws IOException on other problems.
27322773
*/
2774+
@VisibleForTesting
27332775
@Retries.RetryTranslated
2734-
private S3AFileStatus s3GetFileStatus(final Path path,
2735-
String key,
2776+
S3AFileStatus s3GetFileStatus(final Path path,
2777+
final String key,
27362778
final Set<StatusProbeEnum> probes,
27372779
final Set<Path> tombstones) throws IOException {
2738-
if (!key.isEmpty() && probes.contains(StatusProbeEnum.Head)) {
2739-
try {
2740-
ObjectMetadata meta = getObjectMetadata(key);
2741-
2742-
if (objectRepresentsDirectory(key, meta.getContentLength())) {
2743-
LOG.debug("Found exact file: fake directory");
2744-
return new S3AFileStatus(Tristate.TRUE, path, username);
2745-
} else {
2746-
LOG.debug("Found exact file: normal file");
2780+
if (!key.isEmpty()) {
2781+
if (probes.contains(StatusProbeEnum.Head) && !key.endsWith("/")) {
2782+
try {
2783+
// look for the simple file
2784+
ObjectMetadata meta = getObjectMetadata(key);
2785+
LOG.debug("Found exact file: normal file {}", key);
27472786
return new S3AFileStatus(meta.getContentLength(),
27482787
dateToLong(meta.getLastModified()),
27492788
path,
27502789
getDefaultBlockSize(path),
27512790
username,
27522791
meta.getETag(),
27532792
meta.getVersionId());
2754-
}
2755-
} catch (AmazonServiceException e) {
2756-
if (e.getStatusCode() != SC_404) {
2793+
} catch (AmazonServiceException e) {
2794+
// if the response is a 404 error, it just means that there is
2795+
// no file at that path...the remaining checks will be needed.
2796+
if (e.getStatusCode() != SC_404) {
2797+
throw translateException("getFileStatus", path, e);
2798+
}
2799+
} catch (AmazonClientException e) {
27572800
throw translateException("getFileStatus", path, e);
27582801
}
2759-
} catch (AmazonClientException e) {
2760-
throw translateException("getFileStatus", path, e);
27612802
}
27622803

2804+
// Either a normal file was not found or the probe was skipped.
2805+
// because the key ended in "/" or it was not in the set of probes.
27632806
// Look for the dir marker
2764-
if (!key.endsWith("/") && probes.contains(StatusProbeEnum.DirMarker)) {
2765-
String newKey = key + "/";
2807+
if (probes.contains(StatusProbeEnum.DirMarker)) {
2808+
String newKey = maybeAddTrailingSlash(key);
27662809
try {
27672810
ObjectMetadata meta = getObjectMetadata(newKey);
27682811

@@ -2794,8 +2837,8 @@ private S3AFileStatus s3GetFileStatus(final Path path,
27942837
// execute the list
27952838
if (probes.contains(StatusProbeEnum.List)) {
27962839
try {
2797-
key = maybeAddTrailingSlash(key);
2798-
S3ListRequest request = createListObjectsRequest(key, "/", 1);
2840+
String dirKey = maybeAddTrailingSlash(key);
2841+
S3ListRequest request = createListObjectsRequest(dirKey, "/", 1);
27992842

28002843
S3ListResult objects = listObjects(request);
28012844

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StatusProbeEnum.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,20 @@ public enum StatusProbeEnum {
4141
public static final Set<StatusProbeEnum> DIRECTORIES =
4242
EnumSet.of(DirMarker, List);
4343

44+
/** We only want the HEAD or dir marker. */
45+
public static final Set<StatusProbeEnum> HEAD_OR_DIR_MARKER =
46+
EnumSet.of(Head, DirMarker);
47+
48+
/** We only want the HEAD. */
49+
public static final Set<StatusProbeEnum> HEAD_ONLY =
50+
EnumSet.of(Head);
51+
52+
/** We only want the dir marker. */
53+
public static final Set<StatusProbeEnum> DIR_MARKER_ONLY =
54+
EnumSet.of(DirMarker);
55+
56+
/** We only want the dir marker. */
57+
public static final Set<StatusProbeEnum> LIST_ONLY =
58+
EnumSet.of(List);
59+
4460
}

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestAuthoritativePath.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,18 @@ public void setup() throws Exception {
8484

8585
private void cleanUpFS(S3AFileSystem fs) {
8686
// detach from the (shared) metadata store.
87-
fs.setMetadataStore(new NullMetadataStore());
87+
if (fs != null) {
88+
fs.setMetadataStore(new NullMetadataStore());
89+
}
8890

8991
IOUtils.cleanupWithLogger(LOG, fs);
9092
}
9193

9294
@Override
9395
public void teardown() throws Exception {
94-
fullyAuthFS.delete(testRoot, true);
96+
if (fullyAuthFS != null) {
97+
fullyAuthFS.delete(testRoot, true);
98+
}
9599

96100
cleanUpFS(fullyAuthFS);
97101
cleanUpFS(rawFS);

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java

Lines changed: 134 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,32 @@
1818

1919
package org.apache.hadoop.fs.s3a;
2020

21+
import org.apache.hadoop.conf.Configuration;
22+
import org.apache.hadoop.fs.FSDataOutputStream;
2123
import org.apache.hadoop.fs.FileStatus;
2224
import org.apache.hadoop.fs.FileSystem;
2325
import org.apache.hadoop.fs.Path;
2426
import org.apache.hadoop.fs.contract.ContractTestUtils;
2527
import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum;
2628

29+
import org.assertj.core.api.Assertions;
2730
import org.junit.Test;
31+
import org.junit.runner.RunWith;
32+
import org.junit.runners.Parameterized;
2833
import org.slf4j.Logger;
2934
import org.slf4j.LoggerFactory;
3035

3136
import java.io.File;
3237
import java.io.FileNotFoundException;
3338
import java.net.URI;
39+
import java.util.Arrays;
40+
import java.util.Collection;
41+
import java.util.EnumSet;
3442
import java.util.UUID;
3543
import java.util.concurrent.Callable;
3644

3745
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
46+
import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
3847
import static org.apache.hadoop.fs.s3a.Statistic.*;
3948
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
4049
import static org.apache.hadoop.test.GenericTestUtils.getTestDir;
@@ -43,7 +52,9 @@
4352
/**
4453
* Use metrics to assert about the cost of file status queries.
4554
* {@link S3AFileSystem#getFileStatus(Path)}.
55+
* Parameterized on guarded vs raw.
4656
*/
57+
@RunWith(Parameterized.class)
4758
public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
4859

4960
private MetricDiff metadataRequests;
@@ -52,9 +63,48 @@ public class ITestS3AFileOperationCost extends AbstractS3ATestBase {
5263
private static final Logger LOG =
5364
LoggerFactory.getLogger(ITestS3AFileOperationCost.class);
5465

66+
/**
67+
* Parameterization.
68+
*/
69+
@Parameterized.Parameters(name = "{0}")
70+
public static Collection<Object[]> params() {
71+
return Arrays.asList(new Object[][]{
72+
{"raw", false},
73+
{"guarded", true}
74+
});
75+
}
76+
77+
private final String name;
78+
79+
private final boolean s3guard;
80+
81+
public ITestS3AFileOperationCost(final String name, final boolean s3guard) {
82+
this.name = name;
83+
this.s3guard = s3guard;
84+
}
85+
86+
@Override
87+
public Configuration createConfiguration() {
88+
Configuration conf = super.createConfiguration();
89+
String bucketName = getTestBucketName(conf);
90+
removeBucketOverrides(bucketName, conf,
91+
S3_METADATA_STORE_IMPL);
92+
if (!s3guard) {
93+
// in a raw run remove all s3guard settings
94+
removeBaseAndBucketOverrides(bucketName, conf,
95+
S3_METADATA_STORE_IMPL);
96+
}
97+
disableFilesystemCaching(conf);
98+
return conf;
99+
}
55100
@Override
56101
public void setup() throws Exception {
57102
super.setup();
103+
if (s3guard) {
104+
// s3guard is required for those test runs where any of the
105+
// guard options are set
106+
assumeS3GuardState(true, getConfiguration());
107+
}
58108
S3AFileSystem fs = getFileSystem();
59109
metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS);
60110
listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS);
@@ -80,6 +130,19 @@ private void resetMetricDiffs() {
80130
reset(metadataRequests, listRequests);
81131
}
82132

133+
/**
134+
* Verify that the head and list calls match expectations,
135+
* then reset the counters ready for the next operation.
136+
* @param head expected HEAD count
137+
* @param list expected LIST count
138+
*/
139+
private void verifyOperationCount(int head, int list) {
140+
metadataRequests.assertDiffEquals(head);
141+
listRequests.assertDiffEquals(list);
142+
metadataRequests.reset();
143+
listRequests.reset();
144+
}
145+
83146
@Test
84147
public void testCostOfGetFileStatusOnEmptyDir() throws Throwable {
85148
describe("performing getFileStatus on an empty directory");
@@ -205,12 +268,6 @@ public void testFakeDirectoryDeletion() throws Throwable {
205268
+ "clean up activity");
206269
S3AFileSystem fs = getFileSystem();
207270

208-
// As this test uses the s3 metrics to count the number of fake directory
209-
// operations, it depends on side effects happening internally. With
210-
// metadata store enabled, it is brittle to change. We disable this test
211-
// before the internal behavior w/ or w/o metadata store.
212-
// assumeFalse(fs.hasMetadataStore());
213-
214271
Path srcBaseDir = path("src");
215272
mkdirs(srcBaseDir);
216273
MetricDiff deleteRequests =
@@ -389,4 +446,75 @@ public String toString() {
389446
fs.delete(dest, false);
390447
}
391448
}
449+
450+
@Test
451+
public void testDirProbes() throws Throwable {
452+
describe("Test directory probe cost -raw only");
453+
S3AFileSystem fs = getFileSystem();
454+
assume("Unguarded FS only", !fs.hasMetadataStore());
455+
String dir = "testEmptyDirHeadProbe";
456+
Path emptydir = path(dir);
457+
// Create the empty directory.
458+
fs.mkdirs(emptydir);
459+
460+
// metrics and assertions.
461+
resetMetricDiffs();
462+
463+
intercept(FileNotFoundException.class, () ->
464+
fs.innerGetFileStatus(emptydir, false,
465+
StatusProbeEnum.HEAD_ONLY));
466+
verifyOperationCount(1, 0);
467+
468+
// a LIST will find it -but it doesn't consider it an empty dir.
469+
S3AFileStatus status = fs.innerGetFileStatus(emptydir, true,
470+
StatusProbeEnum.LIST_ONLY);
471+
verifyOperationCount(0, 1);
472+
Assertions.assertThat(status)
473+
.describedAs("LIST output is not considered empty")
474+
.matches(s -> !s.isEmptyDirectory().equals(Tristate.TRUE), "is empty");
475+
476+
// finally, skip all probes and expect no operations toThere are
477+
// take place
478+
intercept(FileNotFoundException.class, () ->
479+
fs.innerGetFileStatus(emptydir, false,
480+
EnumSet.noneOf(StatusProbeEnum.class)));
481+
verifyOperationCount(0, 0);
482+
483+
// now add a trailing slash to the key and use the
484+
// deep internal s3GetFileStatus method call.
485+
String emptyDirTrailingSlash = fs.pathToKey(emptydir.getParent())
486+
+ "/" + dir + "/";
487+
// A HEAD request does not probe for keys with a trailing /
488+
intercept(FileNotFoundException.class, () ->
489+
fs.s3GetFileStatus(emptydir, emptyDirTrailingSlash,
490+
StatusProbeEnum.HEAD_ONLY, null));
491+
verifyOperationCount(0, 0);
492+
493+
// but ask for a directory marker and you get the entry
494+
status = fs.s3GetFileStatus(emptydir,
495+
emptyDirTrailingSlash,
496+
StatusProbeEnum.DIR_MARKER_ONLY, null);
497+
verifyOperationCount(1, 0);
498+
assertEquals(emptydir, status.getPath());
499+
}
500+
501+
@Test
502+
public void testCreateCost() throws Throwable {
503+
describe("Test file creation cost -raw only");
504+
S3AFileSystem fs = getFileSystem();
505+
assume("Unguarded FS only", !fs.hasMetadataStore());
506+
resetMetricDiffs();
507+
Path testFile = path("testCreateCost");
508+
509+
// when overwrite is false, the path is checked for existence.
510+
try (FSDataOutputStream out = fs.create(testFile, false)) {
511+
verifyOperationCount(2, 1);
512+
}
513+
514+
// but when true: only the directory checks take place.
515+
try (FSDataOutputStream out = fs.create(testFile, true)) {
516+
verifyOperationCount(1, 1);
517+
}
518+
519+
}
392520
}

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardTtl.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@ protected Configuration createConfiguration() {
9696
return configuration;
9797
}
9898

99+
@Override
100+
public void setup() throws Exception {
101+
super.setup();
102+
Assume.assumeTrue(getFileSystem().hasMetadataStore());
103+
}
104+
99105
@Test
100106
public void testDirectoryListingAuthoritativeTtl() throws Exception {
101107
LOG.info("Authoritative mode: {}", authoritative);

0 commit comments

Comments
 (0)