Skip to content

Commit 34747c3

Browse files
authored
HADOOP-16396. Allow authoritative mode on a subdirectory. (apache#1043)
1 parent a2a8be1 commit 34747c3

File tree

10 files changed

+426
-55
lines changed

10 files changed

+426
-55
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,10 @@ private Constants() {
363363

364364
public static final String USER_AGENT_PREFIX = "fs.s3a.user.agent.prefix";
365365

366+
/** Whether or not to allow MetadataStore to be source of truth for a path prefix */
367+
public static final String AUTHORITATIVE_PATH = "fs.s3a.authoritative.path";
368+
public static final String[] DEFAULT_AUTHORITATIVE_PATH = {};
369+
366370
/** Whether or not to allow MetadataStore to be source of truth. */
367371
public static final String METADATASTORE_AUTHORITATIVE =
368372
"fs.s3a.metadatastore.authoritative";

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
237237
private final AtomicBoolean closed = new AtomicBoolean(false);
238238
private volatile boolean isClosed = false;
239239
private MetadataStore metadataStore;
240-
private boolean allowAuthoritative;
240+
private boolean allowAuthoritativeMetadataStore;
241+
private Collection<String> allowAuthoritativePaths;
241242

242243
/** Delegation token integration; non-empty when DT support is enabled. */
243244
private Optional<S3ADelegationTokens> delegationTokens = Optional.empty();
@@ -397,11 +398,13 @@ public void initialize(URI name, Configuration originalConf)
397398
ttlTimeProvider = new S3Guard.TtlTimeProvider(authDirTtl);
398399

399400
setMetadataStore(S3Guard.getMetadataStore(this));
400-
allowAuthoritative = conf.getBoolean(METADATASTORE_AUTHORITATIVE,
401+
allowAuthoritativeMetadataStore = conf.getBoolean(METADATASTORE_AUTHORITATIVE,
401402
DEFAULT_METADATASTORE_AUTHORITATIVE);
403+
allowAuthoritativePaths = S3Guard.getAuthoritativePaths(this);
404+
402405
if (hasMetadataStore()) {
403-
LOG.debug("Using metadata store {}, authoritative={}",
404-
getMetadataStore(), allowAuthoritative);
406+
LOG.debug("Using metadata store {}, authoritative store={}, authoritative path={}",
407+
getMetadataStore(), allowAuthoritativeMetadataStore, allowAuthoritativePaths);
405408
}
406409
initMultipartUploads(conf);
407410
} catch (AmazonClientException e) {
@@ -840,7 +843,8 @@ public String pathToKey(Path path) {
840843
* @param key s3 key or ""
841844
* @return the with a trailing "/", or, if it is the root key, "",
842845
*/
843-
private String maybeAddTrailingSlash(String key) {
846+
@InterfaceAudience.Private
847+
public String maybeAddTrailingSlash(String key) {
844848
if (!key.isEmpty() && !key.endsWith("/")) {
845849
return key + '/';
846850
} else {
@@ -1446,7 +1450,7 @@ public boolean hasMetadataStore() {
14461450
*/
14471451
@VisibleForTesting
14481452
boolean hasAuthoritativeMetadataStore() {
1449-
return hasMetadataStore() && allowAuthoritative;
1453+
return hasMetadataStore() && allowAuthoritativeMetadataStore;
14501454
}
14511455

14521456
/**
@@ -2398,6 +2402,8 @@ public FileStatus[] innerListStatus(Path f) throws FileNotFoundException,
23982402

23992403
DirListingMetadata dirMeta =
24002404
S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider);
2405+
boolean allowAuthoritative = S3Guard.allowAuthoritative(f, this,
2406+
allowAuthoritativeMetadataStore, allowAuthoritativePaths);
24012407
if (allowAuthoritative && dirMeta != null && dirMeta.isAuthoritative()) {
24022408
return S3Guard.dirMetaToStatuses(dirMeta);
24032409
}
@@ -2415,6 +2421,7 @@ public FileStatus[] innerListStatus(Path f) throws FileNotFoundException,
24152421
result.add(files.next());
24162422
}
24172423
// merge the results. This will update the store as needed
2424+
24182425
return S3Guard.dirListingUnion(metadataStore, path, result, dirMeta,
24192426
allowAuthoritative, ttlTimeProvider);
24202427
} else {
@@ -2629,6 +2636,8 @@ S3AFileStatus innerGetFileStatus(final Path f,
26292636
// dest is also a directory, there's no difference.
26302637
// TODO After HADOOP-16085 the modification detection can be done with
26312638
// etags or object version instead of modTime
2639+
boolean allowAuthoritative = S3Guard.allowAuthoritative(f, this,
2640+
allowAuthoritativeMetadataStore, allowAuthoritativePaths);
26322641
if (!pm.getFileStatus().isDirectory() &&
26332642
!allowAuthoritative) {
26342643
LOG.debug("Metadata for {} found in the non-auth metastore.", path);
@@ -3554,7 +3563,8 @@ public String toString() {
35543563
sb.append(", blockFactory=").append(blockFactory);
35553564
}
35563565
sb.append(", metastore=").append(metadataStore);
3557-
sb.append(", authoritative=").append(allowAuthoritative);
3566+
sb.append(", authoritativeStore=").append(allowAuthoritativeMetadataStore);
3567+
sb.append(", authoritativePath=").append(allowAuthoritativePaths);
35583568
sb.append(", useListV1=").append(useListV1);
35593569
if (committerIntegration != null) {
35603570
sb.append(", magicCommitter=").append(isMagicCommitEnabled());
@@ -3794,10 +3804,13 @@ private RemoteIterator<S3ALocatedFileStatus> innerListFiles(Path f, boolean
37943804
key, delimiter);
37953805
final RemoteIterator<S3AFileStatus> cachedFilesIterator;
37963806
final Set<Path> tombstones;
3807+
boolean allowAuthoritative = S3Guard.allowAuthoritative(f, this,
3808+
allowAuthoritativeMetadataStore, allowAuthoritativePaths);
37973809
if (recursive) {
37983810
final PathMetadata pm = metadataStore.get(path, true);
37993811
// shouldn't need to check pm.isDeleted() because that will have
38003812
// been caught by getFileStatus above.
3813+
38013814
MetadataStoreListFilesIterator metadataStoreListFilesIterator =
38023815
new MetadataStoreListFilesIterator(metadataStore, pm,
38033816
allowAuthoritative);
@@ -3886,6 +3899,8 @@ public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
38863899
final RemoteIterator<S3AFileStatus> cachedFileStatusIterator =
38873900
listing.createProvidedFileStatusIterator(
38883901
S3Guard.dirMetaToStatuses(meta), filter, acceptor);
3902+
boolean allowAuthoritative = S3Guard.allowAuthoritative(f, this,
3903+
allowAuthoritativeMetadataStore, allowAuthoritativePaths);
38893904
return (allowAuthoritative && meta != null
38903905
&& meta.isAuthoritative())
38913906
? listing.createLocatedFileStatusIterator(

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/CommitOperations.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,8 @@ public void createSuccessMarker(Path outputPath,
399399
conf.getTrimmed(S3_METADATA_STORE_IMPL, ""));
400400
successData.addDiagnostic(METADATASTORE_AUTHORITATIVE,
401401
conf.getTrimmed(METADATASTORE_AUTHORITATIVE, "false"));
402+
successData.addDiagnostic(AUTHORITATIVE_PATH,
403+
conf.getTrimmed(AUTHORITATIVE_PATH, ""));
402404
successData.addDiagnostic(MAGIC_COMMITTER_ENABLED,
403405
conf.getTrimmed(MAGIC_COMMITTER_ENABLED, "false"));
404406

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1321,7 +1321,6 @@ public void put(
13211321
final DirListingMetadata meta,
13221322
@Nullable final BulkOperationState operationState) throws IOException {
13231323
LOG.debug("Saving to table {} in region {}: {}", tableName, region, meta);
1324-
13251324
// directory path
13261325
Path path = meta.getPath();
13271326
DDBPathMetadata ddbPathMeta =

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
import com.google.common.annotations.VisibleForTesting;
3636
import com.google.common.base.Preconditions;
37+
import org.apache.hadoop.fs.s3a.S3AFileSystem;
3738
import org.slf4j.Logger;
3839
import org.slf4j.LoggerFactory;
3940

@@ -49,9 +50,8 @@
4950
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
5051
import org.apache.hadoop.util.ReflectionUtils;
5152

52-
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_METADATASTORE_METADATA_TTL;
53-
import static org.apache.hadoop.fs.s3a.Constants.METADATASTORE_METADATA_TTL;
54-
import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
53+
import static org.apache.hadoop.fs.s3a.Constants.*;
54+
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_AUTHORITATIVE_PATH;
5555
import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_LATENCY;
5656
import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_REQUEST;
5757
import static org.apache.hadoop.fs.s3a.S3AUtils.createUploadFileStatus;
@@ -772,4 +772,33 @@ public static DirListingMetadata listChildrenWithTtl(MetadataStore ms,
772772
return dlm;
773773
}
774774

775+
public static Collection<String> getAuthoritativePaths(S3AFileSystem fs) {
776+
String[] rawAuthoritativePaths =
777+
fs.getConf().getTrimmedStrings(AUTHORITATIVE_PATH, DEFAULT_AUTHORITATIVE_PATH);
778+
Collection<String> authoritativePaths = new ArrayList<>();
779+
if (rawAuthoritativePaths.length > 0) {
780+
for (int i = 0; i < rawAuthoritativePaths.length; i++) {
781+
Path qualified = fs.qualify(new Path(rawAuthoritativePaths[i]));
782+
authoritativePaths.add(fs.maybeAddTrailingSlash(qualified.toString()));
783+
}
784+
}
785+
return authoritativePaths;
786+
}
787+
788+
public static boolean allowAuthoritative(Path p, S3AFileSystem fs,
789+
boolean authMetadataStore, Collection<String> authPaths) {
790+
String haystack = fs.maybeAddTrailingSlash(p.toString());
791+
if (authMetadataStore) {
792+
return true;
793+
}
794+
if (!authPaths.isEmpty()) {
795+
for (String needle : authPaths) {
796+
797+
if (haystack.startsWith(needle)) {
798+
return true;
799+
}
800+
}
801+
}
802+
return false;
803+
}
775804
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1177,8 +1177,10 @@ public int run(String[] args, PrintStream out)
11771177
if (usingS3Guard) {
11781178
out.printf("Filesystem %s is using S3Guard with store %s%n",
11791179
fsUri, store.toString());
1180-
printOption(out, "Authoritative S3Guard",
1180+
printOption(out, "Authoritative Metadata Store",
11811181
METADATASTORE_AUTHORITATIVE, "false");
1182+
printOption(out, "Authoritative Path",
1183+
AUTHORITATIVE_PATH, "");
11821184
authMode = conf.getBoolean(METADATASTORE_AUTHORITATIVE, false);
11831185
printStoreDiagnostics(out, store);
11841186
} else {

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ two different reasons:
113113
stored in metadata store.
114114
* This mode can be set as a configuration property
115115
`fs.s3a.metadatastore.authoritative`
116+
* It can also be set only on specific directories by setting
117+
`fs.s3a.authoritative.path` to one or more prefixes, for example
118+
`s3a://bucket/path` or "/auth1,/auth2".
116119
* All interactions with the S3 bucket(s) must be through S3A clients sharing
117120
the same metadata store.
118121
* This is independent from which metadata store implementation is used.

0 commit comments

Comments
 (0)