Skip to content

Commit 61f369c

Browse files
authored
HDFS-16890: RBF: Ensures router periodically refreshes its record of a namespace's state. (apache#5298)
1 parent 8798b94 commit 61f369c

File tree

4 files changed

+110
-7
lines changed

4 files changed

+110
-7
lines changed

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
201201
FEDERATION_ROUTER_PREFIX + "observer.federated.state.propagation.maxsize";
202202
public static final int DFS_ROUTER_OBSERVER_FEDERATED_STATE_PROPAGATION_MAXSIZE_DEFAULT = 5;
203203

204+
public static final String DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY =
205+
FEDERATION_ROUTER_PREFIX + "observer.state.id.refresh.period";
206+
public static final String DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_DEFAULT = "15s";
207+
204208
public static final String FEDERATION_STORE_SERIALIZER_CLASS =
205209
FEDERATION_STORE_PREFIX + "serializer";
206210
public static final Class<StateStoreSerializerPBImpl>

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import java.util.concurrent.ThreadFactory;
5858
import java.util.concurrent.ThreadPoolExecutor;
5959
import java.util.concurrent.TimeUnit;
60+
import java.util.concurrent.atomic.LongAccumulator;
6061
import java.util.concurrent.atomic.LongAdder;
6162
import java.util.regex.Matcher;
6263
import java.util.regex.Pattern;
@@ -86,6 +87,7 @@
8687
import org.apache.hadoop.net.NetUtils;
8788
import org.apache.hadoop.security.UserGroupInformation;
8889
import org.apache.hadoop.util.StringUtils;
90+
import org.apache.hadoop.util.Time;
8991
import org.eclipse.jetty.util.ajax.JSON;
9092
import org.slf4j.Logger;
9193
import org.slf4j.LoggerFactory;
@@ -136,6 +138,14 @@ public class RouterRpcClient {
136138
private final boolean observerReadEnabledDefault;
137139
/** Nameservice specific overrides of the default setting for enabling observer reads. */
138140
private HashSet<String> observerReadEnabledOverrides = new HashSet<>();
141+
/**
142+
* Period to refresh namespace stateID using active namenode.
143+
* This ensures the namespace stateID is fresh even when an
144+
* observer is trailing behind.
145+
*/
146+
private long activeNNStateIdRefreshPeriodMs;
147+
/** Last msync times for each namespace. */
148+
private final ConcurrentHashMap<String, LongAccumulator> lastActiveNNRefreshTimes;
139149

140150
/** Pattern to parse a stack trace line. */
141151
private static final Pattern STACK_TRACE_PATTERN =
@@ -211,13 +221,25 @@ public RouterRpcClient(Configuration conf, Router router,
211221
this.observerReadEnabledDefault = conf.getBoolean(
212222
RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_DEFAULT_KEY,
213223
RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_DEFAULT_VALUE);
214-
String[] observerReadOverrides = conf.getStrings(RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_OVERRIDES);
224+
String[] observerReadOverrides =
225+
conf.getStrings(RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_OVERRIDES);
215226
if (observerReadOverrides != null) {
216227
observerReadEnabledOverrides.addAll(Arrays.asList(observerReadOverrides));
217228
}
218229
if (this.observerReadEnabledDefault) {
219230
LOG.info("Observer read is enabled for router.");
220231
}
232+
this.activeNNStateIdRefreshPeriodMs = conf.getTimeDuration(
233+
RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY,
234+
RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_DEFAULT,
235+
TimeUnit.SECONDS, TimeUnit.MILLISECONDS);
236+
if (activeNNStateIdRefreshPeriodMs < 0) {
237+
LOG.info("Periodic stateId freshness check is disabled"
238+
+ " since '{}' is {}ms, which is less than 0.",
239+
RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY,
240+
activeNNStateIdRefreshPeriodMs);
241+
}
242+
this.lastActiveNNRefreshTimes = new ConcurrentHashMap<>();
221243
}
222244

223245
/**
@@ -1707,10 +1729,13 @@ private List<? extends FederationNamenodeContext> getOrderedNamenodes(String nsI
17071729
boolean isObserverRead) throws IOException {
17081730
final List<? extends FederationNamenodeContext> namenodes;
17091731

1710-
if (RouterStateIdContext.getClientStateIdFromCurrentCall(nsId) > Long.MIN_VALUE) {
1711-
namenodes = namenodeResolver.getNamenodesForNameserviceId(nsId, isObserverRead);
1712-
} else {
1713-
namenodes = namenodeResolver.getNamenodesForNameserviceId(nsId, false);
1732+
boolean listObserverNamenodesFirst = isObserverRead
1733+
&& isNamespaceStateIdFresh(nsId)
1734+
&& (RouterStateIdContext.getClientStateIdFromCurrentCall(nsId) > Long.MIN_VALUE);
1735+
namenodes = namenodeResolver.getNamenodesForNameserviceId(nsId, listObserverNamenodesFirst);
1736+
if (!listObserverNamenodesFirst) {
1737+
// Refresh time of last call to active NameNode.
1738+
getTimeOfLastCallToActive(nsId).accumulate(Time.monotonicNow());
17141739
}
17151740

17161741
if (namenodes == null || namenodes.isEmpty()) {
@@ -1721,7 +1746,8 @@ private List<? extends FederationNamenodeContext> getOrderedNamenodes(String nsI
17211746
}
17221747

17231748
private boolean isObserverReadEligible(String nsId, Method method) {
1724-
boolean isReadEnabledForNamespace = observerReadEnabledDefault != observerReadEnabledOverrides.contains(nsId);
1749+
boolean isReadEnabledForNamespace =
1750+
observerReadEnabledDefault != observerReadEnabledOverrides.contains(nsId);
17251751
return isReadEnabledForNamespace && isReadCall(method);
17261752
}
17271753

@@ -1735,4 +1761,24 @@ private static boolean isReadCall(Method method) {
17351761
}
17361762
return !method.getAnnotationsByType(ReadOnly.class)[0].activeOnly();
17371763
}
1764+
1765+
/**
1766+
* Checks and sets last refresh time for a namespace's stateId.
1767+
* Returns true if refresh time is newer than threshold.
1768+
* Otherwise, return false and call should be handled by active namenode.
1769+
* @param nsId namespaceID
1770+
*/
1771+
@VisibleForTesting
1772+
boolean isNamespaceStateIdFresh(String nsId) {
1773+
if (activeNNStateIdRefreshPeriodMs < 0) {
1774+
return true;
1775+
}
1776+
long timeSinceRefreshMs = Time.monotonicNow() - getTimeOfLastCallToActive(nsId).get();
1777+
return (timeSinceRefreshMs <= activeNNStateIdRefreshPeriodMs);
1778+
}
1779+
1780+
private LongAccumulator getTimeOfLastCallToActive(String namespaceId) {
1781+
return lastActiveNNRefreshTimes
1782+
.computeIfAbsent(namespaceId, key -> new LongAccumulator(Math::max, 0));
1783+
}
17381784
}

hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,4 +884,14 @@
884884
of namespaces in use and the latency of the msync requests.
885885
</description>
886886
</property>
887+
888+
<property>
889+
<name>dfs.federation.router.observer.state.id.refresh.period</name>
890+
<value>15s</value>
891+
<description>
892+
Period to refresh namespace stateID using active namenode. This ensures the
893+
namespace stateID is refresh even when an observer is trailing behind.
894+
If this is below 0, the auto-refresh is disabled.
895+
</description>
896+
</property>
887897
</configuration>

hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestObserverWithRouter.java

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,11 @@
3434
import java.util.concurrent.atomic.LongAccumulator;
3535

3636
import org.apache.hadoop.conf.Configuration;
37+
import org.apache.hadoop.fs.FileStatus;
3738
import org.apache.hadoop.fs.FileSystem;
3839
import org.apache.hadoop.fs.Path;
3940
import org.apache.hadoop.hdfs.ClientGSIContext;
41+
import org.apache.hadoop.hdfs.DFSClient;
4042
import org.apache.hadoop.hdfs.DFSConfigKeys;
4143
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
4244
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RouterFederatedStateProto;
@@ -50,6 +52,7 @@
5052
import org.apache.hadoop.hdfs.server.federation.resolver.MembershipNamenodeResolver;
5153
import org.apache.hadoop.hdfs.server.namenode.NameNode;
5254
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos;
55+
import org.apache.hadoop.test.GenericTestUtils;
5356
import org.junit.jupiter.api.Assertions;
5457
import org.junit.jupiter.api.Test;
5558
import org.junit.jupiter.api.AfterEach;
@@ -95,7 +98,9 @@ public void startUpCluster(int numberOfObserver, Configuration confOverrides) th
9598
conf.set(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, "0ms");
9699
conf.setBoolean(DFS_NAMENODE_STATE_CONTEXT_ENABLED_KEY, true);
97100
if (confOverrides != null) {
98-
conf.addResource(confOverrides);
101+
confOverrides
102+
.iterator()
103+
.forEachRemaining(entry -> conf.set(entry.getKey(), entry.getValue()));
99104
}
100105
cluster = new MiniRouterDFSCluster(true, 2, numberOfNamenode);
101106
cluster.addNamenodeOverrides(conf);
@@ -639,4 +644,42 @@ public void testRouterStateIdContextCleanup() throws Exception {
639644
assertEquals("ns0", namespace1.get(0));
640645
assertTrue(namespace2.isEmpty());
641646
}
647+
648+
@Test
649+
@Tag(SKIP_BEFORE_EACH_CLUSTER_STARTUP)
650+
public void testPeriodicStateRefreshUsingActiveNamenode() throws Exception {
651+
Path rootPath = new Path("/");
652+
653+
Configuration confOverride = new Configuration(false);
654+
confOverride.set(RBFConfigKeys.DFS_ROUTER_OBSERVER_STATE_ID_REFRESH_PERIOD_KEY, "500ms");
655+
confOverride.set(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, "3s");
656+
startUpCluster(1, confOverride);
657+
658+
fileSystem = routerContext.getFileSystem(getConfToEnableObserverReads());
659+
fileSystem.listStatus(rootPath);
660+
int initialLengthOfRootListing = fileSystem.listStatus(rootPath).length;
661+
662+
DFSClient activeClient = cluster.getNamenodes("ns0")
663+
.stream()
664+
.filter(nnContext -> nnContext.getNamenode().isActiveState())
665+
.findFirst().orElseThrow(() -> new IllegalStateException("No active namenode."))
666+
.getClient();
667+
668+
for (int i = 0; i < 10; i++) {
669+
activeClient.mkdirs("/dir" + i, null, false);
670+
}
671+
activeClient.close();
672+
673+
// Wait long enough for state in router to be considered stale.
674+
GenericTestUtils.waitFor(
675+
() -> !routerContext
676+
.getRouterRpcClient()
677+
.isNamespaceStateIdFresh("ns0"),
678+
100,
679+
10000,
680+
"Timeout: Namespace state was never considered stale.");
681+
FileStatus[] rootFolderAfterMkdir = fileSystem.listStatus(rootPath);
682+
assertEquals("List-status should show newly created directories.",
683+
initialLengthOfRootListing + 10, rootFolderAfterMkdir.length);
684+
}
642685
}

0 commit comments

Comments
 (0)