Skip to content

Commit 1046925

Browse files
顾鹏顾鹏
authored andcommitted
HDFS-17223. Add journalnode maintenance node list
1 parent 000a39b commit 1046925

File tree

9 files changed

+211
-11
lines changed

9 files changed

+211
-11
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,6 +1466,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
14661466
"dfs.journalnode.edit-cache-size.fraction";
14671467
public static final float DFS_JOURNALNODE_EDIT_CACHE_SIZE_FRACTION_DEFAULT = 0.5f;
14681468

1469+
public static final String DFS_JOURNALNODE_MAINTENANCE_NODES_KEY =
1470+
"dfs.journalnode.maintenance.nodes";
1471+
public static final String[] DFS_JOURNALNODE_MAINTENANCE_NODES_DEFAULT = {};
1472+
14691473
// Journal-node related configs for the client side.
14701474
public static final String DFS_QJOURNAL_QUEUE_SIZE_LIMIT_KEY = "dfs.qjournal.queued-edits.limit.mb";
14711475
public static final int DFS_QJOURNAL_QUEUE_SIZE_LIMIT_DEFAULT = 10;

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
import org.apache.hadoop.classification.InterfaceStability;
7171
import org.apache.hadoop.fs.ParentNotDirectoryException;
7272
import org.apache.hadoop.fs.UnresolvedLinkException;
73+
import org.apache.hadoop.hdfs.server.blockmanagement.HostSet;
7374
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
7475
import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
7576
import org.apache.hadoop.hdfs.server.namenode.INodesInPath;
@@ -1982,4 +1983,32 @@ public static void addTransferRateMetric(final DataNodeMetrics metrics, final lo
19821983
LOG.warn("Unexpected value for data transfer bytes={} duration={}", read, duration);
19831984
}
19841985
}
1986+
1987+
/**
1988+
* Construct a HostSet from an array of "ip:port" strings.
1989+
* @param nodesHostPort ip port string array.
1990+
* @return HostSet of InetSocketAddress.
1991+
*/
1992+
public static HostSet getHostSet(String[] nodesHostPort) {
1993+
HostSet retSet = new HostSet();
1994+
for (String hostPort : nodesHostPort) {
1995+
try {
1996+
URI uri = new URI("dummy", hostPort, null, null, null);
1997+
int port = uri.getPort();
1998+
if (port < 0) {
1999+
LOG.warn(String.format("The ip:port `%s` is invalid, skip this node.", hostPort));
2000+
continue;
2001+
}
2002+
InetSocketAddress inetSocketAddress = new InetSocketAddress(uri.getHost(), port);
2003+
if (inetSocketAddress.isUnresolved()) {
2004+
LOG.warn(String.format("Failed to resolve address `%s`", hostPort));
2005+
continue;
2006+
}
2007+
retSet.add(inetSocketAddress);
2008+
} catch (URISyntaxException e) {
2009+
LOG.warn(String.format("Failed to parse `%s`", hostPort));
2010+
}
2011+
}
2012+
return retSet;
2013+
}
19852014
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,15 @@ class AsyncLoggerSet {
5353

5454
private static final long INVALID_EPOCH = -1;
5555
private long myEpoch = INVALID_EPOCH;
56+
private final int majoritySize;
5657

57-
public AsyncLoggerSet(List<AsyncLogger> loggers) {
58+
AsyncLoggerSet(List<AsyncLogger> loggers) {
59+
this(loggers, loggers.size());
60+
}
61+
62+
AsyncLoggerSet(List<AsyncLogger> loggers, int quorumJournalCount) {
5863
this.loggers = ImmutableList.copyOf(loggers);
64+
this.majoritySize = quorumJournalCount / 2 + 1;
5965
}
6066

6167
void setEpoch(long e) {
@@ -151,7 +157,7 @@ <V> Map<AsyncLogger, V> waitForWriteQuorum(QuorumCall<AsyncLogger, V> q,
151157
* @return the number of nodes which are required to obtain a quorum.
152158
*/
153159
int getMajoritySize() {
154-
return loggers.size() / 2 + 1;
160+
return this.majoritySize;
155161
}
156162

157163
/**
@@ -161,6 +167,11 @@ String getMajorityString() {
161167
return getMajoritySize() + "/" + loggers.size();
162168
}
163169

170+
@VisibleForTesting
171+
List<AsyncLogger> getLoggerListForTests() {
172+
return loggers;
173+
}
174+
164175
/**
165176
* @return the number of loggers behind this set
166177
*/

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
*/
1818
package org.apache.hadoop.hdfs.qjournal.client;
1919

20+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_JOURNALNODE_MAINTENANCE_NODES_DEFAULT;
21+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_JOURNALNODE_MAINTENANCE_NODES_KEY;
22+
2023
import java.io.IOException;
2124
import java.net.InetSocketAddress;
2225
import java.net.URI;
@@ -31,6 +34,7 @@
3134
import java.util.concurrent.TimeUnit;
3235
import java.util.concurrent.TimeoutException;
3336

37+
import org.apache.hadoop.hdfs.server.blockmanagement.HostSet;
3438
import org.apache.hadoop.util.Lists;
3539
import org.slf4j.Logger;
3640
import org.slf4j.LoggerFactory;
@@ -108,6 +112,7 @@ public class QuorumJournalManager implements JournalManager {
108112
private static final int OUTPUT_BUFFER_CAPACITY_DEFAULT = 512 * 1024;
109113
private int outputBufferCapacity;
110114
private final URLConnectionFactory connectionFactory;
115+
private int quorumJournalCount;
111116

112117
/** Limit logging about input stream selection to every 5 seconds max. */
113118
private static final long SELECT_INPUT_STREAM_LOG_INTERVAL_MS = 5000;
@@ -144,7 +149,18 @@ public QuorumJournalManager(Configuration conf,
144149
this.uri = uri;
145150
this.nsInfo = nsInfo;
146151
this.nameServiceId = nameServiceId;
147-
this.loggers = new AsyncLoggerSet(createLoggers(loggerFactory));
152+
153+
// createLoggers() will set quorumJournalCount to total number of journal nodes while return a
154+
// list of healthy/good journal nodes.
155+
List<AsyncLogger> asyncLoggerList = createLoggers(loggerFactory);
156+
this.loggers = new AsyncLoggerSet(asyncLoggerList, this.quorumJournalCount);
157+
158+
// Check whether the number of jn maintenance lists is valid
159+
int quorumThreshold = quorumJournalCount / 2 + 1;
160+
Preconditions.checkArgument(
161+
this.loggers.size() >= quorumThreshold,
162+
"The total journalnode minus %s the number of blacklists must be greater than or equal to"
163+
+ " %s!", DFS_JOURNALNODE_MAINTENANCE_NODES_KEY, quorumThreshold);
148164

149165
this.maxTxnsPerRpc =
150166
conf.getInt(QJM_RPC_MAX_TXNS_KEY, QJM_RPC_MAX_TXNS_DEFAULT);
@@ -250,6 +266,10 @@ Map<AsyncLogger, NewEpochResponseProto> createNewUniqueEpoch()
250266

251267
@Override
252268
public void format(NamespaceInfo nsInfo, boolean force) throws IOException {
269+
if (isJNInMaintenanceMode()) {
270+
throw new IOException(
271+
"Formatting a journal node is not support while in jn maintenance mode");
272+
}
253273
QuorumCall<AsyncLogger, Void> call = loggers.format(nsInfo, force);
254274
try {
255275
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -406,21 +426,39 @@ private void recoverUnclosedSegment(long segmentTxId) throws IOException {
406426
logToSync.getStartTxId(),
407427
logToSync.getEndTxId()));
408428
}
409-
410-
static List<AsyncLogger> createLoggers(Configuration conf,
429+
430+
List<AsyncLogger> createLoggers(Configuration conf,
431+
URI uri,
432+
NamespaceInfo nsInfo,
433+
AsyncLogger.Factory factory,
434+
String nameServiceId)
435+
throws IOException {
436+
String[] skipNodesHostPort = conf.getTrimmedStrings(
437+
DFS_JOURNALNODE_MAINTENANCE_NODES_KEY, DFS_JOURNALNODE_MAINTENANCE_NODES_DEFAULT);
438+
return createLoggers(conf, uri, nsInfo, factory, nameServiceId, skipNodesHostPort);
439+
}
440+
441+
private List<AsyncLogger> createLoggers(Configuration conf,
411442
URI uri,
412443
NamespaceInfo nsInfo,
413444
AsyncLogger.Factory factory,
414-
String nameServiceId)
445+
String nameServiceId,
446+
String[] skipNodesHostPort)
415447
throws IOException {
416448
List<AsyncLogger> ret = Lists.newArrayList();
417449
List<InetSocketAddress> addrs = Util.getAddressesList(uri, conf);
418450
if (addrs.size() % 2 == 0) {
419451
LOG.warn("Quorum journal URI '" + uri + "' has an even number " +
420452
"of Journal Nodes specified. This is not recommended!");
421453
}
454+
setQuorumJournalCount(addrs.size());
455+
HostSet skipSet = DFSUtil.getHostSet(skipNodesHostPort);
422456
String jid = parseJournalId(uri);
423457
for (InetSocketAddress addr : addrs) {
458+
if(skipSet.match(addr)) {
459+
LOG.info("The node {} is a maintenance node and will be skipped.", addr);
460+
continue;
461+
}
424462
ret.add(factory.createLogger(conf, nsInfo, jid, nameServiceId, addr));
425463
}
426464
return ret;
@@ -667,6 +705,9 @@ AsyncLoggerSet getLoggerSetForTests() {
667705

668706
@Override
669707
public void doPreUpgrade() throws IOException {
708+
if (isJNInMaintenanceMode()) {
709+
throw new IOException("doPreUpgrade() is not support while in jn maintenance mode");
710+
}
670711
QuorumCall<AsyncLogger, Void> call = loggers.doPreUpgrade();
671712
try {
672713
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -684,6 +725,9 @@ public void doPreUpgrade() throws IOException {
684725

685726
@Override
686727
public void doUpgrade(Storage storage) throws IOException {
728+
if (isJNInMaintenanceMode()) {
729+
throw new IOException("doUpgrade() is not support while in jn maintenance mode");
730+
}
687731
QuorumCall<AsyncLogger, Void> call = loggers.doUpgrade(storage);
688732
try {
689733
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -701,6 +745,9 @@ public void doUpgrade(Storage storage) throws IOException {
701745

702746
@Override
703747
public void doFinalize() throws IOException {
748+
if (isJNInMaintenanceMode()) {
749+
throw new IOException("doFinalize() is not support while in jn maintenance mode");
750+
}
704751
QuorumCall<AsyncLogger, Void> call = loggers.doFinalize();
705752
try {
706753
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -719,6 +766,9 @@ public void doFinalize() throws IOException {
719766
@Override
720767
public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
721768
int targetLayoutVersion) throws IOException {
769+
if (isJNInMaintenanceMode()) {
770+
throw new IOException("canRollBack() is not support while in jn maintenance mode");
771+
}
722772
QuorumCall<AsyncLogger, Boolean> call = loggers.canRollBack(storage,
723773
prevStorage, targetLayoutVersion);
724774
try {
@@ -753,6 +803,9 @@ public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
753803

754804
@Override
755805
public void doRollback() throws IOException {
806+
if (isJNInMaintenanceMode()) {
807+
throw new IOException("doRollback() is not support while in jn maintenance mode");
808+
}
756809
QuorumCall<AsyncLogger, Void> call = loggers.doRollback();
757810
try {
758811
call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
@@ -770,6 +823,9 @@ public void doRollback() throws IOException {
770823

771824
@Override
772825
public void discardSegments(long startTxId) throws IOException {
826+
if (isJNInMaintenanceMode()) {
827+
throw new IOException("discardSegments() is not support while in jn maintenance mode");
828+
}
773829
QuorumCall<AsyncLogger, Void> call = loggers.discardSegments(startTxId);
774830
try {
775831
call.waitFor(loggers.size(), loggers.size(), 0,
@@ -789,6 +845,9 @@ public void discardSegments(long startTxId) throws IOException {
789845

790846
@Override
791847
public long getJournalCTime() throws IOException {
848+
if (isJNInMaintenanceMode()) {
849+
throw new IOException("getJournalCTime() is not support while in jn maintenance mode");
850+
}
792851
QuorumCall<AsyncLogger, Long> call = loggers.getJournalCTime();
793852
try {
794853
call.waitFor(loggers.size(), loggers.size(), 0,
@@ -819,4 +878,12 @@ public long getJournalCTime() throws IOException {
819878

820879
throw new AssertionError("Unreachable code.");
821880
}
881+
882+
public void setQuorumJournalCount(int quorumJournalCount) {
883+
this.quorumJournalCount = quorumJournalCount;
884+
}
885+
886+
private boolean isJNInMaintenanceMode() {
887+
return this.loggers.size() < quorumJournalCount;
888+
}
822889
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public class HostSet implements Iterable<InetSocketAddress> {
4545
* The function that checks whether there exists an entry foo in the set
4646
* so that foo &lt;= addr.
4747
*/
48-
boolean matchedBy(InetSocketAddress addr) {
48+
public boolean matchedBy(InetSocketAddress addr) {
4949
Collection<Integer> ports = addrs.get(addr.getAddress());
5050
return addr.getPort() == 0 ? !ports.isEmpty() : ports.contains(addr
5151
.getPort());
@@ -55,23 +55,23 @@ boolean matchedBy(InetSocketAddress addr) {
5555
* The function that checks whether there exists an entry foo in the set
5656
* so that addr &lt;= foo.
5757
*/
58-
boolean match(InetSocketAddress addr) {
58+
public boolean match(InetSocketAddress addr) {
5959
int port = addr.getPort();
6060
Collection<Integer> ports = addrs.get(addr.getAddress());
6161
boolean exactMatch = ports.contains(port);
6262
boolean genericMatch = ports.contains(0);
6363
return exactMatch || genericMatch;
6464
}
6565

66-
boolean isEmpty() {
66+
public boolean isEmpty() {
6767
return addrs.isEmpty();
6868
}
6969

70-
int size() {
70+
public int size() {
7171
return addrs.size();
7272
}
7373

74-
void add(InetSocketAddress addr) {
74+
public void add(InetSocketAddress addr) {
7575
Preconditions.checkArgument(!addr.isUnresolved());
7676
addrs.put(addr.getAddress(), addr.getPort());
7777
}

hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6333,6 +6333,23 @@
63336333
</description>
63346334
</property>
63356335

6336+
<property>
6337+
<name>dfs.journalnode.maintenance.nodes</name>
6338+
<value></value>
6339+
<description>
6340+
In the case that one out of three journal nodes being down, theoretically HDFS can still
6341+
function. However, in reality, the unavailable journal node may not recover quickly. During
6342+
this period, when we need to restart an Namenode, the Namenode will try to connect to the
6343+
unavailable journal node through the lengthy RPC retry mechanism, resulting in a long
6344+
initialization time for the Namenode. By adding these unavailable journal nodes to the
6345+
maintenance nodes, we will skip these unavailable journal nodes during Namenode initialization
6346+
and thus reduce namenode startup time.
6347+
1-node example values: jn01:8485
6348+
2-node example values: jn01:8485,jn02:8485
6349+
</description>
6350+
</property>
6351+
6352+
63366353
<property>
63376354
<name>dfs.namenode.lease-hard-limit-sec</name>
63386355
<value>1200</value>

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import static org.junit.Assert.assertArrayEquals;
4242
import static org.junit.Assert.assertEquals;
4343
import static org.junit.Assert.assertFalse;
44+
import static org.junit.Assert.assertNotNull;
4445
import static org.junit.Assert.assertNull;
4546
import static org.junit.Assert.assertThat;
4647
import static org.junit.Assert.assertTrue;
@@ -72,6 +73,7 @@
7273
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
7374
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
7475
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
76+
import org.apache.hadoop.hdfs.server.blockmanagement.HostSet;
7577
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
7678
import org.apache.hadoop.hdfs.server.namenode.NameNode;
7779
import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
@@ -1137,4 +1139,24 @@ public void testAddTransferRateMetricForInvalidValue() {
11371139
DFSUtil.addTransferRateMetric(mockMetrics, 100, 0);
11381140
verify(mockMetrics, times(0)).addReadTransferRate(anyLong());
11391141
}
1142+
1143+
@Test
1144+
public void testGetHostSet() {
1145+
String[] testAddrs = new String[] {"unreachable-host1.com:9000", "unreachable-host2.com:9000"};
1146+
HostSet hostSet = DFSUtil.getHostSet(testAddrs);
1147+
assertNotNull(hostSet);
1148+
assertEquals(0, hostSet.size());
1149+
1150+
String strAddress = "localhost";
1151+
testAddrs = new String[] {strAddress};
1152+
hostSet = DFSUtil.getHostSet(testAddrs);
1153+
assertEquals(0, hostSet.size());
1154+
1155+
strAddress = "localhost:9000";
1156+
InetSocketAddress inetSocketAddress = new InetSocketAddress("localhost", 9000);
1157+
testAddrs = new String[] {strAddress};
1158+
hostSet = DFSUtil.getHostSet(testAddrs);
1159+
assertNotNull(hostSet);
1160+
assertTrue(hostSet.match(inetSocketAddress));
1161+
}
11401162
}

0 commit comments

Comments
 (0)