Skip to content

Commit 6d92aa7

Browse files
symatnkalmar
authored andcommitted
HADOOP-16579. Upgrade to Curator 4.2.0 and ZooKeeper 3.5.5 (#1656). Contributed by Norbert Kalmár, Mate Szalay-Beko
* HADOOP-16579 - Upgrade to Apache Curator 4.2.0 and ZooKeeper 3.5.5 - Add a static initializer for the unit tests using ZooKeeper to enable the four-letter-words diagnostic telnet commands. (this is an interface that become disabled by default, so to keep the ZooKeeper 3.4.x behavior we enabled it for the tests) - Also fix ZKFailoverController to look for relevant fail-over ActiveAttempt records. The new ZooKeeper seems to respond quicker during the fail-over tests than the ZooKeeper, so we made sure to catch all the relevant records by adding a new parameter to ZKFailoverontroller.waitForActiveAttempt(). Co-authored-by: Norbert Kalmár <[email protected]>
1 parent 3d41f33 commit 6d92aa7

File tree

11 files changed

+159
-54
lines changed

11 files changed

+159
-54
lines changed

hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515

1616
import com.google.common.annotations.VisibleForTesting;
1717
import java.nio.ByteBuffer;
18-
import java.nio.charset.Charset;
1918
import java.security.SecureRandom;
2019
import java.util.Collections;
2120
import java.util.HashMap;
@@ -36,7 +35,7 @@
3635
import org.apache.hadoop.classification.InterfaceStability;
3736
import org.apache.zookeeper.KeeperException;
3837
import org.apache.zookeeper.ZooDefs.Perms;
39-
import org.apache.zookeeper.client.ZooKeeperSaslClient;
38+
import org.apache.zookeeper.client.ZKClientConfig;
4039
import org.apache.zookeeper.data.ACL;
4140
import org.apache.zookeeper.data.Id;
4241
import org.apache.zookeeper.data.Stat;
@@ -368,7 +367,7 @@ protected CuratorFramework createCuratorClient(Properties config)
368367
LOG.info("Connecting to ZooKeeper with SASL/Kerberos"
369368
+ "and using 'sasl' ACLs");
370369
String principal = setJaasConfiguration(config);
371-
System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY,
370+
System.setProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY,
372371
JAAS_LOGIN_ENTRY_NAME);
373372
System.setProperty("zookeeper.authProvider.1",
374373
"org.apache.zookeeper.server.auth.SASLAuthenticationProvider");

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -442,14 +442,16 @@ private void recordActiveAttempt(
442442
* </ul>
443443
*
444444
* @param timeoutMillis number of millis to wait
445+
* @param onlyAfterNanoTime accept attempt records only after a given
446+
* timestamp. Use this parameter to ignore the old attempt records from a
447+
* previous fail-over attempt.
445448
* @return the published record, or null if the timeout elapses or the
446449
* service becomes unhealthy
447450
* @throws InterruptedException if the thread is interrupted.
448451
*/
449-
private ActiveAttemptRecord waitForActiveAttempt(int timeoutMillis)
450-
throws InterruptedException {
451-
long st = System.nanoTime();
452-
long waitUntil = st + TimeUnit.NANOSECONDS.convert(
452+
private ActiveAttemptRecord waitForActiveAttempt(int timeoutMillis,
453+
long onlyAfterNanoTime) throws InterruptedException {
454+
long waitUntil = onlyAfterNanoTime + TimeUnit.NANOSECONDS.convert(
453455
timeoutMillis, TimeUnit.MILLISECONDS);
454456

455457
do {
@@ -466,7 +468,7 @@ private ActiveAttemptRecord waitForActiveAttempt(int timeoutMillis)
466468

467469
synchronized (activeAttemptRecordLock) {
468470
if ((lastActiveAttemptRecord != null &&
469-
lastActiveAttemptRecord.nanoTime >= st)) {
471+
lastActiveAttemptRecord.nanoTime >= onlyAfterNanoTime)) {
470472
return lastActiveAttemptRecord;
471473
}
472474
// Only wait 1sec so that we periodically recheck the health state
@@ -660,6 +662,7 @@ private void doGracefulFailover()
660662
List<ZKFCProtocol> otherZkfcs = new ArrayList<ZKFCProtocol>(otherNodes.size());
661663

662664
// Phase 3: ask the other nodes to yield from the election.
665+
long st = System.nanoTime();
663666
HAServiceTarget activeNode = null;
664667
for (HAServiceTarget remote : otherNodes) {
665668
// same location, same node - may not always be == equality
@@ -678,7 +681,7 @@ private void doGracefulFailover()
678681

679682
// Phase 4: wait for the normal election to make the local node
680683
// active.
681-
ActiveAttemptRecord attempt = waitForActiveAttempt(timeout + 60000);
684+
ActiveAttemptRecord attempt = waitForActiveAttempt(timeout + 60000, st);
682685

683686
if (attempt == null) {
684687
// We didn't even make an attempt to become active.

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
import org.apache.zookeeper.KeeperException;
6060
import org.apache.zookeeper.KeeperException.NoNodeException;
6161
import org.apache.zookeeper.ZooDefs.Perms;
62-
import org.apache.zookeeper.client.ZooKeeperSaslClient;
62+
import org.apache.zookeeper.client.ZKClientConfig;
6363
import org.apache.zookeeper.data.ACL;
6464
import org.apache.zookeeper.data.Id;
6565
import org.slf4j.Logger;
@@ -173,8 +173,8 @@ public ZKDelegationTokenSecretManager(Configuration conf) {
173173
LOG.info("Connecting to ZooKeeper with SASL/Kerberos"
174174
+ "and using 'sasl' ACLs");
175175
String principal = setJaasConfiguration(conf);
176-
System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY,
177-
JAAS_LOGIN_ENTRY_NAME);
176+
System.setProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY,
177+
JAAS_LOGIN_ENTRY_NAME);
178178
System.setProperty("zookeeper.authProvider.1",
179179
"org.apache.zookeeper.server.auth.SASLAuthenticationProvider");
180180
aclProvider = new SASLOwnerACLProvider(principal);

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020
import java.io.IOException;
2121
import java.nio.charset.Charset;
2222
import java.util.ArrayList;
23+
import java.util.LinkedList;
2324
import java.util.List;
2425

2526
import org.apache.curator.framework.AuthInfo;
2627
import org.apache.curator.framework.CuratorFramework;
2728
import org.apache.curator.framework.CuratorFrameworkFactory;
28-
import org.apache.curator.framework.api.transaction.CuratorTransaction;
29-
import org.apache.curator.framework.api.transaction.CuratorTransactionFinal;
29+
import org.apache.curator.framework.api.transaction.CuratorOp;
3030
import org.apache.curator.retry.RetryNTimes;
3131
import org.apache.hadoop.classification.InterfaceAudience;
3232
import org.apache.hadoop.conf.Configuration;
@@ -387,43 +387,45 @@ public SafeTransaction createTransaction(List<ACL> fencingACL,
387387
/**
388388
* Use curator transactions to ensure zk-operations are performed in an all
389389
* or nothing fashion. This is equivalent to using ZooKeeper#multi.
390-
*
391-
* TODO (YARN-3774): Curator 3.0 introduces CuratorOp similar to Op. We ll
392-
* have to rewrite this inner class when we adopt that.
393390
*/
394391
public class SafeTransaction {
395-
private CuratorTransactionFinal transactionFinal;
396392
private String fencingNodePath;
393+
private List<CuratorOp> curatorOperations = new LinkedList<>();
397394

398395
SafeTransaction(List<ACL> fencingACL, String fencingNodePath)
399396
throws Exception {
400397
this.fencingNodePath = fencingNodePath;
401-
CuratorTransaction transaction = curator.inTransaction();
402-
transactionFinal = transaction.create()
403-
.withMode(CreateMode.PERSISTENT).withACL(fencingACL)
404-
.forPath(fencingNodePath, new byte[0]).and();
398+
curatorOperations.add(curator.transactionOp().create()
399+
.withMode(CreateMode.PERSISTENT)
400+
.withACL(fencingACL)
401+
.forPath(fencingNodePath, new byte[0]));
405402
}
406403

407404
public void commit() throws Exception {
408-
transactionFinal = transactionFinal.delete()
409-
.forPath(fencingNodePath).and();
410-
transactionFinal.commit();
405+
curatorOperations.add(curator.transactionOp().delete()
406+
.forPath(fencingNodePath));
407+
curator.transaction().forOperations(curatorOperations);
408+
curatorOperations.clear();
411409
}
412410

413411
public void create(String path, byte[] data, List<ACL> acl, CreateMode mode)
414412
throws Exception {
415-
transactionFinal = transactionFinal.create()
416-
.withMode(mode).withACL(acl).forPath(path, data).and();
413+
curatorOperations.add(curator.transactionOp().create()
414+
.withMode(mode)
415+
.withACL(acl)
416+
.forPath(path, data));
417417
}
418418

419419
public void delete(String path) throws Exception {
420-
transactionFinal = transactionFinal.delete().forPath(path).and();
420+
curatorOperations.add(curator.transactionOp().delete()
421+
.forPath(path));
421422
}
422423

423424
public void setData(String path, byte[] data, int version)
424425
throws Exception {
425-
transactionFinal = transactionFinal.setData()
426-
.withVersion(version).forPath(path, data).and();
426+
curatorOperations.add(curator.transactionOp().setData()
427+
.withVersion(version)
428+
.forPath(path, data));
427429
}
428430
}
429431
}

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,15 @@ public abstract class ClientBaseWithFixes extends ZKTestCase {
6565
public static int CONNECTION_TIMEOUT = 30000;
6666
static final File BASETEST = GenericTestUtils.getTestDir();
6767

68+
static {
69+
// The 4-letter-words commands are simple diagnostics telnet commands in
70+
// ZooKeeper. Since ZooKeeper 3.5, these are disabled by default due to
71+
// security concerns: https://issues.apache.org/jira/browse/ZOOKEEPER-2693
72+
// We are enabling them for the tests here, as some tests in hadoop or in
73+
// other projects might still use them
74+
System.setProperty("zookeeper.4lw.commands.whitelist", "*");
75+
}
76+
6877
protected final String hostPort = initHostPort();
6978
protected int maxCnxns = 0;
7079
protected ServerCnxnFactory serverFactory = null;

hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistrySecurity.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.apache.hadoop.util.ZKUtil;
3333
import org.apache.zookeeper.Environment;
3434
import org.apache.zookeeper.ZooDefs;
35+
import org.apache.zookeeper.client.ZKClientConfig;
3536
import org.apache.zookeeper.client.ZooKeeperSaslClient;
3637
import org.apache.zookeeper.data.ACL;
3738
import org.apache.zookeeper.data.Id;
@@ -769,19 +770,19 @@ public void applySecurityEnvironment(CuratorFrameworkFactory.Builder
769770
JaasConfiguration jconf =
770771
new JaasConfiguration(jaasClientEntry, principal, keytab);
771772
javax.security.auth.login.Configuration.setConfiguration(jconf);
772-
setSystemPropertyIfUnset(ZooKeeperSaslClient.ENABLE_CLIENT_SASL_KEY,
773-
"true");
774-
setSystemPropertyIfUnset(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY,
775-
jaasClientEntry);
773+
setSystemPropertyIfUnset(ZKClientConfig.ENABLE_CLIENT_SASL_KEY,
774+
"true");
775+
setSystemPropertyIfUnset(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY,
776+
jaasClientEntry);
776777
} else {
777778
// in this case, jaas config is specified so we will not change it
778779
LOG.info("Using existing ZK sasl configuration: " +
779-
"jaasClientEntry = " + System.getProperty(
780-
ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, "Client") +
781-
", sasl client = " + System.getProperty(
782-
ZooKeeperSaslClient.ENABLE_CLIENT_SASL_KEY,
783-
ZooKeeperSaslClient.ENABLE_CLIENT_SASL_DEFAULT) +
784-
", jaas = " + existingJaasConf);
780+
"jaasClientEntry = " + System.getProperty(
781+
ZKClientConfig.LOGIN_CONTEXT_NAME_KEY, "Client") +
782+
", sasl client = " + System.getProperty(
783+
ZKClientConfig.ENABLE_CLIENT_SASL_KEY,
784+
ZKClientConfig.ENABLE_CLIENT_SASL_DEFAULT) +
785+
", jaas = " + existingJaasConf);
785786
}
786787
break;
787788

hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/ZookeeperConfigOptions.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
package org.apache.hadoop.registry.client.impl.zk;
2020

21-
import org.apache.zookeeper.client.ZooKeeperSaslClient;
21+
import org.apache.zookeeper.client.ZKClientConfig;
2222
import org.apache.zookeeper.server.ZooKeeperSaslServer;
2323

2424
/**
@@ -62,10 +62,10 @@ public interface ZookeeperConfigOptions {
6262
*
6363
* <p>
6464
* Default value is derived from
65-
* {@link ZooKeeperSaslClient#LOGIN_CONTEXT_NAME_KEY}
65+
* {@link ZKClientConfig#LOGIN_CONTEXT_NAME_KEY}
6666
*/
6767
String PROP_ZK_SASL_CLIENT_CONTEXT =
68-
ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY;
68+
ZKClientConfig.LOGIN_CONTEXT_NAME_KEY;
6969

7070
/**
7171
* The SASL client username: {@value}.

hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/MicroZookeeperService.java

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import java.io.PrintWriter;
4343
import java.io.StringWriter;
4444
import java.net.InetSocketAddress;
45+
import java.net.ServerSocket;
4546
import java.net.UnknownHostException;
4647

4748
/**
@@ -121,7 +122,7 @@ public InetSocketAddress getConnectionAddress() {
121122
* @throws UnknownHostException if the server cannot resolve the host
122123
*/
123124
private InetSocketAddress getAddress(int port) throws UnknownHostException {
124-
return new InetSocketAddress(host, port < 0 ? 0 : port);
125+
return new InetSocketAddress(host, port <= 0 ? getRandomAvailablePort() : port);
125126
}
126127

127128
/**
@@ -227,10 +228,8 @@ protected void serviceStart() throws Exception {
227228

228229
setupSecurity();
229230

230-
ZooKeeperServer zkServer = new ZooKeeperServer();
231231
FileTxnSnapLog ftxn = new FileTxnSnapLog(dataDir, dataDir);
232-
zkServer.setTxnLogFactory(ftxn);
233-
zkServer.setTickTime(tickTime);
232+
ZooKeeperServer zkServer = new ZooKeeperServer(ftxn, tickTime);
234233

235234
LOG.info("Starting Local Zookeeper service");
236235
factory = ServerCnxnFactory.createFactory();
@@ -245,7 +244,7 @@ protected void serviceStart() throws Exception {
245244
PrintWriter pw = new PrintWriter(sw);
246245
zkServer.dumpConf(pw);
247246
pw.flush();
248-
LOG.debug(sw.toString());
247+
LOG.debug("ZooKeeper config:\n" + sw.toString());
249248
}
250249
binding = new BindingInformation();
251250
binding.ensembleProvider = new FixedEnsembleProvider(connectString);
@@ -279,4 +278,20 @@ public BindingInformation supplyBindingInformation() {
279278
"Service is not started: binding information undefined");
280279
return binding;
281280
}
281+
282+
/**
283+
* Returns with a random open port can be used to set as server port for ZooKeeper.
284+
* @return a random open port or 0 (in case of error)
285+
*/
286+
private int getRandomAvailablePort() {
287+
port = 0;
288+
try {
289+
final ServerSocket s = new ServerSocket(0);
290+
port = s.getLocalPort();
291+
s.close();
292+
} catch (IOException e) {
293+
LOG.warn("ERROR during selecting random port for ZooKeeper server to bind." , e);
294+
}
295+
return port;
296+
}
282297
}

0 commit comments

Comments
 (0)