diff options
author | Stig Rohde Døssing <stig@humio.com> | 2021-03-06 19:38:43 +0000 |
---|---|---|
committer | Mate Szalay-Beko <mszalay@cloudera.com> | 2022-05-16 19:57:45 +0200 |
commit | d1ec2f346b43ce6c00be45a68871fb7bda6e098a (patch) | |
tree | 21c8a12eb030b895b0840df7932c3c06c46d289a | |
parent | 22d0c856aeb1eb19da2810dacb5585c055365ed6 (diff) | |
download | zookeeper-d1ec2f346b43ce6c00be45a68871fb7bda6e098a.tar.gz |
ZOOKEEPER-3781: Create snapshots on followers when snapshot.trust.empty is true
snapshot.trust.empty is an escape hatch for users upgrading from 3.4.x to later Zookeeper
versions, allowing nodes to start with a non-empty transaction log but no snapshot.
The intent is for this setting to be enabled for a short while during the upgrade,
and then disabled again, as the check it disables is a safety feature.
Prior to this PR, a node would only write a snapshot locally if it became leader,
or if it had fallen so far behind the leader that the leader sent a SNAP message instead
of a DIFF. This made the upgrade process inconvenient, as not all nodes would create
a snapshot when snapshot.trust.empty was true, meaning that the safety check could
not be flipped back on.
This PR makes follower nodes write a local snapshot when they receive NEWLEADER,
if they have no local snapshot and snapshot.trust.empty is true.
Author: Stig Rohde Døssing <stig@humio.com>
Reviewers: Enrico Olivelli <eolivelli@apache.org>, Damien Diederen <ddiederen@apache.org>
Closes #1581 from srdo/zookeeper-3781
(cherry picked from commit 1214d3bf611d153ae8c3987523da01d3d6c82686)
Signed-off-by: Damien Diederen <ddiederen@apache.org>
(cherry picked from commit 679cc2b1015db6a0b41cf9223c826a4b565387e9)
5 files changed, 82 insertions, 14 deletions
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java index 2bab9378f..a12c1117d 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java @@ -353,6 +353,10 @@ public class ZooKeeperServer implements SessionExpirer, ServerStats.Provider { } } + public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() { + return txnLogFactory.shouldForceWriteInitialSnapshotAfterLeaderElection(); + } + @Override public long getDataDirSize() { if (zkDb == null) { diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java index 0267eb480..ff9b914d6 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java @@ -218,6 +218,15 @@ public class FileTxnSnapLog { } /** + * whether to force the write of an initial snapshot after a leader election, + * to address ZOOKEEPER-3781 after upgrading from Zookeeper 3.4.x. + * @return true if an initial snapshot should be written even if not otherwise required, false otherwise. + */ + public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() { + return trustEmptySnapshot && getLastSnapshotInfo() == null; + } + + /** * this function restores the server * database after reading from the * snapshots and transaction logs diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java index 51b103882..a8d89b28d 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java @@ -390,7 +390,13 @@ public class Learner { synchronized (zk) { if (qp.getType() == Leader.DIFF) { LOG.info("Getting a diff from the leader 0x{}", Long.toHexString(qp.getZxid())); - snapshotNeeded = false; + if (zk.shouldForceWriteInitialSnapshotAfterLeaderElection()) { + LOG.info("Forcing a snapshot write as part of upgrading from an older Zookeeper. This should only happen while upgrading."); + snapshotNeeded = true; + syncSnapshot = true; + } else { + snapshotNeeded = false; + } } else if (qp.getType() == Leader.SNAP) { LOG.info("Getting a snapshot from leader 0x" + Long.toHexString(qp.getZxid())); diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java index 2ff750ea1..64c48ef5e 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals; import java.io.File; import java.io.IOException; import java.io.PrintWriter; +import java.nio.file.Files; import java.util.List; import org.apache.log4j.Logger; @@ -39,7 +40,7 @@ import org.apache.zookeeper.server.persistence.FileTxnSnapLog; import org.junit.Assert; import org.junit.Test; -/** If snapshots are corrupted to the empty file or deleted, Zookeeper should +/** If snapshots are corrupted to the empty file or deleted, Zookeeper should * not proceed to read its transaction log files * Test that zxid == -1 in the presence of emptied/deleted snapshots */ @@ -145,6 +146,54 @@ public class EmptiedSnapshotRecoveryTest extends ZKTestCase implements Watcher runTest(false, true); } + @Test + public void testRestoreWithTrustedEmptySnapFilesWhenFollowing() throws Exception { + QuorumUtil qu = new QuorumUtil(1); + try { + qu.startAll(); + String connString = qu.getConnectionStringForServer(1); + try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) { + for (int i = 0; i < N_TRANSACTIONS; i++) { + zk.create("/node-" + i, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + } + } + int leaderIndex = qu.getLeaderServer(); + //Shut down the cluster and delete the snapshots from the followers + for (int i = 1; i <= qu.ALL; i++) { + qu.shutdown(i); + if (i != leaderIndex) { + FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory(); + List<File> snapshots = txnLogFactory.findNRecentSnapshots(10); + Assert.assertTrue("We have a snapshot to corrupt", snapshots.size() > 0); + for (File file : snapshots) { + Files.delete(file.toPath()); + } + assertEquals(txnLogFactory.findNRecentSnapshots(10).size(), 0); + } + } + //Start while trusting empty snapshots, verify that the followers save snapshots + System.setProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY, "true"); + qu.start(leaderIndex); + for (int i = 1; i <= qu.ALL; i++) { + if (i != leaderIndex) { + qu.restart(i); + FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory(); + List<File> snapshots = txnLogFactory.findNRecentSnapshots(10); + Assert.assertTrue("A snapshot should have been created on follower " + i, snapshots.size() > 0); + } + } + //Check that the created nodes are still there + try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) { + for (int i = 0; i < N_TRANSACTIONS; i++) { + Assert.assertNotNull(zk.exists("/node-" + i, false)); + } + } + } finally { + System.clearProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY); + qu.tearDown(); + } + } + public void process(WatchedEvent event) { // do nothing } diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java b/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java index 113452a95..d6e8b9963 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java @@ -107,7 +107,7 @@ public class QuorumUtil { ps.clientPort = PortAssignment.unique(); peers.put(i, ps); - peersView.put(Long.valueOf(i), new QuorumServer(i, + peersView.put(Long.valueOf(i), new QuorumServer(i, new InetSocketAddress("127.0.0.1", PortAssignment.unique()), new InetSocketAddress("127.0.0.1", PortAssignment.unique()), new InetSocketAddress("127.0.0.1", ps.clientPort), @@ -136,7 +136,7 @@ public class QuorumUtil { // This was added to avoid running into the problem of ZOOKEEPER-1539 public boolean disableJMXTest = false; - + public void enableLocalSession(boolean localSessionEnabled) { this.localSessionEnabled = localSessionEnabled; @@ -158,7 +158,7 @@ public class QuorumUtil { // This was added to avoid running into the problem of ZOOKEEPER-1539 if (disableJMXTest) return; - + // interesting to see what's there... try { JMXEnv.dump(); @@ -250,22 +250,22 @@ public class QuorumUtil { public void shutdown(int id) { QuorumPeer qp = getPeer(id).peer; try { - LOG.info("Shutting down quorum peer " + qp.getName()); + LOG.info("Shutting down quorum peer {} with id {}", qp.getName(), id); qp.shutdown(); Election e = qp.getElectionAlg(); if (e != null) { - LOG.info("Shutting down leader election " + qp.getName()); + LOG.info("Shutting down leader election {} with id {}", qp.getName(), id); e.shutdown(); } else { - LOG.info("No election available to shutdown " + qp.getName()); + LOG.info("No election available to shutdown {} with id {}", qp.getName(), id); } - LOG.info("Waiting for " + qp.getName() + " to exit thread"); + LOG.info("Waiting for {} with id {} to exit thread", qp.getName(), id); qp.join(30000); if (qp.isAlive()) { - Assert.fail("QP failed to shutdown in 30 seconds: " + qp.getName()); + Assert.fail("QP failed to shutdown in 30 seconds: " + qp.getName() + " " + id); } } catch (InterruptedException e) { - LOG.debug("QP interrupted: " + qp.getName(), e); + LOG.debug("QP interrupted: {} {}", qp.getName(), id, e); } } @@ -293,11 +293,11 @@ public class QuorumUtil { } public List<QuorumPeer> getFollowerQuorumPeers() { - List<QuorumPeer> peerList = new ArrayList<QuorumPeer>(ALL - 1); + List<QuorumPeer> peerList = new ArrayList<QuorumPeer>(ALL - 1); for (PeerStruct ps: peers.values()) { if (ps.peer.leader == null) { - peerList.add(ps.peer); + peerList.add(ps.peer); } } @@ -308,7 +308,7 @@ public class QuorumUtil { LOG.info("TearDown started"); OSMXBean osMbean = new OSMXBean(); - if (osMbean.getUnix() == true) { + if (osMbean.getUnix() == true) { LOG.info("fdcount after test is: " + osMbean.getOpenFileDescriptorCount()); } |