summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStig Rohde Døssing <stig@humio.com>2021-03-06 19:38:43 +0000
committerMate Szalay-Beko <mszalay@cloudera.com>2022-05-16 19:57:45 +0200
commitd1ec2f346b43ce6c00be45a68871fb7bda6e098a (patch)
tree21c8a12eb030b895b0840df7932c3c06c46d289a
parent22d0c856aeb1eb19da2810dacb5585c055365ed6 (diff)
downloadzookeeper-d1ec2f346b43ce6c00be45a68871fb7bda6e098a.tar.gz
ZOOKEEPER-3781: Create snapshots on followers when snapshot.trust.empty is true
snapshot.trust.empty is an escape hatch for users upgrading from 3.4.x to later Zookeeper versions, allowing nodes to start with a non-empty transaction log but no snapshot. The intent is for this setting to be enabled for a short while during the upgrade, and then disabled again, as the check it disables is a safety feature. Prior to this PR, a node would only write a snapshot locally if it became leader, or if it had fallen so far behind the leader that the leader sent a SNAP message instead of a DIFF. This made the upgrade process inconvenient, as not all nodes would create a snapshot when snapshot.trust.empty was true, meaning that the safety check could not be flipped back on. This PR makes follower nodes write a local snapshot when they receive NEWLEADER, if they have no local snapshot and snapshot.trust.empty is true. Author: Stig Rohde Døssing <stig@humio.com> Reviewers: Enrico Olivelli <eolivelli@apache.org>, Damien Diederen <ddiederen@apache.org> Closes #1581 from srdo/zookeeper-3781 (cherry picked from commit 1214d3bf611d153ae8c3987523da01d3d6c82686) Signed-off-by: Damien Diederen <ddiederen@apache.org> (cherry picked from commit 679cc2b1015db6a0b41cf9223c826a4b565387e9)
-rw-r--r--zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java4
-rw-r--r--zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java9
-rw-r--r--zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java8
-rw-r--r--zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java51
-rw-r--r--zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java24
5 files changed, 82 insertions, 14 deletions
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
index 2bab9378f..a12c1117d 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java
@@ -353,6 +353,10 @@ public class ZooKeeperServer implements SessionExpirer, ServerStats.Provider {
}
}
+ public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() {
+ return txnLogFactory.shouldForceWriteInitialSnapshotAfterLeaderElection();
+ }
+
@Override
public long getDataDirSize() {
if (zkDb == null) {
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
index 0267eb480..ff9b914d6 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
@@ -218,6 +218,15 @@ public class FileTxnSnapLog {
}
/**
+ * whether to force the write of an initial snapshot after a leader election,
+ * to address ZOOKEEPER-3781 after upgrading from Zookeeper 3.4.x.
+ * @return true if an initial snapshot should be written even if not otherwise required, false otherwise.
+ */
+ public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() {
+ return trustEmptySnapshot && getLastSnapshotInfo() == null;
+ }
+
+ /**
* this function restores the server
* database after reading from the
* snapshots and transaction logs
diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java
index 51b103882..a8d89b28d 100644
--- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java
+++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java
@@ -390,7 +390,13 @@ public class Learner {
synchronized (zk) {
if (qp.getType() == Leader.DIFF) {
LOG.info("Getting a diff from the leader 0x{}", Long.toHexString(qp.getZxid()));
- snapshotNeeded = false;
+ if (zk.shouldForceWriteInitialSnapshotAfterLeaderElection()) {
+ LOG.info("Forcing a snapshot write as part of upgrading from an older Zookeeper. This should only happen while upgrading.");
+ snapshotNeeded = true;
+ syncSnapshot = true;
+ } else {
+ snapshotNeeded = false;
+ }
}
else if (qp.getType() == Leader.SNAP) {
LOG.info("Getting a snapshot from leader 0x" + Long.toHexString(qp.getZxid()));
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
index 2ff750ea1..64c48ef5e 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
+import java.nio.file.Files;
import java.util.List;
import org.apache.log4j.Logger;
@@ -39,7 +40,7 @@ import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
import org.junit.Assert;
import org.junit.Test;
-/** If snapshots are corrupted to the empty file or deleted, Zookeeper should
+/** If snapshots are corrupted to the empty file or deleted, Zookeeper should
* not proceed to read its transaction log files
* Test that zxid == -1 in the presence of emptied/deleted snapshots
*/
@@ -145,6 +146,54 @@ public class EmptiedSnapshotRecoveryTest extends ZKTestCase implements Watcher
runTest(false, true);
}
+ @Test
+ public void testRestoreWithTrustedEmptySnapFilesWhenFollowing() throws Exception {
+ QuorumUtil qu = new QuorumUtil(1);
+ try {
+ qu.startAll();
+ String connString = qu.getConnectionStringForServer(1);
+ try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) {
+ for (int i = 0; i < N_TRANSACTIONS; i++) {
+ zk.create("/node-" + i, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
+ }
+ }
+ int leaderIndex = qu.getLeaderServer();
+ //Shut down the cluster and delete the snapshots from the followers
+ for (int i = 1; i <= qu.ALL; i++) {
+ qu.shutdown(i);
+ if (i != leaderIndex) {
+ FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory();
+ List<File> snapshots = txnLogFactory.findNRecentSnapshots(10);
+ Assert.assertTrue("We have a snapshot to corrupt", snapshots.size() > 0);
+ for (File file : snapshots) {
+ Files.delete(file.toPath());
+ }
+ assertEquals(txnLogFactory.findNRecentSnapshots(10).size(), 0);
+ }
+ }
+ //Start while trusting empty snapshots, verify that the followers save snapshots
+ System.setProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY, "true");
+ qu.start(leaderIndex);
+ for (int i = 1; i <= qu.ALL; i++) {
+ if (i != leaderIndex) {
+ qu.restart(i);
+ FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory();
+ List<File> snapshots = txnLogFactory.findNRecentSnapshots(10);
+ Assert.assertTrue("A snapshot should have been created on follower " + i, snapshots.size() > 0);
+ }
+ }
+ //Check that the created nodes are still there
+ try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) {
+ for (int i = 0; i < N_TRANSACTIONS; i++) {
+ Assert.assertNotNull(zk.exists("/node-" + i, false));
+ }
+ }
+ } finally {
+ System.clearProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY);
+ qu.tearDown();
+ }
+ }
+
public void process(WatchedEvent event) {
// do nothing
}
diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java b/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java
index 113452a95..d6e8b9963 100644
--- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java
+++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java
@@ -107,7 +107,7 @@ public class QuorumUtil {
ps.clientPort = PortAssignment.unique();
peers.put(i, ps);
- peersView.put(Long.valueOf(i), new QuorumServer(i,
+ peersView.put(Long.valueOf(i), new QuorumServer(i,
new InetSocketAddress("127.0.0.1", PortAssignment.unique()),
new InetSocketAddress("127.0.0.1", PortAssignment.unique()),
new InetSocketAddress("127.0.0.1", ps.clientPort),
@@ -136,7 +136,7 @@ public class QuorumUtil {
// This was added to avoid running into the problem of ZOOKEEPER-1539
public boolean disableJMXTest = false;
-
+
public void enableLocalSession(boolean localSessionEnabled) {
this.localSessionEnabled = localSessionEnabled;
@@ -158,7 +158,7 @@ public class QuorumUtil {
// This was added to avoid running into the problem of ZOOKEEPER-1539
if (disableJMXTest) return;
-
+
// interesting to see what's there...
try {
JMXEnv.dump();
@@ -250,22 +250,22 @@ public class QuorumUtil {
public void shutdown(int id) {
QuorumPeer qp = getPeer(id).peer;
try {
- LOG.info("Shutting down quorum peer " + qp.getName());
+ LOG.info("Shutting down quorum peer {} with id {}", qp.getName(), id);
qp.shutdown();
Election e = qp.getElectionAlg();
if (e != null) {
- LOG.info("Shutting down leader election " + qp.getName());
+ LOG.info("Shutting down leader election {} with id {}", qp.getName(), id);
e.shutdown();
} else {
- LOG.info("No election available to shutdown " + qp.getName());
+ LOG.info("No election available to shutdown {} with id {}", qp.getName(), id);
}
- LOG.info("Waiting for " + qp.getName() + " to exit thread");
+ LOG.info("Waiting for {} with id {} to exit thread", qp.getName(), id);
qp.join(30000);
if (qp.isAlive()) {
- Assert.fail("QP failed to shutdown in 30 seconds: " + qp.getName());
+ Assert.fail("QP failed to shutdown in 30 seconds: " + qp.getName() + " " + id);
}
} catch (InterruptedException e) {
- LOG.debug("QP interrupted: " + qp.getName(), e);
+ LOG.debug("QP interrupted: {} {}", qp.getName(), id, e);
}
}
@@ -293,11 +293,11 @@ public class QuorumUtil {
}
public List<QuorumPeer> getFollowerQuorumPeers() {
- List<QuorumPeer> peerList = new ArrayList<QuorumPeer>(ALL - 1);
+ List<QuorumPeer> peerList = new ArrayList<QuorumPeer>(ALL - 1);
for (PeerStruct ps: peers.values()) {
if (ps.peer.leader == null) {
- peerList.add(ps.peer);
+ peerList.add(ps.peer);
}
}
@@ -308,7 +308,7 @@ public class QuorumUtil {
LOG.info("TearDown started");
OSMXBean osMbean = new OSMXBean();
- if (osMbean.getUnix() == true) {
+ if (osMbean.getUnix() == true) {
LOG.info("fdcount after test is: " + osMbean.getOpenFileDescriptorCount());
}