Fixed regression in consolidate_hashes

Occurs when a new file is stored to new suffix to not empty partition. Then suffix is added to an invalidations file but not into hashes pickle file. When a replication of this partition runs, replication of suffix is completed on first and each 10th run of replicator. Rsync runs on each new suffix because destination does not return hash of new suffix although suffix content is in the same state. This bug was introduced in 2.7.0 Co-Authored-By: Alistair Coles <alistair.coles@hpe.com> Change-Id: Ie2700f6e6171f2ecfa7d07b0f18b79e90cbf1c8a Closes-Bug: #1634967 (cherry picked from commit 8ac432fff3e01a07f4bff918bb9cc38d93532b43)
author: Pavel Kvasnička <pavel.kvasnicka@firma.seznam.cz> 2016-11-23 16:41:11 +0100
committer: Alistair Coles <alistair.coles@hpe.com> 2016-11-25 12:41:10 +0000
commit: 4fdac38f7ab8a7f8281258f2480cdb609a464ef4 (patch)
tree: c9899813e0b8101e9d1bd808eeb0a17eba0effed
parent: d555000f5f47e2e4e1470168931ed7fb8b5f8e9b (diff)
download: swift-4fdac38f7ab8a7f8281258f2480cdb609a464ef4.tar.gz
2 files changed, 154 insertions, 7 deletions
diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py
index e1c9370f9..ab5b7acb0 100644
--- a/swift/obj/diskfile.py
+++ b/swift/obj/diskfile.py
@@ -271,7 +271,8 @@ def consolidate_hashes(partition_dir):
             with open(invalidations_file, 'rb') as inv_fh:
                 for line in inv_fh:
                     suffix = line.strip()
-                    if hashes is not None and hashes.get(suffix) is not None:
+                    if hashes is not None and \
+                            hashes.get(suffix, '') is not None:
                         hashes[suffix] = None
                         modified = True
         except (IOError, OSError) as e:
diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py
index 6c77bf94d..df064bb08 100644
--- a/test/unit/obj/test_diskfile.py
+++ b/test/unit/obj/test_diskfile.py
@@ -4996,9 +4996,11 @@ class TestSuffixHashes(unittest.TestCase):
             self.assertFalse(os.path.exists(hashes_file))
             self.assertFalse(os.path.exists(inv_file))
 
-    def test_invalidate_hash_file_exists(self):
+    def test_invalidate_hash_empty_file_exists(self):
         for policy in self.iter_policies():
             df_mgr = self.df_router[policy]
+            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertEqual(hashes, {})
             # create something to hash
             df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
                                      policy=policy)
@@ -5007,6 +5009,34 @@ class TestSuffixHashes(unittest.TestCase):
             suffix = os.path.basename(suffix_dir)
             hashes = df_mgr.get_hashes('sda1', '0', [], policy)
             self.assertIn(suffix, hashes)  # sanity
+
+    def test_invalidate_hash_consolidation(self):
+        def assert_consolidation(suffixes):
+            # verify that suffixes are invalidated after consolidation
+            with mock.patch('swift.obj.diskfile.lock_path') as mock_lock:
+                hashes = df_mgr.consolidate_hashes(part_path)
+            self.assertTrue(mock_lock.called)
+            for suffix in suffixes:
+                self.assertIn(suffix, hashes)
+                self.assertIsNone(hashes[suffix])
+            with open(hashes_file, 'rb') as f:
+                self.assertEqual(hashes, pickle.load(f))
+            with open(invalidations_file, 'rb') as f:
+                self.assertEqual("", f.read())
+            return hashes
+
+        for policy in self.iter_policies():
+            df_mgr = self.df_router[policy]
+            # create something to hash
+            df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
+                                     policy=policy)
+            df.delete(self.ts())
+            suffix_dir = os.path.dirname(df._datadir)
+            suffix = os.path.basename(suffix_dir)
+            original_hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertIn(suffix, original_hashes)  # sanity
+            self.assertIsNotNone(original_hashes[suffix])
+
             # sanity check hashes file
             part_path = os.path.join(self.devices, 'sda1',
                                      diskfile.get_data_dir(policy), '0')
@@ -5014,24 +5044,57 @@ class TestSuffixHashes(unittest.TestCase):
             invalidations_file = os.path.join(
                 part_path, diskfile.HASH_INVALIDATIONS_FILE)
             with open(hashes_file, 'rb') as f:
-                self.assertEqual(hashes, pickle.load(f))
+                self.assertEqual(original_hashes, pickle.load(f))
+            self.assertFalse(os.path.exists(invalidations_file))
 
             # invalidate the hash
             with mock.patch('swift.obj.diskfile.lock_path') as mock_lock:
                 df_mgr.invalidate_hash(suffix_dir)
             self.assertTrue(mock_lock.called)
+            # suffix should be in invalidations file
             with open(invalidations_file, 'rb') as f:
                 self.assertEqual(suffix + "\n", f.read())
+            # hashes file is unchanged
+            with open(hashes_file, 'rb') as f:
+                self.assertEqual(original_hashes, pickle.load(f))
 
             # consolidate the hash and the invalidations
-            with mock.patch('swift.obj.diskfile.lock_path') as mock_lock:
-                hashes = df_mgr.consolidate_hashes(part_path)
-            self.assertIsNone(hashes.get(suffix))
+            hashes = assert_consolidation([suffix])
+
+            # invalidate a different suffix hash in same partition but not in
+            # existing hashes.pkl
+            i = 0
+            while True:
+                df2 = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o%d' % i,
+                                          policy=policy)
+                i += 1
+                suffix_dir2 = os.path.dirname(df2._datadir)
+                if suffix_dir != suffix_dir2:
+                    break
 
+            df2.delete(self.ts())
+            suffix2 = os.path.basename(suffix_dir2)
+            # suffix2 should be in invalidations file
+            with open(invalidations_file, 'rb') as f:
+                self.assertEqual(suffix2 + "\n", f.read())
+            # hashes file is not yet changed
             with open(hashes_file, 'rb') as f:
                 self.assertEqual(hashes, pickle.load(f))
+
+            # consolidate hashes
+            hashes = assert_consolidation([suffix, suffix2])
+
+            # invalidating suffix2 multiple times is ok
+            df2.delete(self.ts())
+            df2.delete(self.ts())
+            # suffix2 should be in invalidations file
             with open(invalidations_file, 'rb') as f:
-                self.assertEqual("", f.read())
+                self.assertEqual("%s\n%s\n" % (suffix2, suffix2), f.read())
+            # hashes file is not yet changed
+            with open(hashes_file, 'rb') as f:
+                self.assertEqual(hashes, pickle.load(f))
+            # consolidate hashes
+            assert_consolidation([suffix, suffix2])
 
     # invalidate_hash tests - error handling
 
@@ -5126,6 +5189,89 @@ class TestSuffixHashes(unittest.TestCase):
             hashes = df_mgr.get_hashes('sda1', '0', [], policy)
             self.assertEqual(hashes, {})
 
+    def test_hash_suffix_ts_cleanup_after_recalc(self):
+        for policy in self.iter_policies():
+            df_mgr = self.df_router[policy]
+            df = df_mgr.get_diskfile(
+                'sda1', '0', 'a', 'c', 'o', policy=policy)
+            suffix_dir = os.path.dirname(df._datadir)
+            suffix = os.path.basename(suffix_dir)
+
+            # scale back reclaim age a bit
+            df_mgr.reclaim_age = 1000
+            # write a valid tombstone
+            old_time = time() - 500
+            timestamp = Timestamp(old_time)
+            df.delete(timestamp.internal)
+            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertIn(suffix, hashes)
+            self.assertIsNotNone(hashes[suffix])
+
+            # we have tombstone entry
+            tombstone = '%s.ts' % timestamp.internal
+            self.assertTrue(os.path.exists(df._datadir))
+            self.assertIn(tombstone, os.listdir(df._datadir))
+
+            # lower reclaim age to force tombstone reclaiming
+            df_mgr.reclaim_age = 200
+
+            # not cleaning up because suffix not invalidated
+            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertTrue(os.path.exists(df._datadir))
+            self.assertIn(tombstone, os.listdir(df._datadir))
+            self.assertIn(suffix, hashes)
+            self.assertIsNotNone(hashes[suffix])
+
+            # recalculating suffix hash cause cleanup
+            hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
+
+            self.assertEqual(hashes, {})
+            self.assertFalse(os.path.exists(df._datadir))
+
+    def test_hash_suffix_ts_cleanup_after_invalidate_hash(self):
+        for policy in self.iter_policies():
+            df_mgr = self.df_router[policy]
+            df = df_mgr.get_diskfile(
+                'sda1', '0', 'a', 'c', 'o', policy=policy)
+            suffix_dir = os.path.dirname(df._datadir)
+            suffix = os.path.basename(suffix_dir)
+
+            # scale back reclaim age a bit
+            df_mgr.reclaim_age = 1000
+            # write a valid tombstone
+            old_time = time() - 500
+            timestamp = Timestamp(old_time)
+            df.delete(timestamp.internal)
+            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertIn(suffix, hashes)
+            self.assertIsNotNone(hashes[suffix])
+
+            # we have tombstone entry
+            tombstone = '%s.ts' % timestamp.internal
+            self.assertTrue(os.path.exists(df._datadir))
+            self.assertIn(tombstone, os.listdir(df._datadir))
+
+            # lower reclaim age to force tombstone reclaiming
+            df_mgr.reclaim_age = 200
+
+            # not cleaning up because suffix not invalidated
+            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertTrue(os.path.exists(df._datadir))
+            self.assertIn(tombstone, os.listdir(df._datadir))
+            self.assertIn(suffix, hashes)
+            self.assertIsNotNone(hashes[suffix])
+
+            # However if we call invalidate_hash for the suffix dir,
+            # get_hashes can reclaim the tombstone
+            with mock.patch('swift.obj.diskfile.lock_path'):
+                df_mgr.invalidate_hash(suffix_dir)
+
+            # updating invalidated hashes cause cleanup
+            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+
+            self.assertEqual(hashes, {})
+            self.assertFalse(os.path.exists(df._datadir))
+
     def test_hash_suffix_one_reclaim_and_one_valid_tombstone(self):
         for policy in self.iter_policies():
             paths, suffix = find_paths_with_matching_suffixes(2, 1)
author	Pavel Kvasnička <pavel.kvasnicka@firma.seznam.cz>	2016-11-23 16:41:11 +0100
committer	Alistair Coles <alistair.coles@hpe.com>	2016-11-25 12:41:10 +0000
commit	4fdac38f7ab8a7f8281258f2480cdb609a464ef4 (patch)
tree	c9899813e0b8101e9d1bd808eeb0a17eba0effed
parent	d555000f5f47e2e4e1470168931ed7fb8b5f8e9b (diff)
download	swift-4fdac38f7ab8a7f8281258f2480cdb609a464ef4.tar.gz