summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlistair Coles <alistairncoles@gmail.com>2021-03-03 17:30:24 +0000
committerClay Gerrard <clay.gerrard@gmail.com>2021-03-12 09:24:49 -0600
commit4bb78de611b0662918852044681548fdb2cebf6d (patch)
tree982fda2b42118463cfe384a385a5231a17c558fd
parent1f8eadd6416aa354c9c8e0067242da9f76bd96b6 (diff)
downloadswift-4bb78de611b0662918852044681548fdb2cebf6d.tar.gz
Fix os.link exceptions in diskfile.relink_paths
Previously, when a diskfile was relinked in a new partition, the diskfile.relink_paths() would test for the existence of the link before attempting to create it. This 'test then set' might race with another process creating the same diskfile (e.g. a concurrent PUT which created the very same object in an old partition that the relinker is also relinking). The race might lead to an attempt to create a link that already exists (despite the pre-check) and an EEXIST exception being raised. This patch modifies relink_paths to tolerate EEXIST exceptions but only if the existing file is a link to the target file. Otherwise the EEXIST exception is still raised. The 'check_existing' argument for relink_paths is removed since it is no longer relevant. relink_paths() might also raise an ENOENT exception if, while a diskfile is being relinked in the 'new' partition dir, another process PUTs a newer version of the same object and as a result cleans up the older diskfile in the 'old' partition before the first process has called os.link(). This patch modifies relink_paths() to tolerate ENOENT exceptions from os.link() but only if the target path (i.e. the file in the 'old' partition) no longer exists. Otherwise the ENOENT will still be raised. This patch also modifies relink_paths() to return a boolean indicating if the hard link was created by the call to the method (True) or not (False). Closes-Bug: 1917658 Change-Id: I65d4b83c56699ed566fbfb7068f9d2681ca67aa3
-rw-r--r--swift/cli/relinker.py3
-rw-r--r--swift/obj/diskfile.py51
-rw-r--r--test/unit/cli/test_relinker.py55
-rw-r--r--test/unit/obj/test_diskfile.py160
-rw-r--r--test/unit/obj/test_server.py61
5 files changed, 300 insertions, 30 deletions
diff --git a/swift/cli/relinker.py b/swift/cli/relinker.py
index 19a59f8fe..f35a7e16b 100644
--- a/swift/cli/relinker.py
+++ b/swift/cli/relinker.py
@@ -300,7 +300,8 @@ def relink(conf, logger, device):
for fname, _, _ in locations:
newfname = replace_partition_in_path(fname, next_part_power)
try:
- diskfile.relink_paths(fname, newfname, check_existing=True)
+ logger.debug('Relinking %s to %s', fname, newfname)
+ diskfile.relink_paths(fname, newfname)
relinked += 1
suffix_dir = os.path.dirname(os.path.dirname(newfname))
diskfile.invalidate_hash(suffix_dir)
diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py
index aa3bb84c6..6423d0253 100644
--- a/swift/obj/diskfile.py
+++ b/swift/obj/diskfile.py
@@ -443,20 +443,20 @@ def invalidate_hash(suffix_dir):
inv_fh.write(suffix + b"\n")
-def relink_paths(target_path, new_target_path, check_existing=False):
+def relink_paths(target_path, new_target_path):
"""
Hard-links a file located in target_path using the second path
new_target_path. Creates intermediate directories if required.
:param target_path: current absolute filename
:param new_target_path: new absolute filename for the hardlink
- :param check_existing: if True, check whether the link is already present
- before attempting to create a new one
+ :raises: OSError if the hard link could not be created, unless the intended
+ hard link already exists or the target_path does not exist.
+ :returns: True if the link was created by the call to this method, False
+ otherwise.
"""
-
+ link_created = False
if target_path != new_target_path:
- logging.debug('Relinking %s to %s due to next_part_power set',
- target_path, new_target_path)
new_target_dir = os.path.dirname(new_target_path)
try:
os.makedirs(new_target_dir)
@@ -464,17 +464,33 @@ def relink_paths(target_path, new_target_path, check_existing=False):
if err.errno != errno.EEXIST:
raise
- link_exists = False
- if check_existing:
- try:
- new_stat = os.stat(new_target_path)
- orig_stat = os.stat(target_path)
- link_exists = (new_stat.st_ino == orig_stat.st_ino)
- except OSError:
- pass # if anything goes wrong, try anyway
-
- if not link_exists:
+ try:
os.link(target_path, new_target_path)
+ link_created = True
+ except OSError as err:
+ # there are some circumstances in which it may be ok that the
+ # attempted link failed
+ ok = False
+ if err.errno == errno.ENOENT:
+ # this is ok if the *target* path doesn't exist anymore
+ ok = not os.path.exists(target_path)
+ if err.errno == errno.EEXIST:
+ # this is ok *if* the intended link has already been made
+ try:
+ orig_stat = os.stat(target_path)
+ except OSError as sub_err:
+ # this is ok: the *target* path doesn't exist anymore
+ ok = sub_err.errno == errno.ENOENT
+ else:
+ try:
+ new_stat = os.stat(new_target_path)
+ ok = new_stat.st_ino == orig_stat.st_ino
+ except OSError:
+ # squash this exception; the original will be raised
+ pass
+ if not ok:
+ raise err
+ return link_created
def get_part_path(dev_path, policy, partition):
@@ -1845,6 +1861,9 @@ class BaseDiskFileWriter(object):
if target_path != new_target_path:
try:
fsync_dir(os.path.dirname(target_path))
+ self.manager.logger.debug(
+ 'Relinking %s to %s due to next_part_power set',
+ target_path, new_target_path)
relink_paths(target_path, new_target_path)
except OSError as exc:
self.manager.logger.exception(
diff --git a/test/unit/cli/test_relinker.py b/test/unit/cli/test_relinker.py
index ee5c7183c..eab781c3c 100644
--- a/test/unit/cli/test_relinker.py
+++ b/test/unit/cli/test_relinker.py
@@ -37,7 +37,8 @@ from swift.common.exceptions import PathNotDir
from swift.common.storage_policy import (
StoragePolicy, StoragePolicyCollection, POLICIES, ECStoragePolicy)
-from swift.obj.diskfile import write_metadata, DiskFileRouter, DiskFileManager
+from swift.obj.diskfile import write_metadata, DiskFileRouter, \
+ DiskFileManager, relink_paths
from test.unit import FakeLogger, skip_if_no_xattrs, DEFAULT_TEST_EC_TYPE, \
patch_policies
@@ -424,6 +425,58 @@ class TestRelinker(unittest.TestCase):
self.assertFalse(os.path.exists(
os.path.join(self.part_dir, 'hashes.invalid')))
+ def test_relink_link_already_exists(self):
+ self.rb.prepare_increase_partition_power()
+ self._save_ring()
+ orig_relink_paths = relink_paths
+
+ def mock_relink_paths(target_path, new_target_path):
+ # pretend another process has created the link before this one
+ os.makedirs(self.expected_dir)
+ os.link(target_path, new_target_path)
+ orig_relink_paths(target_path, new_target_path)
+
+ with mock.patch('swift.cli.relinker.diskfile.relink_paths',
+ mock_relink_paths):
+ self.assertEqual(0, relinker.main([
+ 'relink',
+ '--swift-dir', self.testdir,
+ '--devices', self.devices,
+ '--skip-mount',
+ ]))
+
+ self.assertTrue(os.path.isdir(self.expected_dir))
+ self.assertTrue(os.path.isfile(self.expected_file))
+ stat_old = os.stat(os.path.join(self.objdir, self.object_fname))
+ stat_new = os.stat(self.expected_file)
+ self.assertEqual(stat_old.st_ino, stat_new.st_ino)
+
+ def test_relink_link_target_disappears(self):
+ # we need object name in lower half of current part so that there is no
+ # rehash of the new partition which wold erase the empty new partition
+ # - we want to assert it was created
+ self._setup_object(lambda part: part < 2 ** (PART_POWER - 1))
+ self.rb.prepare_increase_partition_power()
+ self._save_ring()
+ orig_relink_paths = relink_paths
+
+ def mock_relink_paths(target_path, new_target_path):
+ # pretend another process has cleaned up the target path
+ os.unlink(target_path)
+ orig_relink_paths(target_path, new_target_path)
+
+ with mock.patch('swift.cli.relinker.diskfile.relink_paths',
+ mock_relink_paths):
+ self.assertEqual(0, relinker.main([
+ 'relink',
+ '--swift-dir', self.testdir,
+ '--devices', self.devices,
+ '--skip-mount',
+ ]))
+
+ self.assertTrue(os.path.isdir(self.expected_dir))
+ self.assertFalse(os.path.isfile(self.expected_file))
+
def test_relink_no_applicable_policy(self):
# NB do not prepare part power increase
self._save_ring()
diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py
index b80ce3f89..3727191d6 100644
--- a/test/unit/obj/test_diskfile.py
+++ b/test/unit/obj/test_diskfile.py
@@ -207,7 +207,8 @@ class TestDiskFileModuleMethods(unittest.TestCase):
with open(target_path, 'w') as fd:
fd.write('junk')
new_target_path = os.path.join(self.testdir, 'd2', 'f1')
- diskfile.relink_paths(target_path, new_target_path)
+ created = diskfile.relink_paths(target_path, new_target_path)
+ self.assertTrue(created)
self.assertTrue(os.path.isfile(new_target_path))
with open(new_target_path, 'r') as fd:
self.assertEqual('junk', fd.read())
@@ -225,8 +226,9 @@ class TestDiskFileModuleMethods(unittest.TestCase):
diskfile.relink_paths(target_path, new_target_path)
self.assertEqual('oops', str(cm.exception))
- def test_relink_paths_race(self):
- # test two concurrent relinks of the same object hash dir
+ def test_relink_paths_makedirs_race(self):
+ # test two concurrent relinks of the same object hash dir with race
+ # around makedirs
target_dir = os.path.join(self.testdir, 'd1')
# target dir exists
os.mkdir(target_dir)
@@ -236,31 +238,34 @@ class TestDiskFileModuleMethods(unittest.TestCase):
new_target_dir = os.path.join(self.testdir, 'd2')
new_target_path_1 = os.path.join(new_target_dir, 't1.data')
new_target_path_2 = os.path.join(new_target_dir, 't2.data')
+ created = []
def write_and_relink(target_path, new_target_path):
with open(target_path, 'w') as fd:
fd.write(target_path)
- diskfile.relink_paths(target_path, new_target_path)
+ created.append(diskfile.relink_paths(target_path, new_target_path))
calls = []
orig_makedirs = os.makedirs
- def mock_makedirs(path):
+ def mock_makedirs(path, *args):
calls.append(path)
if len(calls) == 1:
# pretend another process jumps in here and relinks same dirs
write_and_relink(target_path_2, new_target_path_2)
- return orig_makedirs(path)
+ return orig_makedirs(path, *args)
with mock.patch('swift.obj.diskfile.os.makedirs', mock_makedirs):
write_and_relink(target_path_1, new_target_path_1)
+ self.assertEqual([new_target_dir, new_target_dir], calls)
self.assertTrue(os.path.isfile(new_target_path_1))
with open(new_target_path_1, 'r') as fd:
self.assertEqual(target_path_1, fd.read())
self.assertTrue(os.path.isfile(new_target_path_2))
with open(new_target_path_2, 'r') as fd:
self.assertEqual(target_path_2, fd.read())
+ self.assertEqual([True, True], created)
def test_relink_paths_object_dir_exists_but_not_dir(self):
target_dir = os.path.join(self.testdir, 'd1')
@@ -285,6 +290,149 @@ class TestDiskFileModuleMethods(unittest.TestCase):
diskfile.relink_paths(target_path, new_target_path)
self.assertEqual(errno.ENOTDIR, cm.exception.errno)
+ def test_relink_paths_os_link_error(self):
+ # check relink_paths raises exception from os.link
+ target_dir = os.path.join(self.testdir, 'd1')
+ os.mkdir(target_dir)
+ target_path = os.path.join(target_dir, 'f1')
+ with open(target_path, 'w') as fd:
+ fd.write('junk')
+ new_target_path = os.path.join(self.testdir, 'd2', 'f1')
+ with mock.patch('swift.obj.diskfile.os.link',
+ side_effect=OSError(errno.EPERM, 'nope')):
+ with self.assertRaises(Exception) as cm:
+ diskfile.relink_paths(target_path, new_target_path)
+ self.assertEqual(errno.EPERM, cm.exception.errno)
+
+ def test_relink_paths_target_path_does_not_exist(self):
+ # check relink_paths does not raise exception
+ target_dir = os.path.join(self.testdir, 'd1')
+ os.mkdir(target_dir)
+ target_path = os.path.join(target_dir, 'f1')
+ new_target_path = os.path.join(self.testdir, 'd2', 'f1')
+ created = diskfile.relink_paths(target_path, new_target_path)
+ self.assertFalse(os.path.exists(target_path))
+ self.assertFalse(os.path.exists(new_target_path))
+ self.assertFalse(created)
+
+ def test_relink_paths_os_link_race(self):
+ # test two concurrent relinks of the same object hash dir with race
+ # around os.link
+ target_dir = os.path.join(self.testdir, 'd1')
+ # target dir exists
+ os.mkdir(target_dir)
+ target_path = os.path.join(target_dir, 't1.data')
+ # new target dir and file do not exist
+ new_target_dir = os.path.join(self.testdir, 'd2')
+ new_target_path = os.path.join(new_target_dir, 't1.data')
+ created = []
+
+ def write_and_relink(target_path, new_target_path):
+ with open(target_path, 'w') as fd:
+ fd.write(target_path)
+ created.append(diskfile.relink_paths(target_path, new_target_path))
+
+ calls = []
+ orig_link = os.link
+
+ def mock_link(path, new_path):
+ calls.append((path, new_path))
+ if len(calls) == 1:
+ # pretend another process jumps in here and links same files
+ write_and_relink(target_path, new_target_path)
+ return orig_link(path, new_path)
+
+ with mock.patch('swift.obj.diskfile.os.link', mock_link):
+ write_and_relink(target_path, new_target_path)
+
+ self.assertEqual([(target_path, new_target_path)] * 2, calls)
+ self.assertTrue(os.path.isfile(new_target_path))
+ with open(new_target_path, 'r') as fd:
+ self.assertEqual(target_path, fd.read())
+ with open(target_path, 'r') as fd:
+ self.assertEqual(target_path, fd.read())
+ self.assertEqual([True, False], created)
+
+ def test_relink_paths_different_file_exists(self):
+ # check for an exception if a hard link cannot be made because a
+ # different file already exists at new_target_path
+ target_dir = os.path.join(self.testdir, 'd1')
+ # target dir and file exists
+ os.mkdir(target_dir)
+ target_path = os.path.join(target_dir, 't1.data')
+ with open(target_path, 'w') as fd:
+ fd.write(target_path)
+ # new target dir and different file exist
+ new_target_dir = os.path.join(self.testdir, 'd2')
+ os.mkdir(new_target_dir)
+ new_target_path = os.path.join(new_target_dir, 't1.data')
+ with open(new_target_path, 'w') as fd:
+ fd.write(new_target_path)
+
+ with self.assertRaises(OSError) as cm:
+ diskfile.relink_paths(target_path, new_target_path)
+
+ self.assertEqual(errno.EEXIST, cm.exception.errno)
+ # check nothing got deleted...
+ self.assertTrue(os.path.isfile(target_path))
+ with open(target_path, 'r') as fd:
+ self.assertEqual(target_path, fd.read())
+ self.assertTrue(os.path.isfile(new_target_path))
+ with open(new_target_path, 'r') as fd:
+ self.assertEqual(new_target_path, fd.read())
+
+ def test_relink_paths_same_file_exists(self):
+ # check for no exception if a hard link cannot be made because a link
+ # to the same file already exists at the path
+ target_dir = os.path.join(self.testdir, 'd1')
+ # target dir and file exists
+ os.mkdir(target_dir)
+ target_path = os.path.join(target_dir, 't1.data')
+ with open(target_path, 'w') as fd:
+ fd.write(target_path)
+ # new target dir and link to same file exist
+ new_target_dir = os.path.join(self.testdir, 'd2')
+ os.mkdir(new_target_dir)
+ new_target_path = os.path.join(new_target_dir, 't1.data')
+ os.link(target_path, new_target_path)
+ with open(new_target_path, 'r') as fd:
+ self.assertEqual(target_path, fd.read()) # sanity check
+
+ # existing link checks ok
+ created = diskfile.relink_paths(target_path, new_target_path)
+ with open(new_target_path, 'r') as fd:
+ self.assertEqual(target_path, fd.read()) # sanity check
+ self.assertFalse(created)
+
+ # now pretend there is an error when checking that the link already
+ # exists - expect the EEXIST exception to be raised
+ orig_stat = os.stat
+
+ def mocked_stat(path):
+ if path == new_target_path:
+ raise OSError(errno.EPERM, 'cannot be sure link exists :(')
+ return orig_stat(path)
+
+ with mock.patch('swift.obj.diskfile.os.stat', mocked_stat):
+ with self.assertRaises(OSError) as cm:
+ diskfile.relink_paths(target_path, new_target_path)
+ self.assertEqual(errno.EEXIST, cm.exception.errno, str(cm.exception))
+ with open(new_target_path, 'r') as fd:
+ self.assertEqual(target_path, fd.read()) # sanity check
+
+ # ...unless while checking for an existing link the target file is
+ # found to no longer exists, which is ok
+ def mocked_stat(path):
+ if path == target_path:
+ raise OSError(errno.ENOENT, 'target longer here :)')
+ return orig_stat(path)
+
+ with mock.patch('swift.obj.diskfile.os.stat', mocked_stat):
+ created = diskfile.relink_paths(target_path, new_target_path)
+ with open(new_target_path, 'r') as fd:
+ self.assertEqual(target_path, fd.read()) # sanity check
+ self.assertFalse(created)
+
def test_extract_policy(self):
# good path names
pn = 'objects/0/606/1984527ed7ef6247c78606/1401379842.14643.data'
diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py
index a041d4f5f..21e1a4a2e 100644
--- a/test/unit/obj/test_server.py
+++ b/test/unit/obj/test_server.py
@@ -2921,12 +2921,61 @@ class TestObjectController(unittest.TestCase):
self.assertFalse(os.path.isfile(data_file(new_part, ts_1)))
self.assertFalse(os.path.isfile(data_file(old_part, ts_1)))
error_lines = self.logger.get_lines_for_level('error')
- # the older request's data file in the old partition will have been
- # cleaned up by the newer request, so it's attempt to relink will fail
- self.assertEqual(1, len(error_lines))
- self.assertIn(ts_1.internal + '.data failed: '
- '[Errno 2] No such file or directory',
- error_lines[0])
+ self.assertEqual([], error_lines)
+
+ def test_PUT_next_part_power_races_around_makedirs_enoent(self):
+ hash_path_ = hash_path('a', 'c', 'o')
+ part_power = 10
+ old_part = utils.get_partition_for_hash(hash_path_, part_power)
+ new_part = utils.get_partition_for_hash(hash_path_, part_power + 1)
+ policy = POLICIES.default
+
+ def make_request(timestamp):
+ headers = {'X-Timestamp': timestamp.internal,
+ 'Content-Length': '6',
+ 'Content-Type': 'application/octet-stream',
+ 'X-Backend-Storage-Policy-Index': int(policy),
+ 'X-Backend-Next-Part-Power': part_power + 1}
+ req = Request.blank(
+ '/sda1/%s/a/c/o' % old_part, method='PUT',
+ headers=headers, body=b'VERIFY')
+ resp = req.get_response(self.object_controller)
+ self.assertEqual(resp.status_int, 201)
+
+ def data_file(part, timestamp):
+ return os.path.join(
+ self.testdir, 'sda1',
+ storage_directory(diskfile.get_data_dir(int(policy)),
+ part, hash_path_),
+ timestamp.internal + '.data')
+
+ ts_1 = next(self.ts)
+ ts_2 = next(self.ts)
+ calls = []
+ orig_makedirs = os.makedirs
+
+ def mock_makedirs(path, *args, **kwargs):
+ # let another request race ahead just as the first is about to
+ # create the next part power object dir
+ if path == os.path.dirname(data_file(new_part, ts_1)):
+ calls.append(path)
+ if len(calls) == 1:
+ # pretend 'yield' to other request process
+ make_request(ts_2)
+ return orig_makedirs(path, *args, **kwargs)
+
+ with mock.patch('swift.obj.diskfile.os.makedirs', mock_makedirs):
+ make_request(ts_1)
+
+ self.assertEqual(
+ [os.path.dirname(data_file(new_part, ts_1)),
+ os.path.dirname(data_file(new_part, ts_1))], calls)
+ self.assertTrue(os.path.isfile(data_file(old_part, ts_2)))
+ self.assertTrue(os.path.isfile(data_file(new_part, ts_2)))
+ self.assertFalse(os.path.isfile(data_file(new_part, ts_1)))
+ self.assertFalse(os.path.isfile(data_file(old_part, ts_1)))
+ error_lines = self.logger.get_lines_for_level('error')
+ self.assertEqual([], error_lines)
def test_HEAD(self):
# Test swift.obj.server.ObjectController.HEAD