summaryrefslogtreecommitdiff
path: root/swift/container
diff options
context:
space:
mode:
authorAlistair Coles <alistairncoles@gmail.com>2022-08-12 15:33:46 +0100
committerAlistair Coles <alistairncoles@gmail.com>2022-11-16 16:12:32 +0000
commit2bcf3d1a8eb2e465b3d7afea7a6efb3b4a7d43e8 (patch)
tree1edf25d13a3337a3226f926f989164ca37579119 /swift/container
parentc82c73122b4ceeca1740989bc213d7ea9ec6fdda (diff)
downloadswift-2bcf3d1a8eb2e465b3d7afea7a6efb3b4a7d43e8.tar.gz
sharder: merge shard shard_ranges from root while sharding
We've seen shards become stuck while sharding because they had incomplete or stale deleted shard ranges. The root container had more complete and useful shard ranges into which objects could have been cleaved, but the shard never merged the root's shard ranges. While the sharder is auditing shard container DBs it would previously only merge shard ranges fetched from root into the shard DB if the shard was shrinking or the shard ranges were known to be children of the shard. With this patch the sharder will now merge other shard ranges from root during sharding as well as shrinking. Shard ranges from root are only merged if they would not result in overlaps or gaps in the set of shard ranges in the shard DB. Shard ranges that are known to be ancestors of the shard are never merged, except the root shard range which may be merged into a shrinking shard. These checks were not previously applied when merging shard ranges into a shrinking shard. The two substantive changes with this patch are therefore: - shard ranges from root are now merged during sharding, subject to checks. - shard ranges from root are still merged during shrinking, but are now subjected to checks. Change-Id: I066cfbd9062c43cd9638710882ae9bd85a5b4c37
Diffstat (limited to 'swift/container')
-rw-r--r--swift/container/backend.py5
-rw-r--r--swift/container/sharder.py125
2 files changed, 99 insertions, 31 deletions
diff --git a/swift/container/backend.py b/swift/container/backend.py
index 980616193..760a72697 100644
--- a/swift/container/backend.py
+++ b/swift/container/backend.py
@@ -454,10 +454,7 @@ class ContainerBroker(DatabaseBroker):
for sharding to have been initiated, False otherwise.
"""
own_shard_range = self.get_own_shard_range()
- if own_shard_range.state in (ShardRange.SHARDING,
- ShardRange.SHRINKING,
- ShardRange.SHARDED,
- ShardRange.SHRUNK):
+ if own_shard_range.state in ShardRange.CLEAVING_STATES:
return bool(self.get_shard_ranges())
return False
diff --git a/swift/container/sharder.py b/swift/container/sharder.py
index 71f2140c6..ab25a19ca 100644
--- a/swift/container/sharder.py
+++ b/swift/container/sharder.py
@@ -40,7 +40,7 @@ from swift.common.utils import get_logger, config_true_value, \
Everything, config_auto_int_value, ShardRangeList, config_percent_value
from swift.container.backend import ContainerBroker, \
RECORD_TYPE_SHARD, UNSHARDED, SHARDING, SHARDED, COLLAPSED, \
- SHARD_UPDATE_STATES
+ SHARD_UPDATE_STATES, sift_shard_ranges
from swift.container.replicator import ContainerReplicator
@@ -101,7 +101,7 @@ def _find_discontinuity(paths, start):
return longest_start_path, longest_end_path
-def find_paths_with_gaps(shard_ranges):
+def find_paths_with_gaps(shard_ranges, within_range=None):
"""
Find gaps in the shard ranges and pairs of shard range paths that lead to
and from those gaps. For each gap a single pair of adjacent paths is
@@ -109,6 +109,9 @@ def find_paths_with_gaps(shard_ranges):
entire namespace with no overlaps.
:param shard_ranges: a list of instances of ShardRange.
+ :param within_range: an optional ShardRange that constrains the search
+ space; the method will only return gaps within this range. The default
+ is the entire namespace.
:return: A list of tuples of ``(start_path, gap_range, end_path)`` where
``start_path`` is a list of ShardRanges leading to the gap,
``gap_range`` is a ShardRange synthesized to describe the namespace
@@ -119,6 +122,7 @@ def find_paths_with_gaps(shard_ranges):
namespace.
"""
timestamp = Timestamp.now()
+ within_range = within_range or ShardRange('entire/namespace', timestamp)
shard_ranges = ShardRangeList(shard_ranges)
# note: find_paths results do not include shrinking ranges
paths = find_paths(shard_ranges)
@@ -149,7 +153,8 @@ def find_paths_with_gaps(shard_ranges):
timestamp,
lower=start_path.upper,
upper=end_path.lower)
- paths_with_gaps.append((start_path, gap_range, end_path))
+ if gap_range.overlaps(within_range):
+ paths_with_gaps.append((start_path, gap_range, end_path))
return paths_with_gaps
@@ -497,6 +502,27 @@ def rank_paths(paths, shard_range_to_span):
return paths
+def combine_shard_ranges(new_shard_ranges, existing_shard_ranges):
+ """
+ Combines new and existing shard ranges based on most recent state.
+
+ :param new_shard_ranges: a list of ShardRange instances.
+ :param existing_shard_ranges: a list of ShardRange instances.
+ :return: a list of ShardRange instances.
+ """
+ new_shard_ranges = [dict(sr) for sr in new_shard_ranges]
+ existing_shard_ranges = [dict(sr) for sr in existing_shard_ranges]
+ to_add, to_delete = sift_shard_ranges(
+ new_shard_ranges,
+ dict((sr['name'], sr) for sr in existing_shard_ranges))
+ result = [ShardRange.from_dict(existing)
+ for existing in existing_shard_ranges
+ if existing['name'] not in to_delete]
+ result.extend([ShardRange.from_dict(sr) for sr in to_add])
+ return sorted([sr for sr in result if not sr.deleted],
+ key=ShardRange.sort_key)
+
+
class CleavingContext(object):
"""
Encapsulates metadata associated with the process of cleaving a retiring
@@ -916,9 +942,7 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
if db_state not in (UNSHARDED, SHARDING, SHARDED):
return
own_shard_range = broker.get_own_shard_range()
- if own_shard_range.state not in (
- ShardRange.SHARDING, ShardRange.SHARDED,
- ShardRange.SHRINKING, ShardRange.SHRUNK):
+ if own_shard_range.state not in ShardRange.CLEAVING_STATES:
return
if db_state == SHARDED:
@@ -1159,7 +1183,7 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
warnings = []
own_shard_range = broker.get_own_shard_range()
- if own_shard_range.state in (ShardRange.SHARDING, ShardRange.SHARDED):
+ if own_shard_range.state in ShardRange.SHARDING_STATES:
shard_ranges = [sr for sr in broker.get_shard_ranges()
if sr.state != ShardRange.SHRINKING]
paths_with_gaps = find_paths_with_gaps(shard_ranges)
@@ -1245,9 +1269,8 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
own_shard_range = broker.get_own_shard_range()
if (orig_own_shard_range != own_shard_range or
orig_own_shard_range.state != own_shard_range.state):
- self.logger.info(
- 'Updated own shard range from %s to %s',
- orig_own_shard_range, own_shard_range)
+ self.logger.info('Updated own shard range from %s to %s',
+ orig_own_shard_range, own_shard_range)
elif shard_range.is_child_of(own_shard_range):
children_shard_ranges.append(shard_range)
else:
@@ -1262,19 +1285,70 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
len(children_shard_ranges))
broker.merge_shard_ranges(children_shard_ranges)
- if (other_shard_ranges and
- own_shard_range.state in ShardRange.SHRINKING_STATES):
- # If own_shard_range state is shrinking, save off *all* shards
- # returned because these may contain shards into which this
- # shard is to shrink itself; shrinking is the only case when we
- # want to learn about *other* shard ranges from the root.
- # We need to include shrunk state too, because one replica of a
- # shard may already have moved the own_shard_range state to
- # shrunk while another replica may still be in the process of
- # shrinking.
- self.logger.debug('Updating %s other shard range(s) from root',
- len(other_shard_ranges))
- broker.merge_shard_ranges(other_shard_ranges)
+ if (other_shard_ranges
+ and own_shard_range.state in ShardRange.CLEAVING_STATES
+ and not broker.is_sharded()):
+ # Other shard ranges returned from the root may need to be merged
+ # for the purposes of sharding or shrinking this shard:
+ #
+ # Shrinking states: If the up-to-date state is shrinking, the
+ # shards fetched from root may contain shards into which this shard
+ # is to shrink itself. Shrinking is initiated by modifying multiple
+ # neighboring shard range states *in the root*, rather than
+ # modifying a shard directly. We therefore need to learn about
+ # *other* neighboring shard ranges from the root, possibly
+ # including the root itself. We need to include shrunk state too,
+ # because one replica of a shard may already have moved the
+ # own_shard_range state to shrunk while another replica may still
+ # be in the process of shrinking.
+ #
+ # Sharding states: Normally a shard will shard to its own children.
+ # However, in some circumstances a shard may need to shard to other
+ # non-children sub-shards. For example, a shard range repair may
+ # cause a child sub-shard to be deleted and its namespace covered
+ # by another 'acceptor' shard.
+ #
+ # Therefore, if the up-to-date own_shard_range state indicates that
+ # sharding or shrinking is in progress, then other shard ranges
+ # will be merged, with the following caveats: we never expect a
+ # shard to shard to any ancestor shard range including the root,
+ # but containers might ultimately *shrink* to root; we never want
+ # to cleave to a container that is itself sharding or shrinking;
+ # the merged shard ranges should not result in gaps or overlaps in
+ # the namespace of this shard.
+ #
+ # Note: the search for ancestors is guaranteed to find the parent
+ # and root *if they are present*, but if any ancestor is missing
+ # then there is a chance that older generations in the
+ # other_shard_ranges will not be filtered and could be merged. That
+ # is only a problem if they are somehow still in ACTIVE state, and
+ # no overlap is detected, so the ancestor is merged.
+ ancestor_names = [
+ sr.name for sr in own_shard_range.find_ancestors(shard_ranges)]
+ filtered_other_shard_ranges = [
+ sr for sr in other_shard_ranges
+ if (sr.name not in ancestor_names
+ and (sr.state not in ShardRange.CLEAVING_STATES
+ or sr.deleted))
+ ]
+ if own_shard_range.state in ShardRange.SHRINKING_STATES:
+ root_shard_range = own_shard_range.find_root(
+ other_shard_ranges)
+ if (root_shard_range and
+ root_shard_range.state == ShardRange.ACTIVE):
+ filtered_other_shard_ranges.append(root_shard_range)
+ existing_shard_ranges = broker.get_shard_ranges()
+ combined_shard_ranges = combine_shard_ranges(
+ filtered_other_shard_ranges, existing_shard_ranges)
+ overlaps = find_overlapping_ranges(combined_shard_ranges)
+ paths_with_gaps = find_paths_with_gaps(
+ combined_shard_ranges, own_shard_range)
+ if not (overlaps or paths_with_gaps):
+ # only merge if shard ranges appear to be *good*
+ self.logger.debug(
+ 'Updating %s other shard range(s) from root',
+ len(filtered_other_shard_ranges))
+ broker.merge_shard_ranges(filtered_other_shard_ranges)
return own_shard_range, own_shard_range_from_root
@@ -2168,10 +2242,7 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
broker, shard_ranges=[broker.get_own_shard_range()])
own_shard_range = broker.get_own_shard_range()
- if own_shard_range.state in (ShardRange.SHARDING,
- ShardRange.SHRINKING,
- ShardRange.SHARDED,
- ShardRange.SHRUNK):
+ if own_shard_range.state in ShardRange.CLEAVING_STATES:
if broker.get_shard_ranges():
# container has been given shard ranges rather than
# found them e.g. via replication or a shrink event,