summaryrefslogtreecommitdiff
path: root/swift/proxy/controllers/base.py
diff options
context:
space:
mode:
authorClay Gerrard <clay.gerrard@gmail.com>2020-08-05 11:19:48 -0500
committerTim Burke <tim.burke@gmail.com>2020-09-08 14:33:09 -0700
commit754defc39c0ffd7d68c9913d4da1e38c503bf914 (patch)
tree287298ea7a7381ac45e6fe694016eac8a34e9394 /swift/proxy/controllers/base.py
parent06091172c218929bcee9c66c50e846acd8326a32 (diff)
downloadswift-754defc39c0ffd7d68c9913d4da1e38c503bf914.tar.gz
Client should retry when there's just one 404 and a bunch of errors
During a rebalance, it's expected that we may get a 404 for data that does exist elsewhere in the cluster. Normally this isn't a problem; the proxy sees the 404, keeps digging, and one of the other primaries will serve the response. Previously, if the other replicas were heavily loaded, the proxy would see a bunch of timeouts and the fresh (empty) primary, treat the 404 as good, and send that on to the client. Now, have the proxy throw out that first 404 (provided it doesn't have a timestamp); it will then return a 503 to the client, indicating that it should try again. Add a new (per-policy) proxy-server config option, rebalance_missing_suppression_count; operators may use this to increase the number of 404-no-timestamp responses to discard if their rebalances are going faster than replication can keep up, or set it to zero to return to the previous behavior. Change-Id: If4bd39788642c00d66579b26144af8f116735b4d
Diffstat (limited to 'swift/proxy/controllers/base.py')
-rw-r--r--swift/proxy/controllers/base.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/swift/proxy/controllers/base.py b/swift/proxy/controllers/base.py
index 94f61434a..209feaa9e 100644
--- a/swift/proxy/controllers/base.py
+++ b/swift/proxy/controllers/base.py
@@ -873,6 +873,10 @@ class GetOrHeadHandler(object):
self.policy = policy
self.node = None
self.latest_404_timestamp = Timestamp(0)
+ policy_options = self.app.get_policy_options(self.policy)
+ self.rebalance_missing_suppression_count = min(
+ policy_options.rebalance_missing_suppression_count,
+ node_iter.num_primary_nodes - 1)
# stuff from request
self.req_method = req.method
@@ -1320,6 +1324,13 @@ class GetOrHeadHandler(object):
# throw out 5XX and 404s from handoff nodes unless the data is
# really on disk and had been DELETEd
return False
+
+ if self.rebalance_missing_suppression_count > 0 and \
+ possible_source.status == HTTP_NOT_FOUND and \
+ not Timestamp(src_headers.get('x-backend-timestamp', 0)):
+ self.rebalance_missing_suppression_count -= 1
+ return False
+
self.statuses.append(possible_source.status)
self.reasons.append(possible_source.reason)
self.bodies.append(possible_source.read())