diff options
author | Florian Ludwig <f.ludwig@greyrook.com> | 2020-12-19 08:37:39 +0100 |
---|---|---|
committer | Florian Ludwig <f.ludwig@greyrook.com> | 2020-12-19 08:39:58 +0100 |
commit | f9d61695936933d4254377e49609eca9063b0ff1 (patch) | |
tree | 6095197eac91c7a4ab90629dd36f8c96bf363996 | |
parent | 508caa425d7ab4857666a73ed23853ad02044d46 (diff) | |
download | rdflib-f9d61695936933d4254377e49609eca9063b0ff1.tar.gz |
document reason for backoff in retry
-rw-r--r-- | test/helper.py | 36 |
1 files changed, 32 insertions, 4 deletions
diff --git a/test/helper.py b/test/helper.py index 12c529d5..240d90c7 100644 --- a/test/helper.py +++ b/test/helper.py @@ -5,8 +5,36 @@ import rdflib import rdflib.query -MAX_RETRY = 3 +MAX_RETRY = 10 +BACKOFF_FACTOR = 1.5 def query_with_retry(graph: rdflib.Graph, query: str, **kwargs) -> rdflib.query.Result: + """Query graph an retry on failure, returns preloaded result + + The tests run against outside network targets which results + in flaky tests. Therefor retries are needed to increase stability. + + There are two main categories why these might fail: + + * Resource shortage on the server running the tests (e.g. out of ports) + * Issues outside the server (network, target server, etc) + + As fast feedback is important the retry should be done quickly. + Therefor the first retry is done after 100ms. But if the issue is + outside the server running the tests it we need to be good + citizenship of the internet and not hit servers of others at + a constant rate. (Also it might get us banned) + + Therefor this function implements a backoff mechanism. + + When adjusting the parameters please keep in mind that several + tests might run on the same machine at the same time + on our CI, and we really don't want to hit any rate limiting. + + The maximum time the function waits is: + + >>> sum((BACKOFF_FACTOR ** backoff) / 10 for backoff in range(MAX_RETRY)) + 11.3330078125 + """ backoff = 0 for i in range(MAX_RETRY): try: @@ -17,7 +45,7 @@ def query_with_retry(graph: rdflib.Graph, query: str, **kwargs) -> rdflib.query. if i == MAX_RETRY -1: raise e - backoff_s = 1.2 ** backoff - print(f"Network eroror {e} during query, waiting for {backoff_s}s and retrying") - time.sleep(1) + backoff_s = (BACKOFF_FACTOR ** backoff) / 10 + print(f"Network error {e} during query, waiting for {backoff_s:.2f}s and retrying") + time.sleep(backoff_s) backoff += 1 |