summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Merritt <sam@swiftstack.com>2017-10-12 10:45:12 -0700
committerTim Burke <tim.burke@gmail.com>2017-11-01 12:19:04 -0700
commit633998983c2bd69c003bc343ff65872440be5439 (patch)
tree86e696dc60836e8df4eac9b51192b08004ca5261
parent35ab6a5da363aa10c8fa88df0a388ad6518164e7 (diff)
downloadswift-633998983c2bd69c003bc343ff65872440be5439.tar.gz
Use "poll" or "selects" Eventlet hub for all Swift daemons.
Previously, Swift's WSGI servers, the object replicator, and the object reconstructor were setting Eventlet's hub to either "poll" or "selects", depending on availability. Other daemons were letting Eventlet use its default hub, which is "epoll". In any daemons that fork, we really don't want to use epoll. Epoll instances end up shared between the parent and all children, and you get some awful messes when file descriptors are shared. Here's an example where two processes are trying to wait on the same file descriptor using the same epoll instance, and everything goes wrong: [proc A] epoll_ctl(6, EPOLL_CTL_ADD, 3, ...) = 0 [proc B] epoll_ctl(6, EPOLL_CTL_ADD, 3, ...) = -1 EEXIST (File exists) [proc B] epoll_wait(6, ...) = 1 [proc B] epoll_ctl(6, EPOLL_CTL_DEL, 3, ...) = 0 [proc A] epoll_wait(6, ...) This primarily affects the container updater and object updater since they fork. I've decided to change the hub for all Swift daemons so that we don't add multiprocessing support to some other daemon someday and suffer through this same bug again. This problem was made more apparent by commit 6d16079, which made our logging mutex use file descriptors. However, it could have struck on any shared file descriptor on which a read or write returned EAGAIN. Change-Id: Ic2c1178ac918c88b0b901e581eb4fab3b2666cfe Closes-Bug: 1722951
-rw-r--r--swift/common/daemon.py3
-rw-r--r--swift/common/utils.py19
-rw-r--r--swift/obj/reconstructor.py8
-rw-r--r--swift/obj/replicator.py6
-rw-r--r--test/unit/common/test_daemon.py5
5 files changed, 30 insertions, 11 deletions
diff --git a/swift/common/daemon.py b/swift/common/daemon.py
index 39fc66c80..7127b6c58 100644
--- a/swift/common/daemon.py
+++ b/swift/common/daemon.py
@@ -20,6 +20,7 @@ import signal
from re import sub
import eventlet.debug
+from eventlet.hubs import use_hub
from swift.common import utils
@@ -83,6 +84,8 @@ def run_daemon(klass, conf_file, section_name='', once=False, **kwargs):
# and results in an exit code of 1.
sys.exit(e)
+ use_hub(utils.get_hub())
+
# once on command line (i.e. daemonize=false) will over-ride config
once = once or not utils.config_true_value(conf.get('daemonize', 'true'))
diff --git a/swift/common/utils.py b/swift/common/utils.py
index 3add489a9..eb8bb76d0 100644
--- a/swift/common/utils.py
+++ b/swift/common/utils.py
@@ -1938,6 +1938,25 @@ def get_hub():
getting swallowed somewhere. Then when that file descriptor
was re-used, eventlet would freak right out because it still
thought it was waiting for activity from it in some other coro.
+
+ Another note about epoll: it's hard to use when forking. epoll works
+ like so:
+
+ * create an epoll instance: efd = epoll_create(...)
+
+ * register file descriptors of interest with epoll_ctl(efd,
+ EPOLL_CTL_ADD, fd, ...)
+
+ * wait for events with epoll_wait(efd, ...)
+
+ If you fork, you and all your child processes end up using the same
+ epoll instance, and everyone becomes confused. It is possible to use
+ epoll and fork and still have a correct program as long as you do the
+ right things, but eventlet doesn't do those things. Really, it can't
+ even try to do those things since it doesn't get notified of forks.
+
+ In contrast, both poll() and select() specify the set of interesting
+ file descriptors with each call, so there's no problem with forking.
"""
try:
import select
diff --git a/swift/obj/reconstructor.py b/swift/obj/reconstructor.py
index 35d70b01d..711f87be9 100644
--- a/swift/obj/reconstructor.py
+++ b/swift/obj/reconstructor.py
@@ -25,14 +25,13 @@ import six
import six.moves.cPickle as pickle
import shutil
-from eventlet import (GreenPile, GreenPool, Timeout, sleep, hubs, tpool,
- spawn)
+from eventlet import (GreenPile, GreenPool, Timeout, sleep, tpool, spawn)
from eventlet.support.greenlets import GreenletExit
from swift import gettext_ as _
from swift.common.utils import (
whataremyips, unlink_older_than, compute_eta, get_logger,
- dump_recon_cache, mkdirs, config_true_value, list_from_csv, get_hub,
+ dump_recon_cache, mkdirs, config_true_value, list_from_csv,
tpool_reraise, GreenAsyncPile, Timestamp, remove_file)
from swift.common.header_key_dict import HeaderKeyDict
from swift.common.bufferedhttp import http_connect
@@ -50,9 +49,6 @@ from swift.common.exceptions import ConnectionTimeout, DiskFileError, \
SYNC, REVERT = ('sync_only', 'sync_revert')
-hubs.use_hub(get_hub())
-
-
def _get_partners(frag_index, part_nodes):
"""
Returns the left and right partners of the node whose index is
diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py
index fc7f353f0..0b016c30f 100644
--- a/swift/obj/replicator.py
+++ b/swift/obj/replicator.py
@@ -25,7 +25,7 @@ import six.moves.cPickle as pickle
from swift import gettext_ as _
import eventlet
-from eventlet import GreenPool, tpool, Timeout, sleep, hubs
+from eventlet import GreenPool, tpool, Timeout, sleep
from eventlet.green import subprocess
from eventlet.support.greenlets import GreenletExit
@@ -33,7 +33,7 @@ from swift.common.ring.utils import is_local_device
from swift.common.utils import whataremyips, unlink_older_than, \
compute_eta, get_logger, dump_recon_cache, ismount, \
rsync_module_interpolation, mkdirs, config_true_value, list_from_csv, \
- get_hub, tpool_reraise, config_auto_int_value, storage_directory
+ tpool_reraise, config_auto_int_value, storage_directory
from swift.common.bufferedhttp import http_connect
from swift.common.daemon import Daemon
from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE
@@ -43,8 +43,6 @@ from swift.common.storage_policy import POLICIES, REPL_POLICY
DEFAULT_RSYNC_TIMEOUT = 900
-hubs.use_hub(get_hub())
-
def _do_listdir(partition, replication_cycle):
return (((partition + replication_cycle) % 10) == 0)
diff --git a/test/unit/common/test_daemon.py b/test/unit/common/test_daemon.py
index 92376ad8b..a8bd9ddae 100644
--- a/test/unit/common/test_daemon.py
+++ b/test/unit/common/test_daemon.py
@@ -102,11 +102,14 @@ class TestRunDaemon(unittest.TestCase):
def test_run_daemon(self):
sample_conf = "[my-daemon]\nuser = %s\n" % getuser()
- with tmpfile(sample_conf) as conf_file:
+ with tmpfile(sample_conf) as conf_file, \
+ mock.patch('swift.common.daemon.use_hub') as mock_use_hub:
with mock.patch.dict('os.environ', {'TZ': ''}):
daemon.run_daemon(MyDaemon, conf_file)
self.assertEqual(MyDaemon.forever_called, True)
self.assertTrue(os.environ['TZ'] is not '')
+ self.assertEqual(mock_use_hub.mock_calls,
+ [mock.call(utils.get_hub())])
daemon.run_daemon(MyDaemon, conf_file, once=True)
self.assertEqual(MyDaemon.once_called, True)