diff options
-rw-r--r-- | ceilometer/agent/base.py | 64 | ||||
-rw-r--r-- | ceilometer/notification.py | 76 | ||||
-rw-r--r-- | ceilometer/pipeline.py | 39 | ||||
-rw-r--r-- | ceilometer/service_base.py | 70 | ||||
-rw-r--r-- | ceilometer/tests/agent/agentbase.py | 102 | ||||
-rw-r--r-- | ceilometer/tests/test_notification.py | 95 |
6 files changed, 402 insertions, 44 deletions
diff --git a/ceilometer/agent/base.py b/ceilometer/agent/base.py index 254152ea..bde2acd9 100644 --- a/ceilometer/agent/base.py +++ b/ceilometer/agent/base.py @@ -26,7 +26,6 @@ import random from oslo_config import cfg from oslo_context import context from oslo_log import log -from oslo_service import service as os_service import six from six import moves from six.moves.urllib import parse as urlparse @@ -34,8 +33,9 @@ from stevedore import extension from ceilometer.agent import plugin_base from ceilometer import coordination -from ceilometer.i18n import _ +from ceilometer.i18n import _, _LI from ceilometer import pipeline as publish_pipeline +from ceilometer import service_base from ceilometer import utils LOG = log.getLogger(__name__) @@ -191,7 +191,7 @@ class PollingTask(object): exc_info=True) -class AgentManager(os_service.Service): +class AgentManager(service_base.BaseService): def __init__(self, namespaces, pollster_list, group_prefix=None): # features of using coordination and pollster-list are exclusive, and @@ -256,16 +256,16 @@ class AgentManager(os_service.Service): ) def join_partitioning_groups(self): - groups = set([self.construct_group_id(d.obj.group_id) - for d in self.discovery_manager]) + self.groups = set([self.construct_group_id(d.obj.group_id) + for d in self.discovery_manager]) # let each set of statically-defined resources have its own group static_resource_groups = set([ self.construct_group_id(utils.hash_of_set(p.resources)) for p in self.pipeline_manager.pipelines if p.resources ]) - groups.update(static_resource_groups) - for group in groups: + self.groups.update(static_resource_groups) + for group in self.groups: self.partition_coordinator.join_group(group) def create_polling_task(self): @@ -290,12 +290,7 @@ class AgentManager(os_service.Service): discovery_group_id) if discovery_group_id else None) - def start(self): - self.pipeline_manager = publish_pipeline.setup_pipeline() - - self.partition_coordinator.start() - self.join_partitioning_groups() - + def configure_polling_tasks(self): # allow time for coordination if necessary delay_start = self.partition_coordinator.is_active() @@ -303,16 +298,29 @@ class AgentManager(os_service.Service): delay_polling_time = random.randint( 0, cfg.CONF.shuffle_time_before_polling_task) + pollster_timers = [] for interval, task in six.iteritems(self.setup_polling_tasks()): delay_time = (interval + delay_polling_time if delay_start else delay_polling_time) - self.tg.add_timer(interval, - self.interval_task, - initial_delay=delay_time, - task=task) + pollster_timers.append(self.tg.add_timer(interval, + self.interval_task, + initial_delay=delay_time, + task=task)) self.tg.add_timer(cfg.CONF.coordination.heartbeat, self.partition_coordinator.heartbeat) + return pollster_timers + + def start(self): + self.pipeline_manager = publish_pipeline.setup_pipeline() + + self.partition_coordinator.start() + self.join_partitioning_groups() + + self.pollster_timers = self.configure_polling_tasks() + + self.init_pipeline_refresh() + def stop(self): if self.partition_coordinator: self.partition_coordinator.stop() @@ -356,3 +364,25 @@ class AgentManager(os_service.Service): else: LOG.warning(_('Unknown discovery extension: %s') % name) return resources + + def stop_pollsters(self): + for x in self.pollster_timers: + try: + x.stop() + self.tg.timer_done(x) + except Exception: + LOG.error(_('Error stopping pollster.'), exc_info=True) + self.pollster_timers = [] + + def reload_pipeline(self): + LOG.info(_LI("Reconfiguring polling tasks.")) + + # stop existing pollsters and leave partitioning groups + self.stop_pollsters() + for group in self.groups: + self.partition_coordinator.leave_group(group) + + # re-create partitioning groups according to pipeline + # and configure polling tasks with latest pipeline conf + self.join_partitioning_groups() + self.pollster_timers = self.configure_polling_tasks() diff --git a/ceilometer/notification.py b/ceilometer/notification.py index 900bdd9c..1e281be2 100644 --- a/ceilometer/notification.py +++ b/ceilometer/notification.py @@ -17,15 +17,15 @@ from oslo_config import cfg from oslo_context import context from oslo_log import log import oslo_messaging -from oslo_service import service as os_service from stevedore import extension from ceilometer.agent import plugin_base as base from ceilometer import coordination from ceilometer.event import endpoint as event_endpoint -from ceilometer.i18n import _, _LW +from ceilometer.i18n import _, _LI, _LW from ceilometer import messaging from ceilometer import pipeline +from ceilometer import service_base from ceilometer import utils @@ -66,7 +66,7 @@ cfg.CONF.import_opt('telemetry_driver', 'ceilometer.publisher.messaging', group='publisher_notifier') -class NotificationService(os_service.Service): +class NotificationService(service_base.BaseService): """Notification service. When running multiple agents, additional queuing sequence is required for @@ -100,30 +100,50 @@ class NotificationService(os_service.Service): publisher_id='ceilometer.notification', topic='%s-%s' % (self.NOTIFICATION_IPC, pipe.name)) - def start(self): - super(NotificationService, self).start() - self.pipeline_manager = pipeline.setup_pipeline() - if cfg.CONF.notification.store_events: - self.event_pipeline_manager = pipeline.setup_event_pipeline() - - transport = messaging.get_transport() - self.partition_coordinator = coordination.PartitionCoordinator() - self.partition_coordinator.start() + def _get_pipe_manager(self, transport, pipeline_manager): - event_pipe_manager = None if cfg.CONF.notification.workload_partitioning: pipe_manager = pipeline.SamplePipelineTransportManager() - for pipe in self.pipeline_manager.pipelines: + for pipe in pipeline_manager.pipelines: pipe_manager.add_transporter( (pipe.source.support_meter, self._get_notifier(transport, pipe))) - if cfg.CONF.notification.store_events: + else: + pipe_manager = pipeline_manager + + return pipe_manager + + def _get_event_pipeline_manager(self, transport): + + if cfg.CONF.notification.store_events: + self.event_pipeline_manager = pipeline.setup_event_pipeline() + + if cfg.CONF.notification.workload_partitioning: event_pipe_manager = pipeline.EventPipelineTransportManager() for pipe in self.event_pipeline_manager.pipelines: event_pipe_manager.add_transporter( (pipe.source.support_event, self._get_notifier(transport, pipe))) + else: + event_pipe_manager = self.event_pipeline_manager + + return event_pipe_manager + def start(self): + super(NotificationService, self).start() + + self.pipeline_manager = pipeline.setup_pipeline() + self.transport = messaging.get_transport() + + self.pipe_manager = self._get_pipe_manager(self.transport, + self.pipeline_manager) + self.event_pipe_manager = self._get_event_pipeline_manager( + self.transport) + + self.partition_coordinator = coordination.PartitionCoordinator() + self.partition_coordinator.start() + + if cfg.CONF.notification.workload_partitioning: self.ctxt = context.get_admin_context() self.group_id = self.NOTIFICATION_NAMESPACE else: @@ -133,14 +153,12 @@ class NotificationService(os_service.Service): # the notification_topics in an other way # we must create a transport to ensure the option have # beeen registered by oslo_messaging - messaging.get_notifier(transport, '') - pipe_manager = self.pipeline_manager - if cfg.CONF.notification.store_events: - event_pipe_manager = self.event_pipeline_manager + messaging.get_notifier(self.transport, '') self.group_id = None self.listeners, self.pipeline_listeners = [], [] - self._configure_main_queue_listeners(pipe_manager, event_pipe_manager) + self._configure_main_queue_listeners(self.pipe_manager, + self.event_pipe_manager) if cfg.CONF.notification.workload_partitioning: self.partition_coordinator.join_group(self.group_id) @@ -160,6 +178,8 @@ class NotificationService(os_service.Service): # Add a dummy thread to have wait() working self.tg.add_timer(604800, lambda: None) + self.init_pipeline_refresh() + def _configure_main_queue_listeners(self, pipe_manager, event_pipe_manager): notification_manager = self._get_notifications_manager(pipe_manager) @@ -231,3 +251,19 @@ class NotificationService(os_service.Service): self.partition_coordinator.stop() utils.kill_listeners(self.listeners + self.pipeline_listeners) super(NotificationService, self).stop() + + def reload_pipeline(self): + LOG.info(_LI("Reloading notification agent and listeners.")) + + self.pipe_manager = self._get_pipe_manager( + self.transport, self.pipeline_manager) + + # re-start the main queue listeners. + utils.kill_listeners(self.listeners) + self._configure_main_queue_listeners( + self.pipe_manager, self.event_pipe_manager) + + # re-start the pipeline listeners if workload partitioning + # is enabled. + if cfg.CONF.notification.workload_partitioning: + self._refresh_agent(None) diff --git a/ceilometer/pipeline.py b/ceilometer/pipeline.py index acd4b489..17f6e7db 100644 --- a/ceilometer/pipeline.py +++ b/ceilometer/pipeline.py @@ -19,6 +19,7 @@ import abc import fnmatch +import hashlib import os from oslo_config import cfg @@ -45,6 +46,15 @@ OPTS = [ default="event_pipeline.yaml", help="Configuration file for event pipeline definition." ), + cfg.BoolOpt('refresh_pipeline_cfg', + default=False, + help="Refresh Pipeline configuration on-the-fly." + ), + cfg.IntOpt('pipeline_polling_interval', + default=20, + help="Polling interval for pipeline file configuration" + " in seconds." + ), ] cfg.CONF.register_opts(OPTS) @@ -723,3 +733,32 @@ def setup_pipeline(transformer_manager=None): """Setup pipeline manager according to yaml config file.""" cfg_file = cfg.CONF.pipeline_cfg_file return _setup_pipeline_manager(cfg_file, transformer_manager) + + +def _get_pipeline_cfg_file(p_type=SAMPLE_TYPE): + if p_type == EVENT_TYPE: + cfg_file = cfg.CONF.event_pipeline_cfg_file + else: + cfg_file = cfg.CONF.pipeline_cfg_file + + if not os.path.exists(cfg_file): + cfg_file = cfg.CONF.find_file(cfg_file) + + return cfg_file + + +def get_pipeline_mtime(p_type=SAMPLE_TYPE): + cfg_file = _get_pipeline_cfg_file(p_type) + return os.path.getmtime(cfg_file) + + +def get_pipeline_hash(p_type=SAMPLE_TYPE): + + cfg_file = _get_pipeline_cfg_file(p_type) + with open(cfg_file) as fap: + data = fap.read() + if six.PY3: + data = data.encode('utf-8') + + file_hash = hashlib.md5(data).hexdigest() + return file_hash diff --git a/ceilometer/service_base.py b/ceilometer/service_base.py new file mode 100644 index 00000000..0da8cdf0 --- /dev/null +++ b/ceilometer/service_base.py @@ -0,0 +1,70 @@ +# +# Copyright 2015 Hewlett Packard +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import abc + +from oslo_config import cfg +from oslo_log import log +from oslo_service import service as os_service +import six + +from ceilometer.i18n import _, _LE, _LI +from ceilometer import pipeline + +LOG = log.getLogger(__name__) + + +@six.add_metaclass(abc.ABCMeta) +class BaseService(os_service.Service): + + def init_pipeline_refresh(self): + if cfg.CONF.refresh_pipeline_cfg: + + self.pipeline_mtime = pipeline.get_pipeline_mtime() + self.pipeline_hash = pipeline.get_pipeline_hash() + + self.tg.add_timer(cfg.CONF.pipeline_polling_interval, + self.refresh_pipeline) + + @abc.abstractmethod + def reload_pipeline(self): + """Reload pipeline in the agents.""" + + def refresh_pipeline(self): + mtime = pipeline.get_pipeline_mtime() + if mtime > self.pipeline_mtime: + LOG.info(_LI('Pipeline configuration file has been updated.')) + + self.pipeline_mtime = mtime + _hash = pipeline.get_pipeline_hash() + + if _hash != self.pipeline_hash: + LOG.info(_LI("Detected change in pipeline configuration.")) + + try: + self.pipeline_manager = pipeline.setup_pipeline() + LOG.debug(_("Pipeline has been refreshed. " + "old hash: %(old)s, new hash: %(new)s") % + ({'old': self.pipeline_hash, + 'new': _hash})) + except Exception as err: + LOG.debug(_("Active pipeline config's hash is %s") % + self.pipeline_hash) + LOG.exception(_LE('Unable to load changed pipeline: %s') + % err) + return + + self.pipeline_hash = _hash + self.reload_pipeline() diff --git a/ceilometer/tests/agent/agentbase.py b/ceilometer/tests/agent/agentbase.py index 34e93f72..26a92a65 100644 --- a/ceilometer/tests/agent/agentbase.py +++ b/ceilometer/tests/agent/agentbase.py @@ -24,14 +24,20 @@ import abc import copy import datetime +import shutil +import eventlet import mock from oslo_config import fixture as fixture_config +from oslo_service import service as os_service +from oslo_utils import timeutils from oslotest import mockpatch import six from stevedore import extension +import yaml from ceilometer.agent import plugin_base +from ceilometer.openstack.common import fileutils from ceilometer import pipeline from ceilometer import publisher from ceilometer.publisher import test as test_publisher @@ -293,6 +299,102 @@ class BaseAgentManagerTestCase(base.BaseTestCase): timer_call = mock.call(1.0, self.mgr.partition_coordinator.heartbeat) self.assertEqual([timer_call], self.mgr.tg.add_timer.call_args_list) + @mock.patch('ceilometer.pipeline.setup_pipeline') + def test_start_with_pipeline_poller(self, setup_pipeline): + self.mgr.join_partitioning_groups = mock.MagicMock() + self.mgr.setup_polling_tasks = mock.MagicMock() + + self.CONF.set_override('heartbeat', 1.0, group='coordination') + self.CONF.set_override('refresh_pipeline_cfg', True) + self.CONF.set_override('pipeline_polling_interval', 5) + self.mgr.start() + setup_pipeline.assert_called_once_with() + self.mgr.partition_coordinator.start.assert_called_once_with() + self.mgr.join_partitioning_groups.assert_called_once_with() + self.mgr.setup_polling_tasks.assert_called_once_with() + timer_call = mock.call(1.0, self.mgr.partition_coordinator.heartbeat) + pipeline_poller_call = mock.call(5, self.mgr.refresh_pipeline) + self.assertEqual([timer_call, pipeline_poller_call], + self.mgr.tg.add_timer.call_args_list) + + def test_start_with_reloadable_pipeline(self): + + def setup_pipeline_file(pipeline): + if six.PY3: + pipeline = pipeline.encode('utf-8') + + pipeline_cfg_file = fileutils.write_to_tempfile(content=pipeline, + prefix="pipeline", + suffix="yaml") + return pipeline_cfg_file + + self.CONF.set_override('heartbeat', 1.0, group='coordination') + self.CONF.set_override('refresh_pipeline_cfg', True) + self.CONF.set_override('pipeline_polling_interval', 2) + + pipeline = yaml.dump({ + 'sources': [{ + 'name': 'test_pipeline', + 'interval': 1, + 'meters': ['test'], + 'resources': ['test://'] if self.source_resources else [], + 'sinks': ['test_sink']}], + 'sinks': [{ + 'name': 'test_sink', + 'transformers': [], + 'publishers': ["test"]}] + }) + + pipeline_cfg_file = setup_pipeline_file(pipeline) + + self.CONF.set_override("pipeline_cfg_file", pipeline_cfg_file) + self.mgr.tg = os_service.threadgroup.ThreadGroup(1000) + self.mgr.start() + pub = self.mgr.pipeline_manager.pipelines[0].publishers[0] + self.expected_samples = 1 + start = timeutils.utcnow() + while timeutils.delta_seconds(start, timeutils.utcnow()) < 600: + if len(pub.samples) >= self.expected_samples: + break + eventlet.sleep(0) + + del pub.samples[0].resource_metadata['resources'] + self.assertEqual(self.Pollster.test_data, pub.samples[0]) + + # Flush publisher samples to test reloading + pub.samples = [] + # Modify the collection targets + pipeline = yaml.dump({ + 'sources': [{ + 'name': 'test_pipeline', + 'interval': 1, + 'meters': ['testanother'], + 'resources': ['test://'] if self.source_resources else [], + 'sinks': ['test_sink']}], + 'sinks': [{ + 'name': 'test_sink', + 'transformers': [], + 'publishers': ["test"]}] + }) + + updated_pipeline_cfg_file = setup_pipeline_file(pipeline) + # Move/re-name the updated pipeline file to the original pipeline + # file path as recorded in oslo config + shutil.move(updated_pipeline_cfg_file, pipeline_cfg_file) + # Random sleep to let the pipeline poller complete the reloading + eventlet.sleep(3) + + pub = self.mgr.pipeline_manager.pipelines[0].publishers[0] + self.expected_samples = 1 + start = timeutils.utcnow() + while timeutils.delta_seconds(start, timeutils.utcnow()) < 600: + if len(pub.samples) >= self.expected_samples: + break + eventlet.sleep(0) + + del pub.samples[0].resource_metadata['resources'] + self.assertEqual(self.PollsterAnother.test_data, pub.samples[0]) + def test_join_partitioning_groups(self): self.mgr.discovery_manager = self.create_discovery_manager() self.mgr.join_partitioning_groups() diff --git a/ceilometer/tests/test_notification.py b/ceilometer/tests/test_notification.py index 6827cc2c..3337d6ba 100644 --- a/ceilometer/tests/test_notification.py +++ b/ceilometer/tests/test_notification.py @@ -14,6 +14,8 @@ # under the License. """Tests for Ceilometer notify daemon.""" +import shutil + import eventlet import mock from oslo_config import fixture as fixture_config @@ -171,16 +173,12 @@ class TestNotification(tests_base.BaseTestCase): class BaseRealNotification(tests_base.BaseTestCase): - def setUp(self): - super(BaseRealNotification, self).setUp() - self.CONF = self.useFixture(fixture_config.Config()).conf - self.setup_messaging(self.CONF, 'nova') - + def setup_pipeline(self, counter_names): pipeline = yaml.dump({ 'sources': [{ 'name': 'test_pipeline', 'interval': 5, - 'meters': ['instance', 'memory'], + 'meters': counter_names, 'sinks': ['test_sink'] }], 'sinks': [{ @@ -191,12 +189,22 @@ class BaseRealNotification(tests_base.BaseTestCase): }) if six.PY3: pipeline = pipeline.encode('utf-8') - self.expected_samples = 2 + pipeline_cfg_file = fileutils.write_to_tempfile(content=pipeline, prefix="pipeline", suffix="yaml") + return pipeline_cfg_file + + def setUp(self): + super(BaseRealNotification, self).setUp() + self.CONF = self.useFixture(fixture_config.Config()).conf + self.setup_messaging(self.CONF, 'nova') + + pipeline_cfg_file = self.setup_pipeline(['instance', 'memory']) self.CONF.set_override("pipeline_cfg_file", pipeline_cfg_file) + self.expected_samples = 2 + self.CONF.set_override("store_events", True, group="notification") self.CONF.set_override("disable_non_metric_meters", False, group="notification") @@ -245,6 +253,79 @@ class BaseRealNotification(tests_base.BaseTestCase): self.assertEqual(["9f9d01b9-4a58-4271-9e27-398b21ab20d1"], resources) +class TestRealNotificationReloadablePipeline(BaseRealNotification): + + def setUp(self): + super(TestRealNotificationReloadablePipeline, self).setUp() + self.CONF.set_override('refresh_pipeline_cfg', True) + self.CONF.set_override('pipeline_polling_interval', 1) + self.srv = notification.NotificationService() + + @mock.patch('ceilometer.publisher.test.TestPublisher') + def test_notification_pipeline_poller(self, fake_publisher_cls): + fake_publisher_cls.return_value = self.publisher + self.srv.tg = mock.MagicMock() + self.srv.start() + + pipeline_poller_call = mock.call(1, self.srv.refresh_pipeline) + self.assertIn(pipeline_poller_call, + self.srv.tg.add_timer.call_args_list) + + @mock.patch('ceilometer.publisher.test.TestPublisher') + def test_notification_reloaded_pipeline(self, fake_publisher_cls): + fake_publisher_cls.return_value = self.publisher + + pipeline_cfg_file = self.setup_pipeline(['instance']) + self.CONF.set_override("pipeline_cfg_file", pipeline_cfg_file) + + self.expected_samples = 1 + self.srv.start() + + notifier = messaging.get_notifier(self.transport, + "compute.vagrant-precise") + notifier.info(context.RequestContext(), 'compute.instance.create.end', + TEST_NOTICE_PAYLOAD) + + start = timeutils.utcnow() + while timeutils.delta_seconds(start, timeutils.utcnow()) < 600: + if (len(self.publisher.samples) >= self.expected_samples and + len(self.publisher.events) >= self.expected_events): + break + eventlet.sleep(0) + + self.assertEqual(self.expected_samples, len(self.publisher.samples)) + + # Flush publisher samples to test reloading + self.publisher.samples = [] + # Modify the collection targets + updated_pipeline_cfg_file = self.setup_pipeline(['vcpus', + 'disk.root.size']) + # Move/re-name the updated pipeline file to the original pipeline + # file path as recorded in oslo config + shutil.move(updated_pipeline_cfg_file, pipeline_cfg_file) + + self.expected_samples = 2 + # Random sleep to let the pipeline poller complete the reloading + eventlet.sleep(3) + # Send message again to verify the reload works + notifier = messaging.get_notifier(self.transport, + "compute.vagrant-precise") + notifier.info(context.RequestContext(), 'compute.instance.create.end', + TEST_NOTICE_PAYLOAD) + + start = timeutils.utcnow() + while timeutils.delta_seconds(start, timeutils.utcnow()) < 600: + if (len(self.publisher.samples) >= self.expected_samples and + len(self.publisher.events) >= self.expected_events): + break + eventlet.sleep(0) + + self.assertEqual(self.expected_samples, len(self.publisher.samples)) + + (self.assertIn(sample.name, ['disk.root.size', 'vcpus']) + for sample in self.publisher.samples) + + class TestRealNotification(BaseRealNotification): def setUp(self): |