summaryrefslogtreecommitdiff
path: root/ironic_python_agent/inspect.py
blob: 17a6b91d3bea3a4cbbe95d5cb941322470af268c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import random
import select
import threading

from ironic_lib import exception
from oslo_config import cfg
from oslo_log import log

from ironic_python_agent import errors
from ironic_python_agent import inspector

LOG = log.getLogger(__name__)


class IronicInspection(threading.Thread):
    """Class for manual inspection functionality."""

    # If we could wait at most N seconds between heartbeats (or in case of an
    # error) we will instead wait r x N seconds, where r is a random value
    # between these multipliers.
    min_jitter_multiplier = 0.7
    max_jitter_multiplier = 1.2

    # Exponential backoff values used in case of an error. In reality we will
    # only wait a portion of either of these delays based on the jitter
    # multipliers.
    max_delay = 4 * cfg.CONF.introspection_daemon_post_interval
    backoff_factor = 2.7

    def __init__(self):
        super(IronicInspection, self).__init__()
        if bool(cfg.CONF.keyfile) != bool(cfg.CONF.certfile):
            LOG.warning("Only one of 'keyfile' and 'certfile' options is "
                        "defined in config file. Its value will be ignored.")

    def _run(self):
        try:
            daemon_mode = cfg.CONF.introspection_daemon
            interval = cfg.CONF.introspection_daemon_post_interval

            inspector.inspect()
            if not daemon_mode:
                # No reason to continue unless we're in daemon mode.
                return

            self.reader, self.writer = os.pipe()
            p = select.poll()
            p.register(self.reader)
            exception_encountered = False

            try:
                while daemon_mode:
                    interval_multiplier = random.uniform(
                        self.min_jitter_multiplier,
                        self.max_jitter_multiplier)
                    interval = interval * interval_multiplier
                    log_msg = 'sleeping before next inspection, interval: %s'
                    LOG.info(log_msg, interval)

                    if p.poll(interval * 1000):
                        if os.read(self.reader, 1).decode() == 'a':
                            break
                    try:
                        inspector.inspect()
                        if exception_encountered:
                            interval = min(
                                interval,
                                cfg.CONF.introspection_daemon_post_interval)
                            exception_encountered = False
                    except errors.InspectionError as e:
                        # Failures happen, no reason to exit as
                        # the failure could be intermittent.
                        LOG.warning('Error reporting introspection '
                                    'data: %(err)s',
                                    {'err': e})
                        exception_encountered = True
                        interval = min(interval * self.backoff_factor,
                                       self.max_delay)

                    except exception.ServiceLookupFailure as e:
                        # Likely a mDNS lookup failure. We should
                        # keep retrying.
                        LOG.error('Error looking up introspection '
                                  'endpoint: %(err)s',
                                  {'err': e})
                        exception_encountered = True
                        interval = min(interval * self.backoff_factor,
                                       self.max_delay)
                    except Exception as e:
                        # General failure such as requests ConnectionError
                        LOG.error('Error occured attempting to connect to '
                                  'connect to the introspection service. '
                                  'Error: %(err)s',
                                  {'err': e})
                        exception_encountered = True
                        interval = min(interval * self.backoff_factor,
                                       self.max_delay)

            finally:
                os.close(self.reader)
                os.close(self.writer)
                self.reader = None
                self.writer = None
        except errors.InspectionError as e:
            msg = "Inspection failed: %s" % e
            raise errors.InspectionError(msg)

    def run(self):
        """Run Inspection."""
        if not cfg.CONF.inspection_callback_url:
            cfg.CONF.set_override('inspection_callback_url', 'mdns')
        self._run()