# Copyright (C) 2012 Canonical Ltd. # Copyright (C) 2012, 2013 Hewlett-Packard Development Company, L.P. # Copyright (C) 2012 Yahoo! Inc. # # Author: Scott Moser # Author: Juerg Haefliger # Author: Joshua Harlow # # This file is part of cloud-init. See LICENSE file for license information. import abc import os from cloudinit import importer from cloudinit import log as logging from cloudinit import type_utils, util from cloudinit.settings import FREQUENCIES, PER_ALWAYS, PER_INSTANCE LOG = logging.getLogger(__name__) # Used as the content type when a message is not multipart # and it doesn't contain its own content-type NOT_MULTIPART_TYPE = "text/x-not-multipart" # When none is assigned this gets used OCTET_TYPE = "application/octet-stream" # Special content types that signal the start and end of processing CONTENT_END = "__end__" CONTENT_START = "__begin__" CONTENT_SIGNALS = [CONTENT_START, CONTENT_END] # Used when a part-handler type is encountered # to allow for registration of new types. PART_CONTENT_TYPES = ["text/part-handler"] PART_HANDLER_FN_TMPL = "part-handler-%03d" # For parts without filenames PART_FN_TPL = "part-%03d" # Different file beginnings to their content type INCLUSION_TYPES_MAP = { "#include": "text/x-include-url", "#include-once": "text/x-include-once-url", "#!": "text/x-shellscript", "#cloud-config": "text/cloud-config", "#part-handler": "text/part-handler", "#cloud-boothook": "text/cloud-boothook", "#cloud-config-archive": "text/cloud-config-archive", "#cloud-config-jsonp": "text/cloud-config-jsonp", "## template: jinja": "text/jinja2", # Note: for the next 3 entries, the prefix doesn't matter because these # are for types that can only be used as part of a MIME message. However, # including these entries supresses warnings during `cloudinit devel # make-mime`, which otherwise would require `--force`. "text/x-shellscript-per-boot": "text/x-shellscript-per-boot", "text/x-shellscript-per-instance": "text/x-shellscript-per-instance", "text/x-shellscript-per-once": "text/x-shellscript-per-once", } # Sorted longest first INCLUSION_SRCH = sorted( list(INCLUSION_TYPES_MAP.keys()), key=(lambda e: 0 - len(e)) ) class Handler(metaclass=abc.ABCMeta): def __init__(self, frequency, version=2): self.handler_version = version self.frequency = frequency def __repr__(self): return "%s: [%s]" % (type_utils.obj_name(self), self.list_types()) def list_types(self): # Each subclass must define the supported content prefixes it handles. if not hasattr(self, "prefixes"): raise NotImplementedError("Missing prefixes subclass attribute") else: return [ INCLUSION_TYPES_MAP[prefix] for prefix in getattr(self, "prefixes") ] @abc.abstractmethod def handle_part(self, *args, **kwargs): raise NotImplementedError() def run_part(mod, data, filename, payload, frequency, headers): mod_freq = mod.frequency if not ( mod_freq == PER_ALWAYS or (frequency == PER_INSTANCE and mod_freq == PER_INSTANCE) ): return # Sanity checks on version (should be an int convertable) try: mod_ver = mod.handler_version mod_ver = int(mod_ver) except (TypeError, ValueError, AttributeError): mod_ver = 1 content_type = headers["Content-Type"] try: LOG.debug( "Calling handler %s (%s, %s, %s) with frequency %s", mod, content_type, filename, mod_ver, frequency, ) if mod_ver == 3: # Treat as v. 3 which does get a frequency + headers mod.handle_part( data, content_type, filename, payload, frequency, headers ) elif mod_ver == 2: # Treat as v. 2 which does get a frequency mod.handle_part(data, content_type, filename, payload, frequency) elif mod_ver == 1: # Treat as v. 1 which gets no frequency mod.handle_part(data, content_type, filename, payload) else: raise ValueError("Unknown module version %s" % (mod_ver)) except Exception: util.logexc( LOG, "Failed calling handler %s (%s, %s, %s) with frequency %s", mod, content_type, filename, mod_ver, frequency, ) def call_begin(mod, data, frequency): # Create a fake header set headers = { "Content-Type": CONTENT_START, } run_part(mod, data, None, None, frequency, headers) def call_end(mod, data, frequency): # Create a fake header set headers = { "Content-Type": CONTENT_END, } run_part(mod, data, None, None, frequency, headers) def walker_handle_handler(pdata, _ctype, _filename, payload): curcount = pdata["handlercount"] modname = PART_HANDLER_FN_TMPL % (curcount) frequency = pdata["frequency"] modfname = os.path.join(pdata["handlerdir"], "%s" % (modname)) if not modfname.endswith(".py"): modfname = "%s.py" % (modfname) # TODO(harlowja): Check if path exists?? util.write_file(modfname, payload, 0o600) handlers = pdata["handlers"] try: mod = fixup_handler(importer.import_module(modname)) call_begin(mod, pdata["data"], frequency) # Only register and increment after the above have worked, so we don't # register if it fails starting. handlers.register(mod, initialized=True) pdata["handlercount"] = curcount + 1 except Exception: util.logexc( LOG, "Failed at registering python file: %s (part handler %s)", modfname, curcount, ) def _extract_first_or_bytes(blob, size): # Extract the first line or upto X symbols for text objects # Extract first X bytes for binary objects try: if isinstance(blob, str): start = blob.split("\n", 1)[0] else: # We want to avoid decoding the whole blob (it might be huge) # By taking 4*size bytes we guarantee to decode size utf8 chars start = blob[: 4 * size].decode(errors="ignore").split("\n", 1)[0] if len(start) >= size: start = start[:size] except UnicodeDecodeError: # Bytes array doesn't contain text so return chunk of raw bytes start = blob[0:size] return start def _escape_string(text): try: return text.encode("string_escape") except (LookupError, TypeError): try: # Unicode (and Python 3's str) doesn't support string_escape... return text.encode("unicode_escape") except TypeError: # Give up... pass except AttributeError: # We're in Python3 and received blob as text # No escaping is needed because bytes are printed # as 'b\xAA\xBB' automatically in Python3 pass return text def walker_callback(data, filename, payload, headers): content_type = headers["Content-Type"] if content_type in data.get("excluded"): LOG.debug('content_type "%s" is excluded', content_type) return if content_type in PART_CONTENT_TYPES: walker_handle_handler(data, content_type, filename, payload) return handlers = data["handlers"] if content_type in handlers: run_part( handlers[content_type], data["data"], filename, payload, data["frequency"], headers, ) elif payload: # Extract the first line or 24 bytes for displaying in the log start = _extract_first_or_bytes(payload, 24) details = "'%s...'" % (_escape_string(start)) if content_type == NOT_MULTIPART_TYPE: LOG.warning( "Unhandled non-multipart (%s) userdata: %s", content_type, details, ) else: LOG.warning( "Unhandled unknown content-type (%s) userdata: %s", content_type, details, ) else: LOG.debug("Empty payload of type %s", content_type) # Callback is a function that will be called with # (data, content_type, filename, payload) def walk(msg, callback, data): partnum = 0 for part in msg.walk(): # multipart/* are just containers if part.get_content_maintype() == "multipart": continue ctype = part.get_content_type() if ctype is None: ctype = OCTET_TYPE filename = part.get_filename() if not filename: filename = PART_FN_TPL % (partnum) headers = dict(part) LOG.debug(headers) headers["Content-Type"] = ctype payload = util.fully_decoded_payload(part) callback(data, filename, payload, headers) partnum = partnum + 1 def fixup_handler(mod, def_freq=PER_INSTANCE): if not hasattr(mod, "handler_version"): setattr(mod, "handler_version", 1) if not hasattr(mod, "frequency"): setattr(mod, "frequency", def_freq) else: freq = mod.frequency if freq and freq not in FREQUENCIES: LOG.warning("Handler %s has an unknown frequency %s", mod, freq) return mod def type_from_starts_with(payload, default=None): try: payload_lc = util.decode_binary(payload).lower() except UnicodeDecodeError: return default payload_lc = payload_lc.lstrip() for text in INCLUSION_SRCH: if payload_lc.startswith(text): return INCLUSION_TYPES_MAP[text] return default # vi: ts=4 expandtab