summaryrefslogtreecommitdiff
path: root/cloudinit/url_helper.py
diff options
context:
space:
mode:
Diffstat (limited to 'cloudinit/url_helper.py')
-rw-r--r--cloudinit/url_helper.py509
1 files changed, 0 insertions, 509 deletions
diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py
deleted file mode 100644
index c05e9d90..00000000
--- a/cloudinit/url_helper.py
+++ /dev/null
@@ -1,509 +0,0 @@
-# vi: ts=4 expandtab
-#
-# Copyright (C) 2012 Canonical Ltd.
-# Copyright (C) 2012 Hewlett-Packard Development Company, L.P.
-# Copyright (C) 2012 Yahoo! Inc.
-#
-# Author: Scott Moser <scott.moser@canonical.com>
-# Author: Juerg Haefliger <juerg.haefliger@hp.com>
-# Author: Joshua Harlow <harlowja@yahoo-inc.com>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 3, as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-import json
-import os
-import requests
-import six
-import time
-
-from email.utils import parsedate
-from functools import partial
-
-import oauthlib.oauth1 as oauth1
-from requests import exceptions
-
-from six.moves.urllib.parse import (
- urlparse, urlunparse,
- quote as urlquote)
-
-from cloudinit import log as logging
-from cloudinit import version
-
-LOG = logging.getLogger(__name__)
-
-if six.PY2:
- import httplib
- NOT_FOUND = httplib.NOT_FOUND
-else:
- import http.client
- NOT_FOUND = http.client.NOT_FOUND
-
-
-# Check if requests has ssl support (added in requests >= 0.8.8)
-SSL_ENABLED = False
-CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0)
-_REQ_VER = None
-try:
- from distutils.version import LooseVersion
- import pkg_resources
- _REQ = pkg_resources.get_distribution('requests')
- _REQ_VER = LooseVersion(_REQ.version)
- if _REQ_VER >= LooseVersion('0.8.8'):
- SSL_ENABLED = True
- if _REQ_VER >= LooseVersion('0.7.0') and _REQ_VER < LooseVersion('1.0.0'):
- CONFIG_ENABLED = True
-except ImportError:
- pass
-
-
-def _cleanurl(url):
- parsed_url = list(urlparse(url, scheme='http'))
- if not parsed_url[1] and parsed_url[2]:
- # Swap these since this seems to be a common
- # occurrence when given urls like 'www.google.com'
- parsed_url[1] = parsed_url[2]
- parsed_url[2] = ''
- return urlunparse(parsed_url)
-
-
-def combine_url(base, *add_ons):
-
- def combine_single(url, add_on):
- url_parsed = list(urlparse(url))
- path = url_parsed[2]
- if path and not path.endswith("/"):
- path += "/"
- path += urlquote(str(add_on), safe="/:")
- url_parsed[2] = path
- return urlunparse(url_parsed)
-
- url = base
- for add_on in add_ons:
- url = combine_single(url, add_on)
- return url
-
-
-# Made to have same accessors as UrlResponse so that the
-# read_file_or_url can return this or that object and the
-# 'user' of those objects will not need to know the difference.
-class StringResponse(object):
- def __init__(self, contents, code=200):
- self.code = code
- self.headers = {}
- self.contents = contents
- self.url = None
-
- def ok(self, *args, **kwargs):
- if self.code != 200:
- return False
- return True
-
- def __str__(self):
- return self.contents
-
-
-class FileResponse(StringResponse):
- def __init__(self, path, contents, code=200):
- StringResponse.__init__(self, contents, code=code)
- self.url = path
-
-
-class UrlResponse(object):
- def __init__(self, response):
- self._response = response
-
- @property
- def contents(self):
- return self._response.content
-
- @property
- def url(self):
- return self._response.url
-
- def ok(self, redirects_ok=False):
- upper = 300
- if redirects_ok:
- upper = 400
- if self.code >= 200 and self.code < upper:
- return True
- else:
- return False
-
- @property
- def headers(self):
- return self._response.headers
-
- @property
- def code(self):
- return self._response.status_code
-
- def __str__(self):
- return self._response.text
-
-
-class UrlError(IOError):
- def __init__(self, cause, code=None, headers=None, url=None):
- IOError.__init__(self, str(cause))
- self.cause = cause
- self.code = code
- self.headers = headers
- if self.headers is None:
- self.headers = {}
- self.url = url
-
-
-def _get_ssl_args(url, ssl_details):
- ssl_args = {}
- scheme = urlparse(url).scheme
- if scheme == 'https' and ssl_details:
- if not SSL_ENABLED:
- LOG.warn("SSL is not supported in requests v%s, "
- "cert. verification can not occur!", _REQ_VER)
- else:
- if 'ca_certs' in ssl_details and ssl_details['ca_certs']:
- ssl_args['verify'] = ssl_details['ca_certs']
- else:
- ssl_args['verify'] = True
- if 'cert_file' in ssl_details and 'key_file' in ssl_details:
- ssl_args['cert'] = [ssl_details['cert_file'],
- ssl_details['key_file']]
- elif 'cert_file' in ssl_details:
- ssl_args['cert'] = str(ssl_details['cert_file'])
- return ssl_args
-
-
-def readurl(url, data=None, timeout=None, retries=0, sec_between=1,
- headers=None, headers_cb=None, ssl_details=None,
- check_status=True, allow_redirects=True, exception_cb=None):
- url = _cleanurl(url)
- req_args = {
- 'url': url,
- }
- req_args.update(_get_ssl_args(url, ssl_details))
- req_args['allow_redirects'] = allow_redirects
- req_args['method'] = 'GET'
- if timeout is not None:
- req_args['timeout'] = max(float(timeout), 0)
- if data:
- req_args['method'] = 'POST'
- # It doesn't seem like config
- # was added in older library versions (or newer ones either), thus we
- # need to manually do the retries if it wasn't...
- if CONFIG_ENABLED:
- req_config = {
- 'store_cookies': False,
- }
- # Don't use the retry support built-in
- # since it doesn't allow for 'sleep_times'
- # in between tries....
- # if retries:
- # req_config['max_retries'] = max(int(retries), 0)
- req_args['config'] = req_config
- manual_tries = 1
- if retries:
- manual_tries = max(int(retries) + 1, 1)
-
- def_headers = {
- 'User-Agent': 'Cloud-Init/%s' % (version.version_string()),
- }
- if headers:
- def_headers.update(headers)
- headers = def_headers
-
- if not headers_cb:
- def _cb(url):
- return headers
- headers_cb = _cb
- if data:
- req_args['data'] = data
- if sec_between is None:
- sec_between = -1
-
- excps = []
- # Handle retrying ourselves since the built-in support
- # doesn't handle sleeping between tries...
- for i in range(0, manual_tries):
- req_args['headers'] = headers_cb(url)
- filtered_req_args = {}
- for (k, v) in req_args.items():
- if k == 'data':
- continue
- filtered_req_args[k] = v
- try:
- LOG.debug("[%s/%s] open '%s' with %s configuration", i,
- manual_tries, url, filtered_req_args)
-
- r = requests.request(**req_args)
- if check_status:
- r.raise_for_status()
- LOG.debug("Read from %s (%s, %sb) after %s attempts", url,
- r.status_code, len(r.content), (i + 1))
- # Doesn't seem like we can make it use a different
- # subclass for responses, so add our own backward-compat
- # attrs
- return UrlResponse(r)
- except exceptions.RequestException as e:
- if (isinstance(e, (exceptions.HTTPError)) and
- hasattr(e, 'response') and # This appeared in v 0.10.8
- hasattr(e.response, 'status_code')):
- excps.append(UrlError(e, code=e.response.status_code,
- headers=e.response.headers,
- url=url))
- else:
- excps.append(UrlError(e, url=url))
- if SSL_ENABLED and isinstance(e, exceptions.SSLError):
- # ssl exceptions are not going to get fixed by waiting a
- # few seconds
- break
- if exception_cb and exception_cb(req_args.copy(), excps[-1]):
- # if an exception callback was given it should return None
- # a true-ish value means to break and re-raise the exception
- break
- if i + 1 < manual_tries and sec_between > 0:
- LOG.debug("Please wait %s seconds while we wait to try again",
- sec_between)
- time.sleep(sec_between)
- if excps:
- raise excps[-1]
- return None # Should throw before this...
-
-
-def wait_for_url(urls, max_wait=None, timeout=None,
- status_cb=None, headers_cb=None, sleep_time=1,
- exception_cb=None):
- """
- urls: a list of urls to try
- max_wait: roughly the maximum time to wait before giving up
- The max time is *actually* len(urls)*timeout as each url will
- be tried once and given the timeout provided.
- a number <= 0 will always result in only one try
- timeout: the timeout provided to urlopen
- status_cb: call method with string message when a url is not available
- headers_cb: call method with single argument of url to get headers
- for request.
- exception_cb: call method with 2 arguments 'msg' (per status_cb) and
- 'exception', the exception that occurred.
-
- the idea of this routine is to wait for the EC2 metdata service to
- come up. On both Eucalyptus and EC2 we have seen the case where
- the instance hit the MD before the MD service was up. EC2 seems
- to have permenantely fixed this, though.
-
- In openstack, the metadata service might be painfully slow, and
- unable to avoid hitting a timeout of even up to 10 seconds or more
- (LP: #894279) for a simple GET.
-
- Offset those needs with the need to not hang forever (and block boot)
- on a system where cloud-init is configured to look for EC2 Metadata
- service but is not going to find one. It is possible that the instance
- data host (169.254.169.254) may be firewalled off Entirely for a sytem,
- meaning that the connection will block forever unless a timeout is set.
- """
- start_time = time.time()
-
- def log_status_cb(msg, exc=None):
- LOG.debug(msg)
-
- if status_cb is None:
- status_cb = log_status_cb
-
- def timeup(max_wait, start_time):
- return ((max_wait <= 0 or max_wait is None) or
- (time.time() - start_time > max_wait))
-
- loop_n = 0
- while True:
- sleep_time = int(loop_n / 5) + 1
- for url in urls:
- now = time.time()
- if loop_n != 0:
- if timeup(max_wait, start_time):
- break
- if timeout and (now + timeout > (start_time + max_wait)):
- # shorten timeout to not run way over max_time
- timeout = int((start_time + max_wait) - now)
-
- reason = ""
- url_exc = None
- try:
- if headers_cb is not None:
- headers = headers_cb(url)
- else:
- headers = {}
-
- response = readurl(url, headers=headers, timeout=timeout,
- check_status=False)
- if not response.contents:
- reason = "empty response [%s]" % (response.code)
- url_exc = UrlError(ValueError(reason), code=response.code,
- headers=response.headers, url=url)
- elif not response.ok():
- reason = "bad status code [%s]" % (response.code)
- url_exc = UrlError(ValueError(reason), code=response.code,
- headers=response.headers, url=url)
- else:
- return url
- except UrlError as e:
- reason = "request error [%s]" % e
- url_exc = e
- except Exception as e:
- reason = "unexpected error [%s]" % e
- url_exc = e
-
- time_taken = int(time.time() - start_time)
- status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url,
- time_taken,
- max_wait,
- reason)
- status_cb(status_msg)
- if exception_cb:
- # This can be used to alter the headers that will be sent
- # in the future, for example this is what the MAAS datasource
- # does.
- exception_cb(msg=status_msg, exception=url_exc)
-
- if timeup(max_wait, start_time):
- break
-
- loop_n = loop_n + 1
- LOG.debug("Please wait %s seconds while we wait to try again",
- sleep_time)
- time.sleep(sleep_time)
-
- return False
-
-
-class OauthUrlHelper(object):
- def __init__(self, consumer_key=None, token_key=None,
- token_secret=None, consumer_secret=None,
- skew_data_file="/run/oauth_skew.json"):
- self.consumer_key = consumer_key
- self.consumer_secret = consumer_secret or ""
- self.token_key = token_key
- self.token_secret = token_secret
- self.skew_data_file = skew_data_file
- self._do_oauth = True
- self.skew_change_limit = 5
- required = (self.token_key, self.token_secret, self.consumer_key)
- if not any(required):
- self._do_oauth = False
- elif not all(required):
- raise ValueError("all or none of token_key, token_secret, or "
- "consumer_key can be set")
-
- old = self.read_skew_file()
- self.skew_data = old or {}
-
- def read_skew_file(self):
- if self.skew_data_file and os.path.isfile(self.skew_data_file):
- with open(self.skew_data_file, mode="r") as fp:
- return json.load(fp)
- return None
-
- def update_skew_file(self, host, value):
- # this is not atomic
- if not self.skew_data_file:
- return
- cur = self.read_skew_file()
- if cur is None:
- cur = {}
- cur[host] = value
- with open(self.skew_data_file, mode="w") as fp:
- fp.write(json.dumps(cur))
-
- def exception_cb(self, msg, exception):
- if not (isinstance(exception, UrlError) and
- (exception.code == 403 or exception.code == 401)):
- return
-
- if 'date' not in exception.headers:
- LOG.warn("Missing header 'date' in %s response", exception.code)
- return
-
- date = exception.headers['date']
- try:
- remote_time = time.mktime(parsedate(date))
- except Exception as e:
- LOG.warn("Failed to convert datetime '%s': %s", date, e)
- return
-
- skew = int(remote_time - time.time())
- host = urlparse(exception.url).netloc
- old_skew = self.skew_data.get(host, 0)
- if abs(old_skew - skew) > self.skew_change_limit:
- self.update_skew_file(host, skew)
- LOG.warn("Setting oauth clockskew for %s to %d", host, skew)
- self.skew_data[host] = skew
-
- return
-
- def headers_cb(self, url):
- if not self._do_oauth:
- return {}
-
- timestamp = None
- host = urlparse(url).netloc
- if self.skew_data and host in self.skew_data:
- timestamp = int(time.time()) + self.skew_data[host]
-
- return oauth_headers(
- url=url, consumer_key=self.consumer_key,
- token_key=self.token_key, token_secret=self.token_secret,
- consumer_secret=self.consumer_secret, timestamp=timestamp)
-
- def _wrapped(self, wrapped_func, args, kwargs):
- kwargs['headers_cb'] = partial(
- self._headers_cb, kwargs.get('headers_cb'))
- kwargs['exception_cb'] = partial(
- self._exception_cb, kwargs.get('exception_cb'))
- return wrapped_func(*args, **kwargs)
-
- def wait_for_url(self, *args, **kwargs):
- return self._wrapped(wait_for_url, args, kwargs)
-
- def readurl(self, *args, **kwargs):
- return self._wrapped(readurl, args, kwargs)
-
- def _exception_cb(self, extra_exception_cb, msg, exception):
- ret = None
- try:
- if extra_exception_cb:
- ret = extra_exception_cb(msg, exception)
- finally:
- self.exception_cb(msg, exception)
- return ret
-
- def _headers_cb(self, extra_headers_cb, url):
- headers = {}
- if extra_headers_cb:
- headers = extra_headers_cb(url)
- headers.update(self.headers_cb(url))
- return headers
-
-
-def oauth_headers(url, consumer_key, token_key, token_secret, consumer_secret,
- timestamp=None):
- if timestamp:
- timestamp = str(timestamp)
- else:
- timestamp = None
-
- client = oauth1.Client(
- consumer_key,
- client_secret=consumer_secret,
- resource_owner_key=token_key,
- resource_owner_secret=token_secret,
- signature_method=oauth1.SIGNATURE_PLAINTEXT,
- timestamp=timestamp)
- uri, signed_headers, body = client.sign(url)
- return signed_headers