From cc477756a30eecd0fe45e3728f9c7ae031aa4a43 Mon Sep 17 00:00:00 2001 From: Gustavo Serra Scalet Date: Thu, 20 Sep 2018 14:09:28 -0300 Subject: Remove gsutil dependency (#637) --- debian/control | 2 +- .../metadata_scripts/script_retriever.py | 82 ++++++------ .../tests/script_retriever_test.py | 137 +++++++++++++++------ google_compute_engine/metadata_watcher.py | 18 ++- .../tests/metadata_watcher_test.py | 34 ++++- 5 files changed, 187 insertions(+), 86 deletions(-) diff --git a/debian/control b/debian/control index 922aee7..50690e2 100644 --- a/debian/control +++ b/debian/control @@ -30,7 +30,7 @@ Depends: google-compute-engine-oslogin, python3-google-compute-engine (= ${source:Version}), system-log-daemon, systemd -Recommends: google-cloud-sdk, rsyslog +Recommends: rsyslog Provides: irqbalance Conflicts: google-compute-engine-jessie, google-compute-engine-init-jessie, diff --git a/google_compute_engine/metadata_scripts/script_retriever.py b/google_compute_engine/metadata_scripts/script_retriever.py index 924bde2..96d84ac 100644 --- a/google_compute_engine/metadata_scripts/script_retriever.py +++ b/google_compute_engine/metadata_scripts/script_retriever.py @@ -15,6 +15,7 @@ """Retrieve and store user provided metadata scripts.""" +import ast import re import socket import subprocess @@ -23,11 +24,15 @@ import tempfile from google_compute_engine import metadata_watcher from google_compute_engine.compat import httpclient from google_compute_engine.compat import urlerror +from google_compute_engine.compat import urlrequest from google_compute_engine.compat import urlretrieve class ScriptRetriever(object): """A class for retrieving and storing user provided metadata scripts.""" + token_metadata_key = 'instance/service-accounts/default/token' + # Cached authentication token to be used when downloading from bucket. + token = None def __init__(self, logger, script_type): """Constructor. @@ -40,8 +45,10 @@ class ScriptRetriever(object): self.script_type = script_type self.watcher = metadata_watcher.MetadataWatcher(logger=self.logger) - def _DownloadGsUrl(self, url, dest_dir): - """Download a Google Storage URL using gsutil. + def _DownloadAuthUrl(self, url, dest_dir): + """Download a Google Storage URL using an authentication token. + + If the token cannot be fetched, fallback to unauthenticated download. Args: url: string, the URL to download. @@ -50,29 +57,39 @@ class ScriptRetriever(object): Returns: string, the path to the file storing the metadata script. """ - try: - subprocess.check_call( - ['which', 'gsutil'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - except subprocess.CalledProcessError: - self.logger.warning( - 'gsutil is not installed, cannot download items from Google Storage.') - return None - dest_file = tempfile.NamedTemporaryFile(dir=dest_dir, delete=False) dest_file.close() dest = dest_file.name - self.logger.info('Downloading url from %s to %s using gsutil.', url, dest) + self.logger.info( + 'Downloading url from %s to %s using authentication token.', url, dest) + + if not self.token: + response = self.watcher.GetMetadata( + self.token_metadata_key, recursive=False, retry=False) + + if not response: + self.logger.info( + 'Authentication token not found. Attempting unauthenticated ' + 'download.') + return self._DownloadUrl(url, dest_dir) + + self.token = '%s %s' % ( + response.get('token_type', ''), response.get('access_token', '')) + try: - subprocess.check_call(['gsutil', 'cp', url, dest]) - return dest - except subprocess.CalledProcessError as e: - self.logger.warning( - 'Could not download %s using gsutil. %s.', url, str(e)) - except Exception as e: - self.logger.warning( - 'Exception downloading %s using gsutil. %s.', url, str(e)) - return None + request = urlrequest.Request(url) + request.add_unredirected_header('Metadata-Flavor', 'Google') + request.add_unredirected_header('Authorization', self.token) + content = urlrequest.urlopen(request).read() + except (httpclient.HTTPException, socket.error, urlerror.URLError) as e: + self.logger.warning('Could not download %s. %s.', url, str(e)) + return None + + with open(dest, 'w') as f: + f.write(content) + + return dest def _DownloadUrl(self, url, dest_dir): """Download a script from a given URL. @@ -111,7 +128,9 @@ class ScriptRetriever(object): # Check for the preferred Google Storage URL format: # gs:/// if url.startswith(r'gs://'): - return self._DownloadGsUrl(url, dest_dir) + # Convert the string into a standard URL. + url = re.sub('^gs://', 'https://storage.googleapis.com/', url) + return self._DownloadAuthUrl(url, dest_dir) header = r'http[s]?://' domain = r'storage\.googleapis\.com' @@ -122,10 +141,6 @@ class ScriptRetriever(object): bucket = r'(?P[a-z0-9][-_.a-z0-9]*[a-z0-9])' # Accept any non-empty string that doesn't contain a wildcard character - # gsutil interprets some characters as wildcards. - # These characters in object names make it difficult or impossible - # to perform various wildcard operations using gsutil - # For a complete list use "gsutil help naming". obj = r'(?P[^\*\?]+)' # Check for the Google Storage URLs: @@ -134,10 +149,7 @@ class ScriptRetriever(object): gs_regex = re.compile(r'\A%s%s\.%s/%s\Z' % (header, bucket, domain, obj)) match = gs_regex.match(url) if match: - gs_url = r'gs://%s/%s' % (match.group('bucket'), match.group('obj')) - # In case gsutil is not installed, continue as a normal URL. - return (self._DownloadGsUrl(gs_url, dest_dir) or - self._DownloadUrl(url, dest_dir)) + return self._DownloadAuthUrl(url, dest_dir) # Check for the other possible Google Storage URLs: # http://storage.googleapis.com// @@ -150,10 +162,7 @@ class ScriptRetriever(object): r'\A%s(commondata)?%s/%s/%s\Z' % (header, domain, bucket, obj)) match = gs_regex.match(url) if match: - gs_url = r'gs://%s/%s' % (match.group('bucket'), match.group('obj')) - # In case gsutil is not installed, continue as a normal URL. - return (self._DownloadGsUrl(gs_url, dest_dir) or - self._DownloadUrl(url, dest_dir)) + return self._DownloadAuthUrl(url, dest_dir) # Unauthenticated download of the object. return self._DownloadUrl(url, dest_dir) @@ -173,7 +182,7 @@ class ScriptRetriever(object): metadata_key = '%s-script' % self.script_type metadata_value = attribute_data.get(metadata_key) if metadata_value: - self.logger.info('Found %s in metadata.' % metadata_key) + self.logger.info('Found %s in metadata.', metadata_key) with tempfile.NamedTemporaryFile( mode='w', dir=dest_dir, delete=False) as dest: dest.write(metadata_value.lstrip()) @@ -182,8 +191,9 @@ class ScriptRetriever(object): metadata_key = '%s-script-url' % self.script_type metadata_value = attribute_data.get(metadata_key) if metadata_value: - self.logger.info('Found %s in metadata.' % metadata_key) - script_dict[metadata_key] = self._DownloadScript(metadata_value, dest_dir) + self.logger.info('Found %s in metadata.', metadata_key) + script_dict[metadata_key] = self._DownloadScript( + metadata_value, dest_dir) return script_dict diff --git a/google_compute_engine/metadata_scripts/tests/script_retriever_test.py b/google_compute_engine/metadata_scripts/tests/script_retriever_test.py index 229e621..de7179e 100644 --- a/google_compute_engine/metadata_scripts/tests/script_retriever_test.py +++ b/google_compute_engine/metadata_scripts/tests/script_retriever_test.py @@ -17,7 +17,10 @@ import subprocess +from google_compute_engine.compat import urlerror from google_compute_engine.metadata_scripts import script_retriever +from google_compute_engine.metadata_watcher import MetadataWatcher +from google_compute_engine.test_compat import builtin from google_compute_engine.test_compat import mock from google_compute_engine.test_compat import unittest @@ -33,48 +36,100 @@ class ScriptRetrieverTest(unittest.TestCase): self.retriever = script_retriever.ScriptRetriever( self.mock_logger, self.script_type) - @mock.patch('google_compute_engine.metadata_scripts.script_retriever.subprocess.check_call') - def testDownloadGsNoExec(self, mock_call): - mock_call.side_effect = subprocess.CalledProcessError('foo', 'bar') - gs_url = 'gs://fake/url' - self.assertIsNone(self.retriever._DownloadGsUrl(gs_url, self.dest_dir)) - mock_call.assert_called_once_with( - ['which', 'gsutil'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - self.mock_logger.warning.assert_called_once_with(mock.ANY) - - @mock.patch('google_compute_engine.metadata_scripts.script_retriever.subprocess.check_call') @mock.patch('google_compute_engine.metadata_scripts.script_retriever.tempfile.NamedTemporaryFile') - def testDownloadGsUrl(self, mock_tempfile, mock_call): - gs_url = 'gs://fake/url' + @mock.patch('google_compute_engine.metadata_scripts.script_retriever.urlrequest.Request') + @mock.patch('google_compute_engine.metadata_scripts.script_retriever.urlrequest.urlopen') + def testDownloadAuthUrl(self, mock_urlopen, mock_request, mock_tempfile): + auth_url = 'https://storage.googleapis.com/fake/url' mock_tempfile.return_value = mock_tempfile mock_tempfile.name = self.dest - self.assertEqual( - self.retriever._DownloadGsUrl(gs_url, self.dest_dir), self.dest) + self.retriever.token = 'bar' + + mock_open = mock.mock_open() + with mock.patch('%s.open' % builtin, mock_open): + self.assertEqual( + self.retriever._DownloadAuthUrl(auth_url, self.dest_dir), self.dest) + mock_tempfile.assert_called_once_with(dir=self.dest_dir, delete=False) mock_tempfile.close.assert_called_once_with() - self.mock_logger.info.assert_called_once_with(mock.ANY, gs_url, self.dest) - mock_call.assert_called_with(['gsutil', 'cp', gs_url, self.dest]) + + self.mock_logger.info.assert_called_once_with( + mock.ANY, auth_url, self.dest) + mock_request.assert_called_with(auth_url) + mocked_request = mock_request() + mocked_request.add_unredirected_header.assert_called_with( + 'Authorization', 'bar') + mock_urlopen.assert_called_with(mocked_request) + urlopen_read = mock_urlopen().read(return_value='foo') self.mock_logger.warning.assert_not_called() - @mock.patch('google_compute_engine.metadata_scripts.script_retriever.subprocess.check_call') + mock_open.assert_called_once_with(self.dest, 'w') + handle = mock_open() + handle.write.assert_called_once_with(urlopen_read) + @mock.patch('google_compute_engine.metadata_scripts.script_retriever.tempfile.NamedTemporaryFile') - def testDownloadGsUrlProcessError(self, mock_tempfile, mock_call): - gs_url = 'gs://fake/url' + @mock.patch('google_compute_engine.metadata_scripts.script_retriever.urlrequest.Request') + @mock.patch('google_compute_engine.metadata_watcher.MetadataWatcher.GetMetadata') + def testDownloadAuthUrlExceptionAndToken( + self, mock_get_metadata, mock_request, mock_tempfile): + auth_url = 'https://storage.googleapis.com/fake/url' + metadata_prefix = 'http://metadata.google.internal/computeMetadata/v1/' + token_url = metadata_prefix + 'instance/service-accounts/default/token' mock_tempfile.return_value = mock_tempfile mock_tempfile.name = self.dest - mock_call.side_effect = [0, subprocess.CalledProcessError(1, 'Test')] - self.assertIsNone(self.retriever._DownloadGsUrl(gs_url, self.dest_dir)) + self.retriever.token = None + + mock_get_metadata.return_value = { + 'token_type': 'foo', 'access_token': 'bar'} + mock_request.return_value = mock_request + mock_request.side_effect = urlerror.URLError('Error.') + + self.assertIsNone(self.retriever._DownloadAuthUrl(auth_url, self.dest_dir)) + + mock_get_metadata.return_value = mock_get_metadata + # GetMetadata includes a prefix, so remove it. + stripped_url = token_url.replace(metadata_prefix, '') + mock_get_metadata.assert_called_once_with( + stripped_url, recursive=False, retry=False) + + self.assertEqual(self.retriever.token, 'foo bar') + + self.mock_logger.info.assert_called_once_with( + mock.ANY, auth_url, self.dest) self.assertEqual(self.mock_logger.warning.call_count, 1) - @mock.patch('google_compute_engine.metadata_scripts.script_retriever.subprocess.check_call') @mock.patch('google_compute_engine.metadata_scripts.script_retriever.tempfile.NamedTemporaryFile') - def testDownloadGsUrlException(self, mock_tempfile, mock_call): - gs_url = 'gs://fake/url' + @mock.patch('google_compute_engine.metadata_scripts.script_retriever.ScriptRetriever._DownloadUrl') + @mock.patch('google_compute_engine.metadata_watcher.MetadataWatcher.GetMetadata') + def testDownloadAuthUrlFallback( + self, mock_get_metadata, mock_download_url, mock_tempfile): + auth_url = 'https://storage.googleapis.com/fake/url' + metadata_prefix = 'http://metadata.google.internal/computeMetadata/v1/' + token_url = metadata_prefix + 'instance/service-accounts/default/token' mock_tempfile.return_value = mock_tempfile mock_tempfile.name = self.dest - mock_call.side_effect = [0, Exception('Error.')] - self.assertIsNone(self.retriever._DownloadGsUrl(gs_url, self.dest_dir)) - self.assertEqual(self.mock_logger.warning.call_count, 1) + self.retriever.token = None + + mock_get_metadata.return_value = None + mock_download_url.return_value = None + + self.assertIsNone(self.retriever._DownloadAuthUrl(auth_url, self.dest_dir)) + + mock_get_metadata.return_value = mock_get_metadata + # GetMetadata includes a prefix, so remove it. + prefix = 'http://metadata.google.internal/computeMetadata/v1/' + stripped_url = token_url.replace(prefix, '') + mock_get_metadata.assert_called_once_with( + stripped_url, recursive=False, retry=False) + mock_download_url.assert_called_once_with(auth_url, self.dest_dir) + + self.assertIsNone(self.retriever.token) + + expected_calls = [ + mock.call(mock.ANY, auth_url, self.dest), + mock.call(mock.ANY), + ] + self.assertEqual(self.mock_logger.info.mock_calls, expected_calls) @mock.patch('google_compute_engine.metadata_scripts.script_retriever.tempfile.NamedTemporaryFile') @mock.patch('google_compute_engine.metadata_scripts.script_retriever.urlretrieve.urlretrieve') @@ -82,7 +137,8 @@ class ScriptRetrieverTest(unittest.TestCase): url = 'http://www.google.com/fake/url' mock_tempfile.return_value = mock_tempfile mock_tempfile.name = self.dest - self.assertEqual(self.retriever._DownloadUrl(url, self.dest_dir), self.dest) + self.assertEqual( + self.retriever._DownloadUrl(url, self.dest_dir), self.dest) mock_tempfile.assert_called_once_with(dir=self.dest_dir, delete=False) mock_tempfile.close.assert_called_once_with() self.mock_logger.info.assert_called_once_with(mock.ANY, url, self.dest) @@ -140,8 +196,8 @@ class ScriptRetrieverTest(unittest.TestCase): return (url_formats, gs_urls) def testDownloadScript(self): - mock_download_gs = mock.Mock() - self.retriever._DownloadGsUrl = mock_download_gs + mock_auth_download = mock.Mock() + self.retriever._DownloadAuthUrl = mock_auth_download mock_download = mock.Mock() self.retriever._DownloadUrl = mock_download download_urls = [] @@ -171,25 +227,28 @@ class ScriptRetrieverTest(unittest.TestCase): download_urls.extend(urls) download_gs_urls.update(gs_urls) + # All Google Storage URLs are downloaded with an authentication token. + for url, gs_url in download_gs_urls.items(): + mock_download.reset_mock() + mock_auth_download.reset_mock() + self.retriever._DownloadScript(gs_url, self.dest_dir) + new_gs_url = gs_url.replace('gs://', 'https://storage.googleapis.com/') + mock_auth_download.assert_called_once_with(new_gs_url, self.dest_dir) + mock_download.assert_not_called() + for url in download_urls: mock_download.reset_mock() self.retriever._DownloadScript(url, self.dest_dir) mock_download.assert_called_once_with(url, self.dest_dir) - for url, gs_url in download_gs_urls.items(): - mock_download_gs.reset_mock() - self.retriever._DownloadScript(url, self.dest_dir) - mock_download_gs.assert_called_once_with(gs_url, self.dest_dir) - for url, gs_url in download_gs_urls.items(): if url.startswith('gs://'): continue - mock_download_gs.reset_mock() - mock_download_gs.return_value = None + mock_auth_download.reset_mock() + mock_auth_download.return_value = None mock_download.reset_mock() self.retriever._DownloadScript(url, self.dest_dir) - mock_download_gs.assert_called_once_with(gs_url, self.dest_dir) - mock_download.assert_called_once_with(url, self.dest_dir) + mock_auth_download.assert_called_once_with(url, self.dest_dir) @mock.patch('google_compute_engine.metadata_scripts.script_retriever.tempfile.NamedTemporaryFile') def testGetAttributeScripts(self, mock_tempfile): diff --git a/google_compute_engine/metadata_watcher.py b/google_compute_engine/metadata_watcher.py index c9036f6..1f088d9 100644 --- a/google_compute_engine/metadata_watcher.py +++ b/google_compute_engine/metadata_watcher.py @@ -153,7 +153,8 @@ class MetadataWatcher(object): return json.loads(response.read().decode('utf-8')) def _HandleMetadataUpdate( - self, metadata_key='', recursive=True, wait=True, timeout=None): + self, metadata_key='', recursive=True, wait=True, timeout=None, + retry=True): """Wait for a successful metadata response. Args: @@ -161,6 +162,7 @@ class MetadataWatcher(object): recursive: bool, True if we should recursively watch for metadata changes. wait: bool, True if we should wait for a metadata change. timeout: int, timeout in seconds for returning metadata output. + retry: bool, True if we should retry on failure. Returns: json, the deserialized contents of the metadata server. @@ -172,11 +174,13 @@ class MetadataWatcher(object): metadata_key=metadata_key, recursive=recursive, wait=wait, timeout=timeout) except (httpclient.HTTPException, socket.error, urlerror.URLError) as e: - if isinstance(e, type(exception)): - continue - else: + if not isinstance(e, type(exception)): exception = e self.logger.error('GET request error retrieving metadata. %s.', e) + if retry: + continue + else: + break def WatchMetadata( self, handler, metadata_key='', recursive=True, timeout=None): @@ -197,17 +201,19 @@ class MetadataWatcher(object): except Exception as e: self.logger.exception('Exception calling the response handler. %s.', e) - def GetMetadata(self, metadata_key='', recursive=True, timeout=None): + def GetMetadata( + self, metadata_key='', recursive=True, timeout=None, retry=True): """Retrieve the contents of metadata server for a metadata key. Args: metadata_key: string, the metadata key to watch for changes. recursive: bool, True if we should recursively watch for metadata changes. timeout: int, timeout in seconds for returning metadata output. + retry: bool, True if we should retry on failure. Returns: json, the deserialized contents of the metadata server or None if error. """ return self._HandleMetadataUpdate( metadata_key=metadata_key, recursive=recursive, wait=False, - timeout=timeout) + timeout=timeout, retry=retry) diff --git a/google_compute_engine/tests/metadata_watcher_test.py b/google_compute_engine/tests/metadata_watcher_test.py index e07ea94..1bce509 100644 --- a/google_compute_engine/tests/metadata_watcher_test.py +++ b/google_compute_engine/tests/metadata_watcher_test.py @@ -259,11 +259,12 @@ class MetadataWatcherTest(unittest.TestCase): metadata_key = 'instance/id' recursive = False wait = False + retry = True self.assertEqual( self.mock_watcher._HandleMetadataUpdate( metadata_key=metadata_key, recursive=recursive, wait=wait, - timeout=None), + timeout=None, retry=retry), {}) expected_calls = [ mock.call( @@ -274,6 +275,28 @@ class MetadataWatcherTest(unittest.TestCase): expected_calls = [mock.call.error(mock.ANY, mock.ANY)] * 2 self.assertEqual(self.mock_logger.mock_calls, expected_calls) + def testHandleMetadataUpdateExceptionNoRetry(self): + mock_response = mock.Mock() + mock_response.side_effect = metadata_watcher.socket.timeout() + self.mock_watcher._GetMetadataUpdate = mock_response + metadata_key = 'instance/id' + recursive = False + wait = False + retry = False + + self.assertIsNone( + self.mock_watcher._HandleMetadataUpdate( + metadata_key=metadata_key, recursive=recursive, wait=wait, + timeout=None, retry=retry)) + expected_calls = [ + mock.call( + metadata_key=metadata_key, recursive=recursive, wait=wait, + timeout=None), + ] + self.assertEqual(mock_response.mock_calls, expected_calls) + expected_calls = [mock.call.error(mock.ANY, mock.ANY)] + self.assertEqual(self.mock_logger.mock_calls, expected_calls) + def testWatchMetadata(self): mock_response = mock.Mock() mock_response.return_value = {} @@ -310,7 +333,7 @@ class MetadataWatcherTest(unittest.TestCase): self.assertEqual(self.mock_watcher.GetMetadata(), {}) mock_response.assert_called_once_with( - metadata_key='', recursive=True, wait=False, timeout=None) + metadata_key='', recursive=True, wait=False, timeout=None, retry=True) self.mock_watcher.logger.exception.assert_not_called() def testGetMetadataArgs(self): @@ -319,12 +342,15 @@ class MetadataWatcherTest(unittest.TestCase): self.mock_watcher._HandleMetadataUpdate = mock_response metadata_key = 'instance/id' recursive = False + retry = False response = self.mock_watcher.GetMetadata( - metadata_key=metadata_key, recursive=recursive, timeout=60) + metadata_key=metadata_key, recursive=recursive, timeout=60, + retry=retry) self.assertEqual(response, {}) mock_response.assert_called_once_with( - metadata_key=metadata_key, recursive=False, wait=False, timeout=60) + metadata_key=metadata_key, recursive=False, wait=False, timeout=60, + retry=False) self.mock_watcher.logger.exception.assert_not_called() -- cgit v1.2.1