summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Hesketh <josh@nitrotech.org>2014-12-02 16:33:09 +1100
committerJoshua Hesketh <josh@nitrotech.org>2014-12-03 15:02:46 +1100
commit05baf01cf1be10ad7f5471929c2e03eac505d798 (patch)
tree132c59eda8821b2e61f80cf9d73a4578359eac73
parentaae9786f1eab4433edda78445c39818454df71ed (diff)
downloadturbo-hipster-05baf01cf1be10ad7f5471929c2e03eac505d798.tar.gz
Improve log uploading and index generation
Turbo-hipster now produces different levels of logs for each job. These need to be uploaded an in some cases have indexes generated for (eg swift has no directory listing). Support for zuul's swift instructions still need updating. Change-Id: I572c8edfc856bb33998d1cfa0a8d31d274ab1bef
-rw-r--r--requirements.txt1
-rw-r--r--turbo_hipster/lib/models.py7
-rw-r--r--turbo_hipster/lib/utils.py269
-rw-r--r--turbo_hipster/task_plugins/real_db_upgrade/handle_results.py54
4 files changed, 209 insertions, 122 deletions
diff --git a/requirements.txt b/requirements.txt
index 07e1603..97ec161 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,3 +18,4 @@ PyYAML>=3.1.0,<4.0.0
jenkins-job-builder
xmltodict
+python-magic
diff --git a/turbo_hipster/lib/models.py b/turbo_hipster/lib/models.py
index 726a64d..5e8abc3 100644
--- a/turbo_hipster/lib/models.py
+++ b/turbo_hipster/lib/models.py
@@ -224,9 +224,12 @@ class Task(object):
self.log.debug("Process the resulting files (upload/push)")
+ dir_list = os.listdir(self.job_results_dir)
+ path_list = [os.path.join(self.job_results_dir, i) for i in dir_list]
+
if 'publish_logs' in self.worker_server.config:
- index_url = utils.push_file(
- self.job_arguments['LOG_PATH'], self.job_results_dir,
+ index_url = utils.push_files(
+ self.job_arguments['LOG_PATH'], path_list,
self.worker_server.config['publish_logs'])
self.log.debug("Index URL found at %s" % index_url)
self.work_data['url'] = index_url
diff --git a/turbo_hipster/lib/utils.py b/turbo_hipster/lib/utils.py
index 3a8fc2c..0673869 100644
--- a/turbo_hipster/lib/utils.py
+++ b/turbo_hipster/lib/utils.py
@@ -15,12 +15,15 @@
import git
import logging
+import magic
import os
import requests
import select
import shutil
import subprocess
import swiftclient
+import sys
+import tempfile
import time
@@ -197,74 +200,10 @@ def execute_to_log(cmd, logfile, timeout=-1, watch_logs=[], heartbeat=30,
return p.returncode
-def push_file(results_set_name, file_path, publish_config):
- """ Push a log file to a server. Returns the public URL """
- method = publish_config['type'] + '_push_file'
- if method in globals() and hasattr(globals()[method], '__call__'):
- return globals()[method](results_set_name, file_path, publish_config)
-
-
-def swift_push_file(results_set_name, file_path, swift_config):
- """ Push a log file to a swift server. """
- def _push_individual_file(results_set_name, file_path, swift_config):
- with open(file_path, 'r') as fd:
- name = os.path.join(results_set_name, os.path.basename(file_path))
- con = swiftclient.client.Connection(
- authurl=swift_config['authurl'],
- user=swift_config['user'],
- key=swift_config['password'],
- os_options={'region_name': swift_config['region']},
- tenant_name=swift_config['tenant'],
- auth_version=2.0)
- con.put_object(swift_config['container'], name, fd)
-
- if os.path.isfile(file_path):
- _push_individual_file(results_set_name, file_path, swift_config)
- elif os.path.isdir(file_path):
- for path, folders, files in os.walk(file_path):
- for f in files:
- f_path = os.path.join(path, f)
- _push_individual_file(results_set_name, f_path, swift_config)
-
- return (swift_config['prepend_url'] +
- os.path.join(results_set_name, os.path.basename(file_path)))
-
-
-def local_push_file(results_set_name, file_path, local_config):
- """ Copy the file locally somewhere sensible """
- def _push_file_or_dir(results_set_name, file_path, local_config):
- dest_dir = os.path.join(local_config['path'], results_set_name)
- dest_filename = os.path.basename(file_path)
- if not os.path.isdir(dest_dir):
- os.makedirs(dest_dir)
-
- dest_file = os.path.join(dest_dir, dest_filename)
-
- if os.path.isfile(file_path):
- shutil.copyfile(file_path, dest_file)
- elif os.path.isdir(file_path):
- shutil.copytree(file_path, dest_file)
-
- if os.path.isfile(file_path):
- _push_file_or_dir(results_set_name, file_path, local_config)
- elif os.path.isdir(file_path):
- for f in os.listdir(file_path):
- f_path = os.path.join(file_path, f)
- _push_file_or_dir(results_set_name, f_path, local_config)
-
- dest_filename = os.path.basename(file_path)
- return local_config['prepend_url'] + os.path.join(results_set_name,
- dest_filename)
-
-
-def scp_push_file(results_set_name, file_path, local_config):
- """ Copy the file remotely over ssh """
- # TODO!
- pass
-
-
def zuul_swift_upload(file_path, job_arguments):
"""Upload working_dir to swift as per zuul's instructions"""
+ # TODO(jhesketh): replace with swift_form_post_submit from below
+
# NOTE(jhesketh): Zuul specifies an object prefix in the destination so
# we don't need to be concerned with results_set_name
@@ -299,3 +238,201 @@ def zuul_swift_upload(file_path, job_arguments):
return (logserver_prefix +
job_arguments['ZUUL_EXTRA_SWIFT_DESTINATION_PREFIX'])
+
+
+def generate_log_index(file_list, logserver_prefix, results_set_name):
+ """Create an index of logfiles and links to them"""
+
+ output = '<html><head><title>Index of results</title></head><body>'
+ output += '<ul>'
+ for f in file_list:
+ file_url = os.path.join(logserver_prefix, results_set_name, f)
+ # Because file_list is simply a list to create an index for and it
+ # isn't necessarily on disk we can't check if a file is a folder or
+ # not. As such we normalise the name to get the folder/filename but
+ # then need to check if the last character was a trailing slash so to
+ # re-append it to make it obvious that it links to a folder
+ filename_postfix = '/' if f[-1] == '/' else ''
+ filename = os.path.basename(os.path.normpath(f)) + filename_postfix
+ output += '<li>'
+ output += '<a href="%s">%s</a>' % (file_url, filename)
+ output += '</li>'
+
+ output += '</ul>'
+ output += '</body></html>'
+ return output
+
+
+def make_index_file(file_list, logserver_prefix, results_set_name,
+ index_filename='index.html'):
+ """Writes an index into a file for pushing"""
+
+ index_content = generate_log_index(file_list, logserver_prefix,
+ results_set_name)
+ tempdir = tempfile.mkdtemp()
+ fd = open(os.path.join(tempdir, index_filename), 'w')
+ fd.write(index_content)
+ return os.path.join(tempdir, index_filename)
+
+
+def get_file_mime(file_path):
+ """Get the file mime using libmagic"""
+
+ if not os.path.isfile(file_path):
+ return None
+
+ if hasattr(magic, 'from_file'):
+ return magic.from_file(file_path, mime=True)
+ else:
+ # no magic.from_file, we might be using the libmagic bindings
+ m = magic.open(magic.MAGIC_MIME)
+ m.load()
+ return m.file(file_path).split(';')[0]
+
+
+def swift_form_post_submit(file_list, url, hmac_body, signature):
+ """Send the files to swift via the FormPost middleware"""
+
+ # We are uploading the file_list as an HTTP POST multipart encoded.
+ # First grab out the information we need to send back from the hmac_body
+ payload = {}
+
+ (object_prefix,
+ payload['redirect'],
+ payload['max_file_size'],
+ payload['max_file_count'],
+ payload['expires']) = hmac_body.split('\n')
+ payload['signature'] = signature
+
+ # Loop over the file list in chunks of max_file_count
+ for sub_file_list in (file_list[pos:pos + int(payload['max_file_count'])]
+ for pos in xrange(0, len(file_list),
+ int(payload['max_file_count']))):
+ if payload['expires'] < time.time():
+ raise Exception("Ran out of time uploading files!")
+ files = {}
+ # Zuul's log path is generated without a tailing slash. As such the
+ # object prefix does not contain a slash and the files would be
+ # uploaded as 'prefix' + 'filename'. Assume we want the destination
+ # url to look like a folder and make sure there's a slash between.
+ filename_prefix = '/' if url[-1] != '/' else ''
+ for i, f in enumerate(sub_file_list):
+ if os.path.getsize(f['path']) > int(payload['max_file_size']):
+ sys.stderr.write('Warning: %s exceeds %d bytes. Skipping...\n'
+ % (f['path'], int(payload['max_file_size'])))
+ continue
+ files['file%d' % (i + 1)] = (filename_prefix + f['filename'],
+ open(f['path'], 'rb'),
+ get_file_mime(f['path']))
+ requests.post(url, data=payload, files=files)
+
+
+def build_file_list(file_path, logserver_prefix, results_set_name,
+ create_dir_indexes=True):
+ """Generate a list of files to upload to zuul. Recurses through directories
+ and generates index.html files if requested."""
+
+ # file_list: a list of dicts with {path=..., filename=...} where filename
+ # is appended to the end of the object (paths can be used)
+ file_list = []
+ if os.path.isfile(file_path):
+ file_list.append({'filename': os.path.basename(file_path),
+ 'path': file_path})
+ elif os.path.isdir(file_path):
+ if file_path[-1] == os.sep:
+ file_path = file_path[:-1]
+ parent_dir = os.path.dirname(file_path)
+ for path, folders, files in os.walk(file_path):
+ folder_contents = []
+ for f in files:
+ full_path = os.path.join(path, f)
+ relative_name = os.path.relpath(full_path, parent_dir)
+ push_file = {'filename': relative_name,
+ 'path': full_path}
+ file_list.append(push_file)
+ folder_contents.append(relative_name)
+
+ for f in folders:
+ full_path = os.path.join(path, f)
+ relative_name = os.path.relpath(full_path, parent_dir)
+ folder_contents.append(relative_name + '/')
+
+ if create_dir_indexes:
+ index_file = make_index_file(folder_contents, logserver_prefix,
+ results_set_name)
+ relative_name = os.path.relpath(path, parent_dir)
+ file_list.append({
+ 'filename': os.path.join(relative_name,
+ os.path.basename(index_file)),
+ 'path': index_file})
+
+ return file_list
+
+
+def push_files(results_set_name, path_list, publish_config,
+ generate_indexes=True):
+ """ Push a log file/foler to a server. Returns the public URL """
+
+ file_list = []
+ root_list = []
+
+ for file_path in path_list:
+ file_path = os.path.normpath(file_path)
+ if os.path.isfile(file_path):
+ root_list.append(os.path.basename(file_path))
+ else:
+ root_list.append(os.path.basename(file_path) + '/')
+
+ file_list += build_file_list(
+ file_path, publish_config['prepend_url'], results_set_name,
+ generate_indexes
+ )
+
+ index_file = ''
+ if generate_indexes:
+ index_file = make_index_file(root_list, publish_config['prepend_url'],
+ results_set_name)
+ file_list.append({
+ 'filename': os.path.basename(index_file),
+ 'path': index_file})
+
+ method = publish_config['type'] + '_push_files'
+ if method in globals() and hasattr(globals()[method], '__call__'):
+ globals()[method](results_set_name, file_list, publish_config)
+
+ return os.path.join(publish_config['prepend_url'], results_set_name,
+ os.path.basename(index_file))
+
+
+def swift_push_files(results_set_name, file_list, swift_config):
+ """ Push a log file to a swift server. """
+ for file_item in file_list:
+ with open(file_item['path'], 'r') as fd:
+ con = swiftclient.client.Connection(
+ authurl=swift_config['authurl'],
+ user=swift_config['user'],
+ key=swift_config['password'],
+ os_options={'region_name': swift_config['region']},
+ tenant_name=swift_config['tenant'],
+ auth_version=2.0)
+ filename = os.path.join(results_set_name, file_item['filename'])
+ con.put_object(swift_config['container'], filename, fd)
+
+
+def local_push_files(results_set_name, file_list, local_config):
+ """ Copy the file locally somewhere sensible """
+ for file_item in file_list:
+ dest_dir = os.path.join(local_config['path'], results_set_name,
+ os.path.dirname(file_item['filename']))
+ dest_filename = os.path.basename(file_item['filename'])
+ if not os.path.isdir(dest_dir):
+ os.makedirs(dest_dir)
+
+ dest_file = os.path.join(dest_dir, dest_filename)
+ shutil.copyfile(file_item['path'], dest_file)
+
+
+def scp_push_files(results_set_name, file_path, local_config):
+ """ Copy the file remotely over ssh """
+ # TODO!
+ pass
diff --git a/turbo_hipster/task_plugins/real_db_upgrade/handle_results.py b/turbo_hipster/task_plugins/real_db_upgrade/handle_results.py
index cf8c6df..561898f 100644
--- a/turbo_hipster/task_plugins/real_db_upgrade/handle_results.py
+++ b/turbo_hipster/task_plugins/real_db_upgrade/handle_results.py
@@ -19,65 +19,11 @@ Primarily place the log files somewhere useful and optionally email
somebody """
import calendar
-import tempfile
import time
import os
import re
-from turbo_hipster.lib.utils import push_file
-
-
-def generate_log_index(datasets):
- """ Create an index of logfiles and links to them """
- # Loop over logfile URLs
- # Create summary and links
- output = '<html><head><title>Index of results</title></head><body>'
- output += '<ul>'
- for dataset in datasets:
- output += '<li>'
- output += '<a href="%s">%s</a>' % (dataset['result_uri'],
- dataset['name'])
- output += ' <span class="%s">%s</span>' % (dataset['result'],
- dataset['result'])
- output += '</li>'
-
- output += '</ul>'
- output += '</body></html>'
- return output
-
-
-def make_index_file(datasets, index_filename):
- """ Writes an index into a file for pushing """
- index_content = generate_log_index(datasets)
- tempdir = tempfile.mkdtemp()
- fd = open(os.path.join(tempdir, index_filename), 'w')
- fd.write(index_content)
- return os.path.join(tempdir, index_filename)
-
-
-def generate_push_results(datasets, publish_config):
- """ Generates and pushes results """
-
- last_link_uri = None
- for i, dataset in enumerate(datasets):
- result_uri = push_file(dataset['determined_path'],
- dataset['job_log_file_path'],
- publish_config)
- datasets[i]['result_uri'] = result_uri
- last_link_uri = result_uri
-
- if len(datasets) > 1:
- index_file = make_index_file(datasets, 'index.html')
- # FIXME: the determined path here is just copied from the last dataset.
- # Probably should be stored elsewhere...
- index_file_url = push_file(dataset['determined_path'], index_file,
- publish_config)
- return index_file_url
- else:
- return last_link_uri
-
-
MIGRATION_NUMBER_RE = re.compile('^([0-9]+).*\.py$')
MIGRATION_START_RE = re.compile('.* ([0-9]+) -\> ([0-9]+)\.\.\..*$')
MIGRATION_END_RE = re.compile('done$')