diff options
author | Christoph Zwerschke <cito@online.de> | 2010-08-19 17:03:52 +0200 |
---|---|---|
committer | Christoph Zwerschke <cito@online.de> | 2010-08-19 17:03:52 +0200 |
commit | 8bac38f24dd068c859da813a2981129ab3827284 (patch) | |
tree | a82c2ad0c27ba2893e320efcb7fb1066ca82973d | |
parent | aa7d0fb24140051ebffe8fd3717ae437193541a8 (diff) | |
download | paste-8bac38f24dd068c859da813a2981129ab3827284.tar.gz |
Added unittests for util.mimeparse and made the module more robust and somewhat faster.
This also closes tickets #290, #330, #370 and #381.
-rw-r--r-- | .hgignore | 12 | ||||
-rw-r--r-- | paste/util/mimeparse.py | 114 | ||||
-rw-r--r-- | tests/test_util/test_mimeparse.py | 181 |
3 files changed, 256 insertions, 51 deletions
@@ -1,5 +1,13 @@ syntax: glob -docs/_build -Paste.egg-info + +.project +.pydevproject +.settings +*.pyc +*.pyo +*.log +*.tmp +*.egg-info dist/ build/ +docs/_build diff --git a/paste/util/mimeparse.py b/paste/util/mimeparse.py index a8793b2..7a68772 100644 --- a/paste/util/mimeparse.py +++ b/paste/util/mimeparse.py @@ -28,15 +28,23 @@ def parse_mime_type(mime_type): ('application', 'xhtml', {'q', '0.5'}) """ - parts = mime_type.split(";") - params = dict([tuple([s.strip() for s in param.split("=")])\ - for param in parts[1:] ]) - full_type = parts[0].strip() - # Java URLConnection class sends an Accept header that includes a single "*" - # Turn it into a legal wildcard. - if full_type == '*': full_type = '*/*' - (type, subtype) = full_type.split("/") - return (type.strip(), subtype.strip(), params) + type = mime_type.split(';') + type, plist = type[0], type[1:] + try: + type, subtype = type.split('/', 1) + except ValueError: + type, subtype = type.strip() or '*', '*' + else: + type = type.strip() or '*' + subtype = subtype.strip() or '*' + params = {} + for param in plist: + param = param.split('=', 1) + if len(param) == 2: + key, value = param[0].strip(), param[1].strip() + if key and value: + params[key] = value + return type, subtype, params def parse_media_range(range): """Carves up a media range and returns a tuple of the @@ -51,12 +59,17 @@ def parse_media_range(range): is a value for 'q' in the params dictionary, filling it in with a proper default if necessary. """ - (type, subtype, params) = parse_mime_type(range) - if not params.has_key('q') or not params['q'] or \ - not float(params['q']) or float(params['q']) > 1\ - or float(params['q']) < 0: + type, subtype, params = parse_mime_type(range) + if 'q' in params: + try: + q = float(params['q']) + if not 0 <= q <= 1: + raise ValueError + except ValueError: + params['q'] = '1' + else: params['q'] = '1' - return (type, subtype, params) + return type, subtype, params def fitness_and_quality_parsed(mime_type, parsed_ranges): """Find the best match for a given mime-type against @@ -65,24 +78,26 @@ def fitness_and_quality_parsed(mime_type, parsed_ranges): the fitness value and the value of the 'q' quality parameter of the best match, or (-1, 0) if no match was found. Just as for quality_parsed(), 'parsed_ranges' - must be a list of parsed media ranges. """ - best_fitness = -1 - best_fit_q = 0 - (target_type, target_subtype, target_params) =\ - parse_media_range(mime_type) - for (type, subtype, params) in parsed_ranges: - if (type == target_type or type == '*' or target_type == '*') and \ - (subtype == target_subtype or subtype == '*' or target_subtype == '*'): - param_matches = reduce(lambda x, y: x+y, [1 for (key, value) in \ - target_params.iteritems() if key != 'q' and \ - params.has_key(key) and value == params[key]], 0) - fitness = (type == target_type) and 100 or 0 - fitness += (subtype == target_subtype) and 10 or 0 - fitness += param_matches + must be a list of parsed media ranges.""" + best_fitness, best_fit_q = -1, 0 + target_type, target_subtype, target_params = parse_media_range(mime_type) + for type, subtype, params in parsed_ranges: + if (type == target_type + or type == '*' or target_type == '*') and ( + subtype == target_subtype + or subtype == '*' or target_subtype == '*'): + fitness = 0 + if type == target_type: + fitness += 100 + if subtype == target_subtype: + fitness += 10 + for key in target_params: + if key != 'q' and key in params: + if params[key] == target_params[key]: + fitness += 1 if fitness > best_fitness: best_fitness = fitness best_fit_q = params['q'] - return best_fitness, float(best_fit_q) def quality_parsed(mime_type, parsed_ranges): @@ -90,9 +105,9 @@ def quality_parsed(mime_type, parsed_ranges): a list of media_ranges that have already been parsed by parse_media_range(). Returns the 'q' quality parameter of the best match, 0 if no - match was found. This function bahaves the same as quality() + match was found. This function behaves the same as quality() except that 'parsed_ranges' must be a list of - parsed media ranges. """ + parsed media ranges.""" return fitness_and_quality_parsed(mime_type, parsed_ranges)[1] def quality(mime_type, ranges): @@ -103,24 +118,27 @@ def quality(mime_type, ranges): 0.7 """ - parsed_ranges = [parse_media_range(r) for r in ranges.split(",")] + parsed_ranges = map(parse_media_range, ranges.split(',')) return quality_parsed(mime_type, parsed_ranges) def best_match(supported, header): """Takes a list of supported mime-types and finds the best - match for all the media-ranges listed in header. The value of - header must be a string that conforms to the format of the - HTTP Accept: header. The value of 'supported' is a list of - mime-types. + match for all the media-ranges listed in header. In case of + ambiguity, whatever comes first in the list will be chosen. + The value of header must be a string that conforms to the format + of the HTTP Accept: header. The value of 'supported' is a list + of mime-types. >>> best_match(['application/xbel+xml', 'text/xml'], 'text/*;q=0.5,*/*; q=0.1') 'text/xml' """ - parsed_header = [parse_media_range(r) for r in header.split(",")] - weighted_matches = [(fitness_and_quality_parsed(mime_type, parsed_header), mime_type)\ - for mime_type in supported] - weighted_matches.sort() - return weighted_matches[-1][0][1] and weighted_matches[-1][1] or '' + if not supported: + return '' + parsed_header = map(parse_media_range, header.split(',')) + best_type = max([ + (fitness_and_quality_parsed(mime_type, parsed_header), -n) + for n, mime_type in enumerate(supported)]) + return best_type[0][1] and supported[-best_type[1]] or '' def desired_matches(desired, header): """Takes a list of desired mime-types in the order the server prefers to @@ -132,15 +150,13 @@ def desired_matches(desired, header): the desired list in the same order should each one be Accepted by the browser. - >>> sorted_match(['text/html', 'application/xml'], \ + >>> desired_matches(['text/html', 'application/xml'], \ ... 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png') ['text/html', 'application/xml'] - >>> sorted_match(['text/html', 'application/xml'], 'application/xml,application/json') + >>> desired_matches(['text/html', 'application/xml'], 'application/xml,application/json') ['application/xml'] """ - matches = [] - parsed_ranges = [parse_media_range(r) for r in header.split(",")] - for mimetype in desired: - if quality_parsed(mimetype, parsed_ranges): - matches.append(mimetype) - return matches + parsed_ranges = map(parse_media_range, header.split(',')) + return [mimetype for mimetype in desired + if quality_parsed(mimetype, parsed_ranges)] + diff --git a/tests/test_util/test_mimeparse.py b/tests/test_util/test_mimeparse.py new file mode 100644 index 0000000..c24b5e5 --- /dev/null +++ b/tests/test_util/test_mimeparse.py @@ -0,0 +1,181 @@ +# (c) 2010 Ch. Zwerschke and contributors
+# This module is part of the Python Paste Project and is released under
+# the MIT License: http://www.opensource.org/licenses/mit-license.php
+
+from time import localtime
+from datetime import date
+from paste.util.mimeparse import *
+
+def test_parse_mime_type():
+ parse = parse_mime_type
+ assert parse('*/*') == ('*', '*', {})
+ assert parse('text/html') == ('text', 'html', {})
+ assert parse('audio/*; q=0.2') == ('audio', '*', {'q': '0.2'})
+ assert parse('text/x-dvi;level=1') == ('text', 'x-dvi', {'level': '1'})
+ assert parse('image/gif; level=2; q=0.4') == (
+ 'image', 'gif', {'level': '2', 'q': '0.4'})
+ assert parse('application/xhtml;level=3;q=0.5') == (
+ 'application', 'xhtml', {'level': '3', 'q': '0.5'})
+
+def test_parse_illformed_mime_type():
+ parse = parse_mime_type
+ assert parse('*') == ('*', '*', {})
+ assert parse('text') == ('text', '*', {})
+ assert parse('text/') == ('text', '*', {})
+ assert parse('/plain') == ('*', 'plain', {})
+ assert parse('/') == ('*', '*', {})
+ assert parse('text/plain;') == ('text', 'plain', {})
+ assert parse(';q=0.5') == ('*', '*', {'q': '0.5'})
+ assert parse('*; q=.2') == ('*', '*', {'q': '.2'})
+ assert parse('image; q=.7; level=3') == (
+ 'image', '*', {'q': '.7', 'level': '3'})
+ assert parse('*;q=1') == ('*', '*', {'q': '1'})
+ assert parse('*;q=') == ('*', '*', {})
+ assert parse('*;=0.5') == ('*', '*', {})
+ assert parse('*;q=foobar') == ('*', '*', {'q': 'foobar'})
+ assert parse('image/gif; level=2; q=2') == (
+ 'image', 'gif', {'level': '2', 'q': '2'})
+
+def test_parse_media_range():
+ parse = parse_media_range
+ assert parse('application/*;q=0.5') == ('application', '*', {'q': '0.5'})
+ assert parse('text/plain') == ('text', 'plain', {'q': '1'})
+ assert parse('*') == ('*', '*', {'q': '1'})
+ assert parse(';q=0.5') == ('*', '*', {'q': '0.5'})
+ assert parse('*;q=0.5') == ('*', '*', {'q': '0.5'})
+ assert parse('*;q=1') == ('*', '*', {'q': '1'})
+ assert parse('*;q=') == ('*', '*', {'q': '1'})
+ assert parse('*;q=-1') == ('*', '*', {'q': '1'})
+ assert parse('*;q=foobar') == ('*', '*', {'q': '1'})
+ assert parse('*;q=0.0001') == ('*', '*', {'q': '0.0001'})
+ assert parse('*;q=1000.0') == ('*', '*', {'q': '1'})
+ assert parse('*;q=0') == ('*', '*', {'q': '0'})
+ assert parse('*;q=0.0000') == ('*', '*', {'q': '0.0000'})
+ assert parse('*;q=1.0001') == ('*', '*', {'q': '1'})
+ assert parse('*;q=2') == ('*', '*', {'q': '1'})
+ assert parse('*;q=1e3') == ('*', '*', {'q': '1'})
+ assert parse('image/gif; level=2') == (
+ 'image', 'gif', {'level': '2', 'q': '1'})
+ assert parse('image/gif; level=2; q=0.5') == (
+ 'image', 'gif', {'level': '2', 'q': '0.5'})
+ assert parse('image/gif; level=2; q=2') == (
+ 'image', 'gif', {'level': '2', 'q': '1'})
+
+def test_fitness_and_quality_parsed():
+ faq = fitness_and_quality_parsed
+ assert faq('*/*;q=0.7', [
+ ('foo', 'bar', {'q': '0.5'})]) == (0, 0.5)
+ assert faq('foo/*;q=0.7', [
+ ('foo', 'bar', {'q': '0.5'})]) == (100, 0.5)
+ assert faq('*/bar;q=0.7', [
+ ('foo', 'bar', {'q': '0.5'})]) == (10, 0.5)
+ assert faq('foo/bar;q=0.7', [
+ ('foo', 'bar', {'q': '0.5'})]) == (110, 0.5)
+ assert faq('text/html;q=0.7', [
+ ('foo', 'bar', {'q': '0.5'})]) == (-1, 0)
+ assert faq('text/html;q=0.7', [
+ ('text', 'bar', {'q': '0.5'})]) == (-1, 0)
+ assert faq('text/html;q=0.7', [
+ ('foo', 'html', {'q': '0.5'})]) == (-1, 0)
+ assert faq('text/html;q=0.7', [
+ ('text', '*', {'q': '0.5'})]) == (100, 0.5)
+ assert faq('text/html;q=0.7', [
+ ('*', 'html', {'q': '0.5'})]) == (10, 0.5)
+ assert faq('text/html;q=0.7', [
+ ('*', '*', {'q': '0'}), ('text', 'html', {'q': '0.5'})]) == (110, 0.5)
+ assert faq('text/html;q=0.7', [
+ ('*', '*', {'q': '0.5'}), ('audio', '*', {'q': '0'})]) == (0, 0.5)
+ assert faq('audio/mp3;q=0.7', [
+ ('*', '*', {'q': '0'}), ('audio', '*', {'q': '0.5'})]) == (100, 0.5)
+ assert faq('*/mp3;q=0.7', [
+ ('foo', 'mp3', {'q': '0.5'}), ('audio', '*', {'q': '0'})]) == (10, 0.5)
+ assert faq('audio/mp3;q=0.7', [
+ ('audio', 'ogg', {'q': '0'}), ('*', 'mp3', {'q': '0.5'})]) == (10, 0.5)
+ assert faq('audio/mp3;q=0.7', [
+ ('*', 'ogg', {'q': '0'}), ('*', 'mp3', {'q': '0.5'})]) == (10, 0.5)
+ assert faq('text/html;q=0.7', [
+ ('text', 'plain', {'q': '0'}),
+ ('plain', 'html', {'q': '0'}),
+ ('text', 'html', {'q': '0.5'}),
+ ('html', 'text', {'q': '0'})]) == (110, 0.5)
+ assert faq('text/html;q=0.7;level=2', [
+ ('plain', 'html', {'q': '0', 'level': '2'}),
+ ('text', '*', {'q': '0.5', 'level': '3'}),
+ ('*', 'html', {'q': '0.5', 'level': '2'}),
+ ('image', 'gif', {'q': '0.5', 'level': '2'})]) == (100, 0.5)
+ assert faq('text/html;q=0.7;level=2', [
+ ('text', 'plain', {'q': '0'}), ('text', 'html', {'q': '0'}),
+ ('text', 'plain', {'q': '0', 'level': '2'}),
+ ('text', 'html', {'q': '0.5', 'level': '2'}),
+ ('*', '*', {'q': '0', 'level': '2'}),
+ ('text', 'html', {'q': '0', 'level': '3'})]) == (111, 0.5)
+ assert faq('text/html;q=0.7;level=2;opt=3', [
+ ('text', 'html', {'q': '0'}),
+ ('text', 'html', {'q': '0', 'level': '2'}),
+ ('text', 'html', {'q': '0', 'opt': '3'}),
+ ('*', '*', {'q': '0', 'level': '2', 'opt': '3'}),
+ ('text', 'html', {'q': '0', 'level': '3', 'opt': '3'}),
+ ('text', 'html', {'q': '0.5', 'level': '2', 'opt': '3'}),
+ ('*', '*', {'q': '0', 'level': '3', 'opt': '3'})]) == (112, 0.5)
+
+def test_quality_parsed():
+ qp = quality_parsed
+ assert qp('image/gif;q=0.7', [('image', 'jpg', {'q': '0.5'})]) == 0
+ assert qp('image/gif;q=0.7', [('image', '*', {'q': '0.5'})]) == 0.5
+ assert qp('audio/mp3;q=0.7;quality=100', [
+ ('*', '*', {'q': '0', 'quality': '100'}),
+ ('audio', '*', {'q': '0', 'quality': '100'}),
+ ('*', 'mp3', {'q': '0', 'quality': '100'}),
+ ('audio', 'mp3', {'q': '0', 'quality': '50'}),
+ ('audio', 'mp3', {'q': '0.5', 'quality': '100'}),
+ ('audio', 'mp3', {'q': '0.5'})]) == 0.5
+
+def test_quality():
+ assert quality('text/html',
+ 'text/*;q=0.3, text/html;q=0.7, text/html;level=1,'
+ ' text/html;level=2;q=0.4, */*;q=0.5') == 0.7
+ assert quality('text/html;level=2',
+ 'text/*;q=0.3, text/html;q=0.7, text/html;level=1,'
+ ' text/html;level=2;q=0.4, */*;q=0.5') == 0.4
+ assert quality('text/plain',
+ 'text/*;q=0.3, text/html;q=0.7, text/html;level=1,'
+ ' text/html;level=2;q=0.4, */*;q=0.5') == 0.3
+ assert quality('plain/text',
+ 'text/*;q=0.3, text/html;q=0.7, text/html;level=1,'
+ ' text/html;level=2;q=0.4, */*;q=0.5') == 0.5
+
+def test_best_match():
+ bm = best_match
+ assert bm([], '*/*') == ''
+ assert bm(['application/xbel+xml', 'text/xml'],
+ 'text/*;q=0.5,*/*; q=0.1') == 'text/xml'
+ assert bm(['application/xbel+xml', 'audio/mp3'],
+ 'text/*;q=0.5,*/*; q=0.1') == 'application/xbel+xml'
+ assert bm(['application/xbel+xml', 'audio/mp3'],
+ 'text/*;q=0.5,*/mp3; q=0.1') == 'audio/mp3'
+ assert bm(['application/xbel+xml', 'text/plain', 'text/html'],
+ 'text/*;q=0.5,*/plain; q=0.1') == 'text/plain'
+ assert bm(['application/xbel+xml', 'text/html', 'text/xhtml'],
+ 'text/*;q=0.1,*/xhtml; q=0.5') == 'text/html'
+ assert bm(['application/xbel+xml', 'text/html', 'text/xhtml'],
+ '*/html;q=0.1,*/xhtml; q=0.5') == 'text/xhtml'
+
+def test_illformed_best_match():
+ bm = best_match
+ assert bm(['image/png', 'image/jpeg', 'image/gif', 'text/html'],
+ 'text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2') == 'image/jpeg'
+ assert bm(['image/png', 'image/jpg', 'image/tif', 'text/html'],
+ 'text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2') == 'text/html'
+ assert bm(['image/png', 'image/jpg', 'image/tif', 'audio/mp3'],
+ 'text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2') == 'image/png'
+
+def test_sorted_match():
+ dm = desired_matches
+ assert dm(['text/html', 'application/xml'],
+ 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,'
+ 'text/plain;q=0.8,image/png') == ['text/html', 'application/xml']
+ assert dm(['text/html', 'application/xml'],
+ 'application/xml,application/json') == ['application/xml']
+ assert dm(['text/xhtml', 'text/plain', 'application/xhtml'],
+ 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,'
+ 'text/plain;q=0.8,image/png') == ['text/plain']
|