From c3320d5a262c7698d00e6fcf3b89ab384287d626 Mon Sep 17 00:00:00 2001
From: Steve Vinoski <vinoski@ieee.org>
Date: Tue, 23 Sep 2008 09:57:35 -0700
Subject: * Add Erlang version of mimeparse. * Fix problem in best_match where
 a wildcard match was being preferred to an exact match. * Handle single star
 "*" mime_type that Java's URLConnection class emits by turning it into "*/*".
 * Added unit tests for the changes.

---
 mimeparse.py | 65 +++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 23 deletions(-)

(limited to 'mimeparse.py')

diff --git a/mimeparse.py b/mimeparse.py
index 701ef31..f9b6bb4 100644
--- a/mimeparse.py
+++ b/mimeparse.py
@@ -2,25 +2,25 @@
 
 This module provides basic functions for handling mime-types. It can handle
 matching mime-types against a list of media-ranges. See section 14.1 of 
-the HTTP specification [RFC 2616] for a complete explaination.
+the HTTP specification [RFC 2616] for a complete explanation.
 
    http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1
 
 Contents:
-    - parse_mime_type():   Parses a mime-type into it's component parts.
+    - parse_mime_type():   Parses a mime-type into its component parts.
     - parse_media_range(): Media-ranges are mime-types with wild-cards and a 'q' quality parameter.
     - quality():           Determines the quality ('q') of a mime-type when compared against a list of media-ranges.
     - quality_parsed():    Just like quality() except the second parameter must be pre-parsed.
     - best_match():        Choose the mime-type with the highest quality ('q') from a list of candidates. 
 """
 
-__version__ = "0.1.1"
+__version__ = "0.1.2"
 __author__ = 'Joe Gregorio'
 __email__ = "joe@bitworking.org"
 __credits__ = ""
 
 def parse_mime_type(mime_type):
-    """Carves up a mime_type and returns a tuple of the
+    """Carves up a mime-type and returns a tuple of the
        (type, subtype, params) where 'params' is a dictionary
        of all the parameters for the media range.
        For example, the media range 'application/xhtml;q=0.5' would
@@ -31,7 +31,11 @@ def parse_mime_type(mime_type):
     parts = mime_type.split(";")
     params = dict([tuple([s.strip() for s in param.split("=")])\
             for param in parts[1:] ])
-    (type, subtype) = parts[0].split("/")
+    full_type = parts[0].strip()
+    # Java URLConnection class sends an Accept header that includes a single "*"
+    # Turn it into a legal wildcard.
+    if full_type == '*': full_type = '*/*'
+    (type, subtype) = full_type.split("/")
     return (type.strip(), subtype.strip(), params)
 
 def parse_media_range(range):
@@ -54,25 +58,24 @@ def parse_media_range(range):
         params['q'] = '1'
     return (type, subtype, params)
 
-def quality_parsed(mime_type, parsed_ranges):
-    """Find the best match for a given mime_type against 
+def fitness_and_quality_parsed(mime_type, parsed_ranges):
+    """Find the best match for a given mime-type against 
        a list of media_ranges that have already been 
-       parsed by parse_media_range(). Returns the 
-       'q' quality parameter of the best match, 0 if no
-       match was found. This function bahaves the same as quality()
-       except that 'parsed_ranges' must be a list of
-       parsed media ranges. """
+       parsed by parse_media_range(). Returns a tuple of
+       the fitness value and the value of the 'q' quality
+       parameter of the best match, or (-1, 0) if no match
+       was found. Just as for quality_parsed(), 'parsed_ranges'
+       must be a list of parsed media ranges. """
     best_fitness = -1 
-    best_match = ""
     best_fit_q = 0
     (target_type, target_subtype, target_params) =\
             parse_media_range(mime_type)
     for (type, subtype, params) in parsed_ranges:
-        param_matches = reduce(lambda x, y: x+y, [1 for (key, value) in \
-                target_params.iteritems() if key != 'q' and \
-                params.has_key(key) and value == params[key]], 0)
         if (type == target_type or type == '*' or target_type == '*') and \
                 (subtype == target_subtype or subtype == '*' or target_subtype == '*'):
+            param_matches = reduce(lambda x, y: x+y, [1 for (key, value) in \
+                    target_params.iteritems() if key != 'q' and \
+                    params.has_key(key) and value == params[key]], 0)
             fitness = (type == target_type) and 100 or 0
             fitness += (subtype == target_subtype) and 10 or 0
             fitness += param_matches
@@ -80,10 +83,20 @@ def quality_parsed(mime_type, parsed_ranges):
                 best_fitness = fitness
                 best_fit_q = params['q']
             
-    return float(best_fit_q)
-    
+    return best_fitness, float(best_fit_q)
+
+def quality_parsed(mime_type, parsed_ranges):
+    """Find the best match for a given mime-type against
+    a list of media_ranges that have already been
+    parsed by parse_media_range(). Returns the
+    'q' quality parameter of the best match, 0 if no
+    match was found. This function bahaves the same as quality()
+    except that 'parsed_ranges' must be a list of
+    parsed media ranges. """
+    return fitness_and_quality_parsed(mime_type, parsed_ranges)[1]
+
 def quality(mime_type, ranges):
-    """Returns the quality 'q' of a mime_type when compared
+    """Returns the quality 'q' of a mime-type when compared
     against the media-ranges in ranges. For example:
 
     >>> quality('text/html','text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5')
@@ -104,10 +117,10 @@ def best_match(supported, header):
     'text/xml'
     """
     parsed_header = [parse_media_range(r) for r in header.split(",")]
-    weighted_matches = [(quality_parsed(mime_type, parsed_header), mime_type)\
+    weighted_matches = [(fitness_and_quality_parsed(mime_type, parsed_header), mime_type)\
             for mime_type in supported]
     weighted_matches.sort()
-    return weighted_matches[-1][0] and weighted_matches[-1][1] or ''
+    return weighted_matches[-1][0][1] and weighted_matches[-1][1] or ''
 
 if __name__ == "__main__":
     import unittest
@@ -121,6 +134,8 @@ if __name__ == "__main__":
             self.assertEqual(('application', 'xml', {'q': '1'}), parse_media_range('application/xml ; q='))
             self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=1;b=other'))
             self.assertEqual(('application', 'xml', {'q': '1', 'b': 'other'}), parse_media_range('application/xml ; q=2;b=other'))
+            # Java URLConnection class sends an Accept header that includes a single *
+            self.assertEqual(('*', '*', {'q': '.2'}), parse_media_range(" *; q=.2"))
 
         def test_rfc_2616_example(self):
             accept = "text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5"
@@ -150,6 +165,12 @@ if __name__ == "__main__":
             # fail to match anything
             self.assertEqual(best_match(mime_types_supported, 'text/html,application/atom+xml; q=0.9'), '')
 
+            # common AJAX scenario
+            mime_types_supported = ['application/json', 'text/html']
+            self.assertEqual(best_match(mime_types_supported, 'application/json, text/javascript, */*'), 'application/json')
+            # verify fitness ordering
+            self.assertEqual(best_match(mime_types_supported, 'application/json, text/html;q=0.9'), 'application/json')
+
         def test_support_wildcards(self):
             mime_types_supported = ['image/*', 'application/xml']
             # match using a type wildcard
@@ -158,5 +179,3 @@ if __name__ == "__main__":
             self.assertEqual(best_match(mime_types_supported, 'image/*'), 'image/*')
 
     unittest.main() 
-
-
-- 
cgit v1.2.1