summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2003-01-09 05:43:08 +0000
committerGuido van Rossum <guido@python.org>2003-01-09 05:43:08 +0000
commita194ef817f567813e15fa340f7555a5a2c250e7a (patch)
treec117fc28b7515c4f121885c1947de21978e94349
parentf099da317939d9c96e318a0a18642b6864c49634 (diff)
downloadcpython-a194ef817f567813e15fa340f7555a5a2c250e7a.tar.gz
Roll back the introduction of urlsplit() and urlunsplit() to
urlparse.py. These were new features in 2.2, and shouldn't be added to 2.1 this late in the game. There was one use of urlsplit() in httplib.py (the urlparse.py change was done as part of a backport of a bugfix to httplib.py); this use is replaced with a call to urlparse() without changing the effect (only the returned netloc is used).
-rw-r--r--Lib/httplib.py4
-rw-r--r--Lib/urlparse.py84
2 files changed, 37 insertions, 51 deletions
diff --git a/Lib/httplib.py b/Lib/httplib.py
index 2b277e0799..021a76236a 100644
--- a/Lib/httplib.py
+++ b/Lib/httplib.py
@@ -69,7 +69,7 @@ Req-sent-unread-response _CS_REQ_SENT <response_class>
import errno
import mimetools
import socket
-from urlparse import urlsplit
+from urlparse import urlparse
try:
from cStringIO import StringIO
@@ -610,7 +610,7 @@ class HTTPConnection:
netloc = ''
if url.startswith('http'):
- nil, netloc, nil, nil, nil = urlsplit(url)
+ nil, netloc, nil, nil, nil, nil = urlparse(url)
if netloc:
self.putheader('Host', netloc)
diff --git a/Lib/urlparse.py b/Lib/urlparse.py
index ee99645d59..1df83d68d3 100644
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -43,42 +43,19 @@ def clear_cache():
_parse_cache = {}
-def urlparse(url, scheme='', allow_fragments=1):
+def urlparse(url, scheme = '', allow_fragments = 1):
"""Parse a URL into 6 components:
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
- tuple = urlsplit(url, scheme, allow_fragments)
- scheme, netloc, url, query, fragment = tuple
- if scheme in uses_params and ';' in url:
- url, params = _splitparams(url)
- else:
- params = ''
- return scheme, netloc, url, params, query, fragment
-
-def _splitparams(url):
- if '/' in url:
- i = url.find(';', url.rfind('/'))
- if i < 0:
- return url, ''
- else:
- i = url.find(';')
- return url[:i], url[i+1:]
-
-def urlsplit(url, scheme='', allow_fragments=1):
- """Parse a URL into 5 components:
- <scheme>://<netloc>/<path>?<query>#<fragment>
- Return a 5-tuple: (scheme, netloc, path, query, fragment).
- Note that we don't break the components up in smaller bits
- (e.g. netloc is a single string) and we don't expand % escapes."""
key = url, scheme, allow_fragments
cached = _parse_cache.get(key, None)
if cached:
return cached
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
clear_cache()
- netloc = query = fragment = ''
+ netloc = path = params = query = fragment = ''
i = url.find(':')
if i > 0:
if url[:i] == 'http': # optimize the common case
@@ -87,16 +64,23 @@ def urlsplit(url, scheme='', allow_fragments=1):
if url[:2] == '//':
i = url.find('/', 2)
if i < 0:
- i = url.find('#')
- if i < 0:
- i = len(url)
+ i = len(url)
netloc = url[2:i]
url = url[i:]
- if allow_fragments and '#' in url:
- url, fragment = url.split('#', 1)
- if '?' in url:
- url, query = url.split('?', 1)
- tuple = scheme, netloc, url, query, fragment
+ if allow_fragments:
+ i = url.rfind('#')
+ if i >= 0:
+ fragment = url[i+1:]
+ url = url[:i]
+ i = url.find('?')
+ if i >= 0:
+ query = url[i+1:]
+ url = url[:i]
+ i = url.find(';')
+ if i >= 0:
+ params = url[i+1:]
+ url = url[:i]
+ tuple = scheme, netloc, url, params, query, fragment
_parse_cache[key] = tuple
return tuple
for c in url[:i]:
@@ -110,11 +94,19 @@ def urlsplit(url, scheme='', allow_fragments=1):
if i < 0:
i = len(url)
netloc, url = url[2:i], url[i:]
- if allow_fragments and scheme in uses_fragment and '#' in url:
- url, fragment = url.split('#', 1)
- if scheme in uses_query and '?' in url:
- url, query = url.split('?', 1)
- tuple = scheme, netloc, url, query, fragment
+ if allow_fragments and scheme in uses_fragment:
+ i = url.rfind('#')
+ if i >= 0:
+ url, fragment = url[:i], url[i+1:]
+ if scheme in uses_query:
+ i = url.find('?')
+ if i >= 0:
+ url, query = url[:i], url[i+1:]
+ if scheme in uses_params:
+ i = url.find(';')
+ if i >= 0:
+ url, params = url[:i], url[i+1:]
+ tuple = scheme, netloc, url, params, query, fragment
_parse_cache[key] = tuple
return tuple
@@ -123,16 +115,13 @@ def urlunparse((scheme, netloc, url, params, query, fragment)):
slightly different, but equivalent URL, if the URL that was parsed
originally had redundant delimiters, e.g. a ? with an empty query
(the draft states that these are equivalent)."""
- if params:
- url = "%s;%s" % (url, params)
- return urlunsplit((scheme, netloc, url, query, fragment))
-
-def urlunsplit((scheme, netloc, url, query, fragment)):
if netloc or (scheme in uses_netloc and url[:2] == '//'):
if url and url[:1] != '/': url = '/' + url
url = '//' + (netloc or '') + url
if scheme:
url = scheme + ':' + url
+ if params:
+ url = url + ';' + params
if query:
url = url + '?' + query
if fragment:
@@ -198,12 +187,9 @@ def urldefrag(url):
the URL contained no fragments, the second element is the
empty string.
"""
- if '#' in url:
- s, n, p, a, q, frag = urlparse(url)
- defrag = urlunparse((s, n, p, a, q, ''))
- return defrag, frag
- else:
- return url, ''
+ s, n, p, a, q, frag = urlparse(url)
+ defrag = urlunparse((s, n, p, a, q, ''))
+ return defrag, frag
test_input = """