summaryrefslogtreecommitdiff
path: root/Lib/urllib
diff options
context:
space:
mode:
authorSenthil Kumaran <senthil@uthcode.com>2013-05-29 05:57:21 -0700
committerSenthil Kumaran <senthil@uthcode.com>2013-05-29 05:57:21 -0700
commit5ab81807b8d6f7e077a211ffa6a3e061fdb361ab (patch)
treefcea77dc5f4fa6b9d0ced8c819572a548a149ed8 /Lib/urllib
parent94d3481a5942af5da0bc3708c17d76329a219094 (diff)
parentc9c1dc401720cff3d98faceee70ad26e25aea598 (diff)
downloadcpython-5ab81807b8d6f7e077a211ffa6a3e061fdb361ab.tar.gz
merge from 3.3
#17403: urllib.parse.robotparser normalizes the urls before adding to ruleline. This helps in handling certain types invalid urls in a conservative manner. Patch contributed by Mher Movsisyan.
Diffstat (limited to 'Lib/urllib')
-rw-r--r--Lib/urllib/robotparser.py1
1 files changed, 1 insertions, 0 deletions
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py
index 75be4af409..978ba58d84 100644
--- a/Lib/urllib/robotparser.py
+++ b/Lib/urllib/robotparser.py
@@ -157,6 +157,7 @@ class RuleLine:
if path == '' and not allowance:
# an empty value means allow all
allowance = True
+ path = urllib.parse.urlunparse(urllib.parse.urlparse(path))
self.path = urllib.parse.quote(path)
self.allowance = allowance