From 936852fc80e09622a7461d66bd6a46547dbb3eb5 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Wed, 8 Feb 2017 22:38:05 +0100 Subject: Fix POST form submission for Python 3 In Python 3, urlencode() returns Unicode string; but urlopen() wants only bytes in data, so it need to be encoded first. Fixes: >>> form = lxml.html.fromstring('
', base_url='http://localhost') >>> lxml.html.submit_form(form) Traceback (most recent call last): File "", line 1, in File ".../lxml/html/__init__.py", line 1119, in submit_form return open_http(form.method, url, values) File ".../lxml/html/__init__.py", line 1140, in open_http_urllib return urlopen(url, data) File ".../urllib/request.py", line 163, in urlopen return opener.open(url, data, timeout) File ".../urllib/request.py", line 464, in open req = meth(req) File ".../urllib/request.py", line 1183, in do_request_ raise TypeError(msg) TypeError: POST data should be bytes or an iterable of bytes. It cannot be of type str. --- src/lxml/html/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lxml/html/__init__.py b/src/lxml/html/__init__.py index 525f9dc2..b0ab2c1e 100644 --- a/src/lxml/html/__init__.py +++ b/src/lxml/html/__init__.py @@ -1137,6 +1137,8 @@ def open_http_urllib(method, url, values): data = None else: data = urlencode(values) + if not isinstance(data, bytes): + data = data.encode('ASCII') return urlopen(url, data) -- cgit v1.2.1