diff options
author | Jan Rieger <jrx@centrum.cz> | 2020-04-15 19:47:41 +0200 |
---|---|---|
committer | Sviatoslav Sydorenko <wk@sydorenko.org.ua> | 2020-04-17 17:56:24 +0200 |
commit | f1cef2d67f49a76131cf7b9c6cbb0fcdebb36c7c (patch) | |
tree | 9b9c63ebf4d6b800d354451c67e3e844b240ff77 | |
parent | e55f190fe86fe8fc42275de88578e87bb27f5a91 (diff) | |
download | cherrypy-git-f1cef2d67f49a76131cf7b9c6cbb0fcdebb36c7c.tar.gz |
Fix serving files with diacritics in name under their original name
This implements adding extra ``filename*`` parameter in Content-disposition
header when serving content with non-ASCII filenames. As per RFC 6266
recommendations in the Appendix D.
Refs:
* https://stackoverflow.com/a/8996249/2173868
* https://tools.ietf.org/html/rfc6266#appendix-D
Fixes #1776
-rw-r--r-- | cherrypy/lib/static.py | 34 | ||||
-rw-r--r-- | cherrypy/test/test_static.py | 29 |
2 files changed, 55 insertions, 8 deletions
diff --git a/cherrypy/lib/static.py b/cherrypy/lib/static.py index 9a3b8e83..fbc2e09d 100644 --- a/cherrypy/lib/static.py +++ b/cherrypy/lib/static.py @@ -6,6 +6,7 @@ import re import stat import mimetypes import urllib.parse +import unicodedata from email.generator import _make_boundary as make_boundary from io import UnsupportedOperation @@ -28,6 +29,21 @@ def _setup_mimetypes(): _setup_mimetypes() +def _utf8_content_disposition(disposition, file_name): + """Create HTTP header for downloading a file with UTF-8 filename. + See this and related answers: + https://stackoverflow.com/a/8996249/2173868 + """ + ascii_name = ( + unicodedata.normalize('NFKD', file_name). + encode('ascii', errors='ignore').decode() + ) + quoted_name = urllib.parse.quote(file_name) + header = u'{}; filename="{}"'.format(disposition, ascii_name) + header += u'; filename*=UTF-8\'\'{}'.format(quoted_name) + return header + + def serve_file(path, content_type=None, disposition=None, name=None, debug=False): """Set status, headers, and body in order to serve the given path. @@ -37,9 +53,10 @@ def serve_file(path, content_type=None, disposition=None, name=None, of the 'path' argument. If disposition is not None, the Content-Disposition header will be set - to "<disposition>; filename=<name>". If name is None, it will be set - to the basename of path. If disposition is None, no Content-Disposition - header will be written. + to "<disposition>; filename=<name>; filename*=utf-8''<name>" + as described in RFC6266 (https://tools.ietf.org/html/rfc6266#appendix-D). + If name is None, it will be set to the basename of path. + If disposition is None, no Content-Disposition header will be written. """ response = cherrypy.serving.response @@ -92,7 +109,7 @@ def serve_file(path, content_type=None, disposition=None, name=None, if disposition is not None: if name is None: name = os.path.basename(path) - cd = '%s; filename="%s"' % (disposition, name) + cd = _utf8_content_disposition(disposition, name) response.headers['Content-Disposition'] = cd if debug: cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') @@ -111,9 +128,10 @@ def serve_fileobj(fileobj, content_type=None, disposition=None, name=None, The Content-Type header will be set to the content_type arg, if provided. If disposition is not None, the Content-Disposition header will be set - to "<disposition>; filename=<name>". If name is None, 'filename' will - not be set. If disposition is None, no Content-Disposition header will - be written. + to "<disposition>; filename=<name>; filename*=utf-8''<name>" + as described in RFC6266 (https://tools.ietf.org/html/rfc6266#appendix-D). + If name is None, 'filename' will not be set. + If disposition is None, no Content-Disposition header will be written. CAUTION: If the request contains a 'Range' header, one or more seek()s will be performed on the file object. This may cause undesired behavior if @@ -149,7 +167,7 @@ def serve_fileobj(fileobj, content_type=None, disposition=None, name=None, if name is None: cd = disposition else: - cd = '%s; filename="%s"' % (disposition, name) + cd = _utf8_content_disposition(disposition, name) response.headers['Content-Disposition'] = cd if debug: cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') diff --git a/cherrypy/test/test_static.py b/cherrypy/test/test_static.py index 3de658af..42c2fdee 100644 --- a/cherrypy/test/test_static.py +++ b/cherrypy/test/test_static.py @@ -97,6 +97,22 @@ class StaticTest(helper.CPWebCase): f = io.BytesIO(b'Fee\nfie\nfo\nfum') return static.serve_fileobj(f, content_type='text/plain') + @cherrypy.expose + def serve_file_utf8_filename(self): + file_path = os.path.join(curdir, 'style.css') + return static.serve_file( + file_path, + disposition='attachment', + name='has_utf-8_character_☃.html') + + @cherrypy.expose + def serve_fileobj_utf8_filename(self): + f = open(os.path.join(curdir, 'style.css'), 'rb') + return static.serve_fileobj( + f, + disposition='attachment', + name='has_utf-8_character_☃.html') + class Static: @cherrypy.expose @@ -193,6 +209,19 @@ class StaticTest(helper.CPWebCase): # we just check the content self.assertMatchesBody('^Dummy stylesheet') + # Check a filename with utf-8 characters in it + ascii_fn = 'has_utf-8_character_.html' + url_quote_fn = 'has_utf-8_character_%E2%98%83.html' + cd = '''attachment; filename="%s"; filename*=UTF-8\'\'%s''' + + self.getPage('/serve_file_utf8_filename') + self.assertStatus('200 OK') + self.assertHeader('Content-Disposition', cd % (ascii_fn, url_quote_fn)) + + self.getPage('/serve_fileobj_utf8_filename') + self.assertStatus('200 OK') + self.assertHeader('Content-Disposition', cd % (ascii_fn, url_quote_fn)) + @pytest.mark.skipif(platform.system() != 'Windows', reason='Windows only') def test_static_longpath(self): """Test serving of a file in subdir of a Windows long-path |