summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Rieger <jrx@centrum.cz>2020-04-15 19:47:41 +0200
committerSviatoslav Sydorenko <wk@sydorenko.org.ua>2020-04-17 17:56:24 +0200
commitf1cef2d67f49a76131cf7b9c6cbb0fcdebb36c7c (patch)
tree9b9c63ebf4d6b800d354451c67e3e844b240ff77
parente55f190fe86fe8fc42275de88578e87bb27f5a91 (diff)
downloadcherrypy-git-f1cef2d67f49a76131cf7b9c6cbb0fcdebb36c7c.tar.gz
Fix serving files with diacritics in name under their original name
This implements adding extra ``filename*`` parameter in Content-disposition header when serving content with non-ASCII filenames. As per RFC 6266 recommendations in the Appendix D. Refs: * https://stackoverflow.com/a/8996249/2173868 * https://tools.ietf.org/html/rfc6266#appendix-D Fixes #1776
-rw-r--r--cherrypy/lib/static.py34
-rw-r--r--cherrypy/test/test_static.py29
2 files changed, 55 insertions, 8 deletions
diff --git a/cherrypy/lib/static.py b/cherrypy/lib/static.py
index 9a3b8e83..fbc2e09d 100644
--- a/cherrypy/lib/static.py
+++ b/cherrypy/lib/static.py
@@ -6,6 +6,7 @@ import re
import stat
import mimetypes
import urllib.parse
+import unicodedata
from email.generator import _make_boundary as make_boundary
from io import UnsupportedOperation
@@ -28,6 +29,21 @@ def _setup_mimetypes():
_setup_mimetypes()
+def _utf8_content_disposition(disposition, file_name):
+ """Create HTTP header for downloading a file with UTF-8 filename.
+ See this and related answers:
+ https://stackoverflow.com/a/8996249/2173868
+ """
+ ascii_name = (
+ unicodedata.normalize('NFKD', file_name).
+ encode('ascii', errors='ignore').decode()
+ )
+ quoted_name = urllib.parse.quote(file_name)
+ header = u'{}; filename="{}"'.format(disposition, ascii_name)
+ header += u'; filename*=UTF-8\'\'{}'.format(quoted_name)
+ return header
+
+
def serve_file(path, content_type=None, disposition=None, name=None,
debug=False):
"""Set status, headers, and body in order to serve the given path.
@@ -37,9 +53,10 @@ def serve_file(path, content_type=None, disposition=None, name=None,
of the 'path' argument.
If disposition is not None, the Content-Disposition header will be set
- to "<disposition>; filename=<name>". If name is None, it will be set
- to the basename of path. If disposition is None, no Content-Disposition
- header will be written.
+ to "<disposition>; filename=<name>; filename*=utf-8''<name>"
+ as described in RFC6266 (https://tools.ietf.org/html/rfc6266#appendix-D).
+ If name is None, it will be set to the basename of path.
+ If disposition is None, no Content-Disposition header will be written.
"""
response = cherrypy.serving.response
@@ -92,7 +109,7 @@ def serve_file(path, content_type=None, disposition=None, name=None,
if disposition is not None:
if name is None:
name = os.path.basename(path)
- cd = '%s; filename="%s"' % (disposition, name)
+ cd = _utf8_content_disposition(disposition, name)
response.headers['Content-Disposition'] = cd
if debug:
cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC')
@@ -111,9 +128,10 @@ def serve_fileobj(fileobj, content_type=None, disposition=None, name=None,
The Content-Type header will be set to the content_type arg, if provided.
If disposition is not None, the Content-Disposition header will be set
- to "<disposition>; filename=<name>". If name is None, 'filename' will
- not be set. If disposition is None, no Content-Disposition header will
- be written.
+ to "<disposition>; filename=<name>; filename*=utf-8''<name>"
+ as described in RFC6266 (https://tools.ietf.org/html/rfc6266#appendix-D).
+ If name is None, 'filename' will not be set.
+ If disposition is None, no Content-Disposition header will be written.
CAUTION: If the request contains a 'Range' header, one or more seek()s will
be performed on the file object. This may cause undesired behavior if
@@ -149,7 +167,7 @@ def serve_fileobj(fileobj, content_type=None, disposition=None, name=None,
if name is None:
cd = disposition
else:
- cd = '%s; filename="%s"' % (disposition, name)
+ cd = _utf8_content_disposition(disposition, name)
response.headers['Content-Disposition'] = cd
if debug:
cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC')
diff --git a/cherrypy/test/test_static.py b/cherrypy/test/test_static.py
index 3de658af..42c2fdee 100644
--- a/cherrypy/test/test_static.py
+++ b/cherrypy/test/test_static.py
@@ -97,6 +97,22 @@ class StaticTest(helper.CPWebCase):
f = io.BytesIO(b'Fee\nfie\nfo\nfum')
return static.serve_fileobj(f, content_type='text/plain')
+ @cherrypy.expose
+ def serve_file_utf8_filename(self):
+ file_path = os.path.join(curdir, 'style.css')
+ return static.serve_file(
+ file_path,
+ disposition='attachment',
+ name='has_utf-8_character_☃.html')
+
+ @cherrypy.expose
+ def serve_fileobj_utf8_filename(self):
+ f = open(os.path.join(curdir, 'style.css'), 'rb')
+ return static.serve_fileobj(
+ f,
+ disposition='attachment',
+ name='has_utf-8_character_☃.html')
+
class Static:
@cherrypy.expose
@@ -193,6 +209,19 @@ class StaticTest(helper.CPWebCase):
# we just check the content
self.assertMatchesBody('^Dummy stylesheet')
+ # Check a filename with utf-8 characters in it
+ ascii_fn = 'has_utf-8_character_.html'
+ url_quote_fn = 'has_utf-8_character_%E2%98%83.html'
+ cd = '''attachment; filename="%s"; filename*=UTF-8\'\'%s'''
+
+ self.getPage('/serve_file_utf8_filename')
+ self.assertStatus('200 OK')
+ self.assertHeader('Content-Disposition', cd % (ascii_fn, url_quote_fn))
+
+ self.getPage('/serve_fileobj_utf8_filename')
+ self.assertStatus('200 OK')
+ self.assertHeader('Content-Disposition', cd % (ascii_fn, url_quote_fn))
+
@pytest.mark.skipif(platform.system() != 'Windows', reason='Windows only')
def test_static_longpath(self):
"""Test serving of a file in subdir of a Windows long-path