summaryrefslogtreecommitdiff
path: root/paste/urlparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'paste/urlparser.py')
-rw-r--r--paste/urlparser.py639
1 files changed, 639 insertions, 0 deletions
diff --git a/paste/urlparser.py b/paste/urlparser.py
new file mode 100644
index 0000000..19bcbac
--- /dev/null
+++ b/paste/urlparser.py
@@ -0,0 +1,639 @@
+# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
+# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
+"""
+WSGI applications that parse the URL and dispatch to on-disk resources
+"""
+
+import os
+import six
+import sys
+import imp
+import mimetypes
+try:
+ import pkg_resources
+except ImportError:
+ pkg_resources = None
+from paste import request
+from paste import fileapp
+from paste.util import import_string
+from paste import httpexceptions
+from .httpheaders import ETAG
+from paste.util import converters
+
+class NoDefault(object):
+ pass
+
+__all__ = ['URLParser', 'StaticURLParser', 'PkgResourcesParser']
+
+class URLParser(object):
+
+ """
+ WSGI middleware
+
+ Application dispatching, based on URL. An instance of `URLParser` is
+ an application that loads and delegates to other applications. It
+ looks for files in its directory that match the first part of
+ PATH_INFO; these may have an extension, but are not required to have
+ one, in which case the available files are searched to find the
+ appropriate file. If it is ambiguous, a 404 is returned and an error
+ logged.
+
+ By default there is a constructor for .py files that loads the module,
+ and looks for an attribute ``application``, which is a ready
+ application object, or an attribute that matches the module name,
+ which is a factory for building applications, and is called with no
+ arguments.
+
+ URLParser will also look in __init__.py for special overrides.
+ These overrides are:
+
+ ``urlparser_hook(environ)``
+ This can modify the environment. Its return value is ignored,
+ and it cannot be used to change the response in any way. You
+ *can* use this, for example, to manipulate SCRIPT_NAME/PATH_INFO
+ (try to keep them consistent with the original URL -- but
+ consuming PATH_INFO and moving that to SCRIPT_NAME is ok).
+
+ ``urlparser_wrap(environ, start_response, app)``:
+ After URLParser finds the application, it calls this function
+ (if present). If this function doesn't call
+ ``app(environ, start_response)`` then the application won't be
+ called at all! This can be used to allocate resources (with
+ ``try:finally:``) or otherwise filter the output of the
+ application.
+
+ ``not_found_hook(environ, start_response)``:
+ If no file can be found (*in this directory*) to match the
+ request, then this WSGI application will be called. You can
+ use this to change the URL and pass the request back to
+ URLParser again, or on to some other application. This
+ doesn't catch all ``404 Not Found`` responses, just missing
+ files.
+
+ ``application(environ, start_response)``:
+ This basically overrides URLParser completely, and the given
+ application is used for all requests. ``urlparser_wrap`` and
+ ``urlparser_hook`` are still called, but the filesystem isn't
+ searched in any way.
+ """
+
+ parsers_by_directory = {}
+
+ # This is lazily initialized
+ init_module = NoDefault
+
+ global_constructors = {}
+
+ def __init__(self, global_conf,
+ directory, base_python_name,
+ index_names=NoDefault,
+ hide_extensions=NoDefault,
+ ignore_extensions=NoDefault,
+ constructors=None,
+ **constructor_conf):
+ """
+ Create a URLParser object that looks at `directory`.
+ `base_python_name` is the package that this directory
+ represents, thus any Python modules in this directory will
+ be given names under this package.
+ """
+ if global_conf:
+ import warnings
+ warnings.warn(
+ 'The global_conf argument to URLParser is deprecated; '
+ 'either pass in None or {}, or use make_url_parser',
+ DeprecationWarning)
+ else:
+ global_conf = {}
+ if os.path.sep != '/':
+ directory = directory.replace(os.path.sep, '/')
+ self.directory = directory
+ self.base_python_name = base_python_name
+ # This logic here should be deprecated since it is in
+ # make_url_parser
+ if index_names is NoDefault:
+ index_names = global_conf.get(
+ 'index_names', ('index', 'Index', 'main', 'Main'))
+ self.index_names = converters.aslist(index_names)
+ if hide_extensions is NoDefault:
+ hide_extensions = global_conf.get(
+ 'hide_extensions', ('.pyc', '.bak', '.py~', '.pyo'))
+ self.hide_extensions = converters.aslist(hide_extensions)
+ if ignore_extensions is NoDefault:
+ ignore_extensions = global_conf.get(
+ 'ignore_extensions', ())
+ self.ignore_extensions = converters.aslist(ignore_extensions)
+ self.constructors = self.global_constructors.copy()
+ if constructors:
+ self.constructors.update(constructors)
+ # @@: Should we also check the global options for constructors?
+ for name, value in constructor_conf.items():
+ if not name.startswith('constructor '):
+ raise ValueError(
+ "Only extra configuration keys allowed are "
+ "'constructor .ext = import_expr'; you gave %r "
+ "(=%r)" % (name, value))
+ ext = name[len('constructor '):].strip()
+ if isinstance(value, (str, unicode)):
+ value = import_string.eval_import(value)
+ self.constructors[ext] = value
+
+ def __call__(self, environ, start_response):
+ environ['paste.urlparser.base_python_name'] = self.base_python_name
+ if self.init_module is NoDefault:
+ self.init_module = self.find_init_module(environ)
+ path_info = environ.get('PATH_INFO', '')
+ if not path_info:
+ return self.add_slash(environ, start_response)
+ if (self.init_module
+ and getattr(self.init_module, 'urlparser_hook', None)):
+ self.init_module.urlparser_hook(environ)
+ orig_path_info = environ['PATH_INFO']
+ orig_script_name = environ['SCRIPT_NAME']
+ application, filename = self.find_application(environ)
+ if not application:
+ if (self.init_module
+ and getattr(self.init_module, 'not_found_hook', None)
+ and environ.get('paste.urlparser.not_found_parser') is not self):
+ not_found_hook = self.init_module.not_found_hook
+ environ['paste.urlparser.not_found_parser'] = self
+ environ['PATH_INFO'] = orig_path_info
+ environ['SCRIPT_NAME'] = orig_script_name
+ return not_found_hook(environ, start_response)
+ if filename is None:
+ name, rest_of_path = request.path_info_split(environ['PATH_INFO'])
+ if not name:
+ name = 'one of %s' % ', '.join(
+ self.index_names or
+ ['(no index_names defined)'])
+
+ return self.not_found(
+ environ, start_response,
+ 'Tried to load %s from directory %s'
+ % (name, self.directory))
+ else:
+ environ['wsgi.errors'].write(
+ 'Found resource %s, but could not construct application\n'
+ % filename)
+ return self.not_found(
+ environ, start_response,
+ 'Tried to load %s from directory %s'
+ % (filename, self.directory))
+ if (self.init_module
+ and getattr(self.init_module, 'urlparser_wrap', None)):
+ return self.init_module.urlparser_wrap(
+ environ, start_response, application)
+ else:
+ return application(environ, start_response)
+
+ def find_application(self, environ):
+ if (self.init_module
+ and getattr(self.init_module, 'application', None)
+ and not environ.get('paste.urlparser.init_application') == environ['SCRIPT_NAME']):
+ environ['paste.urlparser.init_application'] = environ['SCRIPT_NAME']
+ return self.init_module.application, None
+ name, rest_of_path = request.path_info_split(environ['PATH_INFO'])
+ environ['PATH_INFO'] = rest_of_path
+ if name is not None:
+ environ['SCRIPT_NAME'] = environ.get('SCRIPT_NAME', '') + '/' + name
+ if not name:
+ names = self.index_names
+ for index_name in names:
+ filename = self.find_file(environ, index_name)
+ if filename:
+ break
+ else:
+ # None of the index files found
+ filename = None
+ else:
+ filename = self.find_file(environ, name)
+ if filename is None:
+ return None, filename
+ else:
+ return self.get_application(environ, filename), filename
+
+ def not_found(self, environ, start_response, debug_message=None):
+ exc = httpexceptions.HTTPNotFound(
+ 'The resource at %s could not be found'
+ % request.construct_url(environ),
+ comment=debug_message)
+ return exc.wsgi_application(environ, start_response)
+
+ def add_slash(self, environ, start_response):
+ """
+ This happens when you try to get to a directory
+ without a trailing /
+ """
+ url = request.construct_url(environ, with_query_string=False)
+ url += '/'
+ if environ.get('QUERY_STRING'):
+ url += '?' + environ['QUERY_STRING']
+ exc = httpexceptions.HTTPMovedPermanently(
+ 'The resource has moved to %s - you should be redirected '
+ 'automatically.' % url,
+ headers=[('location', url)])
+ return exc.wsgi_application(environ, start_response)
+
+ def find_file(self, environ, base_filename):
+ possible = []
+ """Cache a few values to reduce function call overhead"""
+ for filename in os.listdir(self.directory):
+ base, ext = os.path.splitext(filename)
+ full_filename = os.path.join(self.directory, filename)
+ if (ext in self.hide_extensions
+ or not base):
+ continue
+ if filename == base_filename:
+ possible.append(full_filename)
+ continue
+ if ext in self.ignore_extensions:
+ continue
+ if base == base_filename:
+ possible.append(full_filename)
+ if not possible:
+ #environ['wsgi.errors'].write(
+ # 'No file found matching %r in %s\n'
+ # % (base_filename, self.directory))
+ return None
+ if len(possible) > 1:
+ # If there is an exact match, this isn't 'ambiguous'
+ # per se; it might mean foo.gif and foo.gif.back for
+ # instance
+ if full_filename in possible:
+ return full_filename
+ else:
+ environ['wsgi.errors'].write(
+ 'Ambiguous URL: %s; matches files %s\n'
+ % (request.construct_url(environ),
+ ', '.join(possible)))
+ return None
+ return possible[0]
+
+ def get_application(self, environ, filename):
+ if os.path.isdir(filename):
+ t = 'dir'
+ else:
+ t = os.path.splitext(filename)[1]
+ constructor = self.constructors.get(t, self.constructors.get('*'))
+ if constructor is None:
+ #environ['wsgi.errors'].write(
+ # 'No constructor found for %s\n' % t)
+ return constructor
+ app = constructor(self, environ, filename)
+ if app is None:
+ #environ['wsgi.errors'].write(
+ # 'Constructor %s return None for %s\n' %
+ # (constructor, filename))
+ pass
+ return app
+
+ def register_constructor(cls, extension, constructor):
+ """
+ Register a function as a constructor. Registered constructors
+ apply to all instances of `URLParser`.
+
+ The extension should have a leading ``.``, or the special
+ extensions ``dir`` (for directories) and ``*`` (a catch-all).
+
+ `constructor` must be a callable that takes two arguments:
+ ``environ`` and ``filename``, and returns a WSGI application.
+ """
+ d = cls.global_constructors
+ assert extension not in d, (
+ "A constructor already exists for the extension %r (%r) "
+ "when attemption to register constructor %r"
+ % (extension, d[extension], constructor))
+ d[extension] = constructor
+ register_constructor = classmethod(register_constructor)
+
+ def get_parser(self, directory, base_python_name):
+ """
+ Get a parser for the given directory, or create one if
+ necessary. This way parsers can be cached and reused.
+
+ # @@: settings are inherited from the first caller
+ """
+ try:
+ return self.parsers_by_directory[(directory, base_python_name)]
+ except KeyError:
+ parser = self.__class__(
+ {},
+ directory, base_python_name,
+ index_names=self.index_names,
+ hide_extensions=self.hide_extensions,
+ ignore_extensions=self.ignore_extensions,
+ constructors=self.constructors)
+ self.parsers_by_directory[(directory, base_python_name)] = parser
+ return parser
+
+ def find_init_module(self, environ):
+ filename = os.path.join(self.directory, '__init__.py')
+ if not os.path.exists(filename):
+ return None
+ return load_module(environ, filename)
+
+ def __repr__(self):
+ return '<%s directory=%r; module=%s at %s>' % (
+ self.__class__.__name__,
+ self.directory,
+ self.base_python_name,
+ hex(abs(id(self))))
+
+def make_directory(parser, environ, filename):
+ base_python_name = environ['paste.urlparser.base_python_name']
+ if base_python_name:
+ base_python_name += "." + os.path.basename(filename)
+ else:
+ base_python_name = os.path.basename(filename)
+ return parser.get_parser(filename, base_python_name)
+
+URLParser.register_constructor('dir', make_directory)
+
+def make_unknown(parser, environ, filename):
+ return fileapp.FileApp(filename)
+
+URLParser.register_constructor('*', make_unknown)
+
+def load_module(environ, filename):
+ base_python_name = environ['paste.urlparser.base_python_name']
+ module_name = os.path.splitext(os.path.basename(filename))[0]
+ if base_python_name:
+ module_name = base_python_name + '.' + module_name
+ return load_module_from_name(environ, filename, module_name,
+ environ['wsgi.errors'])
+
+def load_module_from_name(environ, filename, module_name, errors):
+ if module_name in sys.modules:
+ return sys.modules[module_name]
+ init_filename = os.path.join(os.path.dirname(filename), '__init__.py')
+ if not os.path.exists(init_filename):
+ try:
+ f = open(init_filename, 'w')
+ except (OSError, IOError) as e:
+ errors.write(
+ 'Cannot write __init__.py file into directory %s (%s)\n'
+ % (os.path.dirname(filename), e))
+ return None
+ f.write('#\n')
+ f.close()
+ fp = None
+ if module_name in sys.modules:
+ return sys.modules[module_name]
+ if '.' in module_name:
+ parent_name = '.'.join(module_name.split('.')[:-1])
+ base_name = module_name.split('.')[-1]
+ parent = load_module_from_name(environ, os.path.dirname(filename),
+ parent_name, errors)
+ else:
+ base_name = module_name
+ fp = None
+ try:
+ fp, pathname, stuff = imp.find_module(
+ base_name, [os.path.dirname(filename)])
+ module = imp.load_module(module_name, fp, pathname, stuff)
+ finally:
+ if fp is not None:
+ fp.close()
+ return module
+
+def make_py(parser, environ, filename):
+ module = load_module(environ, filename)
+ if not module:
+ return None
+ if hasattr(module, 'application') and module.application:
+ return getattr(module.application, 'wsgi_application', module.application)
+ base_name = module.__name__.split('.')[-1]
+ if hasattr(module, base_name):
+ obj = getattr(module, base_name)
+ if hasattr(obj, 'wsgi_application'):
+ return obj.wsgi_application
+ else:
+ # @@: Old behavior; should probably be deprecated eventually:
+ return getattr(module, base_name)()
+ environ['wsgi.errors'].write(
+ "Cound not find application or %s in %s\n"
+ % (base_name, module))
+ return None
+
+URLParser.register_constructor('.py', make_py)
+
+class StaticURLParser(object):
+ """
+ Like ``URLParser`` but only serves static files.
+
+ ``cache_max_age``:
+ integer specifies Cache-Control max_age in seconds
+ """
+ # @@: Should URLParser subclass from this?
+
+ def __init__(self, directory, root_directory=None,
+ cache_max_age=None):
+ self.directory = self.normpath(directory)
+ self.root_directory = self.normpath(root_directory or directory)
+ self.cache_max_age = cache_max_age
+
+ def normpath(path):
+ return os.path.normcase(os.path.abspath(path))
+ normpath = staticmethod(normpath)
+
+ def __call__(self, environ, start_response):
+ path_info = environ.get('PATH_INFO', '')
+ if not path_info:
+ return self.add_slash(environ, start_response)
+ if path_info == '/':
+ # @@: This should obviously be configurable
+ filename = 'index.html'
+ else:
+ filename = request.path_info_pop(environ)
+ full = self.normpath(os.path.join(self.directory, filename))
+ if not full.startswith(self.root_directory):
+ # Out of bounds
+ return self.not_found(environ, start_response)
+ if not os.path.exists(full):
+ return self.not_found(environ, start_response)
+ if os.path.isdir(full):
+ # @@: Cache?
+ return self.__class__(full, root_directory=self.root_directory,
+ cache_max_age=self.cache_max_age)(environ,
+ start_response)
+ if environ.get('PATH_INFO') and environ.get('PATH_INFO') != '/':
+ return self.error_extra_path(environ, start_response)
+ if_none_match = environ.get('HTTP_IF_NONE_MATCH')
+ if if_none_match:
+ mytime = os.stat(full).st_mtime
+ if str(mytime) == if_none_match:
+ headers = []
+ ## FIXME: probably should be
+ ## ETAG.update(headers, '"%s"' % mytime)
+ ETAG.update(headers, mytime)
+ start_response('304 Not Modified', headers)
+ return [''] # empty body
+
+ fa = self.make_app(full)
+ if self.cache_max_age:
+ fa.cache_control(max_age=self.cache_max_age)
+ return fa(environ, start_response)
+
+ def make_app(self, filename):
+ return fileapp.FileApp(filename)
+
+ def add_slash(self, environ, start_response):
+ """
+ This happens when you try to get to a directory
+ without a trailing /
+ """
+ url = request.construct_url(environ, with_query_string=False)
+ url += '/'
+ if environ.get('QUERY_STRING'):
+ url += '?' + environ['QUERY_STRING']
+ exc = httpexceptions.HTTPMovedPermanently(
+ 'The resource has moved to %s - you should be redirected '
+ 'automatically.' % url,
+ headers=[('location', url)])
+ return exc.wsgi_application(environ, start_response)
+
+ def not_found(self, environ, start_response, debug_message=None):
+ exc = httpexceptions.HTTPNotFound(
+ 'The resource at %s could not be found'
+ % request.construct_url(environ),
+ comment='SCRIPT_NAME=%r; PATH_INFO=%r; looking in %r; debug: %s'
+ % (environ.get('SCRIPT_NAME'), environ.get('PATH_INFO'),
+ self.directory, debug_message or '(none)'))
+ return exc.wsgi_application(environ, start_response)
+
+ def error_extra_path(self, environ, start_response):
+ exc = httpexceptions.HTTPNotFound(
+ 'The trailing path %r is not allowed' % environ['PATH_INFO'])
+ return exc.wsgi_application(environ, start_response)
+
+ def __repr__(self):
+ return '<%s %r>' % (self.__class__.__name__, self.directory)
+
+def make_static(global_conf, document_root, cache_max_age=None):
+ """
+ Return a WSGI application that serves a directory (configured
+ with document_root)
+
+ cache_max_age - integer specifies CACHE_CONTROL max_age in seconds
+ """
+ if cache_max_age is not None:
+ cache_max_age = int(cache_max_age)
+ return StaticURLParser(
+ document_root, cache_max_age=cache_max_age)
+
+class PkgResourcesParser(StaticURLParser):
+
+ def __init__(self, egg_or_spec, resource_name, manager=None, root_resource=None):
+ if pkg_resources is None:
+ raise NotImplementedError("This class requires pkg_resources.")
+ if isinstance(egg_or_spec, (six.binary_type, six.text_type)):
+ self.egg = pkg_resources.get_distribution(egg_or_spec)
+ else:
+ self.egg = egg_or_spec
+ self.resource_name = resource_name
+ if manager is None:
+ manager = pkg_resources.ResourceManager()
+ self.manager = manager
+ if root_resource is None:
+ root_resource = resource_name
+ self.root_resource = os.path.normpath(root_resource)
+
+ def __repr__(self):
+ return '<%s for %s:%r>' % (
+ self.__class__.__name__,
+ self.egg.project_name,
+ self.resource_name)
+
+ def __call__(self, environ, start_response):
+ path_info = environ.get('PATH_INFO', '')
+ if not path_info:
+ return self.add_slash(environ, start_response)
+ if path_info == '/':
+ # @@: This should obviously be configurable
+ filename = 'index.html'
+ else:
+ filename = request.path_info_pop(environ)
+ resource = os.path.normcase(os.path.normpath(
+ self.resource_name + '/' + filename))
+ if self.root_resource is not None and not resource.startswith(self.root_resource):
+ # Out of bounds
+ return self.not_found(environ, start_response)
+ if not self.egg.has_resource(resource):
+ return self.not_found(environ, start_response)
+ if self.egg.resource_isdir(resource):
+ # @@: Cache?
+ child_root = self.root_resource is not None and self.root_resource or \
+ self.resource_name
+ return self.__class__(self.egg, resource, self.manager,
+ root_resource=child_root)(environ, start_response)
+ if environ.get('PATH_INFO') and environ.get('PATH_INFO') != '/':
+ return self.error_extra_path(environ, start_response)
+
+ type, encoding = mimetypes.guess_type(resource)
+ if not type:
+ type = 'application/octet-stream'
+ # @@: I don't know what to do with the encoding.
+ try:
+ file = self.egg.get_resource_stream(self.manager, resource)
+ except (IOError, OSError) as e:
+ exc = httpexceptions.HTTPForbidden(
+ 'You are not permitted to view this file (%s)' % e)
+ return exc.wsgi_application(environ, start_response)
+ start_response('200 OK',
+ [('content-type', type)])
+ return fileapp._FileIter(file)
+
+ def not_found(self, environ, start_response, debug_message=None):
+ exc = httpexceptions.HTTPNotFound(
+ 'The resource at %s could not be found'
+ % request.construct_url(environ),
+ comment='SCRIPT_NAME=%r; PATH_INFO=%r; looking in egg:%s#%r; debug: %s'
+ % (environ.get('SCRIPT_NAME'), environ.get('PATH_INFO'),
+ self.egg, self.resource_name, debug_message or '(none)'))
+ return exc.wsgi_application(environ, start_response)
+
+def make_pkg_resources(global_conf, egg, resource_name=''):
+ """
+ A static file parser that loads data from an egg using
+ ``pkg_resources``. Takes a configuration value ``egg``, which is
+ an egg spec, and a base ``resource_name`` (default empty string)
+ which is the path in the egg that this starts at.
+ """
+ if pkg_resources is None:
+ raise NotImplementedError("This function requires pkg_resources.")
+ return PkgResourcesParser(egg, resource_name)
+
+def make_url_parser(global_conf, directory, base_python_name,
+ index_names=None, hide_extensions=None,
+ ignore_extensions=None,
+ **constructor_conf):
+ """
+ Create a URLParser application that looks in ``directory``, which
+ should be the directory for the Python package named in
+ ``base_python_name``. ``index_names`` are used when viewing the
+ directory (like ``'index'`` for ``'index.html'``).
+ ``hide_extensions`` are extensions that are not viewable (like
+ ``'.pyc'``) and ``ignore_extensions`` are viewable but only if an
+ explicit extension is given.
+ """
+ if index_names is None:
+ index_names = global_conf.get(
+ 'index_names', ('index', 'Index', 'main', 'Main'))
+ index_names = converters.aslist(index_names)
+
+ if hide_extensions is None:
+ hide_extensions = global_conf.get(
+ 'hide_extensions', ('.pyc', 'bak', 'py~'))
+ hide_extensions = converters.aslist(hide_extensions)
+
+ if ignore_extensions is None:
+ ignore_extensions = global_conf.get(
+ 'ignore_extensions', ())
+ ignore_extensions = converters.aslist(ignore_extensions)
+ # There's no real way to set constructors currently...
+
+ return URLParser({}, directory, base_python_name,
+ index_names=index_names,
+ hide_extensions=hide_extensions,
+ ignore_extensions=ignore_extensions,
+ **constructor_conf)