diff options
Diffstat (limited to 'wsgikit/urlparser.py')
-rw-r--r-- | wsgikit/urlparser.py | 329 |
1 files changed, 329 insertions, 0 deletions
diff --git a/wsgikit/urlparser.py b/wsgikit/urlparser.py new file mode 100644 index 0000000..8b07b5f --- /dev/null +++ b/wsgikit/urlparser.py @@ -0,0 +1,329 @@ +""" +WSGI middleware + +Application dispatching, based on URL. An instance of `URLParser` is +an application that loads and delegates to other applications. It +looks for files in its directory that match the first part of +PATH_INFO; these may have an extension, but are not required to have +one, in which case the available files are searched to find the +appropriate file. If it is ambiguous, a 404 is returned and an error +logged. + +Each URLParser has a set of options, which can be local to that +URLParser. Also, there are default options: + +``index_name``: + The name of the index file, sans extension. + +``hide_extensions``: + A list of extensions (with leading ``.``) that should not ever + be served. + +``ignore_extensions``: + Extensions that will be ignored when searching for a file. If + the extension is given explicitly, files with these extensions + will still be served. + +``constructors``: + A dictionary of extensions as keys, and application constructors + as values. Also the key ``dir`` for directories, and ``*`` when + no other constructor is found. + + Each constructor is called like ``constructor(environ, filename)`` + and should return an application or ``None``. + +By default there is a constructor for .py files that loads the module, +and looks for an attribute ``application``, which is a ready +application object, or an attribute that matches the module name, +which is a factory for building applications, and is called with no +arguments. + +URLParser will also look in __init__.py for special overrides. Currently +the only override is urlparser_hook(environ), which can modify the +environment; its return value is ignored. You can use this, for example, +to manipulate SCRIPT_NAME/PATH_INFO (try to keep them consistent with the +original URL -- but consuming PATH_INFO and moving that to SCRIPT_NAME +is ok). +""" + +import os +import sys +import imp +import wsgilib + +class NoDefault: + pass + +class URLParser(object): + + default_options = { + 'index_names': ['index', 'Index', 'main', 'Main'], + 'hide_extensions': ['.pyc', '.bak', '.py~'], + 'ignore_extensions': [], + 'constructors': {}, + } + + parsers_by_directory = {} + + # This is lazily initialized + init_module = NoDefault + + def __init__(self, directory, base_python_name, add_options=None): + if os.path.sep != '/': + directory = directory.replace(os.path.sep, '/') + self.directory = directory + self.add_options = add_options + self.base_python_name = base_python_name + + def __call__(self, environ, start_response): + environ['wsgikit.urlparser.base_python_name'] = self.base_python_name + if self.add_options: + if environ.has_key('wsgikit.urlparser.options'): + environ['wsgikit.urlparser.options'].update(self.add_options) + else: + environ['wsgikit.urlparser.options'] = self.add_options.copy() + if self.init_module is NoDefault: + self.init_module = self.find_init_module(environ) + path_info = environ.get('PATH_INFO', '') + if not path_info: + return self.add_slash(environ, start_response) + if (self.init_module + and getattr(self.init_module, 'urlparser_hook', None)): + self.init_module.urlparser_hook(environ) + name, rest_of_path = wsgilib.path_info_split(environ['PATH_INFO']) + orig_path_info = environ['PATH_INFO'] + orig_script_name = environ['SCRIPT_NAME'] + environ['PATH_INFO'] = rest_of_path + if name is not None: + environ['SCRIPT_NAME'] = environ.get('SCRIPT_NAME', '') + '/' + name + if not name: + names = self.option(environ, 'index_names') or [] + for index_name in names: + filename = self.find_file(environ, index_name) + if filename: + break + else: + # None of the index files found + filename = None + else: + filename = self.find_file(environ, name) + if filename is None: + application = None + else: + application = self.get_application(environ, filename) + if not application: + if (self.init_module + and getattr(self.init_module, 'not_found_hook', None) + and environ.get('wsgikit.urlparser.not_found_parser') is not self): + not_found_hook = self.init_module.not_found_hook + environ['wsgikit.urlparser.not_found_parser'] = self + environ['PATH_INFO'] = orig_path_info + environ['SCRIPT_NAME'] = orig_script_name + return not_found_hook(environ, start_response) + if filename is None: + if not name: + desc = 'one of %s' % ', '.join( + self.option(environ, 'index_names') or + ['(no index_names defined)']) + else: + desc = name + + return self.not_found( + environ, start_response, + 'Tried to load %s from directory %s' + % (desc, self.directory)) + else: + environ['wsgi.errors'].write( + 'Found resource %s, but could not construct application\n' + % filename) + return self.not_found( + environ, start_response, + 'Tried to load %s from directory %s' + % (filename, self.directory)) + return application(environ, start_response) + + def not_found(self, environ, start_response, debug_message=None): + status, headers, body = wsgilib.error_response( + environ, + '404 Not Found', + 'The resource at %s could not be found' + % wsgilib.construct_url(environ), + debug_message=debug_message) + start_response(status, headers) + return [body] + + def option(self, environ, name): + return environ.get('wsgikit.urlparser.options', {}).get( + name, self.default_options.get(name)) + + def add_slash(self, environ, start_response): + """ + This happens when you try to get to a directory + without a trailing / + """ + url = wsgilib.construct_url(environ, with_query_string=False) + url += '/' + if environ.get('QUERY_STRING'): + url += '?' + environ['QUERY_STRING'] + status = '301 Moved Permanently' + status, headers, body = wsgilib.error_response( + environ, + status, + ''' + <p>The resource has moved to <a href="%s">%s</a>. You should be redirected automatically.</p>''' % (url, url)) + start_response(status, headers + [('Location', url)]) + return [body] + + def find_file(self, environ, base_filename): + possible = [] + """Cache a few values to reduce function call overhead""" + ignore_extensions = self.option(environ, 'ignore_extensions') + hide_extensions = self.option(environ, 'hide_extensions') + for filename in os.listdir(self.directory): + base, ext = os.path.splitext(filename) + full_filename = os.path.join(self.directory, filename) + if (ext in hide_extensions + or not base): + continue + if filename == base_filename: + possible.append(full_filename) + continue + if ext in ignore_extensions: + continue + if base == base_filename: + possible.append(full_filename) + if not possible: + #environ['wsgi.errors'].write( + # 'No file found matching %r in %s\n' + # % (base_filename, self.directory)) + return None + if len(possible) > 1: + environ['wsgi.errors'].write( + 'Ambiguous URL: %s; matches files %s\n' + % (wsgilib.construct_url(environ), + ', '.join(possible))) + return None + return possible[0] + + def get_application(self, environ, filename): + constructors = self.option(environ, 'constructors') + if os.path.isdir(filename): + t = 'dir' + else: + t = os.path.splitext(filename)[1] + constructor = constructors.get(t, constructors.get('*')) + if constructor is None: + #environ['wsgi.errors'].write( + # 'No constructor found for %s\n' % t) + return constructor + app = constructor(environ, filename) + if app is None: + #environ['wsgi.errors'].write( + # 'Constructor %s return None for %s\n' % + # (constructor, filename)) + pass + return app + + def register_constructor(cls, extension, constructor): + d = cls.default_options['constructors'] + assert not d.has_key(extension), ( + "A constructor already exists for the extension %r (%r) " + "when attemption to register constructor %r" + % (extension, d[extension], constructor)) + d[extension] = constructor + register_constructor = classmethod(register_constructor) + + def get_parser(cls, directory, base_python_name): + try: + return cls.parsers_by_directory[(directory, base_python_name)] + except KeyError: + parser = cls(directory, base_python_name) + cls.parsers_by_directory[(directory, base_python_name)] = parser + return parser + get_parser = classmethod(get_parser) + + def find_init_module(self, environ): + filename = os.path.join(self.directory, '__init__.py') + if not os.path.exists(filename): + return None + return load_module(environ, filename) + + def __repr__(self): + return '<%s directory=%r; module=%s at %s>' % ( + self.__class__.__name__, + self.directory, + self.base_python_name, + hex(abs(id(self)))) + +def make_directory(environ, filename): + base_python_name = environ['wsgikit.urlparser.base_python_name'] + if base_python_name: + base_python_name += "." + os.path.basename(filename) + else: + base_python_name = os.path.basename(filename) + return URLParser.get_parser(filename, base_python_name) + +URLParser.register_constructor('dir', make_directory) + +def make_unknown(environ, filename): + return wsgilib.send_file(filename) + +URLParser.register_constructor('*', make_unknown) + +def load_module(environ, filename): + base_python_name = environ['wsgikit.urlparser.base_python_name'] + module_name = os.path.splitext(os.path.basename(filename))[0] + if base_python_name: + module_name = base_python_name + '.' + module_name + return load_module_from_name(environ, filename, module_name, + environ['wsgi.errors']) + +def load_module_from_name(environ, filename, module_name, errors): + if sys.modules.has_key(module_name): + return sys.modules[module_name] + init_filename = os.path.join(os.path.dirname(filename), '__init__.py') + if not os.path.exists(init_filename): + try: + f = open(init_filename, 'w') + except (OSError, IOError), e: + errors.write( + 'Cannot write __init__.py file into directory %s (%s)\n' + % (os.path.dirname(filename), e)) + return None + f.write('#\n') + f.close() + fp = None + if sys.modules.has_key(module_name): + return sys.modules[module_name] + if '.' in module_name: + parent_name = '.'.join(module_name.split('.')[:-1]) + base_name = module_name.split('.')[-1] + parent = load_module_from_name(environ, os.path.dirname(filename), + parent_name, errors) + else: + base_name = module_name + fp = None + try: + fp, pathname, stuff = imp.find_module( + base_name, [os.path.dirname(filename)]) + module = imp.load_module(module_name, fp, pathname, stuff) + finally: + if fp is not None: + fp.close() + return module + +def make_py(environ, filename): + module = load_module(environ, filename) + if not module: + return None + if hasattr(module, 'application') and module.application: + return module.application + base_name = module.__name__.split('.')[-1] + if hasattr(module, base_name): + return getattr(module, base_name)() + environ['wsgi.errors'].write( + "Cound not find application or %s in %s\n" + % (base_name, module)) + return None + +URLParser.register_constructor('.py', make_py) |