From 21f56253814fa55110fcfc1d82b8a14cd179c2b5 Mon Sep 17 00:00:00 2001 From: Kevin Turner Date: Sat, 12 Jan 2008 00:51:12 +0000 Subject: [project @ contrib/openid-parse: debugging utility to grab OpenID messages from the clipboard and pretty-print them.] --- contrib/openid-parse | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 contrib/openid-parse (limited to 'contrib') diff --git a/contrib/openid-parse b/contrib/openid-parse new file mode 100644 index 0000000..1776ce0 --- /dev/null +++ b/contrib/openid-parse @@ -0,0 +1,140 @@ +#!/usr/bin/env python +"""Grab URLs from the clipboard, interpret the queries as OpenID, and print. + +In addition to URLs, I also scan for queries as they appear in httpd log files, +with a pattern like 'GET /foo?bar=baz HTTP'. + +Requires the 'xsel' program to get the contents of the clipboard. +""" + +from pprint import pformat +from urlparse import urlsplit, urlunsplit +import cgi, re, subprocess, sys + +from openid import message + +OPENID_SORT_ORDER = ['mode', 'identity', 'claimed_id'] + +class NoQuery(Exception): + def __init__(self, url): + self.url = url + + def __str__(self): + return "No query in url %s" % (self.url,) + + +def getClipboard(): + xsel = subprocess.Popen(["xsel", "-o", "-b"], stdout=subprocess.PIPE) + output = xsel.communicate()[0] + return output + + +def main(): + source = getClipboard() + urls = find_urls(source) + + errors = [] + output = [] + queries = [] + + for url in urls: + try: + queries.append(queryFromURL(url)) + except NoQuery, err: + errors.append(err) + + queries.extend(queriesFromLogs(source)) + + for where, query in queries: + output.append('at %s:\n%s' % (where, openidFromQuery(query))) + + if output: + print '\n\n'.join(output) + elif errors: + for err in errors: + print err + + +def queryFromURL(url): + split_url = urlsplit(url) + query = cgi.parse_qs(split_url[3]) + + if not query: + raise NoQuery(url) + + url_without_query = urlunsplit(split_url[:3] + (None, None)) + + return (url_without_query, query) + + +def openidFromQuery(query): + try: + msg = message.Message.fromPostArgs(unlistify(query)) + s = formatOpenIDMessage(msg) + except Exception, err: + # XXX - side effect. + sys.stderr.write(str(err)) + s = pformat(query) + + return s + + +def formatOpenIDMessage(msg): + value_lists = {} + for (ns_uri, ns_key), value in msg.args.items(): + l = value_lists.setdefault(ns_uri, {}) + l[ns_key] = value + + output = [] + + for ns_uri, values in value_lists.items(): + ns_output = [] + + alias = msg.namespaces.getAlias(ns_uri) + if alias is message.NULL_NAMESPACE: + alias = 'openid' + ns_output.append(" %s <%s>" % (alias, ns_uri)) + + for key in OPENID_SORT_ORDER: + try: + ns_output.append(" %s = %s" % (key, values.pop(key))) + except KeyError: + pass + + values = values.items() + values.sort() + + for k, v in values: + ns_output.append(" %s = %s" % (k, v)) + + output.append('\n'.join(ns_output)) + + return '\n\n'.join(output) + + +def unlistify(d): + return dict((i[0], i[1][0]) for i in d.items()) + + +def queriesFromLogs(s): + qre = re.compile(r'GET (/.*)?\?(.+) HTTP') + + return [(match.group(1), cgi.parse_qs(match.group(2))) + for match in qre.finditer(s)] + +def find_urls(s): + # Regular expression borrowed from urlscan + # by Daniel Burrows , GPL. + urlinternalpattern=r'[{}a-zA-Z/\-_0-9%?&.=:;+,#~]' + urltrailingpattern=r'[{}a-zA-Z/\-_0-9%&=+#]' + httpurlpattern = r'(?:https?://' + urlinternalpattern + r'*' + urltrailingpattern + r')' + # Used to guess that blah.blah.blah.TLD is a URL. + tlds=['biz', 'com', 'edu', 'info', 'org'] + guessedurlpattern=r'(?:[a-zA-Z0-9_\-%]+(?:\.[a-zA-Z0-9_\-%]+)*\.(?:' + '|'.join(tlds) + '))' + urlre = re.compile(r'(?:<(?:URL:)?)?(' + httpurlpattern + '|' + guessedurlpattern + '|(?:mailto:[a-zA-Z0-9\-_]*@[0-9a-zA-Z_\-.]*[0-9a-zA-Z_\-]))>?') + + return [match.group(1) for match in urlre.finditer(s)] + + +if __name__ == '__main__': + main() -- cgit v1.2.1