summaryrefslogtreecommitdiff
path: root/xmlutils.py
diff options
context:
space:
mode:
authorOlivier Cayrol (Logilab) <Olivier.Cayrol@logilab.fr>2009-08-26 14:47:05 +0200
committerOlivier Cayrol (Logilab) <Olivier.Cayrol@logilab.fr>2009-08-26 14:47:05 +0200
commitd91784c06bac1975b22e0c56a1017332fd106536 (patch)
treedf1551b3df1d47b1b0c76bcbcd92baf00ecde6d2 /xmlutils.py
parent1b31b5f655b08f3ba2112b3b4575ec92706498d9 (diff)
downloadlogilab-common-d91784c06bac1975b22e0c56a1017332fd106536.tar.gz
Added a function for parsing processing instructions in XML data
Diffstat (limited to 'xmlutils.py')
-rw-r--r--xmlutils.py44
1 files changed, 44 insertions, 0 deletions
diff --git a/xmlutils.py b/xmlutils.py
new file mode 100644
index 0000000..5a27f0f
--- /dev/null
+++ b/xmlutils.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+"""XML utilities.
+
+This module contains useful functions for parsing and using XML data. For the
+moment, there is only one function that can parse the data inside a processing
+instruction and return a Python dictionnary.
+
+:copyright: 2009 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
+:license: General Public License version 2 - http://www.gnu.org/licenses
+"""
+__docformat__ = "restructuredtext en"
+
+import re
+
+RE_DOUBLE_QUOTE = re.compile('([\w\-\.]+)="([^"]+)"')
+RE_SIMPLE_QUOTE = re.compile("([\w\-\.]+)='([^']+)'")
+
+def parse_pi_data(pi_data):
+ """
+ Utilitary function that parses the data contained in an XML
+ processing instruction and returns a dictionnary of keywords and their
+ associated values (most of the time, the processing instructions contain
+ data like ``keyword="value"``, if a keyword is not associated to a value,
+ for example ``keyword``, it will be associated to ``None``).
+
+ :param pi_data: data contained in an XML processing instruction.
+ :type pi_data: unicode
+
+ :returns: Dictionnary of the keywords (Unicode strings) associated to
+ their values (Unicode strings) as they were defined in the
+ data.
+ :rtype: dict
+ """
+ results = {}
+ for elt in pi_data.split():
+ if RE_DOUBLE_QUOTE.match(elt):
+ kwd, val = RE_DOUBLE_QUOTE.match(elt).groups()
+ elif RE_SIMPLE_QUOTE.match(elt):
+ kwd, val = RE_SIMPLE_QUOTE.match(elt).groups()
+ else:
+ kwd, val = elt, None
+ results[kwd] = val
+ return results