Added a function for parsing processing instructions in XML data

author: Olivier Cayrol (Logilab) <Olivier.Cayrol@logilab.fr> 2009-08-26 14:47:05 +0200
committer: Olivier Cayrol (Logilab) <Olivier.Cayrol@logilab.fr> 2009-08-26 14:47:05 +0200
commit: d91784c06bac1975b22e0c56a1017332fd106536 (patch)
tree: df1551b3df1d47b1b0c76bcbcd92baf00ecde6d2 /xmlutils.py
parent: 1b31b5f655b08f3ba2112b3b4575ec92706498d9 (diff)
download: logilab-common-d91784c06bac1975b22e0c56a1017332fd106536.tar.gz
1 files changed, 44 insertions, 0 deletions
diff --git a/xmlutils.py b/xmlutils.py
new file mode 100644
index 0000000..5a27f0f
--- /dev/null
+++ b/xmlutils.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+"""XML utilities.
+
+This module contains useful functions for parsing and using XML data. For the
+moment, there is only one function that can parse the data inside a processing
+instruction and return a Python dictionnary.
+
+:copyright: 2009 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
+:license: General Public License version 2 - http://www.gnu.org/licenses
+"""
+__docformat__ = "restructuredtext en"
+
+import re
+
+RE_DOUBLE_QUOTE = re.compile('([\w\-\.]+)="([^"]+)"')
+RE_SIMPLE_QUOTE = re.compile("([\w\-\.]+)='([^']+)'")
+
+def parse_pi_data(pi_data):
+    """
+    Utilitary function that parses the data contained in an XML
+    processing instruction and returns a dictionnary of keywords and their
+    associated values (most of the time, the processing instructions contain
+    data like ``keyword="value"``, if a keyword is not associated to a value,
+    for example ``keyword``, it will be associated to ``None``).
+
+    :param pi_data: data contained in an XML processing instruction.
+    :type pi_data: unicode
+
+    :returns: Dictionnary of the keywords (Unicode strings) associated to
+              their values (Unicode strings) as they were defined in the
+              data.
+    :rtype: dict
+    """
+    results = {}
+    for elt in pi_data.split():
+        if RE_DOUBLE_QUOTE.match(elt):
+            kwd, val = RE_DOUBLE_QUOTE.match(elt).groups()
+        elif RE_SIMPLE_QUOTE.match(elt):
+            kwd, val = RE_SIMPLE_QUOTE.match(elt).groups()
+        else:
+            kwd, val = elt, None
+        results[kwd] = val
+    return results
author	Olivier Cayrol (Logilab) <Olivier.Cayrol@logilab.fr>	2009-08-26 14:47:05 +0200
committer	Olivier Cayrol (Logilab) <Olivier.Cayrol@logilab.fr>	2009-08-26 14:47:05 +0200
commit	d91784c06bac1975b22e0c56a1017332fd106536 (patch)
tree	df1551b3df1d47b1b0c76bcbcd92baf00ecde6d2 /xmlutils.py
parent	1b31b5f655b08f3ba2112b3b4575ec92706498d9 (diff)
download	logilab-common-d91784c06bac1975b22e0c56a1017332fd106536.tar.gz