diff options
author | Olivier Cayrol (Logilab) <Olivier.Cayrol@logilab.fr> | 2009-08-26 14:47:05 +0200 |
---|---|---|
committer | Olivier Cayrol (Logilab) <Olivier.Cayrol@logilab.fr> | 2009-08-26 14:47:05 +0200 |
commit | d91784c06bac1975b22e0c56a1017332fd106536 (patch) | |
tree | df1551b3df1d47b1b0c76bcbcd92baf00ecde6d2 /xmlutils.py | |
parent | 1b31b5f655b08f3ba2112b3b4575ec92706498d9 (diff) | |
download | logilab-common-d91784c06bac1975b22e0c56a1017332fd106536.tar.gz |
Added a function for parsing processing instructions in XML data
Diffstat (limited to 'xmlutils.py')
-rw-r--r-- | xmlutils.py | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/xmlutils.py b/xmlutils.py new file mode 100644 index 0000000..5a27f0f --- /dev/null +++ b/xmlutils.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +"""XML utilities. + +This module contains useful functions for parsing and using XML data. For the +moment, there is only one function that can parse the data inside a processing +instruction and return a Python dictionnary. + +:copyright: 2009 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr +:license: General Public License version 2 - http://www.gnu.org/licenses +""" +__docformat__ = "restructuredtext en" + +import re + +RE_DOUBLE_QUOTE = re.compile('([\w\-\.]+)="([^"]+)"') +RE_SIMPLE_QUOTE = re.compile("([\w\-\.]+)='([^']+)'") + +def parse_pi_data(pi_data): + """ + Utilitary function that parses the data contained in an XML + processing instruction and returns a dictionnary of keywords and their + associated values (most of the time, the processing instructions contain + data like ``keyword="value"``, if a keyword is not associated to a value, + for example ``keyword``, it will be associated to ``None``). + + :param pi_data: data contained in an XML processing instruction. + :type pi_data: unicode + + :returns: Dictionnary of the keywords (Unicode strings) associated to + their values (Unicode strings) as they were defined in the + data. + :rtype: dict + """ + results = {} + for elt in pi_data.split(): + if RE_DOUBLE_QUOTE.match(elt): + kwd, val = RE_DOUBLE_QUOTE.match(elt).groups() + elif RE_SIMPLE_QUOTE.match(elt): + kwd, val = RE_SIMPLE_QUOTE.match(elt).groups() + else: + kwd, val = elt, None + results[kwd] = val + return results |