summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Simionato <michele.simionato@gmail.com>2011-01-01 06:13:13 +0100
committerMichele Simionato <michele.simionato@gmail.com>2011-01-01 06:13:13 +0100
commit6a0b8ae432cde46a621aa4b363ab76835d376add (patch)
treeaab89a2da858481e2eb23a011c3fd51dab1f8f96
parent2c4bcccf9459345e6485dfacb137f209fce6f831 (diff)
downloadmicheles-6a0b8ae432cde46a621aa4b363ab76835d376add.tar.gz
Some work on my record papers
-rw-r--r--artima/python/Makefile2
-rw-r--r--artima/python/easydb.py24
-rw-r--r--artima/python/records2.py294
-rw-r--r--artima/python/records3.py328
4 files changed, 647 insertions, 1 deletions
diff --git a/artima/python/Makefile b/artima/python/Makefile
index 8e36744..0f71c65 100644
--- a/artima/python/Makefile
+++ b/artima/python/Makefile
@@ -36,7 +36,7 @@ records2: records2.py
$(MINIDOC) -d records2; $(POST) /tmp/records2.rst 269269
records3: records3.py
- $(MINIDOC) -d records3; $(POST) /tmp/records3.rst
+ $(MINIDOC) -d records3; $(POST) /tmp/records3.rst 301076
decorator3: decorator3.txt
$(POST) decorator3.txt 243843
diff --git a/artima/python/easydb.py b/artima/python/easydb.py
new file mode 100644
index 0000000..cd9f069
--- /dev/null
+++ b/artima/python/easydb.py
@@ -0,0 +1,24 @@
+# easydb.py
+from operator import itemgetter
+from collections import namedtuple # for Python >= 2.6
+
+def get_table_from_db(cursor, query_templ, query_args=(), ntuple=None):
+ if query_args:
+ cursor.execute(query_templ, query_args)
+ else:
+ cursor.execute(query_templ)
+ rows = cursor.fetchall()
+ fields = map(itemgetter(0), cursor.description)
+ Ntuple = ntuple or namedtuple('DBTuple', fields)
+ yield Ntuple(*fields)
+ for row in rows:
+ yield Ntuple(*row)
+
+if __name__ == '__main__': # test
+ from sqlite3 import dbapi2
+ conn = dbapi2.connect(':memory:')
+ conn.execute('create table test(id integer, descr varchar)')
+ conn.execute("insert into test values (1,'one')")
+ conn.execute("insert into test values (2,'two')")
+ for rec in get_table_from_db(conn.cursor(), 'select * from test'):
+ print rec
diff --git a/artima/python/records2.py b/artima/python/records2.py
new file mode 100644
index 0000000..281d492
--- /dev/null
+++ b/artima/python/records2.py
@@ -0,0 +1,294 @@
+# .. -*- coding: utf-8 -*-
+"""
+In the previous installment I discussed the namedtuple_ type which was
+introduced in the standard library with Python 2.6 (if you are using
+an older Python version you can just download the original
+`Hettinger's recipe`_. In questa puntata farò uso delle namedtuple per gestire
+i record provenienti da un database a darò qualche consiglio su
+come processare e come visualizzare tali record.
+
+How to dump a database table
+-------------------------------------------------------------------
+
+The simplest approach to extract the content of a database table is
+to convert it into a sequence of named tuples. We can do so by
+defining a function
+``get_table_from_db`` analogous to the ``get_table`` function we discussed
+in the first installment of this series. I assume here a working familiarity
+with the `DB API 2`_ (aka PEP 249), the standard way to interact
+with a relational database from Python:
+
+$$easydb
+
+Notice in particular the line
+
+.. code-block:: python
+
+ fields = map(itemgetter(0), cursor.description)
+
+Here, we are extracting the field names from the ``.description`` attribute
+of the cursor, which returns a list of tuples. We just take the first
+element of each tuple by using ``itemgetter``, an utility function
+defined in the operator_ module. You could do the same with a list
+comprehension ``fields = [x[0] for x in cursor.description]`` but
+``itemgetter`` is the most idiomatic solution.
+
+The example here use the SQLite_ database, since drivers for it are
+included in the standard library starting from Python 2.5; however,
+you can easily adapt the code to any other database.
+Finally, let me notice that if you know the database schema in
+advance, you can just pass a pre-defined namedtuple to
+``get_table_from_db``: there is not need to autogenerate it
+from the query. This is useful if you want to give aliases to
+the field names, especially in the case the name of a column
+conflict with a Python keyword.
+
+If you run the script you will get::
+
+ $ python easydb.py
+ DBTuple(id='id', descr='descr')
+ DBTuple(id=1, descr=u"one")
+ DBTuple(id=2, descr=u"two")
+
+
+.. _DB API 2: http://www.python.org/dev/peps/pep-0249/
+.. _SQLite: http://www.sqlite.org/
+
+A higher level approach
+----------------------------------------------------------
+
+Using the DB API 2 is a very low level approach; nowadays most people
+prefer to use an Object Relation Mapper (ORM); the most powerful
+there is is SQLAlchemy_. Using SQLAlchemy my example can be
+rewritten as
+
+.. include-code:: sa.py
+
+I am not a fan of ORMs. I find them too sophisticated
+(*simple is better than complex*)
+e nascondono l'SQL al programmatore (*explicit is better than implicit*).
+Ciò detto, sono il primo a dire che ci sarebbe molto bisogno di una
+DB API 3 ufficiale, di più alto livello della DB API 2, senza per questo
+essere un ORM. In pratica, mi piacerebbe avere un equivalente
+dell'engine di SQLAlchemy nella libreria standard, e che il *recordset*
+ritornato da una query fosse costituito da namedtuple, non da tuple
+ordinarie.
+
+.. _SQLAlchemy: http://www.sqlalchemy.org/
+.. _ORM: http://en.wikipedia.org/wiki/Object-relational_mapping
+
+Generare tabelle
+-----------------------------------------------------
+
+Un lavoro comunissimo è quello di leggere dei dati da un database,
+processarli e produrre come output una tabella di risultati. L'output
+potrebbe essere un file CSV di numeri da usare per un grafico
+oppure semplicemente una tabella HTML da
+pubblicare nel sito aziendale. Un workflow tipico è il
+seguente, da leggere dall'alto verso il basso::
+
+ <data source>
+ |
+ | get_table
+ |
+ <initial table>
+ |
+ | processor
+ |
+ <intermediate table>
+ |
+ | processor
+ |
+ <final table>
+ |
+ | renderer
+ |
+ <output>
+
+Il processore è un oggetto che prende
+una tabella in ingresso e ritorna una tabella in uscita, eventualmente con
+un numero di righe e/o di colonne diverso di quello in ingresso.
+Siccome le tabelle sono degli oggetti iterabili, è naturale implementare
+un processore in Python tramite un generatore che prendere
+un iterabile e ritorna un iterabile.
+In generale, vi possono essere più processori che agiscono uno dopo l'altro e
+quindi più tabelle intermedie. L'ultimo processore ritorna la tabella
+finale che viene successivamente convertita in una stringa e
+salvata in un file, in formato CSV, HTML, XML o altro.
+
+Per esempio, supponiamo di voler generare una tabella HTML.
+In tal caso ci serve un (pre)processore che converte una tabella di
+record astratti in una tabella di record concreti, che non sono
+altro che sequenze di stringhe in cui i caratteri speciali
+dell'HTML sono stati *escaped*; tale processore può essere
+implementato come un semplice generatore:
+
+$$htmlescape
+
+Si noti che ``htmlescape`` è un processore del tutto generico che
+non ha neppure bisogno che i record in ingresso siano delle namedtuple:
+è sufficiente che siano delle sequenze generiche.
+
+Il renderer finale può essere implementato come segue:
+
+$$HtmlTable
+
+Notate che ``HtmlTable`` può essere interpretato anche come un processore,
+visto che ``HtmlTable(table)`` è un oggetto iterabile che ritorna
+blocchi di codice HTML. Il metodo ``.render`` può essere pensato
+come il renderizzatore di default, ma è possibile usare dei renderizzatori
+più sofisticati, in almeno due modi:
+
+1. tramite l'ereditarietà, ovvero derivando una sottoclasse di ``HtmlTable``
+ e sovrascrivendo il metodo ``render``;
+
+2. in maniera funzionale, usando ``HtmlTable`` come un processore e passando
+ il suo output ad un renderizzatore completamente indipendente.
+
+Entrambe le possibilità hanno dei pro e dei contro, ma
+l'approccio funzionale è più indicato se lo scopo finale
+è quello di disaccoppiare il codice. Inoltre, la composizione funzionale
+è concettualmente più leggera di una gerarchia di ereditarietà.
+Questo assicura semplicità e maggiore scalabilità a casi più complessi.
+
+È banale verificare che il tutto funziona con un semplice test:
+
+$$test
+
+In questo esempio ``get_test_table`` legge la tabella iniziale, ``htmlescape``
+è il processore e ``HtmlTable`` è il renderer. Eseguendo il test si ottiene
+la tabella seguente:
+
+.. raw:: html
+
+ <div class="rawhtml">
+ <style>
+ tr.even { background-color: lightgreen }
+ tr.odd { background-color: lightgray }
+ th { background-color: lightblue }
+ </style>
+ <table id="noname" border="1" summary="">
+ <thead>
+ <tr><th>A</th>
+ <th>B</th>
+ <th>C</th>
+ <th>D</th>
+ </tr>
+ </thead>
+ <tbody
+ ><tr class="even">
+ <td>1</td>
+ <td>2</td>
+ <td>3</td>
+ <td>4</td>
+ </tr>
+ <tr class="odd">
+ <td>5</td>
+ <td>6</td>
+ <td>7</td>
+ <td>8</td>
+ </tr>
+ <tr class="even">
+ <td>&gt;</td>
+ <td>&lt;</td>
+ <td>&amp;</td>
+ <td>&quot;</td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+
+È chiaro che l'approccio che ho delineato in questo articolo è del tutto
+generale e si applica direttamente anche ad altri casi; lascio come
+esercizio scrivere un processore/renderizzatore che converte
+in formato XML, Latex o CSV.
+
+I lettori delle `Avventure di un Pythonista in Schemeland`_ avranno
+riconosciuto l'inflenza della programmazione funzionale.
+Non si tratta di un caso fortuito: io
+sono dell'idea che la conoscenza di linguaggi non-mainstream sia molto
+utile anche quando si programma esclusivamente in linguaggi
+mainstream. In particolare, la conoscenza dei linguaggi funzionali vi
+permette di mettere in dubbio concetti che paiono dogmi indiscutibili
+in certi ambienti (tipo la "bontà" della programmazione ad oggetti) e
+di aprirvi a design alternativi. Non è un caso neppure il fatto che
+Python (che fin dall'inizio non è mai stato un linguaggio a oggetti
+bigotto alla Java) si stia muovendo sempre più verso soluzioni
+funzionali, sia nel linguaggio core ( *list comprehensions*,
+*generator expressions*, *tuple unpacking*, ecc) che nelle librerie
+(*itertools*, *namedtuple*, ecc).
+
+La miniserie non finisce qui: c'è ancora molto da dire sul
+problema della visualizzazione di tabelle e a questo argomento
+dedicheremo interamente la terza ed ultima parte. Ci vediamo
+alla prossima, *happy hacking*!
+
+.. _Avventure di un Pythonista in Schemeland: http://stacktrace.it/articoli/2008/02/le-avventure-di-un-pythonista-schemeland-1/
+.. _operator: http://docs.python.org/library/operator.html
+.. _namedtuple: http://docs.python.org/library/collections#collections.namedtuple
+.. _Hettinger's recipe: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/500261
+"""
+
+import os, cgi, easydb
+from tabular_data import headtail
+
+class HtmlTable(object):
+ "Convert a sequence header+body into a HTML table"
+ # this is just a pedagogic implementation, in a real implementation
+ # you should not hard-code your css at the Python level.
+ name = "noname"
+ border = "1"
+ summary = ""
+ css = """\
+ <style>
+ tr.even { background-color: lightgreen }
+ tr.odd { background-color: lightgray }
+ th { background-color: lightblue }
+ </style>
+ """
+ def __init__(self, header_plus_body):
+ self.header, self.body = headtail(header_plus_body)
+
+ def render(self):
+ join = os.linesep.join
+ templ = '''\
+ %s
+ <table id="%s" border="%s" summary="%s">
+ %%s
+ </table>''' % (self.css, self.name, self.border, self.summary)
+ head, tail = headtail(self) # post-processed head and tail
+ h = '<thead>\n%s\n</thead>\n' % join(head)
+ b = '<tbody>\n%s\n</tbody>\n' % join(join(r) for r in tail)
+ return templ % (h+b)
+
+ def __iter__(self):
+ yield ['<tr>'] + ['<th>%s</th>' % h for h in self.header] + ['</tr>']
+ for r, row in enumerate(self.body):
+ ls = ['<tr class="%s">' % ["even", "odd"][r % 2]]
+ for col in row:
+ ls.append('<td>%s</td>' % col)
+ ls.append('</tr>')
+ yield ls
+
+def htmlescape(table):
+ "Converts a table of records into a table of HTML-escaped records"
+ for rec in table:
+ yield [cgi.escape(str(field), quote=True) for field in rec]
+
+def test():
+ page = """\
+ <html>
+ <head>
+ </head>
+ <body>
+ %s
+ </body>
+ </html>
+ """
+ def get_test_table():
+ return 'ABCD', '1234', '5678', '><&"'
+ t = HtmlTable(htmlescape(get_test_table()))
+ print >> file('output.html', 'w'), page % t.render()
+
+if __name__ == '__main__':
+ test()
diff --git a/artima/python/records3.py b/artima/python/records3.py
new file mode 100644
index 0000000..1a74f29
--- /dev/null
+++ b/artima/python/records3.py
@@ -0,0 +1,328 @@
+r"""\
+In the first two installaments of this series
+I discussed how to read and process homogeneous records.
+In this final installment I will discuss non-homogeneous records and
+we will devise a small framework to convert text records into CSV, HTML,
+XML or other formats. *En passant*, I will discuss various object oriented
+techniques and patterns.
+
+.. figure:: http://www.phyast.pitt.edu/~micheles/python/patchwork1.jpg
+ :width: 300
+
+ Fig 1: object-oriented design
+
+A micro-framework to convert records into text
+-------------------------------------------------------------------
+
+It is well know that I am not a framework lower and there are certainly
+many Python programmers sharing this attitude, starting from Guido.
+Actually, my dislike of frameworks is inversely proportional to their
+size: I hate the mega-frameworks, I tolerate the medium-sized framework
+and I like enough the micro-frameworks. In this installment I will define
+a micro-framework to render non-homogeneous records into text. The
+framework is based on the `template pattern`_: in order to define a
+renderer class, the programmer inherits from a mother class ``RecordRenderer``
+and fills in the rendering methods: then the framework with automatically
+call them but without too much magic.
+
+This approach is acceptable only when the base class is simple: it is much
+less acceptable when you start already from a deep hierarchy. For me a
+hierarchy is deep if there are more than two levels: if looking at
+mother and children is not enough, and I am forced to look even and
+the grand-parent classes, the framework is already too complex.
+
+Inheritance-based frameworks have the tendency to go out of control,
+because it become natural to extend the hierarchy too much. In
+traditional object-oriented languages it is quite natural to use
+inheritance, but as I said elsewhere one should always keep in mind
+that alternative are always possible (a notable new language *without*
+inheritance is Go).
+
+Anyway, one should not fight the language she is using: in Python the
+`template pattern`_ is a perfectly reasonable approach.
+
+.. figure:: http://www.phyast.pitt.edu/~micheles/python/patchwork2.jpg
+
+ Fig 2: the *template pattern*
+
+To convert into text a non-homogenous
+record with *N* fields requires in general *N+1*
+functions: *N* functions to convert the fields and a function to convert
+the full record. It is natural to group the needed functions as method
+of a renderer class: the *N* field-associated rendering functions will
+be methods converting values into strings, whereas the *N+1* function
+will be a ``.render`` method converting the record of strings so obtained
+into a single string. We will use a base class called ``RecordRendererABC``,
+where the ABC suffix means *Abstract Base Class*.
+
+I should point out that an Abstract Base Class in Python can provide
+concrete methods to its subclasses and therefore the meaning of ABC
+in Python is different than in C++/Java: a Python ABC is a mixin class,
+which can provide implementation; it is not necessarily pure interface.
+
+For instance, suppose we want to convert an Article record
+
+ ``Article = namedtuple("Article", "title author pubdate")``
+
+into CSV format.
+
+How do we proceed? First of all we define a suitable subclass of
+``RecordRendererABC``:
+
+$$CSVArticleRenderer
+
+Notice that ``CSVArticleRenderer`` defines a ``.schema`` class
+attribute, a namedtuple containing the names of the rendering methods.
+
+In this example both title and author are converted by using the ``.str``
+method, inherited from the base class, whereas the publication date
+is converted by using the ``.isodate`` method, which is defined
+directly in the ``CSVArticleRenderer`` class.
+The ``.render`` method is inherited and converts the input namedtuple
+into a string by converting into strings the fields with the corresponding
+methods and by joining the results, using a comma as separator.
+Here is an example:
+
+ >>> a = Article("test title", "test author", datetime(2008, 05, 15))
+ >>> r = CSVArticleRenderer(a)
+
+The ``.render`` method works as expected:
+
+ >>> print r.render()
+ test title,test author,2008-05-15
+
+By default the separator (``delimiter``) is set to the empty string ''.
+This is useful for implementing different renderers. For instance,
+suppose we want to define a renderer converting the articles into HTML
+format. Suppose we defined three CSS classes ``title``, ``author`` and
+``pubdate`` to visualize the different fields in different ways, for
+instance with different colors. We could define a renderer using the
+CSS classes as follows:
+
+$$HTMLArticleRenderer
+
+Here is how the renderer works:
+
+ >>> r = HTMLArticleRenderer(a)
+ >>> print r.render()
+ <span class="title">test title</span>
+ <span class="author">test author</span>
+ <span class="pubdate">2008-05-15</span>
+
+Design notes
+--------------------------------------------------------------------
+
+Having discussed the usage of the framework, it is now the time to
+discuss the implementation of the base class and the reasons for the
+design choices I made.
+Here is the source code for ``RecordRendererABC``:
+
+$$RecordRendererABC
+
+Let me start from the constructor. The ``__init__`` methods accepts in input
+a single argument, a sequence with length equal to the length of the schema.
+The input sequence *is not required to be a namedtuple*: there is no type check
+such as ``isinstance(input, self.schema.__class__)``.
+A type check here would be a design mistake, since it would restrict without
+reason the field of applicability of the renderer and it would force
+the users to use type converted without need. The only requirement for
+the ``input`` object is that ``zip(self.schema, input)`` must not raise
+an exception: in other words, it is enough that ``input`` had the right
+length.
+
+Actually ``zip(self.schema, input)`` would not raise an error even if
+``input`` had a different length. This is potentially dangerous.
+For instance, imagine that for some reason (say a programmer error)
+we passed a sequence of length zero: then ``zip(self.schema, input)``
+would silently return an empty list. Since *errors should never pass silently*,
+I decided to add a check on the length: in this way if there is an error
+we see it immediately, at instantiation time, and not too late,
+when we start iterating on the renderer. It is always better to discover
+errors early.
+
+On the other hand, it is best to not exaggerate with the checks. For instance,
+if ``.input`` is a list, it is theoretically possible for an evil programmer
+to modify the list *after* instantiation, by adding or removing elements.
+Then ``zip(self.schema, input)`` could behave in an unexpected way.
+However, there is no way to protect against evil (or just bad) programmers.
+Even if we replaced ``.input`` with a tuple, which is immutable, its
+size could always be changes, simply by overriding the ``.input``
+attribute after instantiation.
+
+The Python way is to limit the checks to the one dictated from common
+sense, intended to limit accidental errors which are likely to happen:
+for the rest, the attitude is to trust the user. Checks motivated by
+paranoia and lack of trust in the user are not to be introduced, since
+in a dynamic language the user can do whenever she wants anyway.
+The attitude is mutuated from the `spirit of C`_ (*trust the programmer*).
+According to this maxim I decided not to add additional checks.
+
+In special cases (for instance if you are implementing a subclass of
+``RecordRendererABC`` which requires for ``.input`` to be a record)
+it may be sensible to introduce some additional check. For instance
+you may want to ensure that ``.input`` be a record with the right
+fields. However, even in this case it is best not to introduce a
+type check like
+``isinstance(input, self.schema.__class__)``; you can instead
+use a lighter check like ``input._fields == self.schema.fields``:
+in this way any object with the right fields would be accepted,
+not use a namedtuple. The basic idea is to follow the
+`duck typing`_ principle: don't be too picky and
+accept as good anything with the needed
+attributes.
+
+In this logic you may want to enlarge even more the field of
+acceptable objects: for instance a dictionary-like object
+with the right keys could act as a substitute for a record.
+We could implement such feature by adding an ``if`` in the
+``__init__`` method, by introducing a special case when the input object
+is a dictionary. But that would be bad programming: the point of object
+oriented programming is to avoid ``ifs`` and to replace them with methods.
+In our example, we should remember that Python provides a *classmethod*
+construct, which *raison d'etre* is exactly to manage this use case:
+it allows the programmer to define alternate constructors, without
+the need for complicating the ``__init__`` method. Using alternate
+constructors is called `factory method pattern`_ and it is one of
+the basic techniques of OOP. The advantages are clear, expecially
+in terms of simplicity and easy of maintenance, but also from the
+point of view of code reuse and extensibility.
+
+.. figure:: http://www.phyast.pitt.edu/~micheles/python/patchwork3.jpg
+
+ Fig 3: the *factory method pattern*
+
+In our example dictionaries are rendered through the ``.frommap``
+classmethod:
+
+ >>> r = CSVArticleRenderer.frommap(dict(
+ ... title="test title", author="test author",
+ ... pubdate=datetime(2008, 05, 15)))
+
+There is also a ``.fromobj`` classmethod accepting in input any
+object with a set of attributes which is a superset (proper or
+improper) of the schema's attributes. This is pure *duck typing*.
+If the object lacks an attribute, we will get an ``AttributeError``
+at instantiation time, an absolutely clear and telling error message;
+on the other hand, if the object has enough attributes, it will be
+automatically converted into a namedtuple.
+
+The base class also defines the special methods ``__iter__`` and ``__len__``:
+therefore each rendered instance is a sequence of fixed length and can be
+passed in input to another renderer. In other words, renderers are
+composable in the functional sense.
+
+Renderers are actually homogeneous records with fields which are strings
+and can be passed to the ``HtmlTable`` object defined in the previous
+installment. It is trivial to convert a rendered into a list of strings:
+thanks to the ``__iter__`` method, ``list(renderer)`` works as expected
+(idem for ``tuple(renderer)`` and ``len(renderer)``). `list``, ``tuple`` and
+``len`` are actually builtin generic functions which play well with
+*duck typing* and are definible for any custom object.
+
+It was good to discuss what was implemented into ``RecordRendererABC``;
+it is also interesting to discuss what was *not* implemented.
+In particular, I did not implement the renderers are namedtuples.
+I wanted to avoid the *blob* antipattern_, when you have a class which
+is everything to everybody. I wanted to keep namedtuples simple, without
+adding any methods to them: renderers are logically an independent concept,
+even if they can be converted into namedtuples, being iterable.
+
+.. figure:: http://www.phyast.pitt.edu/~micheles/python/blob.jpg
+ :width: 350
+
+ Fig 4: the *blob antipattern*
+
+I did define ``CSVArticleRenderer`` and ``HTMLArticleRenderer`` as
+subclasses of ``RecordRendererABC``. An alternate design could have
+introduced different abstract intermediate subclasses, depending on the output
+format: for instance ``CSVRecordRenderer``, ``HTMLRecordRenderer``,
+``XMLRecordRenderer``, etc. However I have decided of following strictly
+the rule that *flat is better than nested*, and to keep the hierachies
+as short as possible.
+Actually in Python 2.6+ one could define three abstract interfaces
+``CSVRecordRenderer``, ``HTMLRecordRenderer`` and ``XMLRecordRenderer``
+and one could register her concrete classes with such interfaces: this
+can be done without using inheritance and by keeping the hierarchy flat.
+
+.. _spirit of C: http://www.artima.com/cppsource/spiritofc.html
+.. _duck typing: http://en.wikipedia.org/wiki/Duck_typing
+.. _factory method pattern: http://en.wikipedia.org/wiki/Factory_method_pattern
+.. _antipattern: http://en.wikipedia.org/wiki/Antipattern
+.. _template pattern: http://en.wikipedia.org/wiki/Template_pattern
+"""
+import os, cgi
+from datetime import datetime
+from tabular_data import headtail
+from collections import namedtuple
+
+# in Python 2.6 use abstractmethod, abstractproperty instead
+class notimplemented(object):
+ "Descriptor raising a meaningful error message for nonoverridden attributes"
+ def __init__(self, message):
+ self.message = message
+ def __get__(self, obj, objcls=None):
+ raise NotImplementedError(self.message)
+
+class RecordRendererABC(object):
+ schema = () # a namedtuple specifying the names of the converters
+ delimiter = ''
+
+ @classmethod
+ def frommap(cls, kw):
+ return cls(cls.schema.__class__(**kw))
+
+ @classmethod
+ def fromobj(cls, obj):
+ Schema = cls.schema.__class__
+ nt = Schema._make(getattr(obj, field) for field in Schema._fields)
+ return cls(nt)
+
+ def __init__(self, input):
+ li, ls = len(input), len(self)
+ if li != ls:
+ raise TypeError('%s has %d fields, expected %d' % (input, li, ls))
+ self.input = input
+
+ def __iter__(self):
+ for convertername, value in zip(self.schema, self.input):
+ yield getattr(self, convertername)(value)
+
+ def __len__(self):
+ return len(self.schema)
+
+ def str(self, value):
+ return str(value)
+
+ def render(self):
+ return self.delimiter.join(self)
+
+Article = namedtuple("Article", "title author pubdate")
+
+class CSVArticleRenderer(RecordRendererABC):
+ schema = Article("str", "str", "isodate")
+ delimiter = ','
+ def isodate(self, date):
+ return date.isoformat()[:10]
+
+class HTMLArticleRenderer(RecordRendererABC):
+ schema = Article(title='title', author='author', pubdate="pubdate")
+ delimiter = '\n'
+ def title(self, title):
+ return '<span class="title">%s</span>' % cgi.escape(title)
+ def author(self, author):
+ return '<span class="author">%s</span>' % cgi.escape(author)
+ def pubdate(self, date):
+ return '<span class="pubdate">%s</span>' % date.isoformat()[:10]
+
+# todo: xml.escape
+def to_xml(ntuple):
+ name = ntuple.__class__.__name__
+ xml = ['<%s>' % name]
+ for i, field in enumerate(ntuple._fields):
+ xml.append("<%s>%s</%s>" % (field, ntuple[i], field))
+ xml.append('</%s>' % name)
+ return os.linesep.join(xml)
+
+if __name__ == '__main__':
+ import doctest; doctest.testmod()
+ HTMLArticleRenderer.fromobj(Article("a",'b', datetime.today()))