Lots of improvements on sqlplain, such as mapping-style arguments and better namedtuples

author: michele.simionato <devnull@localhost> 2009-06-10 09:49:57 +0000
committer: michele.simionato <devnull@localhost> 2009-06-10 09:49:57 +0000
commit: da5a44c9e603325eeb0f954a84bb46e6d154ed71 (patch)
tree: 2023905253d8d2d170610195614dc47c79b9e617 /sqlplain
parent: b55d287d6244eabcc952866be5c6673bfa209cb1 (diff)
download: micheles-da5a44c9e603325eeb0f954a84bb46e6d154ed71.tar.gz
5 files changed, 105 insertions, 178 deletions
diff --git a/sqlplain/connection.py b/sqlplain/connection.py
index 857beeb..7609e5f 100644
--- a/sqlplain/connection.py
+++ b/sqlplain/connection.py
@@ -17,18 +17,15 @@ def retry(func, conn, *args, **kw):
     A decorator making a function taking a connection as first argument
     to retry in case of errors
     """
-    if conn.retry:
-        try:
-            return func(conn, *args, **kw)
-        except conn.driver.Error, e:            
-            conn.close() # retry with a fresh connection
-            return func(conn, *args, **kw)
-    else:
+    try:
+        return func(conn, *args, **kw)
+    except conn.driver.Error, e:            
+        conn.close() # retry with a fresh connection
         return func(conn, *args, **kw)
 
 Field = namedtuple(
     'Field',
-    'name type_code display_size internal_size precision scale null_ok')
+    'name type_code display_size internal_size precision scale null_ok'.split())
 
 counter = itertools.count(1)
 
@@ -121,7 +118,11 @@ class LazyConnection(object):
     There is however a chatty method for easy of debugging.
     """    
     def __init__(self, uri, isolation_level=None, threadlocal=False,
-                 retry=False):
+                 params='SEQUENCE'):
+        if params not in ('SEQUENCE', 'MAPPING'):
+            raise TypeError("params must be 'SEQUENCE' or 'MAPPING', you "
+                            "passed %s" % params)
+        self.params = params
         self.uri = URI(uri)
         self.name = self.uri['database']
         self.dbtype = self.uri['dbtype']
@@ -135,9 +136,7 @@ class LazyConnection(object):
             self._storage = _ThreadLocalStorage.new(connect, args)
         else:
             self._storage = _Storage.new(connect, args)
-        self.retry = retry
 
-    @retry
     def _raw_execute(self, templ, args):
         """
         Call a dbapi2 cursor; return the rowcount or a list of tuples,
@@ -160,6 +159,7 @@ class LazyConnection(object):
             return descr, cursor.fetchall()
 
     def execute(self, templ, args=(), ntuple=None, scalar=False):
+        "args must be a sequence, not a dictionary"
         if self.dbtype == 'mssql':
             # converts unicode arguments to utf8
             lst = []
@@ -175,6 +175,9 @@ class LazyConnection(object):
                 raise TypeError(
          "TypeError when executing %s\nExpected %d arguments (%s), got %d %s" %
          (templ, len(argnames), ', '.join(argnames), len(args), args))
+            if self.params == 'MAPPING':
+                # replace the passed dictionary with the corresponding tuple
+                args = tuple(args[name] for name in argnames)
         descr, res = self._raw_execute(templ, args)
         cursor = self._storage.curs # needed to make the reset work
         if self.chatty:
@@ -189,7 +192,7 @@ class LazyConnection(object):
                     % (res, templ, args))
             return res[0][0]
         if descr: # the query was a SELECT
-            fields = [Field(*d) for d in descr]
+            fields = [Field(d) for d in descr]
             header = [f.name or noname() for f in fields]
             if ntuple is None:
                 Ntuple = namedtuple('DBTuple', header)
@@ -197,9 +200,9 @@ class LazyConnection(object):
                 Ntuple = namedtuple(ntuple, header)
             else:
                 Ntuple = ntuple
-            res = TupleList(Ntuple(*row) for row in res)
+            res = TupleList(Ntuple(row) for row in res)
             res.descr = fields
-            res.header = Ntuple(*header)
+            res.header = Ntuple(header)
         return res
 
     def executescript(self, sql, *dicts, **kw):
@@ -251,6 +254,11 @@ class LazyConnection(object):
     def rowcount(self):
         return self._storage.curs.rowcount
 
+class RetryingConnection(LazyConnection):
+    """A LazyConnection which retries once in case of execution errors.
+    It makes sense for long running applications in autocommit mode."""
+    _raw_execute = retry(LazyConnection._raw_execute.im_func)
+
 class NullObject(object):
     '''Implements the NullObject pattern.
     
diff --git a/sqlplain/doc/doc.py b/sqlplain/doc/doc.py
index 65b0a07..d90c337 100644
--- a/sqlplain/doc/doc.py
+++ b/sqlplain/doc/doc.py
@@ -112,10 +112,10 @@ parsing the connection string or uri (in this case
 corresponding database driver (in this case ``pymssql``) which must be
 installed in your system, otherwise you get an ``ImportError``.
 
-The syntax of the URI is the same as in SQLAlchemy (I did copy from
-SQLAlchemy; even Storm uses the same convention and I see no reason
-to change it). Internally ``LazyConn`` instantiates an URI object
-which is a dictionary:
+The syntax of the URI is the same as in SQLAlchemy (I did copy it
+directly from SQLAlchemy; even Storm uses the same convention and I
+see no reason to change it). Internally ``LazyConn`` instantiates an
+URI object which is a dictionary:
 
 .. code-block:: python
 
@@ -131,25 +131,38 @@ which is a dictionary:
 The port is None here, therefore the low level driver ``pymssql`` will open
 the connection by using the default port number for MS SQL, i.e. 1433.
 
-The ``execute`` method of the lazy connection object is the one performing
-the real job: it opens a low level connection, instantiates a DB API 2
-cursor and it runs the ``SELECT`` query: the result is returned as a list
-of named tuples. Named tuples are a Python 2.6 construct, however ``sqlplain``
-ships with its own version of namedtuples (I have just copied Raymond
-Hettinger's recipe from the Python Cookbook site) which is used if
-you are running an early version of Python.
+The ``execute`` method of the lazy connection object is the one
+performing the real job: it opens a low level connection, instantiates
+a DB API 2 cursor and it runs the ``SELECT`` query: the result is
+returned as a list of named tuples. Named tuples are a Python 2.6
+construct, however ``sqlplain`` ships with its own version of
+namedtuples (I have just copied Raymond Hettinger's recipe from the
+Python Cookbook site, with a few tweaks) which is used if you are
+running an early version of Python.
 
 An important thing to notice is that ``sqlplain`` uses named arguments
-for *all* supported database drivers, even
-if the underlying low level driver uses qmark paramstyle (like SQLite)
-or (py)format paramstyle
-(like pymssql and psycogpg). BTW, the (py)format paramstyle, is really
-a terrible choice, since it collides for the usage of ``%s`` in Python
-string templates :-(.
+in the templates for *all* supported database drivers, even if the
+underlying low level driver uses qmark paramstyle - like SQLite - or
+(py)format paramstyle - like pymssql and psycogpg. BTW, the (py)format
+paramstyle, is really a terrible choice, since it collides for the
+usage of ``%s`` in Python string templates :-(. 
+Also notice that by default ``execute`` does not accept a mapping as 
+arguments: it expects an integer-indexed sequence. However, the
+default behavior can be changed by setting the option ``params='MAPPING'``
+at initialization time. Here is an example:
+
+ >> bookdb = connect('mssql://pyadmin:secret@localhost/bookdb', 
+                     params='MAPPING')
+ >> bookdb.execute('SELECT * FROM book WHERE author=:author', 
+                    dict(author='Asimov'))
 
 The ``execute`` method is smart enough: if you run it again,
 the previously instantiated DB API2 connection and cursors are
 re-used, i.e. it does not recreate a connection for each query.
+Moreover, the template is parsed only once and then cached,
+so that there is not big penalty is you execute twice the same
+template with different parameters.
+
 You can access the low level connection and cursor via the
 properties ``._conn`` and ``._curs``:
 
diff --git a/sqlplain/namedtuple.py b/sqlplain/namedtuple.py
index ad067ef..8163625 100644
--- a/sqlplain/namedtuple.py
+++ b/sqlplain/namedtuple.py
@@ -1,134 +1,38 @@
-## Raymond Hettinger's recipe
-from operator import itemgetter as _itemgetter
-from keyword import iskeyword as _iskeyword
-import sys as _sys
-
-def namedtuple(typename, field_names, verbose=False):
-    """Returns a new subclass of tuple with named fields.
-
-    >>> Point = namedtuple('Point', 'x y')
-    >>> Point.__doc__                   # docstring for the new class
-    'Point(x, y)'
-    >>> p = Point(11, y=22)             # instantiate with positional args or keywords
-    >>> p[0] + p[1]                     # indexable like a plain tuple
-    33
-    >>> x, y = p                        # unpack like a regular tuple
-    >>> x, y
-    (11, 22)
-    >>> p.x + p.y                       # fields also accessable by name
-    33
-    >>> d = p._asdict()                 # convert to a dictionary
-    >>> d['x']
-    11
-    >>> Point(**d)                      # convert from a dictionary
-    Point(x=11, y=22)
-    >>> p._replace(x=100)               # _replace() is like str.replace() but targets named fields
-    Point(x=100, y=22)
-
-    """
-
-    # Parse and validate the field names.  Validation serves two purposes,
-    # generating informative error messages and preventing template injection attacks.
-    if isinstance(field_names, basestring):
-        field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
-    field_names = tuple(field_names)
-    for name in (typename,) + field_names:
-        if not min(c.isalnum() or c=='_' for c in name):
-            raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
-        if _iskeyword(name):
-            raise ValueError('Type names and field names cannot be a keyword: %r' % name)
-        if name[0].isdigit():
-            raise ValueError('Type names and field names cannot start with a number: %r' % name)
-    seen_names = set()
-    for name in field_names:
-        if name.startswith('_'):
-            raise ValueError('Field names cannot start with an underscore: %r' % name)
-        if name in seen_names:
-            raise ValueError('Encountered duplicate field name: %r' % name)
-        seen_names.add(name)
-
-    # Create and fill-in the class template
-    numfields = len(field_names)
-    argtxt = repr(field_names).replace("'", "")[1:-1]   # tuple repr without parens or quotes
-    reprtxt = ', '.join('%s=%%r' % name for name in field_names)
-    dicttxt = ', '.join('%r: t[%d]' % (name, pos) for pos, name in enumerate(field_names))
-    template = '''class %(typename)s(tuple):
-        '%(typename)s(%(argtxt)s)' \n
-        __slots__ = () \n
-        _fields = %(field_names)r \n
-        def __new__(cls, %(argtxt)s):
-            return tuple.__new__(cls, (%(argtxt)s)) \n
-        @classmethod
-        def _make(cls, iterable, new=tuple.__new__, len=len):
-            'Make a new %(typename)s object from a sequence or iterable'
-            result = new(cls, iterable)
-            if len(result) != %(numfields)d:
-                raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
-            return result \n
-        def __repr__(self):
-            return '%(typename)s(%(reprtxt)s)' %% self \n
-        def _asdict(t):
-            'Return a new dict which maps field names to their values'
-            return {%(dicttxt)s} \n
-        def _replace(self, **kwds):
-            'Return a new %(typename)s object replacing specified fields with new values'
-            result = self._make(map(kwds.pop, %(field_names)r, self))
-            if kwds:
-                raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
-            return result \n\n''' % locals()
-    for i, name in enumerate(field_names):
-        template += "setattr(%s, '%s', property(itemgetter(%d)))\n" % (
-            typename, name, i)
-    if verbose:
-        print template
-
-    # Execute the template string in a temporary namespace
-    namespace = dict(itemgetter=_itemgetter)
-    try:
-        exec template in namespace
-    except SyntaxError, e:
-        raise SyntaxError(e.message + ':\n' + template)
-    result = namespace[typename]
-
-    # For pickling to work, the __module__ variable needs to be set to the frame
-    # where the named tuple is created.  Bypass this step in enviroments where
-    # sys._getframe is not defined (Jython for example).
-    if hasattr(_sys, '_getframe'):
-        result.__module__ = _sys._getframe(1).f_globals['__name__']
-
-    return result
-
-
-
-
-
-
-if __name__ == '__main__':
-    # verify that instances can be pickled
-    from cPickle import loads, dumps
-    Point = namedtuple('Point', 'x, y', True)
-    p = Point(x=10, y=20)
-    assert p == loads(dumps(p))
-
-    # test and demonstrate ability to override methods
-    class Point(namedtuple('Point', 'x y')):
-        @property
-        def hypot(self):
-            return (self.x ** 2 + self.y ** 2) ** 0.5
-        def __str__(self):
-            return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
-
-    for p in Point(3,4), Point(14,5), Point(9./7,6):
-        print p
-
-    class Point(namedtuple('Point', 'x y')):
-        'Point class with optimized _make() and _replace() without error-checking'
-        _make = classmethod(tuple.__new__)
-        def _replace(self, _map=map, **kwds):
-            return self._make(_map(kwds.get, ('x', 'y'), self))
-
-    print Point(11, 22)._replace(x=100)
-
-    import doctest
-    TestResults = namedtuple('TestResults', 'failed attempted')
-    print TestResults(*doctest.testmod())
+"""
+The namedtuple class here is inspired from the namedtuple class
+used in Python 2.6. Unfortunately, I cannot use that class,
+since it does not work for field names which are Python keywords, and names
+coming from a database are uncontrollable. There is still an issue
+for names starting and ending with a double underscore, and for the
+following names: _fields, _fromdict and _asdict.
+Conflict with such names however is pretty rare and it is ignored.
+"""
+
+from operator import itemgetter
+
+RESERVED_NAMES = set('_asdict _fields _fromdict'.split())
+
+class Namedtuple(tuple):
+    _fields = ()
+    __slots__ = ()
+
+    @classmethod
+    def _fromdict(cls, dic):
+        return cls(dic[name] for name in self._fields)
+
+    def _asdict(self):
+        return dict(zip(self._fields, self))
+
+    def __repr__(self):
+        s = ', '.join('%r: %r' % (n, v) for (n, v) in zip(self._fields, self))
+        return '%s(%s)' % (self.__class__.__name__, s)
+
+def namedtuple(typename, fields):
+    "Return a Namedtuple subclass without slots and with the right properties"
+    dic = dict(_fields=fields, __slots__=())
+    for i, f in enumerate(fields):
+        if f.startswith('__') and f.endswith('__') or f in RESERVED_NAMES:
+            raise NameError('%s is not a valid field name' % f)
+        dic[f] = property(
+            itemgetter(i), doc="Property for the field #%d, %r" % (i, f))
+    return type(typename, (Namedtuple,), dic)
diff --git a/sqlplain/sql_support.py b/sqlplain/sql_support.py
index 6ed8798..41ebd86 100644
--- a/sqlplain/sql_support.py
+++ b/sqlplain/sql_support.py
@@ -8,7 +8,7 @@ class _SymbolReplacer(object):
     a placeholder. Used by get_args_templ.
     """
     STRING_OR_COMMENT = re.compile(r"('[^']*'|--.*\n)")
-    SYMBOL = re.compile(r"(?<!:):(\w+)")
+    SYMBOL = re.compile(r"(?<!:):(\w+)") # a name prefixed by colons
     
     def __init__(self, placeholder):
         self.placeholder = placeholder
@@ -18,22 +18,23 @@ class _SymbolReplacer(object):
     def get_args_templ(self, templ):
         argnames = []
         def repl(mo):
+            "Replace named args with placeholders"
             argname = mo.group(1)
             if argname in argnames:
                 raise NameError('Duplicate argument %r in SQL template'
                                 % argname)
             argnames.append(argname)
-            return self.placeholder or mo.group()
+            return self.placeholder
         out = []
         for i, chunk in enumerate(self.STRING_OR_COMMENT.split(templ)):
-            if i % 2 == 0: # real sql code
+            if i % 2 == 0 and self.placeholder: # real sql code
                 chunk = self.SYMBOL.sub(repl, chunk)
             out.append(chunk)
         return argnames, ''.join(out)
 
 templ_cache = {}
 
-# used in .execute
+# used in .execute and do
 def get_args_templ(templ, repl=None):
     # this is small hack instead of a full featured SQL parser
     """
diff --git a/sqlplain/util.py b/sqlplain/util.py
index 8f571fc..6ffd2aa 100644
--- a/sqlplain/util.py
+++ b/sqlplain/util.py
@@ -4,6 +4,7 @@ Notice: create_db and drop_db are not transactional.
 
 import os, sys, re, subprocess, tempfile
 from sqlplain.uri import URI, CODEMAP
+from sqlplain.sql_support import get_args_templ
 from sqlplain import connect, do
 from sqlplain.connection import Transaction
 from sqlplain.namedtuple import namedtuple
@@ -157,7 +158,7 @@ def _make_clause(dic, sep):
         vals.append(v)
     return sep.join(clauses), tuple(vals)
 
-def update_table(conn, tname, kdict, vdict):
+def update_table(conn, tname, vdict, **kdict):
     "A low-level utility to update a table"
     where, kvals = _make_clause(kdict, ' AND ')
     set_, vvals = _make_clause(vdict, ', ')
@@ -200,19 +201,19 @@ def truncate_table(conn, tname):
         return conn.execute('TRUNCATE TABLE %s' % tname)
 
 def insert_rows(conn, tname, rows):
-    "Insert an iterable sequence of rows into a table; useful for unit tests"
-    it = iter(rows)
+    """Insert an iterable sequence of rows into a table; 
+    useful for unit tests. Notice that it invokes executemany
+    on the underlying low-level connection"""
+    lst = list(rows)
     n = 0 # number of inserted lines
     try:
-        row = it.next()
-    except StopIteration: # nothing to insert
+        row = lst[0]
+    except IndexError: # nothing to insert
         return n
     dummies = [':%s' % (i + 1) for i in range(len(row))]
     templ = 'INSERT INTO %s VALUES (%s)' % (tname, ', '.join(dummies))
-    n = conn.execute(templ, row)
-    for row in it:
-        n += conn.execute(templ, row)
-    return 
+    argnames, templ = get_args_templ(templ)
+    return conn._conn.executemany(templ, rows)
     
 def load_file(uri, tname, fname, mode, **kwargs):
     "Bulk insert a (binary or csv) file into a table"""
author	michele.simionato <devnull@localhost>	2009-06-10 09:49:57 +0000
committer	michele.simionato <devnull@localhost>	2009-06-10 09:49:57 +0000
commit	da5a44c9e603325eeb0f954a84bb46e6d154ed71 (patch)
tree	2023905253d8d2d170610195614dc47c79b9e617 /sqlplain
parent	b55d287d6244eabcc952866be5c6673bfa209cb1 (diff)
download	micheles-da5a44c9e603325eeb0f954a84bb46e6d154ed71.tar.gz