# postgres.py # Copyright (C) 2005, 2006, 2007 Michael Bayer mike_mp@zzzcomputing.com # # This module is part of SQLAlchemy and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php import re, random, warnings, operator from sqlalchemy import sql, schema, ansisql, exceptions from sqlalchemy.engine import base, default import sqlalchemy.types as sqltypes from decimal import Decimal try: import mx.DateTime.DateTime as mxDateTime except: mxDateTime = None class PGInet(sqltypes.TypeEngine): def get_col_spec(self): return "INET" class PGNumeric(sqltypes.Numeric): def get_col_spec(self): if not self.precision: return "NUMERIC" else: return "NUMERIC(%(precision)s, %(length)s)" % {'precision': self.precision, 'length' : self.length} def convert_bind_param(self, value, dialect): return value def convert_result_value(self, value, dialect): if not self.asdecimal and isinstance(value, Decimal): return float(value) else: return value class PGFloat(sqltypes.Float): def get_col_spec(self): if not self.precision: return "FLOAT" else: return "FLOAT(%(precision)s)" % {'precision': self.precision} class PGInteger(sqltypes.Integer): def get_col_spec(self): return "INTEGER" class PGSmallInteger(sqltypes.Smallinteger): def get_col_spec(self): return "SMALLINT" class PGBigInteger(PGInteger): def get_col_spec(self): return "BIGINT" class PGDateTime(sqltypes.DateTime): def get_col_spec(self): return "TIMESTAMP " + (self.timezone and "WITH" or "WITHOUT") + " TIME ZONE" class PGDate(sqltypes.Date): def get_col_spec(self): return "DATE" class PGTime(sqltypes.Time): def get_col_spec(self): return "TIME " + (self.timezone and "WITH" or "WITHOUT") + " TIME ZONE" class PGInterval(sqltypes.TypeEngine): def get_col_spec(self): return "INTERVAL" class PGText(sqltypes.TEXT): def get_col_spec(self): return "TEXT" class PGString(sqltypes.String): def get_col_spec(self): return "VARCHAR(%(length)s)" % {'length' : self.length} class PGChar(sqltypes.CHAR): def get_col_spec(self): return "CHAR(%(length)s)" % {'length' : self.length} class PGBinary(sqltypes.Binary): def get_col_spec(self): return "BYTEA" class PGBoolean(sqltypes.Boolean): def get_col_spec(self): return "BOOLEAN" class PGArray(sqltypes.TypeEngine, sqltypes.Concatenable): def __init__(self, item_type): if isinstance(item_type, type): item_type = item_type() self.item_type = item_type def dialect_impl(self, dialect): impl = self.__class__.__new__(self.__class__) impl.__dict__.update(self.__dict__) impl.item_type = self.item_type.dialect_impl(dialect) return impl def convert_bind_param(self, value, dialect): if value is None: return value def convert_item(item): if isinstance(item, (list,tuple)): return [convert_item(child) for child in item] else: return self.item_type.convert_bind_param(item, dialect) return [convert_item(item) for item in value] def convert_result_value(self, value, dialect): if value is None: return value def convert_item(item): if isinstance(item, list): return [convert_item(child) for child in item] else: return self.item_type.convert_result_value(item, dialect) # Could specialcase when item_type.convert_result_value is the default identity func return [convert_item(item) for item in value] def get_col_spec(self): return self.item_type.get_col_spec() + '[]' colspecs = { sqltypes.Integer : PGInteger, sqltypes.Smallinteger : PGSmallInteger, sqltypes.Numeric : PGNumeric, sqltypes.Float : PGFloat, sqltypes.DateTime : PGDateTime, sqltypes.Date : PGDate, sqltypes.Time : PGTime, sqltypes.String : PGString, sqltypes.Binary : PGBinary, sqltypes.Boolean : PGBoolean, sqltypes.TEXT : PGText, sqltypes.CHAR: PGChar, } ischema_names = { 'integer' : PGInteger, 'bigint' : PGBigInteger, 'smallint' : PGSmallInteger, 'character varying' : PGString, 'character' : PGChar, 'text' : PGText, 'numeric' : PGNumeric, 'float' : PGFloat, 'real' : PGFloat, 'inet': PGInet, 'double precision' : PGFloat, 'timestamp' : PGDateTime, 'timestamp with time zone' : PGDateTime, 'timestamp without time zone' : PGDateTime, 'time with time zone' : PGTime, 'time without time zone' : PGTime, 'date' : PGDate, 'time': PGTime, 'bytea' : PGBinary, 'boolean' : PGBoolean, 'interval':PGInterval, } def descriptor(): return {'name':'postgres', 'description':'PostGres', 'arguments':[ ('username',"Database Username",None), ('password',"Database Password",None), ('database',"Database Name",None), ('host',"Hostname", None), ]} class PGExecutionContext(default.DefaultExecutionContext): def _is_server_side(self): return self.dialect.server_side_cursors and self.is_select() and not re.search(r'FOR UPDATE(?: NOWAIT)?\s*$', self.statement, re.I) def create_cursor(self): if self._is_server_side(): # use server-side cursors: # http://lists.initd.org/pipermail/psycopg/2007-January/005251.html ident = "c" + hex(random.randint(0, 65535))[2:] return self.connection.connection.cursor(ident) else: return self.connection.connection.cursor() def get_result_proxy(self): if self._is_server_side(): return base.BufferedRowResultProxy(self) else: return base.ResultProxy(self) def post_exec(self): if self.compiled.isinsert and self.last_inserted_ids is None: if not self.dialect.use_oids: pass # will raise invalid error when they go to get them else: table = self.compiled.statement.table if self.cursor.lastrowid is not None and table is not None and len(table.primary_key): s = sql.select(table.primary_key, table.oid_column == self.cursor.lastrowid) row = self.connection.execute(s).fetchone() self._last_inserted_ids = [v for v in row] super(PGExecutionContext, self).post_exec() class PGDialect(ansisql.ANSIDialect): def __init__(self, use_oids=False, server_side_cursors=False, **kwargs): ansisql.ANSIDialect.__init__(self, default_paramstyle='pyformat', **kwargs) self.use_oids = use_oids self.server_side_cursors = server_side_cursors self.paramstyle = 'pyformat' def dbapi(cls): import psycopg2 as psycopg return psycopg dbapi = classmethod(dbapi) def create_connect_args(self, url): opts = url.translate_connect_args(['host', 'database', 'user', 'password', 'port']) if opts.has_key('port'): opts['port'] = int(opts['port']) opts.update(url.query) return ([], opts) def create_execution_context(self, *args, **kwargs): return PGExecutionContext(self, *args, **kwargs) def max_identifier_length(self): return 63 def type_descriptor(self, typeobj): return sqltypes.adapt_type(typeobj, colspecs) def compiler(self, statement, bindparams, **kwargs): return PGCompiler(self, statement, bindparams, **kwargs) def schemagenerator(self, *args, **kwargs): return PGSchemaGenerator(self, *args, **kwargs) def schemadropper(self, *args, **kwargs): return PGSchemaDropper(self, *args, **kwargs) def do_begin_twophase(self, connection, xid): self.do_begin(connection.connection) def do_prepare_twophase(self, connection, xid): connection.execute(sql.text("PREPARE TRANSACTION %(tid)s", bindparams=[sql.bindparam('tid', xid)])) def do_rollback_twophase(self, connection, xid, is_prepared=True, recover=False): if is_prepared: if recover: #FIXME: ugly hack to get out of transaction context when commiting recoverable transactions # Must find out a way how to make the dbapi not open a transaction. connection.execute(sql.text("ROLLBACK")) connection.execute(sql.text("ROLLBACK PREPARED %(tid)s", bindparams=[sql.bindparam('tid', xid)])) else: self.do_rollback(connection.connection) def do_commit_twophase(self, connection, xid, is_prepared=True, recover=False): if is_prepared: if recover: connection.execute(sql.text("ROLLBACK")) connection.execute(sql.text("COMMIT PREPARED %(tid)s", bindparams=[sql.bindparam('tid', xid)])) else: self.do_commit(connection.connection) def do_recover_twophase(self, connection): resultset = connection.execute(sql.text("SELECT gid FROM pg_prepared_xacts")) return [row[0] for row in resultset] def defaultrunner(self, context, **kwargs): return PGDefaultRunner(context, **kwargs) def preparer(self): return PGIdentifierPreparer(self) def get_default_schema_name(self, connection): if not hasattr(self, '_default_schema_name'): self._default_schema_name = connection.scalar("select current_schema()", None) return self._default_schema_name def last_inserted_ids(self): if self.context.last_inserted_ids is None: raise exceptions.InvalidRequestError("no INSERT executed, or can't use cursor.lastrowid without Postgres OIDs enabled") else: return self.context.last_inserted_ids def oid_column_name(self, column): if self.use_oids: return "oid" else: return None def do_executemany(self, c, statement, parameters, context=None): """We need accurate rowcounts for updates, inserts and deletes. ``psycopg2`` is not nice enough to produce this correctly for an executemany, so we do our own executemany here. """ rowcount = 0 for param in parameters: c.execute(statement, param) rowcount += c.rowcount if context is not None: context._rowcount = rowcount def has_table(self, connection, table_name, schema=None): # seems like case gets folded in pg_class... if schema is None: cursor = connection.execute("""select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where n.nspname=current_schema() and lower(relname)=%(name)s""", {'name':table_name.lower().encode(self.encoding)}); else: cursor = connection.execute("""select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where n.nspname=%(schema)s and lower(relname)=%(name)s""", {'name':table_name.lower().encode(self.encoding), 'schema':schema}); return bool( not not cursor.rowcount ) def has_sequence(self, connection, sequence_name): cursor = connection.execute('''SELECT relname FROM pg_class WHERE relkind = 'S' AND relnamespace IN ( SELECT oid FROM pg_namespace WHERE nspname NOT LIKE 'pg_%%' AND nspname != 'information_schema' AND relname = %(seqname)s);''', {'seqname': sequence_name}) return bool(not not cursor.rowcount) def is_disconnect(self, e): if isinstance(e, self.dbapi.OperationalError): return 'closed the connection' in str(e) or 'connection not open' in str(e) elif isinstance(e, self.dbapi.InterfaceError): return 'connection already closed' in str(e) elif isinstance(e, self.dbapi.ProgrammingError): # yes, it really says "losed", not "closed" return "losed the connection unexpectedly" in str(e) else: return False def table_names(self, connection, schema): s = """ SELECT relname FROM pg_class c WHERE relkind = 'r' AND '%(schema)s' = (select nspname from pg_namespace n where n.oid = c.relnamespace) """ % locals() return [row[0] for row in connection.execute(s)] def reflecttable(self, connection, table, include_columns): preparer = self.identifier_preparer if table.schema is not None: schema_where_clause = "n.nspname = :schema" else: schema_where_clause = "pg_catalog.pg_table_is_visible(c.oid)" ## information schema in pg suffers from too many permissions' restrictions ## let us find out at the pg way what is needed... SQL_COLS = """ SELECT a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod), (SELECT substring(d.adsrc for 128) FROM pg_catalog.pg_attrdef d WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef) AS DEFAULT, a.attnotnull, a.attnum, a.attrelid as table_oid FROM pg_catalog.pg_attribute a WHERE a.attrelid = ( SELECT c.oid FROM pg_catalog.pg_class c LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace WHERE (%s) AND c.relname = :table_name AND c.relkind in ('r','v') ) AND a.attnum > 0 AND NOT a.attisdropped ORDER BY a.attnum """ % schema_where_clause s = sql.text(SQL_COLS, bindparams=[sql.bindparam('table_name', type_=sqltypes.Unicode), sql.bindparam('schema', type_=sqltypes.Unicode)], typemap={'attname':sqltypes.Unicode}) c = connection.execute(s, table_name=table.name, schema=table.schema) rows = c.fetchall() if not rows: raise exceptions.NoSuchTableError(table.name) domains = self._load_domains(connection) for name, format_type, default, notnull, attnum, table_oid in rows: if include_columns and name not in include_columns: continue ## strip (30) from character varying(30) attype = re.search('([^\([]+)', format_type).group(1) nullable = not notnull is_array = format_type.endswith('[]') try: charlen = re.search('\(([\d,]+)\)', format_type).group(1) except: charlen = False numericprec = False numericscale = False if attype == 'numeric': if charlen is False: numericprec, numericscale = (None, None) else: numericprec, numericscale = charlen.split(',') charlen = False if attype == 'double precision': numericprec, numericscale = (53, False) charlen = False if attype == 'integer': numericprec, numericscale = (32, 0) charlen = False args = [] for a in (charlen, numericprec, numericscale): if a is None: args.append(None) elif a is not False: args.append(int(a)) kwargs = {} if attype == 'timestamp with time zone': kwargs['timezone'] = True elif attype == 'timestamp without time zone': kwargs['timezone'] = False if attype in ischema_names: coltype = ischema_names[attype] else: if attype in domains: domain = domains[attype] if domain['attype'] in ischema_names: # A table can't override whether the domain is nullable. nullable = domain['nullable'] if domain['default'] and not default: # It can, however, override the default value, but can't set it to null. default = domain['default'] coltype = ischema_names[domain['attype']] else: coltype=None if coltype: coltype = coltype(*args, **kwargs) if is_array: coltype = PGArray(coltype) else: warnings.warn(RuntimeWarning("Did not recognize type '%s' of column '%s'" % (attype, name))) coltype = sqltypes.NULLTYPE colargs= [] if default is not None: match = re.search(r"""(nextval\(')([^']+)('.*$)""", default) if match is not None: # the default is related to a Sequence sch = table.schema if '.' not in match.group(2) and sch is not None: default = match.group(1) + sch + '.' + match.group(2) + match.group(3) colargs.append(schema.PassiveDefault(sql.text(default))) table.append_column(schema.Column(name, coltype, nullable=nullable, *colargs)) # Primary keys PK_SQL = """ SELECT attname FROM pg_attribute WHERE attrelid = ( SELECT indexrelid FROM pg_index i WHERE i.indrelid = :table AND i.indisprimary = 't') ORDER BY attnum """ t = sql.text(PK_SQL, typemap={'attname':sqltypes.Unicode}) c = connection.execute(t, table=table_oid) for row in c.fetchall(): pk = row[0] table.primary_key.add(table.c[pk]) # Foreign keys FK_SQL = """ SELECT conname, pg_catalog.pg_get_constraintdef(oid, true) as condef FROM pg_catalog.pg_constraint r WHERE r.conrelid = :table AND r.contype = 'f' ORDER BY 1 """ t = sql.text(FK_SQL, typemap={'conname':sqltypes.Unicode, 'condef':sqltypes.Unicode}) c = connection.execute(t, table=table_oid) for conname, condef in c.fetchall(): m = re.search('FOREIGN KEY \((.*?)\) REFERENCES (?:(.*?)\.)?(.*?)\((.*?)\)', condef).groups() (constrained_columns, referred_schema, referred_table, referred_columns) = m constrained_columns = [preparer._unquote_identifier(x) for x in re.split(r'\s*,\s*', constrained_columns)] if referred_schema: referred_schema = preparer._unquote_identifier(referred_schema) referred_table = preparer._unquote_identifier(referred_table) referred_columns = [preparer._unquote_identifier(x) for x in re.split(r'\s*,\s', referred_columns)] refspec = [] if referred_schema is not None: schema.Table(referred_table, table.metadata, autoload=True, schema=referred_schema, autoload_with=connection) for column in referred_columns: refspec.append(".".join([referred_schema, referred_table, column])) else: schema.Table(referred_table, table.metadata, autoload=True, autoload_with=connection) for column in referred_columns: refspec.append(".".join([referred_table, column])) table.append_constraint(schema.ForeignKeyConstraint(constrained_columns, refspec, conname)) def _load_domains(self, connection): ## Load data types for domains: SQL_DOMAINS = """ SELECT t.typname as "name", pg_catalog.format_type(t.typbasetype, t.typtypmod) as "attype", not t.typnotnull as "nullable", t.typdefault as "default", pg_catalog.pg_type_is_visible(t.oid) as "visible", n.nspname as "schema" FROM pg_catalog.pg_type t LEFT JOIN pg_catalog.pg_namespace n ON n.oid = t.typnamespace LEFT JOIN pg_catalog.pg_constraint r ON t.oid = r.contypid WHERE t.typtype = 'd' """ s = sql.text(SQL_DOMAINS, typemap={'attname':sqltypes.Unicode}) c = connection.execute(s) domains = {} for domain in c.fetchall(): ## strip (30) from character varying(30) attype = re.search('([^\(]+)', domain['attype']).group(1) if domain['visible']: # 'visible' just means whether or not the domain is in a # schema that's on the search path -- or not overriden by # a schema with higher presedence. If it's not visible, # it will be prefixed with the schema-name when it's used. name = domain['name'] else: name = "%s.%s" % (domain['schema'], domain['name']) domains[name] = {'attype':attype, 'nullable': domain['nullable'], 'default': domain['default']} return domains class PGCompiler(ansisql.ANSICompiler): operators = ansisql.ANSICompiler.operators.copy() operators.update( { operator.mod : '%%' } ) def uses_sequences_for_inserts(self): return True def limit_clause(self, select): text = "" if select._limit is not None: text += " \n LIMIT " + str(select._limit) if select._offset is not None: if select._limit is None: text += " \n LIMIT ALL" text += " OFFSET " + str(select._offset) return text def get_select_precolumns(self, select): if select._distinct: if type(select._distinct) == bool: return "DISTINCT " if type(select._distinct) == list: dist_set = "DISTINCT ON (" for col in select._distinct: dist_set += self.strings[col] + ", " dist_set = dist_set[:-2] + ") " return dist_set return "DISTINCT ON (" + str(select._distinct) + ") " else: return "" def for_update_clause(self, select): if select.for_update == 'nowait': return " FOR UPDATE NOWAIT" else: return super(PGCompiler, self).for_update_clause(select) class PGSchemaGenerator(ansisql.ANSISchemaGenerator): def get_column_specification(self, column, **kwargs): colspec = self.preparer.format_column(column) if column.primary_key and len(column.foreign_keys)==0 and column.autoincrement and isinstance(column.type, sqltypes.Integer) and not isinstance(column.type, sqltypes.SmallInteger) and (column.default is None or (isinstance(column.default, schema.Sequence) and column.default.optional)): if isinstance(column.type, PGBigInteger): colspec += " BIGSERIAL" else: colspec += " SERIAL" else: colspec += " " + column.type.dialect_impl(self.dialect).get_col_spec() default = self.get_column_default_string(column) if default is not None: colspec += " DEFAULT " + default if not column.nullable: colspec += " NOT NULL" return colspec def visit_sequence(self, sequence): if not sequence.optional and (not self.checkfirst or not self.dialect.has_sequence(self.connection, sequence.name)): self.append("CREATE SEQUENCE %s" % self.preparer.format_sequence(sequence)) self.execute() class PGSchemaDropper(ansisql.ANSISchemaDropper): def visit_sequence(self, sequence): if not sequence.optional and (not self.checkfirst or self.dialect.has_sequence(self.connection, sequence.name)): self.append("DROP SEQUENCE %s" % sequence.name) self.execute() class PGDefaultRunner(ansisql.ANSIDefaultRunner): def get_column_default(self, column, isinsert=True): if column.primary_key: # passive defaults on primary keys have to be overridden if isinstance(column.default, schema.PassiveDefault): return self.connection.execute("select %s" % column.default.arg).scalar() elif (isinstance(column.type, sqltypes.Integer) and column.autoincrement) and (column.default is None or (isinstance(column.default, schema.Sequence) and column.default.optional)): sch = column.table.schema # TODO: this has to build into the Sequence object so we can get the quoting # logic from it if sch is not None: exc = "select nextval('\"%s\".\"%s_%s_seq\"')" % (sch, column.table.name, column.name) else: exc = "select nextval('\"%s_%s_seq\"')" % (column.table.name, column.name) return self.connection.execute(exc).scalar() return super(ansisql.ANSIDefaultRunner, self).get_column_default(column) def visit_sequence(self, seq): if not seq.optional: return self.connection.execute("select nextval('%s')" % self.dialect.identifier_preparer.format_sequence(seq)).scalar() else: return None class PGIdentifierPreparer(ansisql.ANSIIdentifierPreparer): def _fold_identifier_case(self, value): return value.lower() def _unquote_identifier(self, value): if value[0] == self.initial_quote: value = value[1:-1].replace('""','"') return value dialect = PGDialect