summaryrefslogtreecommitdiff
path: root/lib/sqlalchemy/sql/schema.py
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2023-04-05 11:58:52 -0400
committerMike Bayer <mike_mp@zzzcomputing.com>2023-04-21 11:30:40 -0400
commitcf6872d3bdf1a8a9613e853694acc2b1e6f06f51 (patch)
tree3a4ee41ab8b48aea7ac1e275c2f553763ec28dad /lib/sqlalchemy/sql/schema.py
parent63f51491c5f0cb22883c800a065d7c4b4c54774e (diff)
downloadsqlalchemy-cf6872d3bdf1a8a9613e853694acc2b1e6f06f51.tar.gz
add deterministic imv returning ordering using sentinel columns
Repaired a major shortcoming which was identified in the :ref:`engine_insertmanyvalues` performance optimization feature first introduced in the 2.0 series. This was a continuation of the change in 2.0.9 which disabled the SQL Server version of the feature due to a reliance in the ORM on apparent row ordering that is not guaranteed to take place. The fix applies new logic to all "insertmanyvalues" operations, which takes effect when a new parameter :paramref:`_dml.Insert.returning.sort_by_parameter_order` on the :meth:`_dml.Insert.returning` or :meth:`_dml.UpdateBase.return_defaults` methods, that through a combination of alternate SQL forms, direct correspondence of client side parameters, and in some cases downgrading to running row-at-a-time, will apply sorting to each batch of returned rows using correspondence to primary key or other unique values in each row which can be correlated to the input data. Performance impact is expected to be minimal as nearly all common primary key scenarios are suitable for parameter-ordered batching to be achieved for all backends other than SQLite, while "row-at-a-time" mode operates with a bare minimum of Python overhead compared to the very heavyweight approaches used in the 1.x series. For SQLite, there is no difference in performance when "row-at-a-time" mode is used. It's anticipated that with an efficient "row-at-a-time" INSERT with RETURNING batching capability, the "insertmanyvalues" feature can be later be more easily generalized to third party backends that include RETURNING support but not necessarily easy ways to guarantee a correspondence with parameter order. Fixes: #9618 References: #9603 Change-Id: I1d79353f5f19638f752936ba1c35e4dc235a8b7c
Diffstat (limited to 'lib/sqlalchemy/sql/schema.py')
-rw-r--r--lib/sqlalchemy/sql/schema.py324
1 files changed, 308 insertions, 16 deletions
diff --git a/lib/sqlalchemy/sql/schema.py b/lib/sqlalchemy/sql/schema.py
index 7d964ad05..192096469 100644
--- a/lib/sqlalchemy/sql/schema.py
+++ b/lib/sqlalchemy/sql/schema.py
@@ -58,7 +58,10 @@ from . import ddl
from . import roles
from . import type_api
from . import visitors
+from .base import _DefaultDescriptionTuple
from .base import _NoneName
+from .base import _SentinelColumnCharacterization
+from .base import _SentinelDefaultCharacterization
from .base import DedupeColumnCollection
from .base import DialectKWArgs
from .base import Executable
@@ -77,6 +80,7 @@ from .. import event
from .. import exc
from .. import inspection
from .. import util
+from ..util import HasMemoized
from ..util.typing import Final
from ..util.typing import Literal
from ..util.typing import Protocol
@@ -107,7 +111,6 @@ if typing.TYPE_CHECKING:
_T = TypeVar("_T", bound="Any")
_SI = TypeVar("_SI", bound="SchemaItem")
-_ServerDefaultType = Union["FetchedValue", str, TextClause, ColumnElement[Any]]
_TAB = TypeVar("_TAB", bound="Table")
@@ -115,6 +118,10 @@ _CreateDropBind = Union["Engine", "Connection", "MockConnection"]
_ConstraintNameArgument = Optional[Union[str, _NoneName]]
+_ServerDefaultArgument = Union[
+ "FetchedValue", str, TextClause, ColumnElement[Any]
+]
+
class SchemaConst(Enum):
@@ -345,6 +352,8 @@ class Table(
_columns: DedupeColumnCollection[Column[Any]]
+ _sentinel_column: Optional[Column[Any]]
+
constraints: Set[Constraint]
"""A collection of all :class:`_schema.Constraint` objects associated with
this :class:`_schema.Table`.
@@ -819,6 +828,8 @@ class Table(
assert isinstance(schema, str)
self.schema = quoted_name(schema, quote_schema)
+ self._sentinel_column = None
+
self.indexes = set()
self.constraints = set()
PrimaryKeyConstraint(
@@ -1005,6 +1016,140 @@ class Table(
def _autoincrement_column(self) -> Optional[Column[int]]:
return self.primary_key._autoincrement_column
+ @util.ro_memoized_property
+ def _sentinel_column_characteristics(
+ self,
+ ) -> _SentinelColumnCharacterization:
+ """determine a candidate column (or columns, in case of a client
+ generated composite primary key) which can be used as an
+ "insert sentinel" for an INSERT statement.
+
+ The returned structure, :class:`_SentinelColumnCharacterization`,
+ includes all the details needed by :class:`.Dialect` and
+ :class:`.SQLCompiler` to determine if these column(s) can be used
+ as an INSERT..RETURNING sentinel for a particular database
+ dialect.
+
+ .. versionadded:: 2.0.10
+
+ """
+
+ sentinel_is_explicit = False
+ sentinel_is_autoinc = False
+ the_sentinel: Optional[_typing_Sequence[Column[Any]]] = None
+
+ # see if a column was explicitly marked "insert_sentinel=True".
+ explicit_sentinel_col = self._sentinel_column
+
+ if explicit_sentinel_col is not None:
+ the_sentinel = (explicit_sentinel_col,)
+ sentinel_is_explicit = True
+
+ autoinc_col = self._autoincrement_column
+ if sentinel_is_explicit and explicit_sentinel_col is autoinc_col:
+ assert autoinc_col is not None
+ sentinel_is_autoinc = True
+ elif explicit_sentinel_col is None and autoinc_col is not None:
+ the_sentinel = (autoinc_col,)
+ sentinel_is_autoinc = True
+
+ default_characterization = _SentinelDefaultCharacterization.UNKNOWN
+
+ if the_sentinel:
+ the_sentinel_zero = the_sentinel[0]
+ if the_sentinel_zero.identity:
+
+ if the_sentinel_zero.identity._increment_is_negative:
+ if sentinel_is_explicit:
+ raise exc.InvalidRequestError(
+ "Can't use IDENTITY default with negative "
+ "increment as an explicit sentinel column"
+ )
+ else:
+ if sentinel_is_autoinc:
+ autoinc_col = None
+ sentinel_is_autoinc = False
+ the_sentinel = None
+ else:
+ default_characterization = (
+ _SentinelDefaultCharacterization.IDENTITY
+ )
+ elif (
+ the_sentinel_zero.default is None
+ and the_sentinel_zero.server_default is None
+ ):
+ if the_sentinel_zero.nullable:
+ raise exc.InvalidRequestError(
+ f"Column {the_sentinel_zero} has been marked as a "
+ "sentinel "
+ "column with no default generation function; it "
+ "at least needs to be marked nullable=False assuming "
+ "user-populated sentinel values will be used."
+ )
+ default_characterization = (
+ _SentinelDefaultCharacterization.NONE
+ )
+ elif the_sentinel_zero.default is not None:
+ if the_sentinel_zero.default.is_sentinel:
+ default_characterization = (
+ _SentinelDefaultCharacterization.SENTINEL_DEFAULT
+ )
+ elif default_is_sequence(the_sentinel_zero.default):
+
+ if the_sentinel_zero.default._increment_is_negative:
+ if sentinel_is_explicit:
+ raise exc.InvalidRequestError(
+ "Can't use SEQUENCE default with negative "
+ "increment as an explicit sentinel column"
+ )
+ else:
+ if sentinel_is_autoinc:
+ autoinc_col = None
+ sentinel_is_autoinc = False
+ the_sentinel = None
+
+ default_characterization = (
+ _SentinelDefaultCharacterization.SEQUENCE
+ )
+ elif the_sentinel_zero.default.is_callable:
+ default_characterization = (
+ _SentinelDefaultCharacterization.CLIENTSIDE
+ )
+ elif the_sentinel_zero.server_default is not None:
+ if sentinel_is_explicit:
+ raise exc.InvalidRequestError(
+ f"Column {the_sentinel[0]} can't be a sentinel column "
+ "because it uses an explicit server side default "
+ "that's not the Identity() default."
+ )
+
+ default_characterization = (
+ _SentinelDefaultCharacterization.SERVERSIDE
+ )
+
+ if the_sentinel is None and self.primary_key:
+ assert autoinc_col is None
+
+ # determine for non-autoincrement pk if all elements are
+ # client side
+ for _pkc in self.primary_key:
+ if _pkc.server_default is not None or (
+ _pkc.default and not _pkc.default.is_callable
+ ):
+ break
+ else:
+ the_sentinel = tuple(self.primary_key)
+ default_characterization = (
+ _SentinelDefaultCharacterization.CLIENTSIDE
+ )
+
+ return _SentinelColumnCharacterization(
+ the_sentinel,
+ sentinel_is_explicit,
+ sentinel_is_autoinc,
+ default_characterization,
+ )
+
@property
def autoincrement_column(self) -> Optional[Column[int]]:
"""Returns the :class:`.Column` object which currently represents
@@ -1361,6 +1506,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]):
inherit_cache = True
key: str
+ server_default: Optional[FetchedValue]
+
def __init__(
self,
__name_pos: Optional[
@@ -1384,11 +1531,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]):
] = SchemaConst.NULL_UNSPECIFIED,
onupdate: Optional[Any] = None,
primary_key: bool = False,
- server_default: Optional[_ServerDefaultType] = None,
+ server_default: Optional[_ServerDefaultArgument] = None,
server_onupdate: Optional[FetchedValue] = None,
quote: Optional[bool] = None,
system: bool = False,
comment: Optional[str] = None,
+ insert_sentinel: bool = False,
+ _omit_from_statements: bool = False,
_proxies: Optional[Any] = None,
**dialect_kwargs: Any,
):
@@ -1873,6 +2022,22 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]):
:paramref:`_schema.Column.comment`
parameter to :class:`_schema.Column`.
+ :param insert_sentinel: Marks this :class:`_schema.Column` as an
+ :term:`insert sentinel` used for optimizing the performance of the
+ :term:`insertmanyvalues` feature for tables that don't
+ otherwise have qualifying primary key configurations.
+
+ .. versionadded:: 2.0.10
+
+ .. seealso::
+
+ :func:`_schema.insert_sentinel` - all in one helper for declaring
+ sentinel columns
+
+ :ref:`engine_insertmanyvalues`
+
+ :ref:`engine_insertmanyvalues_sentinel_columns`
+
""" # noqa: E501, RST201, RST202
@@ -1914,7 +2079,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]):
self.key = key if key is not None else name # type: ignore
self.primary_key = primary_key
-
+ self._insert_sentinel = insert_sentinel
+ self._omit_from_statements = _omit_from_statements
self._user_defined_nullable = udn = nullable
if udn is not NULL_UNSPECIFIED:
self.nullable = udn
@@ -1962,22 +2128,26 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]):
else:
self.onpudate = None
- self.server_default = server_default
- self.server_onupdate = server_onupdate
-
- if self.server_default is not None:
- if isinstance(self.server_default, FetchedValue):
- l_args.append(self.server_default._as_for_update(False))
+ if server_default is not None:
+ if isinstance(server_default, FetchedValue):
+ server_default = server_default._as_for_update(False)
+ l_args.append(server_default)
else:
- l_args.append(DefaultClause(self.server_default))
+ server_default = DefaultClause(server_default)
+ l_args.append(server_default)
+ self.server_default = server_default
- if self.server_onupdate is not None:
- if isinstance(self.server_onupdate, FetchedValue):
- l_args.append(self.server_onupdate._as_for_update(True))
+ if server_onupdate is not None:
+ if isinstance(server_onupdate, FetchedValue):
+ server_onupdate = server_onupdate._as_for_update(True)
+ l_args.append(server_onupdate)
else:
- l_args.append(
- DefaultClause(self.server_onupdate, for_update=True)
+ server_onupdate = DefaultClause(
+ server_onupdate, for_update=True
)
+ l_args.append(server_onupdate)
+ self.server_onupdate = server_onupdate
+
self._init_items(*cast(_typing_Sequence[SchemaItem], l_args))
util.set_creation_order(self)
@@ -2042,6 +2212,17 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]):
if isinstance(impl, SchemaEventTarget):
impl._set_parent_with_dispatch(self)
+ @HasMemoized.memoized_attribute
+ def _default_description_tuple(self) -> _DefaultDescriptionTuple:
+ """used by default.py -> _process_execute_defaults()"""
+
+ return _DefaultDescriptionTuple._from_column_default(self.default)
+
+ @HasMemoized.memoized_attribute
+ def _onupdate_description_tuple(self) -> _DefaultDescriptionTuple:
+ """used by default.py -> _process_execute_defaults()"""
+ return _DefaultDescriptionTuple._from_column_default(self.onupdate)
+
@util.memoized_property
def _gen_static_annotations_cache_key(self) -> bool: # type: ignore
"""special attribute used by cache key gen, if true, we will
@@ -2185,6 +2366,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]):
all_names[self.name] = self
self.table = table
+ if self._insert_sentinel:
+ if self.table._sentinel_column is not None:
+ raise exc.ArgumentError(
+ "a Table may have only one explicit sentinel column"
+ )
+ self.table._sentinel_column = self
+
if self.primary_key:
table.primary_key._replace(self)
elif self.key in table.primary_key:
@@ -2316,6 +2504,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]):
server_onupdate=server_onupdate,
doc=self.doc,
comment=self.comment,
+ _omit_from_statements=self._omit_from_statements,
+ insert_sentinel=self._insert_sentinel,
*args,
**column_kwargs,
)
@@ -2472,6 +2662,56 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]):
return c.key, c
+def insert_sentinel(
+ name: Optional[str] = None,
+ type_: Optional[_TypeEngineArgument[_T]] = None,
+ *,
+ default: Optional[Any] = None,
+ omit_from_statements: bool = True,
+) -> Column[Any]:
+ """Provides a surrogate :class:`_schema.Column` that will act as a
+ dedicated insert :term:`sentinel` column, allowing efficient bulk
+ inserts with deterministic RETURNING sorting for tables that
+ don't otherwise have qualifying primary key configurations.
+
+ Adding this column to a :class:`.Table` object requires that a
+ corresponding database table actually has this column present, so if adding
+ it to an existing model, existing database tables would need to be migrated
+ (e.g. using ALTER TABLE or similar) to include this column.
+
+ For background on how this object is used, see the section
+ :ref:`engine_insertmanyvalues_sentinel_columns` as part of the
+ section :ref:`engine_insertmanyvalues`.
+
+ The :class:`_schema.Column` returned will be a nullable integer column by
+ default and make use of a sentinel-specific default generator used only in
+ "insertmanyvalues" operations.
+
+ .. seealso::
+
+ :func:`_orm.orm_insert_sentinel`
+
+ :paramref:`_schema.Column.insert_sentinel`
+
+ :ref:`engine_insertmanyvalues`
+
+ :ref:`engine_insertmanyvalues_sentinel_columns`
+
+
+ .. versionadded:: 2.0.10
+
+ """
+ return Column(
+ name=name,
+ type_=type_api.INTEGERTYPE if type_ is None else type_,
+ default=default
+ if default is not None
+ else _InsertSentinelColumnDefault(),
+ _omit_from_statements=omit_from_statements,
+ insert_sentinel=True,
+ )
+
+
class ForeignKey(DialectKWArgs, SchemaItem):
"""Defines a dependency between two columns.
@@ -3058,17 +3298,24 @@ else:
class DefaultGenerator(Executable, SchemaItem):
- """Base class for column *default* values."""
+ """Base class for column *default* values.
+
+ This object is only present on column.default or column.onupdate.
+ It's not valid as a server default.
+
+ """
__visit_name__ = "default_generator"
_is_default_generator = True
is_sequence = False
+ is_identity = False
is_server_default = False
is_clause_element = False
is_callable = False
is_scalar = False
has_arg = False
+ is_sentinel = False
column: Optional[Column[Any]]
def __init__(self, for_update: bool = False) -> None:
@@ -3222,6 +3469,44 @@ class ScalarElementColumnDefault(ColumnDefault):
)
+class _InsertSentinelColumnDefault(ColumnDefault):
+ """Default generator that's specific to the use of a "sentinel" column
+ when using the insertmanyvalues feature.
+
+ This default is used as part of the :func:`_schema.insert_sentinel`
+ construct.
+
+ """
+
+ is_sentinel = True
+ for_update = False
+ arg = None
+
+ def __new__(cls) -> _InsertSentinelColumnDefault:
+ return object.__new__(cls)
+
+ def __init__(self) -> None:
+ pass
+
+ def _set_parent(self, parent: SchemaEventTarget, **kw: Any) -> None:
+ col = cast("Column[Any]", parent)
+ if not col._insert_sentinel:
+ raise exc.ArgumentError(
+ "The _InsertSentinelColumnDefault may only be applied to a "
+ "Column marked as insert_sentinel=True"
+ )
+ elif not col.nullable:
+ raise exc.ArgumentError(
+ "The _InsertSentinelColumnDefault may only be applied to a "
+ "Column that is nullable"
+ )
+
+ super()._set_parent(parent, **kw)
+
+ def _copy(self) -> _InsertSentinelColumnDefault:
+ return _InsertSentinelColumnDefault()
+
+
_SQLExprDefault = Union["ColumnElement[Any]", "TextClause"]
@@ -3366,6 +3651,10 @@ class IdentityOptions:
self.cache = cache
self.order = order
+ @property
+ def _increment_is_negative(self) -> bool:
+ return self.increment is not None and self.increment < 0
+
class Sequence(HasSchemaAttr, IdentityOptions, DefaultGenerator):
"""Represents a named database sequence.
@@ -3674,6 +3963,7 @@ class FetchedValue(SchemaEventTarget):
reflected = False
has_argument = False
is_clause_element = False
+ is_identity = False
column: Optional[Column[Any]]
@@ -5668,6 +5958,8 @@ class Identity(IdentityOptions, FetchedValue, SchemaItem):
__visit_name__ = "identity_column"
+ is_identity = True
+
def __init__(
self,
always: bool = False,