summaryrefslogtreecommitdiff
path: root/lib/sqlalchemy/sql/dml.py
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2023-04-05 11:58:52 -0400
committerMike Bayer <mike_mp@zzzcomputing.com>2023-04-21 11:30:40 -0400
commitcf6872d3bdf1a8a9613e853694acc2b1e6f06f51 (patch)
tree3a4ee41ab8b48aea7ac1e275c2f553763ec28dad /lib/sqlalchemy/sql/dml.py
parent63f51491c5f0cb22883c800a065d7c4b4c54774e (diff)
downloadsqlalchemy-cf6872d3bdf1a8a9613e853694acc2b1e6f06f51.tar.gz
add deterministic imv returning ordering using sentinel columns
Repaired a major shortcoming which was identified in the :ref:`engine_insertmanyvalues` performance optimization feature first introduced in the 2.0 series. This was a continuation of the change in 2.0.9 which disabled the SQL Server version of the feature due to a reliance in the ORM on apparent row ordering that is not guaranteed to take place. The fix applies new logic to all "insertmanyvalues" operations, which takes effect when a new parameter :paramref:`_dml.Insert.returning.sort_by_parameter_order` on the :meth:`_dml.Insert.returning` or :meth:`_dml.UpdateBase.return_defaults` methods, that through a combination of alternate SQL forms, direct correspondence of client side parameters, and in some cases downgrading to running row-at-a-time, will apply sorting to each batch of returned rows using correspondence to primary key or other unique values in each row which can be correlated to the input data. Performance impact is expected to be minimal as nearly all common primary key scenarios are suitable for parameter-ordered batching to be achieved for all backends other than SQLite, while "row-at-a-time" mode operates with a bare minimum of Python overhead compared to the very heavyweight approaches used in the 1.x series. For SQLite, there is no difference in performance when "row-at-a-time" mode is used. It's anticipated that with an efficient "row-at-a-time" INSERT with RETURNING batching capability, the "insertmanyvalues" feature can be later be more easily generalized to third party backends that include RETURNING support but not necessarily easy ways to guarantee a correspondence with parameter order. Fixes: #9618 References: #9603 Change-Id: I1d79353f5f19638f752936ba1c35e4dc235a8b7c
Diffstat (limited to 'lib/sqlalchemy/sql/dml.py')
-rw-r--r--lib/sqlalchemy/sql/dml.py100
1 files changed, 90 insertions, 10 deletions
diff --git a/lib/sqlalchemy/sql/dml.py b/lib/sqlalchemy/sql/dml.py
index 830f845b4..911061640 100644
--- a/lib/sqlalchemy/sql/dml.py
+++ b/lib/sqlalchemy/sql/dml.py
@@ -32,8 +32,8 @@ from typing import Union
from . import coercions
from . import roles
from . import util as sql_util
-from ._typing import _no_kw
from ._typing import _TP
+from ._typing import _unexpected_kw
from ._typing import is_column_element
from ._typing import is_named_from_clause
from .base import _entity_namespace_key
@@ -455,6 +455,7 @@ class UpdateBase(
self,
*cols: _DMLColumnArgument,
supplemental_cols: Optional[Iterable[_DMLColumnArgument]] = None,
+ sort_by_parameter_order: bool = False,
) -> Self:
"""Make use of a :term:`RETURNING` clause for the purpose
of fetching server-side expressions and defaults, for supporting
@@ -603,6 +604,20 @@ class UpdateBase(
.. versionadded:: 2.0
+ :param sort_by_parameter_order: for a batch INSERT that is being
+ executed against multiple parameter sets, organize the results of
+ RETURNING so that the returned rows correspond to the order of
+ parameter sets passed in. This applies only to an :term:`executemany`
+ execution for supporting dialects and typically makes use of the
+ :term:`insertmanyvalues` feature.
+
+ .. versionadded:: 2.0.10
+
+ .. seealso::
+
+ :ref:`engine_insertmanyvalues_returning_order` - background on
+ sorting of RETURNING rows for bulk INSERT
+
.. seealso::
:meth:`.UpdateBase.returning`
@@ -636,7 +651,13 @@ class UpdateBase(
coercions.expect(roles.ColumnsClauseRole, c) for c in cols
)
self._return_defaults = True
-
+ if sort_by_parameter_order:
+ if not self.is_insert:
+ raise exc.ArgumentError(
+ "The 'sort_by_parameter_order' argument to "
+ "return_defaults() only applies to INSERT statements"
+ )
+ self._sort_by_parameter_order = True
if supplemental_cols:
# uniquifying while also maintaining order (the maintain of order
# is for test suites but also for vertical splicing
@@ -661,7 +682,10 @@ class UpdateBase(
@_generative
def returning(
- self, *cols: _ColumnsClauseArgument[Any], **__kw: Any
+ self,
+ *cols: _ColumnsClauseArgument[Any],
+ sort_by_parameter_order: bool = False,
+ **__kw: Any,
) -> UpdateBase:
r"""Add a :term:`RETURNING` or equivalent clause to this statement.
@@ -723,6 +747,25 @@ class UpdateBase(
read the documentation notes for the database in use in
order to determine the availability of RETURNING.
+ :param \*cols: series of columns, SQL expressions, or whole tables
+ entities to be returned.
+ :param sort_by_parameter_order: for a batch INSERT that is being
+ executed against multiple parameter sets, organize the results of
+ RETURNING so that the returned rows correspond to the order of
+ parameter sets passed in. This applies only to an :term:`executemany`
+ execution for supporting dialects and typically makes use of the
+ :term:`insertmanyvalues` feature.
+
+ .. versionadded:: 2.0.10
+
+ .. seealso::
+
+ :ref:`engine_insertmanyvalues_returning_order` - background on
+ sorting of RETURNING rows for bulk INSERT (Core level discussion)
+
+ :ref:`orm_queryguide_bulk_insert_returning_ordered` - example of
+ use with :ref:`orm_queryguide_bulk_insert` (ORM level discussion)
+
.. seealso::
:meth:`.UpdateBase.return_defaults` - an alternative method tailored
@@ -733,7 +776,7 @@ class UpdateBase(
""" # noqa: E501
if __kw:
- raise _no_kw()
+ raise _unexpected_kw("UpdateBase.returning()", __kw)
if self._return_defaults:
raise exc.InvalidRequestError(
"return_defaults() is already configured on this statement"
@@ -741,6 +784,13 @@ class UpdateBase(
self._returning += tuple(
coercions.expect(roles.ColumnsClauseRole, c) for c in cols
)
+ if sort_by_parameter_order:
+ if not self.is_insert:
+ raise exc.ArgumentError(
+ "The 'sort_by_parameter_order' argument to returning() "
+ "only applies to INSERT statements"
+ )
+ self._sort_by_parameter_order = True
return self
def corresponding_column(
@@ -1123,6 +1173,8 @@ class Insert(ValuesBase):
select = None
include_insert_from_select_defaults = False
+ _sort_by_parameter_order: bool = False
+
is_insert = True
table: TableClause
@@ -1143,6 +1195,7 @@ class Insert(ValuesBase):
"_return_defaults_columns",
InternalTraversal.dp_clauseelement_tuple,
),
+ ("_sort_by_parameter_order", InternalTraversal.dp_boolean),
]
+ HasPrefixes._has_prefixes_traverse_internals
+ DialectKWArgs._dialect_kwargs_traverse_internals
@@ -1231,24 +1284,35 @@ class Insert(ValuesBase):
if TYPE_CHECKING:
- # START OVERLOADED FUNCTIONS self.returning ReturningInsert 1-8
+ # START OVERLOADED FUNCTIONS self.returning ReturningInsert 1-8 ", *, sort_by_parameter_order: bool = False" # noqa: E501
# code within this block is **programmatically,
# statically generated** by tools/generate_tuple_map_overloads.py
@overload
- def returning(self, __ent0: _TCCA[_T0]) -> ReturningInsert[Tuple[_T0]]:
+ def returning(
+ self, __ent0: _TCCA[_T0], *, sort_by_parameter_order: bool = False
+ ) -> ReturningInsert[Tuple[_T0]]:
...
@overload
def returning(
- self, __ent0: _TCCA[_T0], __ent1: _TCCA[_T1]
+ self,
+ __ent0: _TCCA[_T0],
+ __ent1: _TCCA[_T1],
+ *,
+ sort_by_parameter_order: bool = False,
) -> ReturningInsert[Tuple[_T0, _T1]]:
...
@overload
def returning(
- self, __ent0: _TCCA[_T0], __ent1: _TCCA[_T1], __ent2: _TCCA[_T2]
+ self,
+ __ent0: _TCCA[_T0],
+ __ent1: _TCCA[_T1],
+ __ent2: _TCCA[_T2],
+ *,
+ sort_by_parameter_order: bool = False,
) -> ReturningInsert[Tuple[_T0, _T1, _T2]]:
...
@@ -1259,6 +1323,8 @@ class Insert(ValuesBase):
__ent1: _TCCA[_T1],
__ent2: _TCCA[_T2],
__ent3: _TCCA[_T3],
+ *,
+ sort_by_parameter_order: bool = False,
) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3]]:
...
@@ -1270,6 +1336,8 @@ class Insert(ValuesBase):
__ent2: _TCCA[_T2],
__ent3: _TCCA[_T3],
__ent4: _TCCA[_T4],
+ *,
+ sort_by_parameter_order: bool = False,
) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4]]:
...
@@ -1282,6 +1350,8 @@ class Insert(ValuesBase):
__ent3: _TCCA[_T3],
__ent4: _TCCA[_T4],
__ent5: _TCCA[_T5],
+ *,
+ sort_by_parameter_order: bool = False,
) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5]]:
...
@@ -1295,6 +1365,8 @@ class Insert(ValuesBase):
__ent4: _TCCA[_T4],
__ent5: _TCCA[_T5],
__ent6: _TCCA[_T6],
+ *,
+ sort_by_parameter_order: bool = False,
) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5, _T6]]:
...
@@ -1309,6 +1381,8 @@ class Insert(ValuesBase):
__ent5: _TCCA[_T5],
__ent6: _TCCA[_T6],
__ent7: _TCCA[_T7],
+ *,
+ sort_by_parameter_order: bool = False,
) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5, _T6, _T7]]:
...
@@ -1316,12 +1390,18 @@ class Insert(ValuesBase):
@overload
def returning(
- self, *cols: _ColumnsClauseArgument[Any], **__kw: Any
+ self,
+ *cols: _ColumnsClauseArgument[Any],
+ sort_by_parameter_order: bool = False,
+ **__kw: Any,
) -> ReturningInsert[Any]:
...
def returning(
- self, *cols: _ColumnsClauseArgument[Any], **__kw: Any
+ self,
+ *cols: _ColumnsClauseArgument[Any],
+ sort_by_parameter_order: bool = False,
+ **__kw: Any,
) -> ReturningInsert[Any]:
...