diff options
Diffstat (limited to 'lib/sqlalchemy/sql')
-rw-r--r-- | lib/sqlalchemy/sql/_typing.py | 6 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/base.py | 51 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/compiler.py | 597 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/crud.py | 129 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/dml.py | 100 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/elements.py | 7 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/schema.py | 324 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/selectable.py | 3 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/type_api.py | 31 |
9 files changed, 1133 insertions, 115 deletions
diff --git a/lib/sqlalchemy/sql/_typing.py b/lib/sqlalchemy/sql/_typing.py index 14b1b9594..596493b7c 100644 --- a/lib/sqlalchemy/sql/_typing.py +++ b/lib/sqlalchemy/sql/_typing.py @@ -12,6 +12,7 @@ from typing import Any from typing import Callable from typing import Dict from typing import Mapping +from typing import NoReturn from typing import Set from typing import Tuple from typing import Type @@ -364,3 +365,8 @@ def _no_kw() -> exc.ArgumentError: "Additional keyword arguments are not accepted by this " "function/method. The presence of **kw is for pep-484 typing purposes" ) + + +def _unexpected_kw(methname: str, kw: Dict[str, Any]) -> NoReturn: + k = list(kw)[0] + raise TypeError(f"{methname} got an unexpected keyword argument '{k}'") diff --git a/lib/sqlalchemy/sql/base.py b/lib/sqlalchemy/sql/base.py index 6267fd814..253927770 100644 --- a/lib/sqlalchemy/sql/base.py +++ b/lib/sqlalchemy/sql/base.py @@ -30,6 +30,7 @@ from typing import Iterator from typing import List from typing import Mapping from typing import MutableMapping +from typing import NamedTuple from typing import NoReturn from typing import Optional from typing import overload @@ -75,6 +76,8 @@ if TYPE_CHECKING: from .elements import NamedColumn from .elements import SQLCoreOperations from .elements import TextClause + from .schema import Column + from .schema import DefaultGenerator from .selectable import _JoinTargetElement from .selectable import _SelectIterable from .selectable import FromClause @@ -122,6 +125,35 @@ _Fn = TypeVar("_Fn", bound=Callable[..., Any]) _AmbiguousTableNameMap = MutableMapping[str, str] +class _DefaultDescriptionTuple(NamedTuple): + arg: Any + is_scalar: Optional[bool] + is_callable: Optional[bool] + is_sentinel: Optional[bool] + + @classmethod + def _from_column_default( + cls, default: Optional[DefaultGenerator] + ) -> _DefaultDescriptionTuple: + return ( + _DefaultDescriptionTuple( + default.arg, # type: ignore + default.is_scalar, + default.is_callable, + default.is_sentinel, + ) + if default + and ( + default.has_arg + or (not default.for_update and default.is_sentinel) + ) + else _DefaultDescriptionTuple(None, None, None, None) + ) + + +_never_select_column = operator.attrgetter("_omit_from_statements") + + class _EntityNamespace(Protocol): def __getattr__(self, key: str) -> SQLCoreOperations[Any]: ... @@ -1303,6 +1335,25 @@ class SchemaVisitor(ClauseVisitor): __traverse_options__ = {"schema_visitor": True} +class _SentinelDefaultCharacterization(Enum): + NONE = "none" + UNKNOWN = "unknown" + CLIENTSIDE = "clientside" + SENTINEL_DEFAULT = "sentinel_default" + SERVERSIDE = "serverside" + IDENTITY = "identity" + SEQUENCE = "sequence" + + +class _SentinelColumnCharacterization(NamedTuple): + columns: Optional[Sequence[Column[Any]]] = None + is_explicit: bool = False + is_autoinc: bool = False + default_characterization: _SentinelDefaultCharacterization = ( + _SentinelDefaultCharacterization.NONE + ) + + _COLKEY = TypeVar("_COLKEY", Union[None, str], str) _COL_co = TypeVar("_COL_co", bound="ColumnElement[Any]", covariant=True) diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index 9c68b311a..554a84112 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -41,6 +41,7 @@ from typing import ClassVar from typing import Dict from typing import FrozenSet from typing import Iterable +from typing import Iterator from typing import List from typing import Mapping from typing import MutableMapping @@ -70,6 +71,7 @@ from ._typing import is_column_element from ._typing import is_dml from .base import _from_objects from .base import _NONE_NAME +from .base import _SentinelDefaultCharacterization from .base import Executable from .base import NO_ARG from .elements import ClauseElement @@ -81,6 +83,7 @@ from .visitors import prefix_anon_map from .visitors import Visitable from .. import exc from .. import util +from ..util import FastIntFlag from ..util.typing import Literal from ..util.typing import Protocol from ..util.typing import TypedDict @@ -100,6 +103,7 @@ if typing.TYPE_CHECKING: from .elements import ColumnElement from .elements import Label from .functions import Function + from .schema import Table from .selectable import AliasedReturnsRows from .selectable import CompoundSelectState from .selectable import CTE @@ -109,9 +113,14 @@ if typing.TYPE_CHECKING: from .selectable import Select from .selectable import SelectState from .type_api import _BindProcessorType + from .type_api import _SentinelProcessorType from ..engine.cursor import CursorResultMetaData from ..engine.interfaces import _CoreSingleExecuteParams + from ..engine.interfaces import _DBAPIAnyExecuteParams + from ..engine.interfaces import _DBAPIMultiExecuteParams + from ..engine.interfaces import _DBAPISingleExecuteParams from ..engine.interfaces import _ExecuteOptions + from ..engine.interfaces import _GenericSetInputSizesType from ..engine.interfaces import _MutableCoreSingleExecuteParams from ..engine.interfaces import Dialect from ..engine.interfaces import SchemaTranslateMapType @@ -460,12 +469,160 @@ class ExpandedState(NamedTuple): class _InsertManyValues(NamedTuple): - """represents state to use for executing an "insertmanyvalues" statement""" + """represents state to use for executing an "insertmanyvalues" statement. + + The primary consumers of this object are the + :meth:`.SQLCompiler._deliver_insertmanyvalues_batches` and + :meth:`.DefaultDialect._deliver_insertmanyvalues_batches` methods. + + .. versionadded:: 2.0 + + """ is_default_expr: bool + """if True, the statement is of the form + ``INSERT INTO TABLE DEFAULT VALUES``, and can't be rewritten as a "batch" + + """ + single_values_expr: str + """The rendered "values" clause of the INSERT statement. + + This is typically the parenthesized section e.g. "(?, ?, ?)" or similar. + The insertmanyvalues logic uses this string as a search and replace + target. + + """ + insert_crud_params: List[crud._CrudParamElementStr] + """List of Column / bind names etc. used while rewriting the statement""" + num_positional_params_counted: int + """the number of bound parameters in a single-row statement. + + This count may be larger or smaller than the actual number of columns + targeted in the INSERT, as it accommodates for SQL expressions + in the values list that may have zero or more parameters embedded + within them. + + This count is part of what's used to organize rewritten parameter lists + when batching. + + """ + + sort_by_parameter_order: bool = False + """if the deterministic_returnined_order parameter were used on the + insert. + + All of the attributes following this will only be used if this is True. + + """ + + includes_upsert_behaviors: bool = False + """if True, we have to accommodate for upsert behaviors. + + This will in some cases downgrade "insertmanyvalues" that requests + deterministic ordering. + + """ + + sentinel_columns: Optional[Sequence[Column[Any]]] = None + """List of sentinel columns that were located. + + This list is only here if the INSERT asked for + sort_by_parameter_order=True, + and dialect-appropriate sentinel columns were located. + + .. versionadded:: 2.0.10 + + """ + + num_sentinel_columns: int = 0 + """how many sentinel columns are in the above list, if any. + + This is the same as + ``len(sentinel_columns) if sentinel_columns is not None else 0`` + + """ + + sentinel_param_keys: Optional[Sequence[Union[str, int]]] = None + """parameter str keys / int indexes in each param dictionary / tuple + that would link to the client side "sentinel" values for that row, which + we can use to match up parameter sets to result rows. + + This is only present if sentinel_columns is present and the INSERT + statement actually refers to client side values for these sentinel + columns. + + .. versionadded:: 2.0.10 + + """ + + implicit_sentinel: bool = False + """if True, we have exactly one sentinel column and it uses a server side + value, currently has to generate an incrementing integer value. + + The dialect in question would have asserted that it supports receiving + these values back and sorting on that value as a means of guaranteeing + correlation with the incoming parameter list. + + .. versionadded:: 2.0.10 + + """ + + embed_values_counter: bool = False + """Whether to embed an incrementing integer counter in each parameter + set within the VALUES clause as parameters are batched over. + + This is only used for a specific INSERT..SELECT..VALUES..RETURNING syntax + where a subquery is used to produce value tuples. Current support + includes PostgreSQL, Microsoft SQL Server. + + .. versionadded:: 2.0.10 + + """ + + +class _InsertManyValuesBatch(NamedTuple): + """represents an individual batch SQL statement for insertmanyvalues. + + This is passed through the + :meth:`.SQLCompiler._deliver_insertmanyvalues_batches` and + :meth:`.DefaultDialect._deliver_insertmanyvalues_batches` methods out + to the :class:`.Connection` within the + :meth:`.Connection._exec_insertmany_context` method. + + .. versionadded:: 2.0.10 + + """ + + replaced_statement: str + replaced_parameters: _DBAPIAnyExecuteParams + processed_setinputsizes: Optional[_GenericSetInputSizesType] + batch: Sequence[_DBAPISingleExecuteParams] + batch_size: int + batchnum: int + total_batches: int + rows_sorted: bool + is_downgraded: bool + + +class InsertmanyvaluesSentinelOpts(FastIntFlag): + """bitflag enum indicating styles of PK defaults + which can work as implicit sentinel columns + + """ + + NOT_SUPPORTED = 1 + AUTOINCREMENT = 2 + IDENTITY = 4 + SEQUENCE = 8 + + ANY_AUTOINCREMENT = AUTOINCREMENT | IDENTITY | SEQUENCE + _SUPPORTED_OR_NOT = NOT_SUPPORTED | ANY_AUTOINCREMENT + + USE_INSERT_FROM_SELECT = 16 + RENDER_SELECT_COL_CASTS = 64 class CompilerState(IntEnum): @@ -1484,6 +1641,7 @@ class SQLCompiler(Compiled): if self._insertmanyvalues: positions = [] + single_values_expr = re.sub( self._positional_pattern, find_position, @@ -1499,13 +1657,19 @@ class SQLCompiler(Compiled): for v in self._insertmanyvalues.insert_crud_params ] - self._insertmanyvalues = _InsertManyValues( - is_default_expr=self._insertmanyvalues.is_default_expr, + sentinel_param_int_idxs = ( + [ + self.positiontup.index(cast(str, _param_key)) + for _param_key in self._insertmanyvalues.sentinel_param_keys # noqa: E501 + ] + if self._insertmanyvalues.sentinel_param_keys is not None + else None + ) + + self._insertmanyvalues = self._insertmanyvalues._replace( single_values_expr=single_values_expr, insert_crud_params=insert_crud_params, - num_positional_params_counted=( - self._insertmanyvalues.num_positional_params_counted - ), + sentinel_param_keys=sentinel_param_int_idxs, ) def _process_numeric(self): @@ -1574,15 +1738,21 @@ class SQLCompiler(Compiled): for v in self._insertmanyvalues.insert_crud_params ] - self._insertmanyvalues = _InsertManyValues( - is_default_expr=self._insertmanyvalues.is_default_expr, + sentinel_param_int_idxs = ( + [ + self.positiontup.index(cast(str, _param_key)) + for _param_key in self._insertmanyvalues.sentinel_param_keys # noqa: E501 + ] + if self._insertmanyvalues.sentinel_param_keys is not None + else None + ) + + self._insertmanyvalues = self._insertmanyvalues._replace( # This has the numbers (:1, :2) single_values_expr=single_values_expr, # The single binds are instead %s so they can be formatted insert_crud_params=insert_crud_params, - num_positional_params_counted=( - self._insertmanyvalues.num_positional_params_counted - ), + sentinel_param_keys=sentinel_param_int_idxs, ) @util.memoized_property @@ -1611,6 +1781,23 @@ class SQLCompiler(Compiled): if value is not None } + @util.memoized_property + def _imv_sentinel_value_resolvers( + self, + ) -> Optional[Sequence[Optional[_SentinelProcessorType[Any]]]]: + imv = self._insertmanyvalues + if imv is None or imv.sentinel_columns is None: + return None + + sentinel_value_resolvers = [ + _scol.type._cached_sentinel_value_processor(self.dialect) + for _scol in imv.sentinel_columns + ] + if util.NONE_SET.issuperset(sentinel_value_resolvers): + return None + else: + return sentinel_value_resolvers + def is_subquery(self): return len(self.stack) > 1 @@ -5023,27 +5210,111 @@ class SQLCompiler(Compiled): ) return dialect_hints, table_text - def _insert_stmt_should_use_insertmanyvalues(self, statement): - return ( - self.dialect.supports_multivalues_insert - and self.dialect.use_insertmanyvalues - # note self.implicit_returning or self._result_columns - # implies self.dialect.insert_returning capability - and ( - self.dialect.use_insertmanyvalues_wo_returning - or self.implicit_returning - or self._result_columns + # within the realm of "insertmanyvalues sentinel columns", + # these lookups match different kinds of Column() configurations + # to specific backend capabilities. they are broken into two + # lookups, one for autoincrement columns and the other for non + # autoincrement columns + _sentinel_col_non_autoinc_lookup = util.immutabledict( + { + _SentinelDefaultCharacterization.CLIENTSIDE: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.SENTINEL_DEFAULT: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.NONE: ( + InsertmanyvaluesSentinelOpts._SUPPORTED_OR_NOT + ), + _SentinelDefaultCharacterization.IDENTITY: ( + InsertmanyvaluesSentinelOpts.IDENTITY + ), + _SentinelDefaultCharacterization.SEQUENCE: ( + InsertmanyvaluesSentinelOpts.SEQUENCE + ), + } + ) + _sentinel_col_autoinc_lookup = _sentinel_col_non_autoinc_lookup.union( + { + _SentinelDefaultCharacterization.NONE: ( + InsertmanyvaluesSentinelOpts.AUTOINCREMENT + ), + } + ) + + def _get_sentinel_column_for_table( + self, table: Table + ) -> Optional[Sequence[Column[Any]]]: + """given a :class:`.Table`, return a usable sentinel column or + columns for this dialect if any. + + Return None if no sentinel columns could be identified, or raise an + error if a column was marked as a sentinel explicitly but isn't + compatible with this dialect. + + """ + + sentinel_opts = self.dialect.insertmanyvalues_implicit_sentinel + sentinel_characteristics = table._sentinel_column_characteristics + + sent_cols = sentinel_characteristics.columns + + if sent_cols is None: + return None + + if sentinel_characteristics.is_autoinc: + bitmask = self._sentinel_col_autoinc_lookup.get( + sentinel_characteristics.default_characterization, 0 ) - ) + else: + bitmask = self._sentinel_col_non_autoinc_lookup.get( + sentinel_characteristics.default_characterization, 0 + ) + + if sentinel_opts & bitmask: + return sent_cols + + if sentinel_characteristics.is_explicit: + # a column was explicitly marked as insert_sentinel=True, + # however it is not compatible with this dialect. they should + # not indicate this column as a sentinel if they need to include + # this dialect. + + # TODO: do we want non-primary key explicit sentinel cols + # that can gracefully degrade for some backends? + # insert_sentinel="degrade" perhaps. not for the initial release. + # I am hoping people are generally not dealing with this sentinel + # business at all. + + # if is_explicit is True, there will be only one sentinel column. + + raise exc.InvalidRequestError( + f"Column {sent_cols[0]} can't be explicitly " + "marked as a sentinel column when using the " + f"{self.dialect.name} dialect, as the " + "particular type of default generation on this column is " + "not currently compatible with this dialect's specific " + f"INSERT..RETURNING syntax which can receive the " + "server-generated value in " + "a deterministic way. To remove this error, remove " + "insert_sentinel=True from primary key autoincrement " + "columns; these columns are automatically used as " + "sentinels for supported dialects in any case." + ) + + return None def _deliver_insertmanyvalues_batches( - self, statement, parameters, generic_setinputsizes, batch_size - ): + self, + statement: str, + parameters: _DBAPIMultiExecuteParams, + generic_setinputsizes: Optional[_GenericSetInputSizesType], + batch_size: int, + sort_by_parameter_order: bool, + ) -> Iterator[_InsertManyValuesBatch]: imv = self._insertmanyvalues assert imv is not None - executemany_values = f"({imv.single_values_expr})" - lenparams = len(parameters) if imv.is_default_expr and not self.dialect.supports_default_metavalue: # backend doesn't support @@ -5058,19 +5329,41 @@ class SQLCompiler(Compiled): # cursor.lastrowid etc. still goes through the more heavyweight # "ExecutionContext per statement" system as it isn't usable # as a generic "RETURNING" approach - for batchnum, param in enumerate(parameters, 1): - yield ( + use_row_at_a_time = True + downgraded = False + elif not self.dialect.supports_multivalues_insert or ( + sort_by_parameter_order + and self._result_columns + and (imv.sentinel_columns is None or imv.includes_upsert_behaviors) + ): + # deterministic order was requested and the compiler could + # not organize sentinel columns for this dialect/statement. + # use row at a time + use_row_at_a_time = True + downgraded = True + else: + use_row_at_a_time = False + downgraded = False + + if use_row_at_a_time: + for batchnum, param in enumerate( + cast("Sequence[_DBAPISingleExecuteParams]", parameters), 1 + ): + yield _InsertManyValuesBatch( statement, param, generic_setinputsizes, + [param], + batch_size, batchnum, lenparams, + sort_by_parameter_order, + downgraded, ) return - else: - statement = statement.replace( - executemany_values, "__EXECMANY_TOKEN__" - ) + + executemany_values = f"({imv.single_values_expr})" + statement = statement.replace(executemany_values, "__EXECMANY_TOKEN__") # Use optional insertmanyvalues_max_parameters # to further shrink the batch size so that there are no more than @@ -5094,7 +5387,7 @@ class SQLCompiler(Compiled): batches = list(parameters) - processed_setinputsizes = None + processed_setinputsizes: Optional[_GenericSetInputSizesType] = None batchnum = 1 total_batches = lenparams // batch_size + ( 1 if lenparams % batch_size else 0 @@ -5124,10 +5417,14 @@ class SQLCompiler(Compiled): ) return formatted + if imv.embed_values_counter: + imv_values_counter = ", _IMV_VALUES_COUNTER" + else: + imv_values_counter = "" formatted_values_clause = f"""({', '.join( apply_placeholders(bind_keys, formatted) for _, _, formatted, bind_keys in insert_crud_params - )})""" + )}{imv_values_counter})""" keys_to_replace = all_keys.intersection( escaped_bind_names.get(key, key) @@ -5143,7 +5440,13 @@ class SQLCompiler(Compiled): formatted_values_clause = "" keys_to_replace = set() base_parameters = {} - executemany_values_w_comma = f"({imv.single_values_expr}), " + + if imv.embed_values_counter: + executemany_values_w_comma = ( + f"({imv.single_values_expr}, _IMV_VALUES_COUNTER), " + ) + else: + executemany_values_w_comma = f"({imv.single_values_expr}), " all_names_we_will_expand: Set[str] = set() for elem in imv.insert_crud_params: @@ -5176,7 +5479,7 @@ class SQLCompiler(Compiled): ) while batches: - batch = batches[0:batch_size] + batch = cast("Sequence[Any]", batches[0:batch_size]) batches[0:batch_size] = [] if generic_setinputsizes: @@ -5196,7 +5499,7 @@ class SQLCompiler(Compiled): if self.positional: num_ins_params = imv.num_positional_params_counted - batch_iterator: Iterable[Tuple[Any, ...]] + batch_iterator: Iterable[Sequence[Any]] if num_ins_params == len(batch[0]): extra_params_left = extra_params_right = () batch_iterator = batch @@ -5208,9 +5511,19 @@ class SQLCompiler(Compiled): for b in batch ) - expanded_values_string = ( - executemany_values_w_comma * len(batch) - )[:-2] + if imv.embed_values_counter: + expanded_values_string = ( + "".join( + executemany_values_w_comma.replace( + "_IMV_VALUES_COUNTER", str(i) + ) + for i, _ in enumerate(batch) + ) + )[:-2] + else: + expanded_values_string = ( + (executemany_values_w_comma * len(batch)) + )[:-2] if self._numeric_binds and num_ins_params > 0: # numeric will always number the parameters inside of @@ -5254,12 +5567,14 @@ class SQLCompiler(Compiled): replaced_parameters = base_parameters.copy() for i, param in enumerate(batch): - replaced_values_clauses.append( - formatted_values_clause.replace( - "EXECMANY_INDEX__", str(i) - ) + + fmv = formatted_values_clause.replace( + "EXECMANY_INDEX__", str(i) ) + if imv.embed_values_counter: + fmv = fmv.replace("_IMV_VALUES_COUNTER", str(i)) + replaced_values_clauses.append(fmv) replaced_parameters.update( {f"{key}__{i}": param[key] for key in keys_to_replace} ) @@ -5269,12 +5584,16 @@ class SQLCompiler(Compiled): ", ".join(replaced_values_clauses), ) - yield ( + yield _InsertManyValuesBatch( replaced_statement, replaced_parameters, processed_setinputsizes, + batch, + batch_size, batchnum, total_batches, + sort_by_parameter_order, + False, ) batchnum += 1 @@ -5360,6 +5679,13 @@ class SQLCompiler(Compiled): "version settings does not support " "in-place multirow inserts." % self.dialect.name ) + elif ( + self.implicit_returning or insert_stmt._returning + ) and insert_stmt._sort_by_parameter_order: + raise exc.CompileError( + "RETURNING cannot be determinstically sorted when " + "using an INSERT which includes multi-row values()." + ) crud_params_single = crud_params_struct.single_params else: crud_params_single = crud_params_struct.single_params @@ -5390,11 +5716,82 @@ class SQLCompiler(Compiled): [expr for _, expr, _, _ in crud_params_single] ) - if self.implicit_returning or insert_stmt._returning: + # look for insertmanyvalues attributes that would have been configured + # by crud.py as it scanned through the columns to be part of the + # INSERT + use_insertmanyvalues = crud_params_struct.use_insertmanyvalues + named_sentinel_params: Optional[Sequence[str]] = None + add_sentinel_cols = None + implicit_sentinel = False + + returning_cols = self.implicit_returning or insert_stmt._returning + if returning_cols: + + add_sentinel_cols = crud_params_struct.use_sentinel_columns + + if add_sentinel_cols is not None: + assert use_insertmanyvalues + + # search for the sentinel column explicitly present + # in the INSERT columns list, and additionally check that + # this column has a bound parameter name set up that's in the + # parameter list. If both of these cases are present, it means + # we will have a client side value for the sentinel in each + # parameter set. + + _params_by_col = { + col: param_names + for col, _, _, param_names in crud_params_single + } + named_sentinel_params = [] + for _add_sentinel_col in add_sentinel_cols: + if _add_sentinel_col not in _params_by_col: + named_sentinel_params = None + break + param_name = self._within_exec_param_key_getter( + _add_sentinel_col + ) + if param_name not in _params_by_col[_add_sentinel_col]: + named_sentinel_params = None + break + named_sentinel_params.append(param_name) + + if named_sentinel_params is None: + # if we are not going to have a client side value for + # the sentinel in the parameter set, that means it's + # an autoincrement, an IDENTITY, or a server-side SQL + # expression like nextval('seqname'). So this is + # an "implicit" sentinel; we will look for it in + # RETURNING + # only, and then sort on it. For this case on PG, + # SQL Server we have to use a special INSERT form + # that guarantees the server side function lines up with + # the entries in the VALUES. + if ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.ANY_AUTOINCREMENT + ): + implicit_sentinel = True + else: + # here, we are not using a sentinel at all + # and we are likely the SQLite dialect. + # The first add_sentinel_col that we have should not + # be marked as "insert_sentinel=True". if it was, + # an error should have been raised in + # _get_sentinel_column_for_table. + assert not add_sentinel_cols[0]._insert_sentinel, ( + "sentinel selection rules should have prevented " + "us from getting here for this dialect" + ) + + # always put the sentinel columns last. even if they are + # in the returning list already, they will be there twice + # then. + returning_cols = list(returning_cols) + list(add_sentinel_cols) returning_clause = self.returning_clause( insert_stmt, - self.implicit_returning or insert_stmt._returning, + returning_cols, populate_result_map=toplevel, ) @@ -5423,9 +5820,8 @@ class SQLCompiler(Compiled): text += " %s" % select_text elif not crud_params_single and supports_default_values: text += " DEFAULT VALUES" - if toplevel and self._insert_stmt_should_use_insertmanyvalues( - insert_stmt - ): + if use_insertmanyvalues: + self._insertmanyvalues = _InsertManyValues( True, self.dialect.default_metavalue_token, @@ -5433,6 +5829,17 @@ class SQLCompiler(Compiled): "List[crud._CrudParamElementStr]", crud_params_single ), counted_bindparam, + sort_by_parameter_order=( + insert_stmt._sort_by_parameter_order + ), + includes_upsert_behaviors=( + insert_stmt._post_values_clause is not None + ), + sentinel_columns=add_sentinel_cols, + num_sentinel_columns=len(add_sentinel_cols) + if add_sentinel_cols + else 0, + implicit_sentinel=implicit_sentinel, ) elif compile_state._has_multi_parameters: text += " VALUES %s" % ( @@ -5440,11 +5847,9 @@ class SQLCompiler(Compiled): "(%s)" % (", ".join(value for _, _, value, _ in crud_param_set)) for crud_param_set in crud_params_struct.all_multi_params - ) + ), ) else: - # TODO: why is third element of crud_params_single not str - # already? insert_single_values_expr = ", ".join( [ value @@ -5455,20 +5860,90 @@ class SQLCompiler(Compiled): ] ) - text += " VALUES (%s)" % insert_single_values_expr - if toplevel and self._insert_stmt_should_use_insertmanyvalues( - insert_stmt - ): + if use_insertmanyvalues: + + if ( + implicit_sentinel + and ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.USE_INSERT_FROM_SELECT + ) + # this is checking if we have + # INSERT INTO table (id) VALUES (DEFAULT). + and not (crud_params_struct.is_default_metavalue_only) + ): + # if we have a sentinel column that is server generated, + # then for selected backends render the VALUES list as a + # subquery. This is the orderable form supported by + # PostgreSQL and SQL Server. + embed_sentinel_value = True + + render_bind_casts = ( + self.dialect.insertmanyvalues_implicit_sentinel + & InsertmanyvaluesSentinelOpts.RENDER_SELECT_COL_CASTS + ) + + colnames = ", ".join( + f"p{i}" for i, _ in enumerate(crud_params_single) + ) + + if render_bind_casts: + # render casts for the SELECT list. For PG, we are + # already rendering bind casts in the parameter list, + # selectively for the more "tricky" types like ARRAY. + # however, even for the "easy" types, if the parameter + # is NULL for every entry, PG gives up and says + # "it must be TEXT", which fails for other easy types + # like ints. So we cast on this side too. + colnames_w_cast = ", ".join( + self.render_bind_cast( + col.type, + col.type._unwrapped_dialect_impl(self.dialect), + f"p{i}", + ) + for i, (col, *_) in enumerate(crud_params_single) + ) + else: + colnames_w_cast = colnames + + text += ( + f" SELECT {colnames_w_cast} FROM " + f"(VALUES ({insert_single_values_expr})) " + f"AS imp_sen({colnames}, sen_counter) " + "ORDER BY sen_counter" + ) + else: + # otherwise, if no sentinel or backend doesn't support + # orderable subquery form, use a plain VALUES list + embed_sentinel_value = False + text += f" VALUES ({insert_single_values_expr})" + self._insertmanyvalues = _InsertManyValues( - False, - insert_single_values_expr, - cast( + is_default_expr=False, + single_values_expr=insert_single_values_expr, + insert_crud_params=cast( "List[crud._CrudParamElementStr]", crud_params_single, ), - counted_bindparam, + num_positional_params_counted=counted_bindparam, + sort_by_parameter_order=( + insert_stmt._sort_by_parameter_order + ), + includes_upsert_behaviors=( + insert_stmt._post_values_clause is not None + ), + sentinel_columns=add_sentinel_cols, + num_sentinel_columns=len(add_sentinel_cols) + if add_sentinel_cols + else 0, + sentinel_param_keys=named_sentinel_params, + implicit_sentinel=implicit_sentinel, + embed_values_counter=embed_sentinel_value, ) + else: + text += f" VALUES ({insert_single_values_expr})" + if insert_stmt._post_values_clause is not None: post_values_clause = self.process( insert_stmt._post_values_clause, **kw diff --git a/lib/sqlalchemy/sql/crud.py b/lib/sqlalchemy/sql/crud.py index 04b62d1ff..563f61c04 100644 --- a/lib/sqlalchemy/sql/crud.py +++ b/lib/sqlalchemy/sql/crud.py @@ -34,6 +34,7 @@ from . import coercions from . import dml from . import elements from . import roles +from .base import _DefaultDescriptionTuple from .dml import isinsert as _compile_state_isinsert from .elements import ColumnClause from .schema import default_is_clause_element @@ -53,6 +54,7 @@ if TYPE_CHECKING: from .elements import ColumnElement from .elements import KeyedColumnElement from .schema import _SQLExprDefault + from .schema import Column REQUIRED = util.symbol( "REQUIRED", @@ -79,20 +81,22 @@ def _as_dml_column(c: ColumnElement[Any]) -> ColumnClause[Any]: _CrudParamElement = Tuple[ "ColumnElement[Any]", - str, - Optional[Union[str, "_SQLExprDefault"]], + str, # column name + Optional[ + Union[str, "_SQLExprDefault"] + ], # bound parameter string or SQL expression to apply Iterable[str], ] _CrudParamElementStr = Tuple[ "KeyedColumnElement[Any]", str, # column name - str, # placeholder + str, # bound parameter string Iterable[str], ] _CrudParamElementSQLExpr = Tuple[ "ColumnClause[Any]", str, - "_SQLExprDefault", + "_SQLExprDefault", # SQL expression to apply Iterable[str], ] @@ -101,8 +105,10 @@ _CrudParamSequence = List[_CrudParamElement] class _CrudParams(NamedTuple): single_params: _CrudParamSequence - all_multi_params: List[Sequence[_CrudParamElementStr]] + is_default_metavalue_only: bool = False + use_insertmanyvalues: bool = False + use_sentinel_columns: Optional[Sequence[Column[Any]]] = None def _get_crud_params( @@ -206,6 +212,7 @@ def _get_crud_params( (c.key,), ) for c in stmt.table.columns + if not c._omit_from_statements ], [], ) @@ -301,8 +308,10 @@ def _get_crud_params( toplevel, kw, ) + use_insertmanyvalues = False + use_sentinel_columns = None else: - _scan_cols( + use_insertmanyvalues, use_sentinel_columns = _scan_cols( compiler, stmt, compile_state, @@ -328,6 +337,8 @@ def _get_crud_params( % (", ".join("%s" % (c,) for c in check)) ) + is_default_metavalue_only = False + if ( _compile_state_isinsert(compile_state) and compile_state._has_multi_parameters @@ -363,8 +374,15 @@ def _get_crud_params( (), ) ] - - return _CrudParams(values, []) + is_default_metavalue_only = True + + return _CrudParams( + values, + [], + is_default_metavalue_only=is_default_metavalue_only, + use_insertmanyvalues=use_insertmanyvalues, + use_sentinel_columns=use_sentinel_columns, + ) @overload @@ -527,7 +545,19 @@ def _scan_insert_from_select_cols( if stmt.include_insert_from_select_defaults: col_set = set(cols) for col in stmt.table.columns: - if col not in col_set and col.default: + # omit columns that were not in the SELECT statement. + # this will omit columns marked as omit_from_statements naturally, + # as long as that col was not explicit in the SELECT. + # if an omit_from_statements col has a "default" on it, then + # we need to include it, as these defaults should still fire off. + # but, if it has that default and it's the "sentinel" default, + # we don't do sentinel default operations for insert_from_select + # here so we again omit it. + if ( + col not in col_set + and col.default + and not col.default.is_sentinel + ): cols.append(col) for c in cols: @@ -579,6 +609,8 @@ def _scan_cols( implicit_returning, implicit_return_defaults, postfetch_lastrowid, + use_insertmanyvalues, + use_sentinel_columns, ) = _get_returning_modifiers(compiler, stmt, compile_state, toplevel) assert compile_state.isupdate or compile_state.isinsert @@ -672,9 +704,12 @@ def _scan_cols( elif c.default is not None: # column has a default, but it's not a pk column, or it is but # we don't need to get the pk back. - _append_param_insert_hasdefault( - compiler, stmt, c, implicit_return_defaults, values, kw - ) + if not c.default.is_sentinel or ( + use_sentinel_columns is not None + ): + _append_param_insert_hasdefault( + compiler, stmt, c, implicit_return_defaults, values, kw + ) elif c.server_default is not None: # column has a DDL-level default, and is either not a pk @@ -730,6 +765,8 @@ def _scan_cols( if c in remaining_supplemental ) + return (use_insertmanyvalues, use_sentinel_columns) + def _setup_delete_return_defaults( compiler, @@ -744,7 +781,7 @@ def _setup_delete_return_defaults( toplevel, kw, ): - (_, _, implicit_return_defaults, _) = _get_returning_modifiers( + (_, _, implicit_return_defaults, *_) = _get_returning_modifiers( compiler, stmt, compile_state, toplevel ) @@ -1248,6 +1285,18 @@ class _multiparam_column(elements.ColumnElement[Any]): and other.original == self.original ) + @util.memoized_property + def _default_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.default) + + @util.memoized_property + def _onupdate_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.onupdate) + def _process_multiparam_default_bind( compiler: SQLCompiler, @@ -1459,16 +1508,15 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): """ + dialect = compiler.dialect + need_pks = ( toplevel and _compile_state_isinsert(compile_state) and not stmt._inline and ( not compiler.for_executemany - or ( - compiler.dialect.insert_executemany_returning - and stmt._return_defaults - ) + or (dialect.insert_executemany_returning and stmt._return_defaults) ) and not stmt._returning # and (not stmt._returning or stmt._return_defaults) @@ -1479,7 +1527,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): # after the INSERT if that's all we need. postfetch_lastrowid = ( need_pks - and compiler.dialect.postfetch_lastrowid + and dialect.postfetch_lastrowid and stmt.table._autoincrement_column is not None ) @@ -1491,7 +1539,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): need_pks # the dialect can veto it if it just doesnt support RETURNING # with INSERT - and compiler.dialect.insert_returning + and dialect.insert_returning # user-defined implicit_returning on Table can veto it and compile_state._primary_table.implicit_returning # the compile_state can veto it (SQlite uses this to disable @@ -1506,10 +1554,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): # and a lot of weird use cases are supported by it. # SQLite lastrowid times 3x faster than returning, # Mariadb lastrowid 2x faster than returning - ( - not postfetch_lastrowid - or compiler.dialect.favor_returning_over_lastrowid - ) + (not postfetch_lastrowid or dialect.favor_returning_over_lastrowid) or compile_state._has_multi_parameters or stmt._return_defaults ) @@ -1521,25 +1566,57 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): should_implicit_return_defaults = ( implicit_returning and stmt._return_defaults ) + explicit_returning = should_implicit_return_defaults or stmt._returning + use_insertmanyvalues = ( + toplevel + and compiler.for_executemany + and dialect.use_insertmanyvalues + and ( + explicit_returning or dialect.use_insertmanyvalues_wo_returning + ) + ) + + use_sentinel_columns = None + if ( + use_insertmanyvalues + and explicit_returning + and stmt._sort_by_parameter_order + ): + use_sentinel_columns = compiler._get_sentinel_column_for_table( + stmt.table + ) + elif compile_state.isupdate: should_implicit_return_defaults = ( stmt._return_defaults and compile_state._primary_table.implicit_returning and compile_state._supports_implicit_returning - and compiler.dialect.update_returning + and dialect.update_returning ) + use_insertmanyvalues = False + use_sentinel_columns = None elif compile_state.isdelete: should_implicit_return_defaults = ( stmt._return_defaults and compile_state._primary_table.implicit_returning and compile_state._supports_implicit_returning - and compiler.dialect.delete_returning + and dialect.delete_returning ) + use_insertmanyvalues = False + use_sentinel_columns = None else: should_implicit_return_defaults = False # pragma: no cover + use_insertmanyvalues = False + use_sentinel_columns = None if should_implicit_return_defaults: if not stmt._return_defaults_columns: + # TODO: this is weird. See #9685 where we have to + # take an extra step to prevent this from happening. why + # would this ever be *all* columns? but if we set to blank, then + # that seems to break things also in the ORM. So we should + # try to clean this up and figure out what return_defaults + # needs to do w/ the ORM etc. here implicit_return_defaults = set(stmt.table.c) else: implicit_return_defaults = set(stmt._return_defaults_columns) @@ -1551,6 +1628,8 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): implicit_returning or should_implicit_return_defaults, implicit_return_defaults, postfetch_lastrowid, + use_insertmanyvalues, + use_sentinel_columns, ) diff --git a/lib/sqlalchemy/sql/dml.py b/lib/sqlalchemy/sql/dml.py index 830f845b4..911061640 100644 --- a/lib/sqlalchemy/sql/dml.py +++ b/lib/sqlalchemy/sql/dml.py @@ -32,8 +32,8 @@ from typing import Union from . import coercions from . import roles from . import util as sql_util -from ._typing import _no_kw from ._typing import _TP +from ._typing import _unexpected_kw from ._typing import is_column_element from ._typing import is_named_from_clause from .base import _entity_namespace_key @@ -455,6 +455,7 @@ class UpdateBase( self, *cols: _DMLColumnArgument, supplemental_cols: Optional[Iterable[_DMLColumnArgument]] = None, + sort_by_parameter_order: bool = False, ) -> Self: """Make use of a :term:`RETURNING` clause for the purpose of fetching server-side expressions and defaults, for supporting @@ -603,6 +604,20 @@ class UpdateBase( .. versionadded:: 2.0 + :param sort_by_parameter_order: for a batch INSERT that is being + executed against multiple parameter sets, organize the results of + RETURNING so that the returned rows correspond to the order of + parameter sets passed in. This applies only to an :term:`executemany` + execution for supporting dialects and typically makes use of the + :term:`insertmanyvalues` feature. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` - background on + sorting of RETURNING rows for bulk INSERT + .. seealso:: :meth:`.UpdateBase.returning` @@ -636,7 +651,13 @@ class UpdateBase( coercions.expect(roles.ColumnsClauseRole, c) for c in cols ) self._return_defaults = True - + if sort_by_parameter_order: + if not self.is_insert: + raise exc.ArgumentError( + "The 'sort_by_parameter_order' argument to " + "return_defaults() only applies to INSERT statements" + ) + self._sort_by_parameter_order = True if supplemental_cols: # uniquifying while also maintaining order (the maintain of order # is for test suites but also for vertical splicing @@ -661,7 +682,10 @@ class UpdateBase( @_generative def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> UpdateBase: r"""Add a :term:`RETURNING` or equivalent clause to this statement. @@ -723,6 +747,25 @@ class UpdateBase( read the documentation notes for the database in use in order to determine the availability of RETURNING. + :param \*cols: series of columns, SQL expressions, or whole tables + entities to be returned. + :param sort_by_parameter_order: for a batch INSERT that is being + executed against multiple parameter sets, organize the results of + RETURNING so that the returned rows correspond to the order of + parameter sets passed in. This applies only to an :term:`executemany` + execution for supporting dialects and typically makes use of the + :term:`insertmanyvalues` feature. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :ref:`engine_insertmanyvalues_returning_order` - background on + sorting of RETURNING rows for bulk INSERT (Core level discussion) + + :ref:`orm_queryguide_bulk_insert_returning_ordered` - example of + use with :ref:`orm_queryguide_bulk_insert` (ORM level discussion) + .. seealso:: :meth:`.UpdateBase.return_defaults` - an alternative method tailored @@ -733,7 +776,7 @@ class UpdateBase( """ # noqa: E501 if __kw: - raise _no_kw() + raise _unexpected_kw("UpdateBase.returning()", __kw) if self._return_defaults: raise exc.InvalidRequestError( "return_defaults() is already configured on this statement" @@ -741,6 +784,13 @@ class UpdateBase( self._returning += tuple( coercions.expect(roles.ColumnsClauseRole, c) for c in cols ) + if sort_by_parameter_order: + if not self.is_insert: + raise exc.ArgumentError( + "The 'sort_by_parameter_order' argument to returning() " + "only applies to INSERT statements" + ) + self._sort_by_parameter_order = True return self def corresponding_column( @@ -1123,6 +1173,8 @@ class Insert(ValuesBase): select = None include_insert_from_select_defaults = False + _sort_by_parameter_order: bool = False + is_insert = True table: TableClause @@ -1143,6 +1195,7 @@ class Insert(ValuesBase): "_return_defaults_columns", InternalTraversal.dp_clauseelement_tuple, ), + ("_sort_by_parameter_order", InternalTraversal.dp_boolean), ] + HasPrefixes._has_prefixes_traverse_internals + DialectKWArgs._dialect_kwargs_traverse_internals @@ -1231,24 +1284,35 @@ class Insert(ValuesBase): if TYPE_CHECKING: - # START OVERLOADED FUNCTIONS self.returning ReturningInsert 1-8 + # START OVERLOADED FUNCTIONS self.returning ReturningInsert 1-8 ", *, sort_by_parameter_order: bool = False" # noqa: E501 # code within this block is **programmatically, # statically generated** by tools/generate_tuple_map_overloads.py @overload - def returning(self, __ent0: _TCCA[_T0]) -> ReturningInsert[Tuple[_T0]]: + def returning( + self, __ent0: _TCCA[_T0], *, sort_by_parameter_order: bool = False + ) -> ReturningInsert[Tuple[_T0]]: ... @overload def returning( - self, __ent0: _TCCA[_T0], __ent1: _TCCA[_T1] + self, + __ent0: _TCCA[_T0], + __ent1: _TCCA[_T1], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1]]: ... @overload def returning( - self, __ent0: _TCCA[_T0], __ent1: _TCCA[_T1], __ent2: _TCCA[_T2] + self, + __ent0: _TCCA[_T0], + __ent1: _TCCA[_T1], + __ent2: _TCCA[_T2], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2]]: ... @@ -1259,6 +1323,8 @@ class Insert(ValuesBase): __ent1: _TCCA[_T1], __ent2: _TCCA[_T2], __ent3: _TCCA[_T3], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3]]: ... @@ -1270,6 +1336,8 @@ class Insert(ValuesBase): __ent2: _TCCA[_T2], __ent3: _TCCA[_T3], __ent4: _TCCA[_T4], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4]]: ... @@ -1282,6 +1350,8 @@ class Insert(ValuesBase): __ent3: _TCCA[_T3], __ent4: _TCCA[_T4], __ent5: _TCCA[_T5], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5]]: ... @@ -1295,6 +1365,8 @@ class Insert(ValuesBase): __ent4: _TCCA[_T4], __ent5: _TCCA[_T5], __ent6: _TCCA[_T6], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5, _T6]]: ... @@ -1309,6 +1381,8 @@ class Insert(ValuesBase): __ent5: _TCCA[_T5], __ent6: _TCCA[_T6], __ent7: _TCCA[_T7], + *, + sort_by_parameter_order: bool = False, ) -> ReturningInsert[Tuple[_T0, _T1, _T2, _T3, _T4, _T5, _T6, _T7]]: ... @@ -1316,12 +1390,18 @@ class Insert(ValuesBase): @overload def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> ReturningInsert[Any]: ... def returning( - self, *cols: _ColumnsClauseArgument[Any], **__kw: Any + self, + *cols: _ColumnsClauseArgument[Any], + sort_by_parameter_order: bool = False, + **__kw: Any, ) -> ReturningInsert[Any]: ... diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index 0f356ae27..694faee5f 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -91,7 +91,6 @@ if typing.TYPE_CHECKING: from .compiler import SQLCompiler from .functions import FunctionElement from .operators import OperatorType - from .schema import _ServerDefaultType from .schema import Column from .schema import DefaultGenerator from .schema import FetchedValue @@ -1263,6 +1262,8 @@ class ColumnElement( primary_key: bool = False _is_clone_of: Optional[ColumnElement[_T]] _is_column_element = True + _insert_sentinel: bool = False + _omit_from_statements = False foreign_keys: AbstractSet[ForeignKey] = frozenset() @@ -2212,6 +2213,8 @@ class TextClause( _render_label_in_columns_clause = False + _omit_from_statements = False + @property def _hide_froms(self) -> Iterable[FromClause]: return () @@ -4667,7 +4670,7 @@ class ColumnClause( onupdate: Optional[DefaultGenerator] = None default: Optional[DefaultGenerator] = None - server_default: Optional[_ServerDefaultType] = None + server_default: Optional[FetchedValue] = None server_onupdate: Optional[FetchedValue] = None _is_multiparam_column = False diff --git a/lib/sqlalchemy/sql/schema.py b/lib/sqlalchemy/sql/schema.py index 7d964ad05..192096469 100644 --- a/lib/sqlalchemy/sql/schema.py +++ b/lib/sqlalchemy/sql/schema.py @@ -58,7 +58,10 @@ from . import ddl from . import roles from . import type_api from . import visitors +from .base import _DefaultDescriptionTuple from .base import _NoneName +from .base import _SentinelColumnCharacterization +from .base import _SentinelDefaultCharacterization from .base import DedupeColumnCollection from .base import DialectKWArgs from .base import Executable @@ -77,6 +80,7 @@ from .. import event from .. import exc from .. import inspection from .. import util +from ..util import HasMemoized from ..util.typing import Final from ..util.typing import Literal from ..util.typing import Protocol @@ -107,7 +111,6 @@ if typing.TYPE_CHECKING: _T = TypeVar("_T", bound="Any") _SI = TypeVar("_SI", bound="SchemaItem") -_ServerDefaultType = Union["FetchedValue", str, TextClause, ColumnElement[Any]] _TAB = TypeVar("_TAB", bound="Table") @@ -115,6 +118,10 @@ _CreateDropBind = Union["Engine", "Connection", "MockConnection"] _ConstraintNameArgument = Optional[Union[str, _NoneName]] +_ServerDefaultArgument = Union[ + "FetchedValue", str, TextClause, ColumnElement[Any] +] + class SchemaConst(Enum): @@ -345,6 +352,8 @@ class Table( _columns: DedupeColumnCollection[Column[Any]] + _sentinel_column: Optional[Column[Any]] + constraints: Set[Constraint] """A collection of all :class:`_schema.Constraint` objects associated with this :class:`_schema.Table`. @@ -819,6 +828,8 @@ class Table( assert isinstance(schema, str) self.schema = quoted_name(schema, quote_schema) + self._sentinel_column = None + self.indexes = set() self.constraints = set() PrimaryKeyConstraint( @@ -1005,6 +1016,140 @@ class Table( def _autoincrement_column(self) -> Optional[Column[int]]: return self.primary_key._autoincrement_column + @util.ro_memoized_property + def _sentinel_column_characteristics( + self, + ) -> _SentinelColumnCharacterization: + """determine a candidate column (or columns, in case of a client + generated composite primary key) which can be used as an + "insert sentinel" for an INSERT statement. + + The returned structure, :class:`_SentinelColumnCharacterization`, + includes all the details needed by :class:`.Dialect` and + :class:`.SQLCompiler` to determine if these column(s) can be used + as an INSERT..RETURNING sentinel for a particular database + dialect. + + .. versionadded:: 2.0.10 + + """ + + sentinel_is_explicit = False + sentinel_is_autoinc = False + the_sentinel: Optional[_typing_Sequence[Column[Any]]] = None + + # see if a column was explicitly marked "insert_sentinel=True". + explicit_sentinel_col = self._sentinel_column + + if explicit_sentinel_col is not None: + the_sentinel = (explicit_sentinel_col,) + sentinel_is_explicit = True + + autoinc_col = self._autoincrement_column + if sentinel_is_explicit and explicit_sentinel_col is autoinc_col: + assert autoinc_col is not None + sentinel_is_autoinc = True + elif explicit_sentinel_col is None and autoinc_col is not None: + the_sentinel = (autoinc_col,) + sentinel_is_autoinc = True + + default_characterization = _SentinelDefaultCharacterization.UNKNOWN + + if the_sentinel: + the_sentinel_zero = the_sentinel[0] + if the_sentinel_zero.identity: + + if the_sentinel_zero.identity._increment_is_negative: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + "Can't use IDENTITY default with negative " + "increment as an explicit sentinel column" + ) + else: + if sentinel_is_autoinc: + autoinc_col = None + sentinel_is_autoinc = False + the_sentinel = None + else: + default_characterization = ( + _SentinelDefaultCharacterization.IDENTITY + ) + elif ( + the_sentinel_zero.default is None + and the_sentinel_zero.server_default is None + ): + if the_sentinel_zero.nullable: + raise exc.InvalidRequestError( + f"Column {the_sentinel_zero} has been marked as a " + "sentinel " + "column with no default generation function; it " + "at least needs to be marked nullable=False assuming " + "user-populated sentinel values will be used." + ) + default_characterization = ( + _SentinelDefaultCharacterization.NONE + ) + elif the_sentinel_zero.default is not None: + if the_sentinel_zero.default.is_sentinel: + default_characterization = ( + _SentinelDefaultCharacterization.SENTINEL_DEFAULT + ) + elif default_is_sequence(the_sentinel_zero.default): + + if the_sentinel_zero.default._increment_is_negative: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + "Can't use SEQUENCE default with negative " + "increment as an explicit sentinel column" + ) + else: + if sentinel_is_autoinc: + autoinc_col = None + sentinel_is_autoinc = False + the_sentinel = None + + default_characterization = ( + _SentinelDefaultCharacterization.SEQUENCE + ) + elif the_sentinel_zero.default.is_callable: + default_characterization = ( + _SentinelDefaultCharacterization.CLIENTSIDE + ) + elif the_sentinel_zero.server_default is not None: + if sentinel_is_explicit: + raise exc.InvalidRequestError( + f"Column {the_sentinel[0]} can't be a sentinel column " + "because it uses an explicit server side default " + "that's not the Identity() default." + ) + + default_characterization = ( + _SentinelDefaultCharacterization.SERVERSIDE + ) + + if the_sentinel is None and self.primary_key: + assert autoinc_col is None + + # determine for non-autoincrement pk if all elements are + # client side + for _pkc in self.primary_key: + if _pkc.server_default is not None or ( + _pkc.default and not _pkc.default.is_callable + ): + break + else: + the_sentinel = tuple(self.primary_key) + default_characterization = ( + _SentinelDefaultCharacterization.CLIENTSIDE + ) + + return _SentinelColumnCharacterization( + the_sentinel, + sentinel_is_explicit, + sentinel_is_autoinc, + default_characterization, + ) + @property def autoincrement_column(self) -> Optional[Column[int]]: """Returns the :class:`.Column` object which currently represents @@ -1361,6 +1506,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): inherit_cache = True key: str + server_default: Optional[FetchedValue] + def __init__( self, __name_pos: Optional[ @@ -1384,11 +1531,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): ] = SchemaConst.NULL_UNSPECIFIED, onupdate: Optional[Any] = None, primary_key: bool = False, - server_default: Optional[_ServerDefaultType] = None, + server_default: Optional[_ServerDefaultArgument] = None, server_onupdate: Optional[FetchedValue] = None, quote: Optional[bool] = None, system: bool = False, comment: Optional[str] = None, + insert_sentinel: bool = False, + _omit_from_statements: bool = False, _proxies: Optional[Any] = None, **dialect_kwargs: Any, ): @@ -1873,6 +2022,22 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): :paramref:`_schema.Column.comment` parameter to :class:`_schema.Column`. + :param insert_sentinel: Marks this :class:`_schema.Column` as an + :term:`insert sentinel` used for optimizing the performance of the + :term:`insertmanyvalues` feature for tables that don't + otherwise have qualifying primary key configurations. + + .. versionadded:: 2.0.10 + + .. seealso:: + + :func:`_schema.insert_sentinel` - all in one helper for declaring + sentinel columns + + :ref:`engine_insertmanyvalues` + + :ref:`engine_insertmanyvalues_sentinel_columns` + """ # noqa: E501, RST201, RST202 @@ -1914,7 +2079,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): self.key = key if key is not None else name # type: ignore self.primary_key = primary_key - + self._insert_sentinel = insert_sentinel + self._omit_from_statements = _omit_from_statements self._user_defined_nullable = udn = nullable if udn is not NULL_UNSPECIFIED: self.nullable = udn @@ -1962,22 +2128,26 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): else: self.onpudate = None - self.server_default = server_default - self.server_onupdate = server_onupdate - - if self.server_default is not None: - if isinstance(self.server_default, FetchedValue): - l_args.append(self.server_default._as_for_update(False)) + if server_default is not None: + if isinstance(server_default, FetchedValue): + server_default = server_default._as_for_update(False) + l_args.append(server_default) else: - l_args.append(DefaultClause(self.server_default)) + server_default = DefaultClause(server_default) + l_args.append(server_default) + self.server_default = server_default - if self.server_onupdate is not None: - if isinstance(self.server_onupdate, FetchedValue): - l_args.append(self.server_onupdate._as_for_update(True)) + if server_onupdate is not None: + if isinstance(server_onupdate, FetchedValue): + server_onupdate = server_onupdate._as_for_update(True) + l_args.append(server_onupdate) else: - l_args.append( - DefaultClause(self.server_onupdate, for_update=True) + server_onupdate = DefaultClause( + server_onupdate, for_update=True ) + l_args.append(server_onupdate) + self.server_onupdate = server_onupdate + self._init_items(*cast(_typing_Sequence[SchemaItem], l_args)) util.set_creation_order(self) @@ -2042,6 +2212,17 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): if isinstance(impl, SchemaEventTarget): impl._set_parent_with_dispatch(self) + @HasMemoized.memoized_attribute + def _default_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + + return _DefaultDescriptionTuple._from_column_default(self.default) + + @HasMemoized.memoized_attribute + def _onupdate_description_tuple(self) -> _DefaultDescriptionTuple: + """used by default.py -> _process_execute_defaults()""" + return _DefaultDescriptionTuple._from_column_default(self.onupdate) + @util.memoized_property def _gen_static_annotations_cache_key(self) -> bool: # type: ignore """special attribute used by cache key gen, if true, we will @@ -2185,6 +2366,13 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): all_names[self.name] = self self.table = table + if self._insert_sentinel: + if self.table._sentinel_column is not None: + raise exc.ArgumentError( + "a Table may have only one explicit sentinel column" + ) + self.table._sentinel_column = self + if self.primary_key: table.primary_key._replace(self) elif self.key in table.primary_key: @@ -2316,6 +2504,8 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): server_onupdate=server_onupdate, doc=self.doc, comment=self.comment, + _omit_from_statements=self._omit_from_statements, + insert_sentinel=self._insert_sentinel, *args, **column_kwargs, ) @@ -2472,6 +2662,56 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause[_T]): return c.key, c +def insert_sentinel( + name: Optional[str] = None, + type_: Optional[_TypeEngineArgument[_T]] = None, + *, + default: Optional[Any] = None, + omit_from_statements: bool = True, +) -> Column[Any]: + """Provides a surrogate :class:`_schema.Column` that will act as a + dedicated insert :term:`sentinel` column, allowing efficient bulk + inserts with deterministic RETURNING sorting for tables that + don't otherwise have qualifying primary key configurations. + + Adding this column to a :class:`.Table` object requires that a + corresponding database table actually has this column present, so if adding + it to an existing model, existing database tables would need to be migrated + (e.g. using ALTER TABLE or similar) to include this column. + + For background on how this object is used, see the section + :ref:`engine_insertmanyvalues_sentinel_columns` as part of the + section :ref:`engine_insertmanyvalues`. + + The :class:`_schema.Column` returned will be a nullable integer column by + default and make use of a sentinel-specific default generator used only in + "insertmanyvalues" operations. + + .. seealso:: + + :func:`_orm.orm_insert_sentinel` + + :paramref:`_schema.Column.insert_sentinel` + + :ref:`engine_insertmanyvalues` + + :ref:`engine_insertmanyvalues_sentinel_columns` + + + .. versionadded:: 2.0.10 + + """ + return Column( + name=name, + type_=type_api.INTEGERTYPE if type_ is None else type_, + default=default + if default is not None + else _InsertSentinelColumnDefault(), + _omit_from_statements=omit_from_statements, + insert_sentinel=True, + ) + + class ForeignKey(DialectKWArgs, SchemaItem): """Defines a dependency between two columns. @@ -3058,17 +3298,24 @@ else: class DefaultGenerator(Executable, SchemaItem): - """Base class for column *default* values.""" + """Base class for column *default* values. + + This object is only present on column.default or column.onupdate. + It's not valid as a server default. + + """ __visit_name__ = "default_generator" _is_default_generator = True is_sequence = False + is_identity = False is_server_default = False is_clause_element = False is_callable = False is_scalar = False has_arg = False + is_sentinel = False column: Optional[Column[Any]] def __init__(self, for_update: bool = False) -> None: @@ -3222,6 +3469,44 @@ class ScalarElementColumnDefault(ColumnDefault): ) +class _InsertSentinelColumnDefault(ColumnDefault): + """Default generator that's specific to the use of a "sentinel" column + when using the insertmanyvalues feature. + + This default is used as part of the :func:`_schema.insert_sentinel` + construct. + + """ + + is_sentinel = True + for_update = False + arg = None + + def __new__(cls) -> _InsertSentinelColumnDefault: + return object.__new__(cls) + + def __init__(self) -> None: + pass + + def _set_parent(self, parent: SchemaEventTarget, **kw: Any) -> None: + col = cast("Column[Any]", parent) + if not col._insert_sentinel: + raise exc.ArgumentError( + "The _InsertSentinelColumnDefault may only be applied to a " + "Column marked as insert_sentinel=True" + ) + elif not col.nullable: + raise exc.ArgumentError( + "The _InsertSentinelColumnDefault may only be applied to a " + "Column that is nullable" + ) + + super()._set_parent(parent, **kw) + + def _copy(self) -> _InsertSentinelColumnDefault: + return _InsertSentinelColumnDefault() + + _SQLExprDefault = Union["ColumnElement[Any]", "TextClause"] @@ -3366,6 +3651,10 @@ class IdentityOptions: self.cache = cache self.order = order + @property + def _increment_is_negative(self) -> bool: + return self.increment is not None and self.increment < 0 + class Sequence(HasSchemaAttr, IdentityOptions, DefaultGenerator): """Represents a named database sequence. @@ -3674,6 +3963,7 @@ class FetchedValue(SchemaEventTarget): reflected = False has_argument = False is_clause_element = False + is_identity = False column: Optional[Column[Any]] @@ -5668,6 +5958,8 @@ class Identity(IdentityOptions, FetchedValue, SchemaItem): __visit_name__ = "identity_column" + is_identity = True + def __init__( self, always: bool = False, diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index 5f530ba7d..8a371951e 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -63,6 +63,7 @@ from .base import _EntityNamespace from .base import _expand_cloned from .base import _from_objects from .base import _generative +from .base import _never_select_column from .base import _NoArg from .base import _select_iterables from .base import CacheableOptions @@ -930,7 +931,7 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable): @util.ro_non_memoized_property def _select_iterable(self) -> _SelectIterable: - return self.c + return (c for c in self.c if not _never_select_column(c)) def _init_collections(self) -> None: assert "_columns" not in self.__dict__ diff --git a/lib/sqlalchemy/sql/type_api.py b/lib/sqlalchemy/sql/type_api.py index ddbdb49a0..5af12cb93 100644 --- a/lib/sqlalchemy/sql/type_api.py +++ b/lib/sqlalchemy/sql/type_api.py @@ -94,6 +94,11 @@ class _ResultProcessorType(Protocol[_T_co]): ... +class _SentinelProcessorType(Protocol[_T_co]): + def __call__(self, value: Any) -> Optional[_T_co]: + ... + + class _BaseTypeMemoDict(TypedDict): impl: TypeEngine[Any] result: Dict[Any, Optional[_ResultProcessorType[Any]]] @@ -102,6 +107,7 @@ class _BaseTypeMemoDict(TypedDict): class _TypeMemoDict(_BaseTypeMemoDict, total=False): literal: Optional[_LiteralProcessorType[Any]] bind: Optional[_BindProcessorType[Any]] + sentinel: Optional[_SentinelProcessorType[Any]] custom: Dict[Any, object] @@ -598,6 +604,18 @@ class TypeEngine(Visitable, Generic[_T]): """ return None + def _sentinel_value_resolver( + self, dialect: Dialect + ) -> Optional[_SentinelProcessorType[_T]]: + """Return an optional callable that will match parameter values + (post-bind processing) to result values + (pre-result-processing), for use in the "sentinel" feature. + + .. versionadded:: 2.0.10 + + """ + return None + @util.memoized_property def _has_bind_expression(self) -> bool: """memoized boolean, check if bind_expression is implemented. @@ -945,6 +963,19 @@ class TypeEngine(Visitable, Generic[_T]): d["result"][coltype] = rp return rp + def _cached_sentinel_value_processor( + self, dialect: Dialect + ) -> Optional[_SentinelProcessorType[_T]]: + + try: + return dialect._type_memos[self]["sentinel"] + except KeyError: + pass + + d = self._dialect_info(dialect) + d["sentinel"] = bp = d["impl"]._sentinel_value_resolver(dialect) + return bp + def _cached_custom_processor( self, dialect: Dialect, key: str, fn: Callable[[TypeEngine[_T]], _O] ) -> _O: |