diff options
author | Mike Bayer <mike_mp@zzzcomputing.com> | 2022-07-18 15:08:37 -0400 |
---|---|---|
committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2022-09-24 11:15:32 -0400 |
commit | 2bcc97da424eef7db9a5d02f81d02344925415ee (patch) | |
tree | 13d4f04bc7dd40a0207f86aa2fc3a3b49e065674 /lib/sqlalchemy/sql | |
parent | 332188e5680574368001ded52eb0a9d259ecdef5 (diff) | |
download | sqlalchemy-2bcc97da424eef7db9a5d02f81d02344925415ee.tar.gz |
implement batched INSERT..VALUES () () for executemany
the feature is enabled for all built in backends
when RETURNING is used,
except for Oracle that doesn't need it, and on
psycopg2 and mssql+pyodbc it is used for all INSERT statements,
not just those that use RETURNING.
third party dialects would need to opt in to the new feature
by setting use_insertmanyvalues to True.
Also adds dialect-level guards against using returning
with executemany where we dont have an implementation to
suit it. execute single w/ returning still defers to the
server without us checking.
Fixes: #6047
Fixes: #7907
Change-Id: I3936d3c00003f02e322f2e43fb949d0e6e568304
Diffstat (limited to 'lib/sqlalchemy/sql')
-rw-r--r-- | lib/sqlalchemy/sql/compiler.py | 350 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/crud.py | 93 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/dml.py | 267 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/schema.py | 6 | ||||
-rw-r--r-- | lib/sqlalchemy/sql/util.py | 124 |
5 files changed, 650 insertions, 190 deletions
diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index 1d13ffa9a..201324a2a 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -94,6 +94,7 @@ if typing.TYPE_CHECKING: from .elements import BindParameter from .elements import ColumnClause from .elements import ColumnElement + from .elements import KeyedColumnElement from .elements import Label from .functions import Function from .selectable import AliasedReturnsRows @@ -390,6 +391,19 @@ class _CompilerStackEntry(_BaseCompilerStackEntry, total=False): class ExpandedState(NamedTuple): + """represents state to use when producing "expanded" and + "post compile" bound parameters for a statement. + + "expanded" parameters are parameters that are generated at + statement execution time to suit a number of parameters passed, the most + prominent example being the individual elements inside of an IN expression. + + "post compile" parameters are parameters where the SQL literal value + will be rendered into the SQL statement at execution time, rather than + being passed as separate parameters to the driver. + + """ + statement: str additional_parameters: _CoreSingleExecuteParams processors: Mapping[str, _BindProcessorType[Any]] @@ -397,7 +411,23 @@ class ExpandedState(NamedTuple): parameter_expansion: Mapping[str, List[str]] +class _InsertManyValues(NamedTuple): + """represents state to use for executing an "insertmanyvalues" statement""" + + is_default_expr: bool + single_values_expr: str + insert_crud_params: List[Tuple[KeyedColumnElement[Any], str, str]] + num_positional_params_counted: int + + class Linting(IntEnum): + """represent preferences for the 'SQL linting' feature. + + this feature currently includes support for flagging cartesian products + in SQL statements. + + """ + NO_LINTING = 0 "Disable all linting." @@ -419,6 +449,9 @@ NO_LINTING, COLLECT_CARTESIAN_PRODUCTS, WARN_LINTING, FROM_LINTING = tuple( class FromLinter(collections.namedtuple("FromLinter", ["froms", "edges"])): + """represents current state for the "cartesian product" detection + feature.""" + def lint(self, start=None): froms = self.froms if not froms: @@ -762,8 +795,6 @@ class SQLCompiler(Compiled): is_sql = True - _result_columns: List[ResultColumnsEntry] - compound_keywords = COMPOUND_KEYWORDS isdelete: bool = False @@ -810,12 +841,6 @@ class SQLCompiler(Compiled): """major statements such as SELECT, INSERT, UPDATE, DELETE are tracked in this stack using an entry format.""" - result_columns: List[ResultColumnsEntry] - """relates label names in the final SQL to a tuple of local - column/label name, ColumnElement object (if any) and - TypeEngine. CursorResult uses this for type processing and - column targeting""" - returning_precedes_values: bool = False """set to True classwide to generate RETURNING clauses before the VALUES or WHERE clause (i.e. MSSQL) @@ -835,6 +860,12 @@ class SQLCompiler(Compiled): driver/DB enforces this """ + _result_columns: List[ResultColumnsEntry] + """relates label names in the final SQL to a tuple of local + column/label name, ColumnElement object (if any) and + TypeEngine. CursorResult uses this for type processing and + column targeting""" + _textual_ordered_columns: bool = False """tell the result object that the column names as rendered are important, but they are also "ordered" vs. what is in the compiled object here. @@ -881,14 +912,9 @@ class SQLCompiler(Compiled): """ - insert_single_values_expr: Optional[str] = None - """When an INSERT is compiled with a single set of parameters inside - a VALUES expression, the string is assigned here, where it can be - used for insert batching schemes to rewrite the VALUES expression. + _insertmanyvalues: Optional[_InsertManyValues] = None - .. versionadded:: 1.3.8 - - """ + _insert_crud_params: Optional[crud._CrudParamSequence] = None literal_execute_params: FrozenSet[BindParameter[Any]] = frozenset() """bindparameter objects that are rendered as literal values at statement @@ -1072,6 +1098,25 @@ class SQLCompiler(Compiled): if self._render_postcompile: self._process_parameters_for_postcompile(_populate_self=True) + @property + def insert_single_values_expr(self) -> Optional[str]: + """When an INSERT is compiled with a single set of parameters inside + a VALUES expression, the string is assigned here, where it can be + used for insert batching schemes to rewrite the VALUES expression. + + .. versionadded:: 1.3.8 + + .. versionchanged:: 2.0 This collection is no longer used by + SQLAlchemy's built-in dialects, in favor of the currently + internal ``_insertmanyvalues`` collection that is used only by + :class:`.SQLCompiler`. + + """ + if self._insertmanyvalues is None: + return None + else: + return self._insertmanyvalues.single_values_expr + @util.ro_memoized_property def effective_returning(self) -> Optional[Sequence[ColumnElement[Any]]]: """The effective "returning" columns for INSERT, UPDATE or DELETE. @@ -1620,10 +1665,13 @@ class SQLCompiler(Compiled): param_key_getter = self._within_exec_param_key_getter + assert self.compile_state is not None + statement = self.compile_state.statement + if TYPE_CHECKING: - assert isinstance(self.statement, Insert) + assert isinstance(statement, Insert) - table = self.statement.table + table = statement.table getters = [ (operator.methodcaller("get", param_key_getter(col), None), col) @@ -1697,11 +1745,14 @@ class SQLCompiler(Compiled): else: result = util.preloaded.engine_result + assert self.compile_state is not None + statement = self.compile_state.statement + if TYPE_CHECKING: - assert isinstance(self.statement, Insert) + assert isinstance(statement, Insert) param_key_getter = self._within_exec_param_key_getter - table = self.statement.table + table = statement.table returning = self.implicit_returning assert returning is not None @@ -4506,7 +4557,202 @@ class SQLCompiler(Compiled): ) return dialect_hints, table_text + def _insert_stmt_should_use_insertmanyvalues(self, statement): + return ( + self.dialect.supports_multivalues_insert + and self.dialect.use_insertmanyvalues + # note self.implicit_returning or self._result_columns + # implies self.dialect.insert_returning capability + and ( + self.dialect.use_insertmanyvalues_wo_returning + or self.implicit_returning + or self._result_columns + ) + ) + + def _deliver_insertmanyvalues_batches( + self, statement, parameters, generic_setinputsizes, batch_size + ): + imv = self._insertmanyvalues + assert imv is not None + + executemany_values = f"({imv.single_values_expr})" + + lenparams = len(parameters) + if imv.is_default_expr and not self.dialect.supports_default_metavalue: + # backend doesn't support + # INSERT INTO table (pk_col) VALUES (DEFAULT), (DEFAULT), ... + # at the moment this is basically SQL Server due to + # not being able to use DEFAULT for identity column + # just yield out that many single statements! still + # faster than a whole connection.execute() call ;) + # + # note we still are taking advantage of the fact that we know + # we are using RETURNING. The generalized approach of fetching + # cursor.lastrowid etc. still goes through the more heavyweight + # "ExecutionContext per statement" system as it isn't usable + # as a generic "RETURNING" approach + for batchnum, param in enumerate(parameters, 1): + yield ( + statement, + param, + generic_setinputsizes, + batchnum, + lenparams, + ) + return + else: + statement = statement.replace( + executemany_values, "__EXECMANY_TOKEN__" + ) + + # Use optional insertmanyvalues_max_parameters + # to further shrink the batch size so that there are no more than + # insertmanyvalues_max_parameters params. + # Currently used by SQL Server, which limits statements to 2100 bound + # parameters (actually 2099). + max_params = self.dialect.insertmanyvalues_max_parameters + if max_params: + total_num_of_params = len(self.bind_names) + num_params_per_batch = len(imv.insert_crud_params) + num_params_outside_of_batch = ( + total_num_of_params - num_params_per_batch + ) + batch_size = min( + batch_size, + ( + (max_params - num_params_outside_of_batch) + // num_params_per_batch + ), + ) + + batches = list(parameters) + + processed_setinputsizes = None + batchnum = 1 + total_batches = lenparams // batch_size + ( + 1 if lenparams % batch_size else 0 + ) + + insert_crud_params = imv.insert_crud_params + assert insert_crud_params is not None + + escaped_bind_names: Mapping[str, str] + if not self.positional: + if self.escaped_bind_names: + escaped_bind_names = self.escaped_bind_names + else: + escaped_bind_names = {} + + all_keys = set(parameters[0]) + + escaped_insert_crud_params: Sequence[Any] = [ + (escaped_bind_names.get(col.key, col.key), formatted) + for col, _, formatted in insert_crud_params + ] + + keys_to_replace = all_keys.intersection( + key for key, _ in escaped_insert_crud_params + ) + base_parameters = { + key: parameters[0][key] + for key in all_keys.difference(keys_to_replace) + } + executemany_values_w_comma = "" + else: + escaped_insert_crud_params = () + keys_to_replace = set() + base_parameters = {} + executemany_values_w_comma = f"({imv.single_values_expr}), " + + while batches: + batch = batches[0:batch_size] + batches[0:batch_size] = [] + + if generic_setinputsizes: + # if setinputsizes is present, expand this collection to + # suit the batch length as well + # currently this will be mssql+pyodbc for internal dialects + processed_setinputsizes = [ + (new_key, len_, typ) + for new_key, len_, typ in ( + (f"{key}_{index}", len_, typ) + for index in range(len(batch)) + for key, len_, typ in generic_setinputsizes + ) + ] + + replaced_parameters: Any + if self.positional: + # the assumption here is that any parameters that are not + # in the VALUES clause are expected to be parameterized + # expressions in the RETURNING (or maybe ON CONFLICT) clause. + # So based on + # which sequence comes first in the compiler's INSERT + # statement tells us where to expand the parameters. + + # otherwise we probably shouldn't be doing insertmanyvalues + # on the statement. + + num_ins_params = imv.num_positional_params_counted + + if num_ins_params == len(batch[0]): + extra_params = () + batch_iterator: Iterable[Tuple[Any, ...]] = batch + elif self.returning_precedes_values: + extra_params = batch[0][:-num_ins_params] + batch_iterator = (b[-num_ins_params:] for b in batch) + else: + extra_params = batch[0][num_ins_params:] + batch_iterator = (b[:num_ins_params] for b in batch) + + replaced_statement = statement.replace( + "__EXECMANY_TOKEN__", + (executemany_values_w_comma * len(batch))[:-2], + ) + + replaced_parameters = tuple( + itertools.chain.from_iterable(batch_iterator) + ) + if self.returning_precedes_values: + replaced_parameters = extra_params + replaced_parameters + else: + replaced_parameters = replaced_parameters + extra_params + else: + replaced_values_clauses = [] + replaced_parameters = base_parameters.copy() + + for i, param in enumerate(batch): + new_tokens = [ + formatted.replace(key, f"{key}__{i}") + if key in param + else formatted + for key, formatted in escaped_insert_crud_params + ] + replaced_values_clauses.append( + f"({', '.join(new_tokens)})" + ) + + replaced_parameters.update( + {f"{key}__{i}": param[key] for key in keys_to_replace} + ) + + replaced_statement = statement.replace( + "__EXECMANY_TOKEN__", + ", ".join(replaced_values_clauses), + ) + + yield ( + replaced_statement, + replaced_parameters, + processed_setinputsizes, + batchnum, + total_batches, + ) + batchnum += 1 + def visit_insert(self, insert_stmt, **kw): + compile_state = insert_stmt._compile_state_factory( insert_stmt, self, **kw ) @@ -4529,9 +4775,24 @@ class SQLCompiler(Compiled): } ) + positiontup_before = positiontup_after = 0 + + # for positional, insertmanyvalues needs to know how many + # bound parameters are in the VALUES sequence; there's no simple + # rule because default expressions etc. can have zero or more + # params inside them. After multiple attempts to figure this out, + # this very simplistic "count before, then count after" works and is + # likely the least amount of callcounts, though looks clumsy + if self.positiontup: + positiontup_before = len(self.positiontup) + crud_params_struct = crud._get_crud_params( self, insert_stmt, compile_state, toplevel, **kw ) + + if self.positiontup: + positiontup_after = len(self.positiontup) + crud_params_single = crud_params_struct.single_params if ( @@ -4584,14 +4845,34 @@ class SQLCompiler(Compiled): ) if self.implicit_returning or insert_stmt._returning: + + # if returning clause is rendered first, capture bound parameters + # while visiting and place them prior to the VALUES oriented + # bound parameters, when using positional parameter scheme + rpv = self.returning_precedes_values + flip_pt = rpv and self.positional + if flip_pt: + pt: Optional[List[str]] = self.positiontup + temp_pt: Optional[List[str]] + self.positiontup = temp_pt = [] + else: + temp_pt = pt = None + returning_clause = self.returning_clause( insert_stmt, self.implicit_returning or insert_stmt._returning, populate_result_map=toplevel, ) - if self.returning_precedes_values: + if flip_pt: + if TYPE_CHECKING: + assert temp_pt is not None + assert pt is not None + self.positiontup = temp_pt + pt + + if rpv: text += " " + returning_clause + else: returning_clause = None @@ -4614,6 +4895,18 @@ class SQLCompiler(Compiled): text += " %s" % select_text elif not crud_params_single and supports_default_values: text += " DEFAULT VALUES" + if toplevel and self._insert_stmt_should_use_insertmanyvalues( + insert_stmt + ): + self._insertmanyvalues = _InsertManyValues( + True, + self.dialect.default_metavalue_token, + cast( + "List[Tuple[KeyedColumnElement[Any], str, str]]", + crud_params_single, + ), + (positiontup_after - positiontup_before), + ) elif compile_state._has_multi_parameters: text += " VALUES %s" % ( ", ".join( @@ -4623,6 +4916,8 @@ class SQLCompiler(Compiled): ) ) else: + # TODO: why is third element of crud_params_single not str + # already? insert_single_values_expr = ", ".join( [ value @@ -4631,9 +4926,20 @@ class SQLCompiler(Compiled): ) ] ) + text += " VALUES (%s)" % insert_single_values_expr - if toplevel: - self.insert_single_values_expr = insert_single_values_expr + if toplevel and self._insert_stmt_should_use_insertmanyvalues( + insert_stmt + ): + self._insertmanyvalues = _InsertManyValues( + False, + insert_single_values_expr, + cast( + "List[Tuple[KeyedColumnElement[Any], str, str]]", + crud_params_single, + ), + positiontup_after - positiontup_before, + ) if insert_stmt._post_values_clause is not None: post_values_clause = self.process( diff --git a/lib/sqlalchemy/sql/crud.py b/lib/sqlalchemy/sql/crud.py index 81151a26b..b13377a59 100644 --- a/lib/sqlalchemy/sql/crud.py +++ b/lib/sqlalchemy/sql/crud.py @@ -32,6 +32,7 @@ from . import coercions from . import dml from . import elements from . import roles +from .dml import isinsert as _compile_state_isinsert from .elements import ColumnClause from .schema import default_is_clause_element from .schema import default_is_sequence @@ -73,19 +74,18 @@ def _as_dml_column(c: ColumnElement[Any]) -> ColumnClause[Any]: return c -class _CrudParams(NamedTuple): - single_params: Sequence[ - Tuple[ColumnElement[Any], str, Optional[Union[str, _SQLExprDefault]]] - ] - all_multi_params: List[ - Sequence[ - Tuple[ - ColumnClause[Any], - str, - str, - ] - ] +_CrudParamSequence = Sequence[ + Tuple[ + "ColumnElement[Any]", + str, + Optional[Union[str, "_SQLExprDefault"]], ] +] + + +class _CrudParams(NamedTuple): + single_params: _CrudParamSequence + all_multi_params: List[Sequence[Tuple[ColumnClause[Any], str, str]]] def _get_crud_params( @@ -144,6 +144,12 @@ def _get_crud_params( compiler._get_bind_name_for_col = _col_bind_name + if stmt._returning and stmt._return_defaults: + raise exc.CompileError( + "Can't compile statement that includes returning() and " + "return_defaults() simultaneously" + ) + # no parameters in the statement, no parameters in the # compiled params - return binds for all columns if compiler.column_keys is None and compile_state._no_parameters: @@ -164,7 +170,10 @@ def _get_crud_params( ] spd: Optional[MutableMapping[_DMLColumnElement, Any]] - if compile_state._has_multi_parameters: + if ( + _compile_state_isinsert(compile_state) + and compile_state._has_multi_parameters + ): mp = compile_state._multi_parameters assert mp is not None spd = mp[0] @@ -227,7 +236,7 @@ def _get_crud_params( kw, ) - if compile_state.isinsert and stmt._select_names: + if _compile_state_isinsert(compile_state) and stmt._select_names: # is an insert from select, is not a multiparams assert not compile_state._has_multi_parameters @@ -272,7 +281,10 @@ def _get_crud_params( % (", ".join("%s" % (c,) for c in check)) ) - if compile_state._has_multi_parameters: + if ( + _compile_state_isinsert(compile_state) + and compile_state._has_multi_parameters + ): # is a multiparams, is not an insert from a select assert not stmt._select_names multi_extended_values = _extend_values_for_multiparams( @@ -297,7 +309,7 @@ def _get_crud_params( ( _as_dml_column(stmt.table.columns[0]), compiler.preparer.format_column(stmt.table.columns[0]), - "DEFAULT", + compiler.dialect.default_metavalue_token, ) ] @@ -500,7 +512,7 @@ def _scan_insert_from_select_cols( ins_from_select = ins_from_select._generate() # copy raw_columns ins_from_select._raw_columns = list(ins_from_select._raw_columns) + [ - expr for col, col_expr, expr in add_select_cols + expr for _, _, expr in add_select_cols ] compiler.stack[-1]["insert_from_select"] = ins_from_select @@ -539,7 +551,8 @@ def _scan_cols( else: cols = stmt.table.columns - if compile_state.isinsert and not compile_state._has_multi_parameters: + isinsert = _compile_state_isinsert(compile_state) + if isinsert and not compile_state._has_multi_parameters: # new rules for #7998. fetch lastrowid or implicit returning # for autoincrement column even if parameter is NULL, for DBs that # override NULL param for primary key (sqlite, mysql/mariadb) @@ -575,7 +588,7 @@ def _scan_cols( kw, ) - elif compile_state.isinsert: + elif isinsert: # no parameter is present and it's an insert. if c.primary_key and need_pks: @@ -683,7 +696,8 @@ def _append_param_parameter( value, required=value is REQUIRED, name=_col_bind_name(c) - if not compile_state._has_multi_parameters + if not _compile_state_isinsert(compile_state) + or not compile_state._has_multi_parameters else "%s_m0" % _col_bind_name(c), **kw, ) @@ -706,7 +720,8 @@ def _append_param_parameter( c, value, name=_col_bind_name(c) - if not compile_state._has_multi_parameters + if not _compile_state_isinsert(compile_state) + or not compile_state._has_multi_parameters else "%s_m0" % _col_bind_name(c), **kw, ) @@ -922,11 +937,19 @@ def _append_param_insert_select_hasdefault( not c.default.optional or not compiler.dialect.sequences_optional ): values.append( - (c, compiler.preparer.format_column(c), c.default.next_value()) + ( + c, + compiler.preparer.format_column(c), + c.default.next_value(), + ) ) elif default_is_clause_element(c.default): values.append( - (c, compiler.preparer.format_column(c), c.default.arg.self_group()) + ( + c, + compiler.preparer.format_column(c), + c.default.arg.self_group(), + ) ) else: values.append( @@ -1105,14 +1128,10 @@ def _process_multiparam_default_bind( return compiler.process(c.default, **kw) else: col = _multiparam_column(c, index) - if isinstance(stmt, dml.Insert): - return _create_insert_prefetch_bind_param( - compiler, col, process=True, **kw - ) - else: - return _create_update_prefetch_bind_param( - compiler, col, process=True, **kw - ) + assert isinstance(stmt, dml.Insert) + return _create_insert_prefetch_bind_param( + compiler, col, process=True, **kw + ) def _get_update_multitable_params( @@ -1205,13 +1224,7 @@ def _extend_values_for_multiparams( mp = compile_state._multi_parameters assert mp is not None for i, row in enumerate(mp[1:]): - extension: List[ - Tuple[ - ColumnClause[Any], - str, - str, - ] - ] = [] + extension: List[Tuple[ColumnClause[Any], str, str]] = [] row = {_column_as_key(key): v for key, v in row.items()} @@ -1292,7 +1305,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): """ need_pks = ( toplevel - and compile_state.isinsert + and _compile_state_isinsert(compile_state) and not stmt._inline and ( not compiler.for_executemany @@ -1348,7 +1361,7 @@ def _get_returning_modifiers(compiler, stmt, compile_state, toplevel): if implicit_returning: postfetch_lastrowid = False - if compile_state.isinsert: + if _compile_state_isinsert(compile_state): implicit_return_defaults = implicit_returning and stmt._return_defaults elif compile_state.isupdate: implicit_return_defaults = ( diff --git a/lib/sqlalchemy/sql/dml.py b/lib/sqlalchemy/sql/dml.py index eb612f394..a08e38800 100644 --- a/lib/sqlalchemy/sql/dml.py +++ b/lib/sqlalchemy/sql/dml.py @@ -118,7 +118,6 @@ class DMLState(CompileState): ] = None _ordered_values: Optional[List[Tuple[_DMLColumnElement, Any]]] = None _parameter_ordering: Optional[List[_DMLColumnElement]] = None - _has_multi_parameters = False _primary_table: FromClause _supports_implicit_returning = True @@ -202,64 +201,10 @@ class DMLState(CompileState): froms.extend(all_tables[1:]) return primary_table, froms - def _process_multi_values(self, statement: ValuesBase) -> None: - if not statement._supports_multi_parameters: - raise exc.InvalidRequestError( - "%s construct does not support " - "multiple parameter sets." % statement.__visit_name__.upper() - ) - else: - assert isinstance(statement, Insert) - - # which implies... - # assert isinstance(statement.table, TableClause) - - for parameters in statement._multi_values: - multi_parameters: List[MutableMapping[_DMLColumnElement, Any]] = [ - { - c.key: value - for c, value in zip(statement.table.c, parameter_set) - } - if isinstance(parameter_set, collections_abc.Sequence) - else parameter_set - for parameter_set in parameters - ] - - if self._no_parameters: - self._no_parameters = False - self._has_multi_parameters = True - self._multi_parameters = multi_parameters - self._dict_parameters = self._multi_parameters[0] - elif not self._has_multi_parameters: - self._cant_mix_formats_error() - else: - assert self._multi_parameters - self._multi_parameters.extend(multi_parameters) - def _process_values(self, statement: ValuesBase) -> None: if self._no_parameters: - self._has_multi_parameters = False self._dict_parameters = statement._values self._no_parameters = False - elif self._has_multi_parameters: - self._cant_mix_formats_error() - - def _process_ordered_values(self, statement: ValuesBase) -> None: - parameters = statement._ordered_values - - if self._no_parameters: - self._no_parameters = False - assert parameters is not None - self._dict_parameters = dict(parameters) - self._ordered_values = parameters - self._parameter_ordering = [key for key, value in parameters] - elif self._has_multi_parameters: - self._cant_mix_formats_error() - else: - raise exc.InvalidRequestError( - "Can only invoke ordered_values() once, and not mixed " - "with any other values() call" - ) def _process_select_values(self, statement: ValuesBase) -> None: assert statement._select_names is not None @@ -276,6 +221,12 @@ class DMLState(CompileState): # does not allow this construction to occur assert False, "This statement already has parameters" + def _no_multi_values_supported(self, statement: ValuesBase) -> NoReturn: + raise exc.InvalidRequestError( + "%s construct does not support " + "multiple parameter sets." % statement.__visit_name__.upper() + ) + def _cant_mix_formats_error(self) -> NoReturn: raise exc.InvalidRequestError( "Can't mix single and multiple VALUES " @@ -291,6 +242,8 @@ class InsertDMLState(DMLState): include_table_with_column_exprs = False + _has_multi_parameters = False + def __init__( self, statement: Insert, @@ -320,6 +273,37 @@ class InsertDMLState(DMLState): for col in self._dict_parameters or () ] + def _process_values(self, statement: ValuesBase) -> None: + if self._no_parameters: + self._has_multi_parameters = False + self._dict_parameters = statement._values + self._no_parameters = False + elif self._has_multi_parameters: + self._cant_mix_formats_error() + + def _process_multi_values(self, statement: ValuesBase) -> None: + for parameters in statement._multi_values: + multi_parameters: List[MutableMapping[_DMLColumnElement, Any]] = [ + { + c.key: value + for c, value in zip(statement.table.c, parameter_set) + } + if isinstance(parameter_set, collections_abc.Sequence) + else parameter_set + for parameter_set in parameters + ] + + if self._no_parameters: + self._no_parameters = False + self._has_multi_parameters = True + self._multi_parameters = multi_parameters + self._dict_parameters = self._multi_parameters[0] + elif not self._has_multi_parameters: + self._cant_mix_formats_error() + else: + assert self._multi_parameters + self._multi_parameters.extend(multi_parameters) + @CompileState.plugin_for("default", "update") class UpdateDMLState(DMLState): @@ -336,7 +320,7 @@ class UpdateDMLState(DMLState): elif statement._values is not None: self._process_values(statement) elif statement._multi_values: - self._process_multi_values(statement) + self._no_multi_values_supported(statement) t, ef = self._make_extra_froms(statement) self._primary_table = t self._extra_froms = ef @@ -347,6 +331,21 @@ class UpdateDMLState(DMLState): mt and compiler.render_table_with_column_in_update_from ) + def _process_ordered_values(self, statement: ValuesBase) -> None: + parameters = statement._ordered_values + + if self._no_parameters: + self._no_parameters = False + assert parameters is not None + self._dict_parameters = dict(parameters) + self._ordered_values = parameters + self._parameter_ordering = [key for key, value in parameters] + else: + raise exc.InvalidRequestError( + "Can only invoke ordered_values() once, and not mixed " + "with any other values() call" + ) + @CompileState.plugin_for("default", "delete") class DeleteDMLState(DMLState): @@ -897,18 +896,68 @@ class ValuesBase(UpdateBase): return self @_generative - @_exclusive_against( - "_returning", - msgs={ - "_returning": "RETURNING is already configured on this statement" - }, - defaults={"_returning": _returning}, - ) def return_defaults( self: SelfValuesBase, *cols: _DMLColumnArgument ) -> SelfValuesBase: """Make use of a :term:`RETURNING` clause for the purpose - of fetching server-side expressions and defaults. + of fetching server-side expressions and defaults, for supporting + backends only. + + .. tip:: + + The :meth:`.ValuesBase.return_defaults` method is used by the ORM + for its internal work in fetching newly generated primary key + and server default values, in particular to provide the underyling + implementation of the :paramref:`_orm.Mapper.eager_defaults` + ORM feature. Its behavior is fairly idiosyncratic + and is not really intended for general use. End users should + stick with using :meth:`.UpdateBase.returning` in order to + add RETURNING clauses to their INSERT, UPDATE and DELETE + statements. + + Normally, a single row INSERT statement will automatically populate the + :attr:`.CursorResult.inserted_primary_key` attribute when executed, + which stores the primary key of the row that was just inserted in the + form of a :class:`.Row` object with column names as named tuple keys + (and the :attr:`.Row._mapping` view fully populated as well). The + dialect in use chooses the strategy to use in order to populate this + data; if it was generated using server-side defaults and / or SQL + expressions, dialect-specific approaches such as ``cursor.lastrowid`` + or ``RETURNING`` are typically used to acquire the new primary key + value. + + However, when the statement is modified by calling + :meth:`.ValuesBase.return_defaults` before executing the statement, + additional behaviors take place **only** for backends that support + RETURNING and for :class:`.Table` objects that maintain the + :paramref:`.Table.implicit_returning` parameter at its default value of + ``True``. In these cases, when the :class:`.CursorResult` is returned + from the statement's execution, not only will + :attr:`.CursorResult.inserted_primary_key` be populated as always, the + :attr:`.CursorResult.returned_defaults` attribute will also be + populated with a :class:`.Row` named-tuple representing the full range + of server generated + values from that single row, including values for any columns that + specify :paramref:`_schema.Column.server_default` or which make use of + :paramref:`_schema.Column.default` using a SQL expression. + + When invoking INSERT statements with multiple rows using + :ref:`insertmanyvalues <engine_insertmanyvalues>`, the + :meth:`.ValuesBase.return_defaults` modifier will have the effect of + the :attr:`_engine.CursorResult.inserted_primary_key_rows` and + :attr:`_engine.CursorResult.returned_defaults_rows` attributes being + fully populated with lists of :class:`.Row` objects representing newly + inserted primary key values as well as newly inserted server generated + values for each row inserted. The + :attr:`.CursorResult.inserted_primary_key` and + :attr:`.CursorResult.returned_defaults` attributes will also continue + to be populated with the first row of these two collections. + + If the backend does not support RETURNING or the :class:`.Table` in use + has disabled :paramref:`.Table.implicit_returning`, then no RETURNING + clause is added and no additional data is fetched, however the + INSERT or UPDATE statement proceeds normally. + E.g.:: @@ -918,64 +967,58 @@ class ValuesBase(UpdateBase): server_created_at = result.returned_defaults['created_at'] - When used against a backend that supports RETURNING, all column - values generated by SQL expression or server-side-default will be - added to any existing RETURNING clause, provided that - :meth:`.UpdateBase.returning` is not used simultaneously. The column - values will then be available on the result using the - :attr:`_engine.CursorResult.returned_defaults` accessor as - a dictionary, - referring to values keyed to the :class:`_schema.Column` - object as well as - its ``.key``. - - This method differs from :meth:`.UpdateBase.returning` in these ways: - - 1. :meth:`.ValuesBase.return_defaults` is only intended for use with an - INSERT or an UPDATE statement that matches exactly one row per - parameter set. While the RETURNING construct in the general sense - supports multiple rows for a multi-row UPDATE or DELETE statement, - or for special cases of INSERT that return multiple rows (e.g. - INSERT from SELECT, multi-valued VALUES clause), - :meth:`.ValuesBase.return_defaults` is intended only for an - "ORM-style" single-row INSERT/UPDATE statement. The row - returned by the statement is also consumed implicitly when - :meth:`.ValuesBase.return_defaults` is used. By contrast, - :meth:`.UpdateBase.returning` leaves the RETURNING result-set intact - with a collection of any number of rows. - - 2. It is compatible with the existing logic to fetch auto-generated - primary key values, also known as "implicit returning". Backends - that support RETURNING will automatically make use of RETURNING in - order to fetch the value of newly generated primary keys; while the - :meth:`.UpdateBase.returning` method circumvents this behavior, - :meth:`.ValuesBase.return_defaults` leaves it intact. - - 3. It can be called against any backend. Backends that don't support - RETURNING will skip the usage of the feature, rather than raising - an exception. The return value of - :attr:`_engine.CursorResult.returned_defaults` will be ``None`` + + The :meth:`.ValuesBase.return_defaults` method is mutually exclusive + against the :meth:`.UpdateBase.returning` method and errors will be + raised during the SQL compilation process if both are used at the same + time on one statement. The RETURNING clause of the INSERT or UPDATE + statement is therefore controlled by only one of these methods at a + time. + + The :meth:`.ValuesBase.return_defaults` method differs from + :meth:`.UpdateBase.returning` in these ways: + + 1. :meth:`.ValuesBase.return_defaults` method causes the + :attr:`.CursorResult.returned_defaults` collection to be populated + with the first row from the RETURNING result. This attribute is not + populated when using :meth:`.UpdateBase.returning`. + + 2. :meth:`.ValuesBase.return_defaults` is compatible with existing + logic used to fetch auto-generated primary key values that are then + populated into the :attr:`.CursorResult.inserted_primary_key` + attribute. By contrast, using :meth:`.UpdateBase.returning` will + have the effect of the :attr:`.CursorResult.inserted_primary_key` + attribute being left unpopulated. + + 3. :meth:`.ValuesBase.return_defaults` can be called against any + backend. Backends that don't support RETURNING will skip the usage + of the feature, rather than raising an exception. The return value + of :attr:`_engine.CursorResult.returned_defaults` will be ``None`` + for backends that don't support RETURNING or for which the target + :class:`.Table` sets :paramref:`.Table.implicit_returning` to + ``False``. 4. An INSERT statement invoked with executemany() is supported if the backend database driver supports the - ``insert_executemany_returning`` feature, currently this includes - PostgreSQL with psycopg2. When executemany is used, the + :ref:`insertmanyvalues <engine_insertmanyvalues>` + feature which is now supported by most SQLAlchemy-included backends. + When executemany is used, the :attr:`_engine.CursorResult.returned_defaults_rows` and :attr:`_engine.CursorResult.inserted_primary_key_rows` accessors will return the inserted defaults and primary keys. - .. versionadded:: 1.4 + .. versionadded:: 1.4 Added + :attr:`_engine.CursorResult.returned_defaults_rows` and + :attr:`_engine.CursorResult.inserted_primary_key_rows` accessors. + In version 2.0, the underlying implementation which fetches and + populates the data for these attributes was generalized to be + supported by most backends, whereas in 1.4 they were only + supported by the ``psycopg2`` driver. - :meth:`.ValuesBase.return_defaults` is used by the ORM to provide - an efficient implementation for the ``eager_defaults`` feature of - :class:`_orm.Mapper`. :param cols: optional list of column key names or - :class:`_schema.Column` - objects. If omitted, all column expressions evaluated on the server - are added to the returning list. - - .. versionadded:: 0.9.0 + :class:`_schema.Column` that acts as a filter for those columns that + will be fetched. .. seealso:: diff --git a/lib/sqlalchemy/sql/schema.py b/lib/sqlalchemy/sql/schema.py index 3320214a2..565537109 100644 --- a/lib/sqlalchemy/sql/schema.py +++ b/lib/sqlalchemy/sql/schema.py @@ -647,8 +647,9 @@ class Table( server side defaults, on those backends which support RETURNING. In modern SQLAlchemy there is generally no reason to alter this - setting, except in the case of some backends such as SQL Server - when INSERT triggers are used for that table. + setting, except for some backend specific cases + (see :ref:`mssql_triggers` in the SQL Server dialect documentation + for one such example). :param include_columns: A list of strings indicating a subset of columns to be loaded via the ``autoload`` operation; table columns who @@ -3158,7 +3159,6 @@ class ScalarElementColumnDefault(ColumnDefault): ) -# _SQLExprDefault = Union["ColumnElement[Any]", "TextClause", "SelectBase"] _SQLExprDefault = Union["ColumnElement[Any]", "TextClause"] diff --git a/lib/sqlalchemy/sql/util.py b/lib/sqlalchemy/sql/util.py index 0400ab3fe..55c6a35f8 100644 --- a/lib/sqlalchemy/sql/util.py +++ b/lib/sqlalchemy/sql/util.py @@ -598,6 +598,21 @@ class _repr_row(_repr_base): ) +class _long_statement(str): + def __str__(self) -> str: + lself = len(self) + if lself > 500: + lleft = 250 + lright = 100 + trunc = lself - lleft - lright + return ( + f"{self[0:lleft]} ... {trunc} " + f"characters truncated ... {self[-lright:]}" + ) + else: + return str.__str__(self) + + class _repr_params(_repr_base): """Provide a string view of bound parameters. @@ -606,12 +621,13 @@ class _repr_params(_repr_base): """ - __slots__ = "params", "batches", "ismulti" + __slots__ = "params", "batches", "ismulti", "max_params" def __init__( self, params: Optional[_AnyExecuteParams], batches: int, + max_params: int = 100, max_chars: int = 300, ismulti: Optional[bool] = None, ): @@ -619,6 +635,7 @@ class _repr_params(_repr_base): self.ismulti = ismulti self.batches = batches self.max_chars = max_chars + self.max_params = max_params def __repr__(self) -> str: if self.ismulti is None: @@ -693,29 +710,110 @@ class _repr_params(_repr_base): else: return "(%s)" % elements + def _get_batches(self, params: Iterable[Any]) -> Any: + + lparams = list(params) + lenparams = len(lparams) + if lenparams > self.max_params: + lleft = self.max_params // 2 + return ( + lparams[0:lleft], + lparams[-lleft:], + lenparams - self.max_params, + ) + else: + return lparams, None, None + def _repr_params( self, params: _AnySingleExecuteParams, typ: int, ) -> str: - trunc = self.trunc if typ is self._DICT: - return "{%s}" % ( + return self._repr_param_dict( + cast("_CoreSingleExecuteParams", params) + ) + elif typ is self._TUPLE: + return self._repr_param_tuple(cast("Sequence[Any]", params)) + else: + return self._repr_param_list(params) + + def _repr_param_dict(self, params: _CoreSingleExecuteParams) -> str: + trunc = self.trunc + ( + items_first_batch, + items_second_batch, + trunclen, + ) = self._get_batches(params.items()) + + if items_second_batch: + text = "{%s" % ( ", ".join( - "%r: %s" % (key, trunc(value)) - for key, value in cast( - "_CoreSingleExecuteParams", params - ).items() + f"{key!r}: {trunc(value)}" + for key, value in items_first_batch ) ) - elif typ is self._TUPLE: - seq_params = cast("Sequence[Any]", params) - return "(%s%s)" % ( - ", ".join(trunc(value) for value in seq_params), - "," if len(seq_params) == 1 else "", + text += f" ... {trunclen} parameters truncated ... " + text += "%s}" % ( + ", ".join( + f"{key!r}: {trunc(value)}" + for key, value in items_second_batch + ) ) else: - return "[%s]" % (", ".join(trunc(value) for value in params)) + text = "{%s}" % ( + ", ".join( + f"{key!r}: {trunc(value)}" + for key, value in items_first_batch + ) + ) + return text + + def _repr_param_tuple(self, params: "Sequence[Any]") -> str: + trunc = self.trunc + + ( + items_first_batch, + items_second_batch, + trunclen, + ) = self._get_batches(params) + + if items_second_batch: + text = "(%s" % ( + ", ".join(trunc(value) for value in items_first_batch) + ) + text += f" ... {trunclen} parameters truncated ... " + text += "%s)" % ( + ", ".join(trunc(value) for value in items_second_batch), + ) + else: + text = "(%s%s)" % ( + ", ".join(trunc(value) for value in items_first_batch), + "," if len(items_first_batch) == 1 else "", + ) + return text + + def _repr_param_list(self, params: _AnySingleExecuteParams) -> str: + trunc = self.trunc + ( + items_first_batch, + items_second_batch, + trunclen, + ) = self._get_batches(params) + + if items_second_batch: + text = "[%s" % ( + ", ".join(trunc(value) for value in items_first_batch) + ) + text += f" ... {trunclen} parameters truncated ... " + text += "%s]" % ( + ", ".join(trunc(value) for value in items_second_batch) + ) + else: + text = "[%s]" % ( + ", ".join(trunc(value) for value in items_first_batch) + ) + return text def adapt_criterion_to_null(crit: _CE, nulls: Collection[Any]) -> _CE: |