diff options
author | Mike Bayer <mike_mp@zzzcomputing.com> | 2019-08-29 14:45:23 -0400 |
---|---|---|
committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2019-11-04 13:22:43 -0500 |
commit | 29330ec1596f12462c501a65404ff52005b16b6c (patch) | |
tree | be20b85ae3939cdbc4f790fadd4f4372421891d4 /lib/sqlalchemy/sql/selectable.py | |
parent | db47859dca999b9d1679b513fe855e408d7d07c4 (diff) | |
download | sqlalchemy-29330ec1596f12462c501a65404ff52005b16b6c.tar.gz |
Add anonymizing context to cache keys, comparison; convert traversal
Created new visitor system called "internal traversal" that
applies a data driven approach to the concept of a class that
defines its own traversal steps, in contrast to the existing
style of traversal now known as "external traversal" where
the visitor class defines the traversal, i.e. the SQLCompiler.
The internal traversal system now implements get_children(),
_copy_internals(), compare() and _cache_key() for most Core elements.
Core elements with special needs like Select still implement
some of these methods directly however most of these methods
are no longer explicitly implemented.
The data-driven system is also applied to ORM elements that
take part in SQL expressions so that these objects, like mappers,
aliasedclass, query options, etc. can all participate in the
cache key process.
Still not considered is that this approach to defining traversibility
will be used to create some kind of generic introspection system
that works across Core / ORM. It's also not clear if
real statement caching using the _cache_key() method is feasible,
if it is shown that running _cache_key() is nearly as expensive as
compiling in any case. Because it is data driven, it is more
straightforward to optimize using inlined code, as is the case now,
as well as potentially using C code to speed it up.
In addition, the caching sytem now accommodates for anonymous
name labels, which is essential so that constructs which have
anonymous labels can be cacheable, that is, their position
within a statement in relation to other anonymous names causes
them to generate an integer counter relative to that construct
which will be the same every time. Gathering of bound parameters
from any cache key generation is also now required as there is
no use case for a cache key that does not extract bound parameter
values.
Applies-to: #4639
Change-Id: I0660584def8627cad566719ee98d3be045db4b8d
Diffstat (limited to 'lib/sqlalchemy/sql/selectable.py')
-rw-r--r-- | lib/sqlalchemy/sql/selectable.py | 396 |
1 files changed, 148 insertions, 248 deletions
diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index 6a7413fc0..4b3844eec 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -31,6 +31,7 @@ from .base import ColumnSet from .base import DedupeColumnCollection from .base import Executable from .base import Generative +from .base import HasMemoized from .base import Immutable from .coercions import _document_text_coercion from .elements import _anonymous_label @@ -39,11 +40,13 @@ from .elements import and_ from .elements import BindParameter from .elements import ClauseElement from .elements import ClauseList +from .elements import ColumnClause from .elements import GroupedElement from .elements import Grouping from .elements import literal_column from .elements import True_ from .elements import UnaryExpression +from .visitors import InternalTraversal from .. import exc from .. import util @@ -201,6 +204,8 @@ class Selectable(ReturnsRows): class HasPrefixes(object): _prefixes = () + _traverse_internals = [("_prefixes", InternalTraversal.dp_prefix_sequence)] + @_generative @_document_text_coercion( "expr", @@ -252,6 +257,8 @@ class HasPrefixes(object): class HasSuffixes(object): _suffixes = () + _traverse_internals = [("_suffixes", InternalTraversal.dp_prefix_sequence)] + @_generative @_document_text_coercion( "expr", @@ -295,7 +302,7 @@ class HasSuffixes(object): ) -class FromClause(roles.AnonymizedFromClauseRole, Selectable): +class FromClause(HasMemoized, roles.AnonymizedFromClauseRole, Selectable): """Represent an element that can be used within the ``FROM`` clause of a ``SELECT`` statement. @@ -529,11 +536,6 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable): """ return getattr(self, "name", self.__class__.__name__ + " object") - def _reset_exported(self): - """delete memoized collections when a FromClause is cloned.""" - - self._memoized_property.expire_instance(self) - def _generate_fromclause_column_proxies(self, fromclause): fromclause._columns._populate_separate_keys( col._make_proxy(fromclause) for col in self.c @@ -668,6 +670,14 @@ class Join(FromClause): __visit_name__ = "join" + _traverse_internals = [ + ("left", InternalTraversal.dp_clauseelement), + ("right", InternalTraversal.dp_clauseelement), + ("onclause", InternalTraversal.dp_clauseelement), + ("isouter", InternalTraversal.dp_boolean), + ("full", InternalTraversal.dp_boolean), + ] + _is_join = True def __init__(self, left, right, onclause=None, isouter=False, full=False): @@ -805,25 +815,6 @@ class Join(FromClause): self.left._refresh_for_new_column(column) self.right._refresh_for_new_column(column) - def _copy_internals(self, clone=_clone, **kw): - self._reset_exported() - self.left = clone(self.left, **kw) - self.right = clone(self.right, **kw) - self.onclause = clone(self.onclause, **kw) - - def get_children(self, **kwargs): - return self.left, self.right, self.onclause - - def _cache_key(self, **kw): - return ( - Join, - self.isouter, - self.full, - self.left._cache_key(**kw), - self.right._cache_key(**kw), - self.onclause._cache_key(**kw), - ) - def _match_primaries(self, left, right): if isinstance(left, Join): left_right = left.right @@ -1175,6 +1166,11 @@ class AliasedReturnsRows(FromClause): _is_from_container = True named_with_column = True + _traverse_internals = [ + ("element", InternalTraversal.dp_clauseelement), + ("name", InternalTraversal.dp_anon_name), + ] + def __init__(self, *arg, **kw): raise NotImplementedError( "The %s class is not intended to be constructed " @@ -1243,18 +1239,13 @@ class AliasedReturnsRows(FromClause): def _copy_internals(self, clone=_clone, **kw): element = clone(self.element, **kw) + + # the element clone is usually against a Table that returns the + # same object. don't reset exported .c. collections and other + # memoized details if nothing changed if element is not self.element: self._reset_exported() - self.element = element - - def get_children(self, column_collections=True, **kw): - if column_collections: - for c in self.c: - yield c - yield self.element - - def _cache_key(self, **kw): - return (self.__class__, self.element._cache_key(**kw), self._orig_name) + self.element = element @property def _from_objects(self): @@ -1396,6 +1387,11 @@ class TableSample(AliasedReturnsRows): __visit_name__ = "tablesample" + _traverse_internals = AliasedReturnsRows._traverse_internals + [ + ("sampling", InternalTraversal.dp_clauseelement), + ("seed", InternalTraversal.dp_clauseelement), + ] + @classmethod def _factory(cls, selectable, sampling, name=None, seed=None): """Return a :class:`.TableSample` object. @@ -1466,6 +1462,16 @@ class CTE(Generative, HasSuffixes, AliasedReturnsRows): __visit_name__ = "cte" + _traverse_internals = ( + AliasedReturnsRows._traverse_internals + + [ + ("_cte_alias", InternalTraversal.dp_clauseelement), + ("_restates", InternalTraversal.dp_clauseelement_unordered_set), + ("recursive", InternalTraversal.dp_boolean), + ] + + HasSuffixes._traverse_internals + ) + @classmethod def _factory(cls, selectable, name=None, recursive=False): r"""Return a new :class:`.CTE`, or Common Table Expression instance. @@ -1495,15 +1501,13 @@ class CTE(Generative, HasSuffixes, AliasedReturnsRows): def _copy_internals(self, clone=_clone, **kw): super(CTE, self)._copy_internals(clone, **kw) + # TODO: I don't like that we can't use the traversal data here if self._cte_alias is not None: self._cte_alias = clone(self._cte_alias, **kw) self._restates = frozenset( [clone(elem, **kw) for elem in self._restates] ) - def _cache_key(self, *arg, **kw): - raise NotImplementedError("TODO") - def alias(self, name=None, flat=False): """Return an :class:`.Alias` of this :class:`.CTE`. @@ -1764,6 +1768,8 @@ class Subquery(AliasedReturnsRows): class FromGrouping(GroupedElement, FromClause): """Represent a grouping of a FROM clause""" + _traverse_internals = [("element", InternalTraversal.dp_clauseelement)] + def __init__(self, element): self.element = coercions.expect(roles.FromClauseRole, element) @@ -1792,15 +1798,6 @@ class FromGrouping(GroupedElement, FromClause): def _hide_froms(self): return self.element._hide_froms - def get_children(self, **kwargs): - return (self.element,) - - def _copy_internals(self, clone=_clone, **kw): - self.element = clone(self.element, **kw) - - def _cache_key(self, **kw): - return (FromGrouping, self.element._cache_key(**kw)) - @property def _from_objects(self): return self.element._from_objects @@ -1843,6 +1840,14 @@ class TableClause(Immutable, FromClause): __visit_name__ = "table" + _traverse_internals = [ + ( + "columns", + InternalTraversal.dp_fromclause_canonical_column_collection, + ), + ("name", InternalTraversal.dp_string), + ] + named_with_column = True implicit_returning = False @@ -1895,17 +1900,6 @@ class TableClause(Immutable, FromClause): self._columns.add(c) c.table = self - def get_children(self, column_collections=True, **kwargs): - if column_collections: - return [c for c in self.c] - else: - return [] - - def _cache_key(self, **kw): - return (TableClause, self.name) + tuple( - col._cache_key(**kw) for col in self._columns - ) - @util.dependencies("sqlalchemy.sql.dml") def insert(self, dml, values=None, inline=False, **kwargs): """Generate an :func:`.insert` construct against this @@ -1965,6 +1959,13 @@ class TableClause(Immutable, FromClause): class ForUpdateArg(ClauseElement): + _traverse_internals = [ + ("of", InternalTraversal.dp_clauseelement_list), + ("nowait", InternalTraversal.dp_boolean), + ("read", InternalTraversal.dp_boolean), + ("skip_locked", InternalTraversal.dp_boolean), + ] + @classmethod def parse_legacy_select(self, arg): """Parse the for_update argument of :func:`.select`. @@ -2029,19 +2030,6 @@ class ForUpdateArg(ClauseElement): def __hash__(self): return id(self) - def _copy_internals(self, clone=_clone, **kw): - if self.of is not None: - self.of = [clone(col, **kw) for col in self.of] - - def _cache_key(self, **kw): - return ( - ForUpdateArg, - self.nowait, - self.read, - self.skip_locked, - self.of._cache_key(**kw) if self.of is not None else None, - ) - def __init__( self, nowait=False, @@ -2074,6 +2062,7 @@ class SelectBase( roles.DMLSelectRole, roles.CompoundElementRole, roles.InElementRole, + HasMemoized, HasCTE, Executable, SupportsCloneAnnotations, @@ -2092,9 +2081,6 @@ class SelectBase( _memoized_property = util.group_expirable_memoized_property() - def _reset_memoizations(self): - self._memoized_property.expire_instance(self) - def _generate_fromclause_column_proxies(self, fromclause): # type: (FromClause) raise NotImplementedError() @@ -2339,6 +2325,7 @@ class SelectStatementGrouping(GroupedElement, SelectBase): """ __visit_name__ = "grouping" + _traverse_internals = [("element", InternalTraversal.dp_clauseelement)] _is_select_container = True @@ -2350,9 +2337,6 @@ class SelectStatementGrouping(GroupedElement, SelectBase): def select_statement(self): return self.element - def get_children(self, **kwargs): - return (self.element,) - def self_group(self, against=None): # type: (Optional[Any]) -> FromClause return self @@ -2377,12 +2361,6 @@ class SelectStatementGrouping(GroupedElement, SelectBase): """ return self.element.selected_columns - def _copy_internals(self, clone=_clone, **kw): - self.element = clone(self.element, **kw) - - def _cache_key(self, **kw): - return (SelectStatementGrouping, self.element._cache_key(**kw)) - @property def _from_objects(self): return self.element._from_objects @@ -2758,9 +2736,6 @@ class GenerativeSelect(DeprecatedSelectBaseGenerations, SelectBase): def _label_resolve_dict(self): raise NotImplementedError() - def _copy_internals(self, clone=_clone, **kw): - raise NotImplementedError() - class CompoundSelect(GenerativeSelect): """Forms the basis of ``UNION``, ``UNION ALL``, and other @@ -2785,6 +2760,16 @@ class CompoundSelect(GenerativeSelect): __visit_name__ = "compound_select" + _traverse_internals = [ + ("selects", InternalTraversal.dp_clauseelement_list), + ("_limit_clause", InternalTraversal.dp_clauseelement), + ("_offset_clause", InternalTraversal.dp_clauseelement), + ("_order_by_clause", InternalTraversal.dp_clauseelement), + ("_group_by_clause", InternalTraversal.dp_clauseelement), + ("_for_update_arg", InternalTraversal.dp_clauseelement), + ("keyword", InternalTraversal.dp_string), + ] + SupportsCloneAnnotations._traverse_internals + UNION = util.symbol("UNION") UNION_ALL = util.symbol("UNION ALL") EXCEPT = util.symbol("EXCEPT") @@ -3004,47 +2989,6 @@ class CompoundSelect(GenerativeSelect): """ return self.selects[0].selected_columns - def _copy_internals(self, clone=_clone, **kw): - self._reset_memoizations() - self.selects = [clone(s, **kw) for s in self.selects] - if hasattr(self, "_col_map"): - del self._col_map - for attr in ( - "_limit_clause", - "_offset_clause", - "_order_by_clause", - "_group_by_clause", - "_for_update_arg", - ): - if getattr(self, attr) is not None: - setattr(self, attr, clone(getattr(self, attr), **kw)) - - def get_children(self, **kwargs): - return [self._order_by_clause, self._group_by_clause] + list( - self.selects - ) - - def _cache_key(self, **kw): - return ( - (CompoundSelect, self.keyword) - + tuple(stmt._cache_key(**kw) for stmt in self.selects) - + ( - self._order_by_clause._cache_key(**kw) - if self._order_by_clause is not None - else None, - ) - + ( - self._group_by_clause._cache_key(**kw) - if self._group_by_clause is not None - else None, - ) - + ( - self._for_update_arg._cache_key(**kw) - if self._for_update_arg is not None - else None, - ) - ) - def bind(self): if self._bind: return self._bind @@ -3193,11 +3137,35 @@ class Select( _hints = util.immutabledict() _statement_hints = () _distinct = False - _from_cloned = None + _distinct_on = () _correlate = () _correlate_except = None _memoized_property = SelectBase._memoized_property + _traverse_internals = ( + [ + ("_from_obj", InternalTraversal.dp_fromclause_ordered_set), + ("_raw_columns", InternalTraversal.dp_clauseelement_list), + ("_whereclause", InternalTraversal.dp_clauseelement), + ("_having", InternalTraversal.dp_clauseelement), + ("_order_by_clause", InternalTraversal.dp_clauseelement_list), + ("_group_by_clause", InternalTraversal.dp_clauseelement_list), + ("_correlate", InternalTraversal.dp_clauseelement_unordered_set), + ( + "_correlate_except", + InternalTraversal.dp_clauseelement_unordered_set, + ), + ("_for_update_arg", InternalTraversal.dp_clauseelement), + ("_statement_hints", InternalTraversal.dp_statement_hint_list), + ("_hints", InternalTraversal.dp_table_hint_list), + ("_distinct", InternalTraversal.dp_boolean), + ("_distinct_on", InternalTraversal.dp_clauseelement_list), + ] + + HasPrefixes._traverse_internals + + HasSuffixes._traverse_internals + + SupportsCloneAnnotations._traverse_internals + ) + @util.deprecated_params( autocommit=( "0.6", @@ -3416,13 +3384,14 @@ class Select( """ self._auto_correlate = correlate if distinct is not False: - if distinct is True: - self._distinct = True - else: - self._distinct = [ - coercions.expect(roles.WhereHavingRole, e) - for e in util.to_list(distinct) - ] + self._distinct = True + if not isinstance(distinct, bool): + self._distinct_on = tuple( + [ + coercions.expect(roles.WhereHavingRole, e) + for e in util.to_list(distinct) + ] + ) if from_obj is not None: self._from_obj = util.OrderedSet( @@ -3472,15 +3441,17 @@ class Select( GenerativeSelect.__init__(self, **kwargs) + # @_memoized_property @property def _froms(self): - # would love to cache this, - # but there's just enough edge cases, particularly now that - # declarative encourages construction of SQL expressions - # without tables present, to just regen this each time. + # current roadblock to caching is two tests that test that the + # SELECT can be compiled to a string, then a Table is created against + # columns, then it can be compiled again and works. this is somewhat + # valid as people make select() against declarative class where + # columns don't have their Table yet and perhaps some operations + # call upon _froms and cache it too soon. froms = [] seen = set() - translate = self._from_cloned for item in itertools.chain( _from_objects(*self._raw_columns), @@ -3493,8 +3464,6 @@ class Select( raise exc.InvalidRequestError( "select() construct refers to itself as a FROM" ) - if translate and item in translate: - item = translate[item] if not seen.intersection(item._cloned_set): froms.append(item) seen.update(item._cloned_set) @@ -3518,15 +3487,6 @@ class Select( itertools.chain(*[_expand_cloned(f._hide_froms) for f in froms]) ) if toremove: - # if we're maintaining clones of froms, - # add the copies out to the toremove list. only include - # clones that are lexical equivalents. - if self._from_cloned: - toremove.update( - self._from_cloned[f] - for f in toremove.intersection(self._from_cloned) - if self._from_cloned[f]._is_lexical_equivalent(f) - ) # filter out to FROM clauses not in the list, # using a list to maintain ordering froms = [f for f in froms if f not in toremove] @@ -3707,7 +3667,6 @@ class Select( return False def _copy_internals(self, clone=_clone, **kw): - # Select() object has been cloned and probably adapted by the # given clone function. Apply the cloning function to internal # objects @@ -3719,37 +3678,42 @@ class Select( # as of 0.7.4 we also put the current version of _froms, which # gets cleared on each generation. previously we were "baking" # _froms into self._from_obj. - self._from_cloned = from_cloned = dict( - (f, clone(f, **kw)) for f in self._from_obj.union(self._froms) - ) - # 3. update persistent _from_obj with the cloned versions. - self._from_obj = util.OrderedSet( - from_cloned[f] for f in self._from_obj + all_the_froms = list( + itertools.chain( + _from_objects(*self._raw_columns), + _from_objects(self._whereclause) + if self._whereclause is not None + else (), + ) ) + new_froms = {f: clone(f, **kw) for f in all_the_froms} + # copy FROM collections - # the _correlate collection is done separately, what can happen - # here is the same item is _correlate as in _from_obj but the - # _correlate version has an annotation on it - (specifically - # RelationshipProperty.Comparator._criterion_exists() does - # this). Also keep _correlate liberally open with its previous - # contents, as this set is used for matching, not rendering. - self._correlate = set(clone(f) for f in self._correlate).union( - self._correlate - ) + self._from_obj = util.OrderedSet( + clone(f, **kw) for f in self._from_obj + ).union(f for f in new_froms.values() if isinstance(f, Join)) - # do something similar for _correlate_except - this is a more - # unusual case but same idea applies + self._correlate = set(clone(f) for f in self._correlate) if self._correlate_except: self._correlate_except = set( clone(f) for f in self._correlate_except - ).union(self._correlate_except) + ) # 4. clone other things. The difficulty here is that Column - # objects are not actually cloned, and refer to their original - # .table, resulting in the wrong "from" parent after a clone - # operation. Hence _from_cloned and _from_obj supersede what is - # present here. + # objects are usually not altered by a straight clone because they + # are dependent on the FROM cloning we just did above in order to + # be targeted correctly, or a new FROM we have might be a JOIN + # object which doesn't have its own columns. so give the cloner a + # hint. + def replace(obj, **kw): + if isinstance(obj, ColumnClause) and obj.table in new_froms: + newelem = new_froms[obj.table].corresponding_column(obj) + return newelem + + kw["replace"] = replace + + # TODO: I'd still like to try to leverage the traversal data self._raw_columns = [clone(c, **kw) for c in self._raw_columns] for attr in ( "_limit_clause", @@ -3763,67 +3727,12 @@ class Select( if getattr(self, attr) is not None: setattr(self, attr, clone(getattr(self, attr), **kw)) - # erase _froms collection, - # etc. self._reset_memoizations() def get_children(self, **kwargs): - """return child elements as per the ClauseElement specification.""" - - return ( - self._raw_columns - + list(self._froms) - + [ - x - for x in ( - self._whereclause, - self._having, - self._order_by_clause, - self._group_by_clause, - ) - if x is not None - ] - ) - - def _cache_key(self, **kw): - return ( - (Select,) - + ("raw_columns",) - + tuple(elem._cache_key(**kw) for elem in self._raw_columns) - + ("elements",) - + tuple( - elem._cache_key(**kw) if elem is not None else None - for elem in ( - self._whereclause, - self._having, - self._order_by_clause, - self._group_by_clause, - ) - ) - + ("from_obj",) - + tuple(elem._cache_key(**kw) for elem in self._from_obj) - + ("correlate",) - + tuple( - elem._cache_key(**kw) - for elem in ( - self._correlate if self._correlate is not None else () - ) - ) - + ("correlate_except",) - + tuple( - elem._cache_key(**kw) - for elem in ( - self._correlate_except - if self._correlate_except is not None - else () - ) - ) - + ("for_update",), - ( - self._for_update_arg._cache_key(**kw) - if self._for_update_arg is not None - else None, - ), + # TODO: define "get_children" traversal items separately? + return self._froms + super(Select, self).get_children( + omit_attrs=["_from_obj", "_correlate", "_correlate_except"] ) @_generative @@ -3987,10 +3896,8 @@ class Select( """ if expr: expr = [coercions.expect(roles.ByOfRole, e) for e in expr] - if isinstance(self._distinct, list): - self._distinct = self._distinct + expr - else: - self._distinct = expr + self._distinct = True + self._distinct_on = self._distinct_on + tuple(expr) else: self._distinct = True @@ -4489,6 +4396,11 @@ class TextualSelect(SelectBase): __visit_name__ = "textual_select" + _traverse_internals = [ + ("element", InternalTraversal.dp_clauseelement), + ("column_args", InternalTraversal.dp_clauseelement_list), + ] + SupportsCloneAnnotations._traverse_internals + _is_textual = True def __init__(self, text, columns, positional=False): @@ -4534,18 +4446,6 @@ class TextualSelect(SelectBase): c._make_proxy(fromclause) for c in self.column_args ) - def _copy_internals(self, clone=_clone, **kw): - self._reset_memoizations() - self.element = clone(self.element, **kw) - - def get_children(self, **kw): - return [self.element] - - def _cache_key(self, **kw): - return (TextualSelect, self.element._cache_key(**kw)) + tuple( - col._cache_key(**kw) for col in self.column_args - ) - def _scalar_type(self): return self.column_args[0].type |