diff options
author | Mike Bayer <mike_mp@zzzcomputing.com> | 2019-08-29 14:45:23 -0400 |
---|---|---|
committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2019-11-04 13:22:43 -0500 |
commit | 29330ec1596f12462c501a65404ff52005b16b6c (patch) | |
tree | be20b85ae3939cdbc4f790fadd4f4372421891d4 /lib/sqlalchemy/sql/visitors.py | |
parent | db47859dca999b9d1679b513fe855e408d7d07c4 (diff) | |
download | sqlalchemy-29330ec1596f12462c501a65404ff52005b16b6c.tar.gz |
Add anonymizing context to cache keys, comparison; convert traversal
Created new visitor system called "internal traversal" that
applies a data driven approach to the concept of a class that
defines its own traversal steps, in contrast to the existing
style of traversal now known as "external traversal" where
the visitor class defines the traversal, i.e. the SQLCompiler.
The internal traversal system now implements get_children(),
_copy_internals(), compare() and _cache_key() for most Core elements.
Core elements with special needs like Select still implement
some of these methods directly however most of these methods
are no longer explicitly implemented.
The data-driven system is also applied to ORM elements that
take part in SQL expressions so that these objects, like mappers,
aliasedclass, query options, etc. can all participate in the
cache key process.
Still not considered is that this approach to defining traversibility
will be used to create some kind of generic introspection system
that works across Core / ORM. It's also not clear if
real statement caching using the _cache_key() method is feasible,
if it is shown that running _cache_key() is nearly as expensive as
compiling in any case. Because it is data driven, it is more
straightforward to optimize using inlined code, as is the case now,
as well as potentially using C code to speed it up.
In addition, the caching sytem now accommodates for anonymous
name labels, which is essential so that constructs which have
anonymous labels can be cacheable, that is, their position
within a statement in relation to other anonymous names causes
them to generate an integer counter relative to that construct
which will be the same every time. Gathering of bound parameters
from any cache key generation is also now required as there is
no use case for a cache key that does not extract bound parameter
values.
Applies-to: #4639
Change-Id: I0660584def8627cad566719ee98d3be045db4b8d
Diffstat (limited to 'lib/sqlalchemy/sql/visitors.py')
-rw-r--r-- | lib/sqlalchemy/sql/visitors.py | 447 |
1 files changed, 381 insertions, 66 deletions
diff --git a/lib/sqlalchemy/sql/visitors.py b/lib/sqlalchemy/sql/visitors.py index 7b2ac285a..8c06eb8af 100644 --- a/lib/sqlalchemy/sql/visitors.py +++ b/lib/sqlalchemy/sql/visitors.py @@ -28,14 +28,10 @@ import operator from .. import exc from .. import util - +from ..util import langhelpers +from ..util import symbol __all__ = [ - "VisitableType", - "Visitable", - "ClauseVisitor", - "CloningVisitor", - "ReplacingCloningVisitor", "iterate", "iterate_depthfirst", "traverse_using", @@ -43,85 +39,382 @@ __all__ = [ "traverse_depthfirst", "cloned_traverse", "replacement_traverse", + "Traversible", + "TraversibleType", + "ExternalTraversal", + "InternalTraversal", ] -class VisitableType(type): - """Metaclass which assigns a ``_compiler_dispatch`` method to classes - having a ``__visit_name__`` attribute. +def _generate_compiler_dispatch(cls): + """Generate a _compiler_dispatch() external traversal on classes with a + __visit_name__ attribute. + + """ + visit_name = cls.__visit_name__ + + if isinstance(visit_name, util.compat.string_types): + # There is an optimization opportunity here because the + # the string name of the class's __visit_name__ is known at + # this early stage (import time) so it can be pre-constructed. + getter = operator.attrgetter("visit_%s" % visit_name) + + def _compiler_dispatch(self, visitor, **kw): + try: + meth = getter(visitor) + except AttributeError: + raise exc.UnsupportedCompilationError(visitor, cls) + else: + return meth(self, **kw) + + else: + # The optimization opportunity is lost for this case because the + # __visit_name__ is not yet a string. As a result, the visit + # string has to be recalculated with each compilation. + def _compiler_dispatch(self, visitor, **kw): + visit_attr = "visit_%s" % self.__visit_name__ + try: + meth = getattr(visitor, visit_attr) + except AttributeError: + raise exc.UnsupportedCompilationError(visitor, cls) + else: + return meth(self, **kw) + + _compiler_dispatch.__doc__ = """Look for an attribute named "visit_" + + self.__visit_name__ on the visitor, and call it with the same + kw params. + """ + cls._compiler_dispatch = _compiler_dispatch + + +class TraversibleType(type): + """Metaclass which assigns dispatch attributes to various kinds of + "visitable" classes. - The ``_compiler_dispatch`` attribute becomes an instance method which - looks approximately like the following:: + Attributes include: - def _compiler_dispatch (self, visitor, **kw): - '''Look for an attribute named "visit_" + self.__visit_name__ - on the visitor, and call it with the same kw params.''' - visit_attr = 'visit_%s' % self.__visit_name__ - return getattr(visitor, visit_attr)(self, **kw) + * The ``_compiler_dispatch`` method, corresponding to ``__visit_name__``. + This is called "external traversal" because the caller of each visit() + method is responsible for sub-traversing the inner elements of each + object. This is appropriate for string compilers and other traversals + that need to call upon the inner elements in a specific pattern. - Classes having no ``__visit_name__`` attribute will remain unaffected. + * internal traversal collections ``_children_traversal``, + ``_cache_key_traversal``, ``_copy_internals_traversal``, generated from + an optional ``_traverse_internals`` collection of symbols which comes + from the :class:`.InternalTraversal` list of symbols. This is called + "internal traversal" MARKMARK """ def __init__(cls, clsname, bases, clsdict): - if clsname != "Visitable" and hasattr(cls, "__visit_name__"): - _generate_dispatch(cls) + if clsname != "Traversible": + if "__visit_name__" in clsdict: + _generate_compiler_dispatch(cls) + + super(TraversibleType, cls).__init__(clsname, bases, clsdict) - super(VisitableType, cls).__init__(clsname, bases, clsdict) +class Traversible(util.with_metaclass(TraversibleType)): + """Base class for visitable objects, applies the + :class:`.visitors.TraversibleType` metaclass. -def _generate_dispatch(cls): - """Return an optimized visit dispatch function for the cls - for use by the compiler. """ - if "__visit_name__" in cls.__dict__: - visit_name = cls.__visit_name__ - if isinstance(visit_name, util.compat.string_types): - # There is an optimization opportunity here because the - # the string name of the class's __visit_name__ is known at - # this early stage (import time) so it can be pre-constructed. - getter = operator.attrgetter("visit_%s" % visit_name) - def _compiler_dispatch(self, visitor, **kw): - try: - meth = getter(visitor) - except AttributeError: - raise exc.UnsupportedCompilationError(visitor, cls) - else: - return meth(self, **kw) +class _InternalTraversalType(type): + def __init__(cls, clsname, bases, clsdict): + if cls.__name__ in ("InternalTraversal", "ExtendedInternalTraversal"): + lookup = {} + for key, sym in clsdict.items(): + if key.startswith("dp_"): + visit_key = key.replace("dp_", "visit_") + sym_name = sym.name + assert sym_name not in lookup, sym_name + lookup[sym] = lookup[sym_name] = visit_key + if hasattr(cls, "_dispatch_lookup"): + lookup.update(cls._dispatch_lookup) + cls._dispatch_lookup = lookup + + super(_InternalTraversalType, cls).__init__(clsname, bases, clsdict) + + +def _generate_dispatcher(visitor, internal_dispatch, method_name): + names = [] + for attrname, visit_sym in internal_dispatch: + meth = visitor.dispatch(visit_sym) + if meth: + visit_name = ExtendedInternalTraversal._dispatch_lookup[visit_sym] + names.append((attrname, visit_name)) + + code = ( + (" return [\n") + + ( + ", \n".join( + " (%r, self.%s, visitor.%s)" + % (attrname, attrname, visit_name) + for attrname, visit_name in names + ) + ) + + ("\n ]\n") + ) + meth_text = ("def %s(self, visitor):\n" % method_name) + code + "\n" + # print(meth_text) + return langhelpers._exec_code_in_env(meth_text, {}, method_name) - else: - # The optimization opportunity is lost for this case because the - # __visit_name__ is not yet a string. As a result, the visit - # string has to be recalculated with each compilation. - def _compiler_dispatch(self, visitor, **kw): - visit_attr = "visit_%s" % self.__visit_name__ - try: - meth = getattr(visitor, visit_attr) - except AttributeError: - raise exc.UnsupportedCompilationError(visitor, cls) - else: - return meth(self, **kw) - - _compiler_dispatch.__doc__ = """Look for an attribute named "visit_" + self.__visit_name__ - on the visitor, and call it with the same kw params. - """ - cls._compiler_dispatch = _compiler_dispatch - - -class Visitable(util.with_metaclass(VisitableType, object)): - """Base class for visitable objects, applies the - :class:`.visitors.VisitableType` metaclass. - The :class:`.Visitable` class is essentially at the base of the - :class:`.ClauseElement` hierarchy. +class InternalTraversal(util.with_metaclass(_InternalTraversalType, object)): + r"""Defines visitor symbols used for internal traversal. + + The :class:`.InternalTraversal` class is used in two ways. One is that + it can serve as the superclass for an object that implements the + various visit methods of the class. The other is that the symbols + themselves of :class:`.InternalTraversal` are used within + the ``_traverse_internals`` collection. Such as, the :class:`.Case` + object defines ``_travserse_internals`` as :: + + _traverse_internals = [ + ("value", InternalTraversal.dp_clauseelement), + ("whens", InternalTraversal.dp_clauseelement_tuples), + ("else_", InternalTraversal.dp_clauseelement), + ] + + Above, the :class:`.Case` class indicates its internal state as the + attribtues named ``value``, ``whens``, and ``else\_``. They each + link to an :class:`.InternalTraversal` method which indicates the type + of datastructure referred towards. + + Using the ``_traverse_internals`` structure, objects of type + :class:`.InternalTraversible` will have the following methods automatically + implemented: + + * :meth:`.Traversible.get_children` + + * :meth:`.Traversible._copy_internals` + + * :meth:`.Traversible._gen_cache_key` + + Subclasses can also implement these methods directly, particularly for the + :meth:`.Traversible._copy_internals` method, when special steps + are needed. + + .. versionadded:: 1.4 """ + def dispatch(self, visit_symbol): + """Given a method from :class:`.InternalTraversal`, return the + corresponding method on a subclass. -class ClauseVisitor(object): - """Base class for visitor objects which can traverse using + """ + name = self._dispatch_lookup[visit_symbol] + return getattr(self, name, None) + + def run_generated_dispatch( + self, target, internal_dispatch, generate_dispatcher_name + ): + try: + dispatcher = target.__class__.__dict__[generate_dispatcher_name] + except KeyError: + dispatcher = _generate_dispatcher( + self, internal_dispatch, generate_dispatcher_name + ) + setattr(target.__class__, generate_dispatcher_name, dispatcher) + return dispatcher(target, self) + + dp_has_cache_key = symbol("HC") + """Visit a :class:`.HasCacheKey` object.""" + + dp_clauseelement = symbol("CE") + """Visit a :class:`.ClauseElement` object.""" + + dp_fromclause_canonical_column_collection = symbol("FC") + """Visit a :class:`.FromClause` object in the context of the + ``columns`` attribute. + + The column collection is "canonical", meaning it is the originally + defined location of the :class:`.ColumnClause` objects. Right now + this means that the object being visited is a :class:`.TableClause` + or :class:`.Table` object only. + + """ + + dp_clauseelement_tuples = symbol("CT") + """Visit a list of tuples which contain :class:`.ClauseElement` + objects. + + """ + + dp_clauseelement_list = symbol("CL") + """Visit a list of :class:`.ClauseElement` objects. + + """ + + dp_clauseelement_unordered_set = symbol("CU") + """Visit an unordered set of :class:`.ClauseElement` objects. """ + + dp_fromclause_ordered_set = symbol("CO") + """Visit an ordered set of :class:`.FromClause` objects. """ + + dp_string = symbol("S") + """Visit a plain string value. + + Examples include table and column names, bound parameter keys, special + keywords such as "UNION", "UNION ALL". + + The string value is considered to be significant for cache key + generation. + + """ + + dp_anon_name = symbol("AN") + """Visit a potentially "anonymized" string value. + + The string value is considered to be significant for cache key + generation. + + """ + + dp_boolean = symbol("B") + """Visit a boolean value. + + The boolean value is considered to be significant for cache key + generation. + + """ + + dp_operator = symbol("O") + """Visit an operator. + + The operator is a function from the :mod:`sqlalchemy.sql.operators` + module. + + The operator value is considered to be significant for cache key + generation. + + """ + + dp_type = symbol("T") + """Visit a :class:`.TypeEngine` object + + The type object is considered to be significant for cache key + generation. + + """ + + dp_plain_dict = symbol("PD") + """Visit a dictionary with string keys. + + The keys of the dictionary should be strings, the values should + be immutable and hashable. The dictionary is considered to be + significant for cache key generation. + + """ + + dp_string_clauseelement_dict = symbol("CD") + """Visit a dictionary of string keys to :class:`.ClauseElement` + objects. + + """ + + dp_string_multi_dict = symbol("MD") + """Visit a dictionary of string keys to values which may either be + plain immutable/hashable or :class:`.HasCacheKey` objects. + + """ + + dp_plain_obj = symbol("PO") + """Visit a plain python object. + + The value should be immutable and hashable, such as an integer. + The value is considered to be significant for cache key generation. + + """ + + dp_annotations_state = symbol("A") + """Visit the state of the :class:`.Annotatated` version of an object. + + """ + + dp_named_ddl_element = symbol("DD") + """Visit a simple named DDL element. + + The current object used by this method is the :class:`.Sequence`. + + The object is only considered to be important for cache key generation + as far as its name, but not any other aspects of it. + + """ + + dp_prefix_sequence = symbol("PS") + """Visit the sequence represented by :class:`.HasPrefixes` + or :class:`.HasSuffixes`. + + """ + + dp_table_hint_list = symbol("TH") + """Visit the ``_hints`` collection of a :class:`.Select` object. + + """ + + dp_statement_hint_list = symbol("SH") + """Visit the ``_statement_hints`` collection of a :class:`.Select` + object. + + """ + + dp_unknown_structure = symbol("UK") + """Visit an unknown structure. + + """ + + +class ExtendedInternalTraversal(InternalTraversal): + """defines additional symbols that are useful in caching applications. + + Traversals for :class:`.ClauseElement` objects only need to use + those symbols present in :class:`.InternalTraversal`. However, for + additional caching use cases within the ORM, symbols dealing with the + :class:`.HasCacheKey` class are added here. + + """ + + dp_ignore = symbol("IG") + """Specify an object that should be ignored entirely. + + This currently applies function call argument caching where some + arguments should not be considered to be part of a cache key. + + """ + + dp_inspectable = symbol("IS") + """Visit an inspectable object where the return value is a HasCacheKey` + object.""" + + dp_multi = symbol("M") + """Visit an object that may be a :class:`.HasCacheKey` or may be a + plain hashable object.""" + + dp_multi_list = symbol("MT") + """Visit a tuple containing elements that may be :class:`.HasCacheKey` or + may be a plain hashable object.""" + + dp_has_cache_key_tuples = symbol("HT") + """Visit a list of tuples which contain :class:`.HasCacheKey` + objects. + + """ + + dp_has_cache_key_list = symbol("HL") + """Visit a list of :class:`.HasCacheKey` objects.""" + + dp_inspectable_list = symbol("IL") + """Visit a list of inspectable objects which upon inspection are + HasCacheKey objects.""" + + +class ExternalTraversal(object): + """Base class for visitor objects which can traverse externally using the :func:`.visitors.traverse` function. Direct usage of the :func:`.visitors.traverse` function is usually @@ -178,7 +471,7 @@ class ClauseVisitor(object): return self -class CloningVisitor(ClauseVisitor): +class CloningExternalTraversal(ExternalTraversal): """Base class for visitor objects which can traverse using the :func:`.visitors.cloned_traverse` function. @@ -203,7 +496,7 @@ class CloningVisitor(ClauseVisitor): ) -class ReplacingCloningVisitor(CloningVisitor): +class ReplacingExternalTraversal(CloningExternalTraversal): """Base class for visitor objects which can traverse using the :func:`.visitors.replacement_traverse` function. @@ -233,6 +526,14 @@ class ReplacingCloningVisitor(CloningVisitor): return replacement_traverse(obj, self.__traverse_options__, replace) +# backwards compatibility +Visitable = Traversible +VisitableType = TraversibleType +ClauseVisitor = ExternalTraversal +CloningVisitor = CloningExternalTraversal +ReplacingCloningVisitor = ReplacingExternalTraversal + + def iterate(obj, opts): r"""traverse the given expression structure, returning an iterator. @@ -405,11 +706,18 @@ def cloned_traverse(obj, opts, visitors): cloned = {} stop_on = set(opts.get("stop_on", [])) - def clone(elem): + def clone(elem, **kw): if elem in stop_on: return elem else: if id(elem) not in cloned: + + if "replace" in kw: + newelem = kw["replace"](elem) + if newelem is not None: + cloned[id(elem)] = newelem + return newelem + cloned[id(elem)] = newelem = elem._clone() newelem._copy_internals(clone=clone) meth = visitors.get(newelem.__visit_name__, None) @@ -461,7 +769,14 @@ def replacement_traverse(obj, opts, replace): stop_on.add(id(newelem)) return newelem else: + if elem not in cloned: + if "replace" in kw: + newelem = kw["replace"](elem) + if newelem is not None: + cloned[elem] = newelem + return newelem + cloned[elem] = newelem = elem._clone() newelem._copy_internals(clone=clone, **kw) return cloned[elem] |