summaryrefslogtreecommitdiff
path: root/lib/sqlalchemy/sql/visitors.py
diff options
context:
space:
mode:
authorMike Bayer <mike_mp@zzzcomputing.com>2019-08-29 14:45:23 -0400
committerMike Bayer <mike_mp@zzzcomputing.com>2019-11-04 13:22:43 -0500
commit29330ec1596f12462c501a65404ff52005b16b6c (patch)
treebe20b85ae3939cdbc4f790fadd4f4372421891d4 /lib/sqlalchemy/sql/visitors.py
parentdb47859dca999b9d1679b513fe855e408d7d07c4 (diff)
downloadsqlalchemy-29330ec1596f12462c501a65404ff52005b16b6c.tar.gz
Add anonymizing context to cache keys, comparison; convert traversal
Created new visitor system called "internal traversal" that applies a data driven approach to the concept of a class that defines its own traversal steps, in contrast to the existing style of traversal now known as "external traversal" where the visitor class defines the traversal, i.e. the SQLCompiler. The internal traversal system now implements get_children(), _copy_internals(), compare() and _cache_key() for most Core elements. Core elements with special needs like Select still implement some of these methods directly however most of these methods are no longer explicitly implemented. The data-driven system is also applied to ORM elements that take part in SQL expressions so that these objects, like mappers, aliasedclass, query options, etc. can all participate in the cache key process. Still not considered is that this approach to defining traversibility will be used to create some kind of generic introspection system that works across Core / ORM. It's also not clear if real statement caching using the _cache_key() method is feasible, if it is shown that running _cache_key() is nearly as expensive as compiling in any case. Because it is data driven, it is more straightforward to optimize using inlined code, as is the case now, as well as potentially using C code to speed it up. In addition, the caching sytem now accommodates for anonymous name labels, which is essential so that constructs which have anonymous labels can be cacheable, that is, their position within a statement in relation to other anonymous names causes them to generate an integer counter relative to that construct which will be the same every time. Gathering of bound parameters from any cache key generation is also now required as there is no use case for a cache key that does not extract bound parameter values. Applies-to: #4639 Change-Id: I0660584def8627cad566719ee98d3be045db4b8d
Diffstat (limited to 'lib/sqlalchemy/sql/visitors.py')
-rw-r--r--lib/sqlalchemy/sql/visitors.py447
1 files changed, 381 insertions, 66 deletions
diff --git a/lib/sqlalchemy/sql/visitors.py b/lib/sqlalchemy/sql/visitors.py
index 7b2ac285a..8c06eb8af 100644
--- a/lib/sqlalchemy/sql/visitors.py
+++ b/lib/sqlalchemy/sql/visitors.py
@@ -28,14 +28,10 @@ import operator
from .. import exc
from .. import util
-
+from ..util import langhelpers
+from ..util import symbol
__all__ = [
- "VisitableType",
- "Visitable",
- "ClauseVisitor",
- "CloningVisitor",
- "ReplacingCloningVisitor",
"iterate",
"iterate_depthfirst",
"traverse_using",
@@ -43,85 +39,382 @@ __all__ = [
"traverse_depthfirst",
"cloned_traverse",
"replacement_traverse",
+ "Traversible",
+ "TraversibleType",
+ "ExternalTraversal",
+ "InternalTraversal",
]
-class VisitableType(type):
- """Metaclass which assigns a ``_compiler_dispatch`` method to classes
- having a ``__visit_name__`` attribute.
+def _generate_compiler_dispatch(cls):
+ """Generate a _compiler_dispatch() external traversal on classes with a
+ __visit_name__ attribute.
+
+ """
+ visit_name = cls.__visit_name__
+
+ if isinstance(visit_name, util.compat.string_types):
+ # There is an optimization opportunity here because the
+ # the string name of the class's __visit_name__ is known at
+ # this early stage (import time) so it can be pre-constructed.
+ getter = operator.attrgetter("visit_%s" % visit_name)
+
+ def _compiler_dispatch(self, visitor, **kw):
+ try:
+ meth = getter(visitor)
+ except AttributeError:
+ raise exc.UnsupportedCompilationError(visitor, cls)
+ else:
+ return meth(self, **kw)
+
+ else:
+ # The optimization opportunity is lost for this case because the
+ # __visit_name__ is not yet a string. As a result, the visit
+ # string has to be recalculated with each compilation.
+ def _compiler_dispatch(self, visitor, **kw):
+ visit_attr = "visit_%s" % self.__visit_name__
+ try:
+ meth = getattr(visitor, visit_attr)
+ except AttributeError:
+ raise exc.UnsupportedCompilationError(visitor, cls)
+ else:
+ return meth(self, **kw)
+
+ _compiler_dispatch.__doc__ = """Look for an attribute named "visit_"
+ + self.__visit_name__ on the visitor, and call it with the same
+ kw params.
+ """
+ cls._compiler_dispatch = _compiler_dispatch
+
+
+class TraversibleType(type):
+ """Metaclass which assigns dispatch attributes to various kinds of
+ "visitable" classes.
- The ``_compiler_dispatch`` attribute becomes an instance method which
- looks approximately like the following::
+ Attributes include:
- def _compiler_dispatch (self, visitor, **kw):
- '''Look for an attribute named "visit_" + self.__visit_name__
- on the visitor, and call it with the same kw params.'''
- visit_attr = 'visit_%s' % self.__visit_name__
- return getattr(visitor, visit_attr)(self, **kw)
+ * The ``_compiler_dispatch`` method, corresponding to ``__visit_name__``.
+ This is called "external traversal" because the caller of each visit()
+ method is responsible for sub-traversing the inner elements of each
+ object. This is appropriate for string compilers and other traversals
+ that need to call upon the inner elements in a specific pattern.
- Classes having no ``__visit_name__`` attribute will remain unaffected.
+ * internal traversal collections ``_children_traversal``,
+ ``_cache_key_traversal``, ``_copy_internals_traversal``, generated from
+ an optional ``_traverse_internals`` collection of symbols which comes
+ from the :class:`.InternalTraversal` list of symbols. This is called
+ "internal traversal" MARKMARK
"""
def __init__(cls, clsname, bases, clsdict):
- if clsname != "Visitable" and hasattr(cls, "__visit_name__"):
- _generate_dispatch(cls)
+ if clsname != "Traversible":
+ if "__visit_name__" in clsdict:
+ _generate_compiler_dispatch(cls)
+
+ super(TraversibleType, cls).__init__(clsname, bases, clsdict)
- super(VisitableType, cls).__init__(clsname, bases, clsdict)
+class Traversible(util.with_metaclass(TraversibleType)):
+ """Base class for visitable objects, applies the
+ :class:`.visitors.TraversibleType` metaclass.
-def _generate_dispatch(cls):
- """Return an optimized visit dispatch function for the cls
- for use by the compiler.
"""
- if "__visit_name__" in cls.__dict__:
- visit_name = cls.__visit_name__
- if isinstance(visit_name, util.compat.string_types):
- # There is an optimization opportunity here because the
- # the string name of the class's __visit_name__ is known at
- # this early stage (import time) so it can be pre-constructed.
- getter = operator.attrgetter("visit_%s" % visit_name)
- def _compiler_dispatch(self, visitor, **kw):
- try:
- meth = getter(visitor)
- except AttributeError:
- raise exc.UnsupportedCompilationError(visitor, cls)
- else:
- return meth(self, **kw)
+class _InternalTraversalType(type):
+ def __init__(cls, clsname, bases, clsdict):
+ if cls.__name__ in ("InternalTraversal", "ExtendedInternalTraversal"):
+ lookup = {}
+ for key, sym in clsdict.items():
+ if key.startswith("dp_"):
+ visit_key = key.replace("dp_", "visit_")
+ sym_name = sym.name
+ assert sym_name not in lookup, sym_name
+ lookup[sym] = lookup[sym_name] = visit_key
+ if hasattr(cls, "_dispatch_lookup"):
+ lookup.update(cls._dispatch_lookup)
+ cls._dispatch_lookup = lookup
+
+ super(_InternalTraversalType, cls).__init__(clsname, bases, clsdict)
+
+
+def _generate_dispatcher(visitor, internal_dispatch, method_name):
+ names = []
+ for attrname, visit_sym in internal_dispatch:
+ meth = visitor.dispatch(visit_sym)
+ if meth:
+ visit_name = ExtendedInternalTraversal._dispatch_lookup[visit_sym]
+ names.append((attrname, visit_name))
+
+ code = (
+ (" return [\n")
+ + (
+ ", \n".join(
+ " (%r, self.%s, visitor.%s)"
+ % (attrname, attrname, visit_name)
+ for attrname, visit_name in names
+ )
+ )
+ + ("\n ]\n")
+ )
+ meth_text = ("def %s(self, visitor):\n" % method_name) + code + "\n"
+ # print(meth_text)
+ return langhelpers._exec_code_in_env(meth_text, {}, method_name)
- else:
- # The optimization opportunity is lost for this case because the
- # __visit_name__ is not yet a string. As a result, the visit
- # string has to be recalculated with each compilation.
- def _compiler_dispatch(self, visitor, **kw):
- visit_attr = "visit_%s" % self.__visit_name__
- try:
- meth = getattr(visitor, visit_attr)
- except AttributeError:
- raise exc.UnsupportedCompilationError(visitor, cls)
- else:
- return meth(self, **kw)
-
- _compiler_dispatch.__doc__ = """Look for an attribute named "visit_" + self.__visit_name__
- on the visitor, and call it with the same kw params.
- """
- cls._compiler_dispatch = _compiler_dispatch
-
-
-class Visitable(util.with_metaclass(VisitableType, object)):
- """Base class for visitable objects, applies the
- :class:`.visitors.VisitableType` metaclass.
- The :class:`.Visitable` class is essentially at the base of the
- :class:`.ClauseElement` hierarchy.
+class InternalTraversal(util.with_metaclass(_InternalTraversalType, object)):
+ r"""Defines visitor symbols used for internal traversal.
+
+ The :class:`.InternalTraversal` class is used in two ways. One is that
+ it can serve as the superclass for an object that implements the
+ various visit methods of the class. The other is that the symbols
+ themselves of :class:`.InternalTraversal` are used within
+ the ``_traverse_internals`` collection. Such as, the :class:`.Case`
+ object defines ``_travserse_internals`` as ::
+
+ _traverse_internals = [
+ ("value", InternalTraversal.dp_clauseelement),
+ ("whens", InternalTraversal.dp_clauseelement_tuples),
+ ("else_", InternalTraversal.dp_clauseelement),
+ ]
+
+ Above, the :class:`.Case` class indicates its internal state as the
+ attribtues named ``value``, ``whens``, and ``else\_``. They each
+ link to an :class:`.InternalTraversal` method which indicates the type
+ of datastructure referred towards.
+
+ Using the ``_traverse_internals`` structure, objects of type
+ :class:`.InternalTraversible` will have the following methods automatically
+ implemented:
+
+ * :meth:`.Traversible.get_children`
+
+ * :meth:`.Traversible._copy_internals`
+
+ * :meth:`.Traversible._gen_cache_key`
+
+ Subclasses can also implement these methods directly, particularly for the
+ :meth:`.Traversible._copy_internals` method, when special steps
+ are needed.
+
+ .. versionadded:: 1.4
"""
+ def dispatch(self, visit_symbol):
+ """Given a method from :class:`.InternalTraversal`, return the
+ corresponding method on a subclass.
-class ClauseVisitor(object):
- """Base class for visitor objects which can traverse using
+ """
+ name = self._dispatch_lookup[visit_symbol]
+ return getattr(self, name, None)
+
+ def run_generated_dispatch(
+ self, target, internal_dispatch, generate_dispatcher_name
+ ):
+ try:
+ dispatcher = target.__class__.__dict__[generate_dispatcher_name]
+ except KeyError:
+ dispatcher = _generate_dispatcher(
+ self, internal_dispatch, generate_dispatcher_name
+ )
+ setattr(target.__class__, generate_dispatcher_name, dispatcher)
+ return dispatcher(target, self)
+
+ dp_has_cache_key = symbol("HC")
+ """Visit a :class:`.HasCacheKey` object."""
+
+ dp_clauseelement = symbol("CE")
+ """Visit a :class:`.ClauseElement` object."""
+
+ dp_fromclause_canonical_column_collection = symbol("FC")
+ """Visit a :class:`.FromClause` object in the context of the
+ ``columns`` attribute.
+
+ The column collection is "canonical", meaning it is the originally
+ defined location of the :class:`.ColumnClause` objects. Right now
+ this means that the object being visited is a :class:`.TableClause`
+ or :class:`.Table` object only.
+
+ """
+
+ dp_clauseelement_tuples = symbol("CT")
+ """Visit a list of tuples which contain :class:`.ClauseElement`
+ objects.
+
+ """
+
+ dp_clauseelement_list = symbol("CL")
+ """Visit a list of :class:`.ClauseElement` objects.
+
+ """
+
+ dp_clauseelement_unordered_set = symbol("CU")
+ """Visit an unordered set of :class:`.ClauseElement` objects. """
+
+ dp_fromclause_ordered_set = symbol("CO")
+ """Visit an ordered set of :class:`.FromClause` objects. """
+
+ dp_string = symbol("S")
+ """Visit a plain string value.
+
+ Examples include table and column names, bound parameter keys, special
+ keywords such as "UNION", "UNION ALL".
+
+ The string value is considered to be significant for cache key
+ generation.
+
+ """
+
+ dp_anon_name = symbol("AN")
+ """Visit a potentially "anonymized" string value.
+
+ The string value is considered to be significant for cache key
+ generation.
+
+ """
+
+ dp_boolean = symbol("B")
+ """Visit a boolean value.
+
+ The boolean value is considered to be significant for cache key
+ generation.
+
+ """
+
+ dp_operator = symbol("O")
+ """Visit an operator.
+
+ The operator is a function from the :mod:`sqlalchemy.sql.operators`
+ module.
+
+ The operator value is considered to be significant for cache key
+ generation.
+
+ """
+
+ dp_type = symbol("T")
+ """Visit a :class:`.TypeEngine` object
+
+ The type object is considered to be significant for cache key
+ generation.
+
+ """
+
+ dp_plain_dict = symbol("PD")
+ """Visit a dictionary with string keys.
+
+ The keys of the dictionary should be strings, the values should
+ be immutable and hashable. The dictionary is considered to be
+ significant for cache key generation.
+
+ """
+
+ dp_string_clauseelement_dict = symbol("CD")
+ """Visit a dictionary of string keys to :class:`.ClauseElement`
+ objects.
+
+ """
+
+ dp_string_multi_dict = symbol("MD")
+ """Visit a dictionary of string keys to values which may either be
+ plain immutable/hashable or :class:`.HasCacheKey` objects.
+
+ """
+
+ dp_plain_obj = symbol("PO")
+ """Visit a plain python object.
+
+ The value should be immutable and hashable, such as an integer.
+ The value is considered to be significant for cache key generation.
+
+ """
+
+ dp_annotations_state = symbol("A")
+ """Visit the state of the :class:`.Annotatated` version of an object.
+
+ """
+
+ dp_named_ddl_element = symbol("DD")
+ """Visit a simple named DDL element.
+
+ The current object used by this method is the :class:`.Sequence`.
+
+ The object is only considered to be important for cache key generation
+ as far as its name, but not any other aspects of it.
+
+ """
+
+ dp_prefix_sequence = symbol("PS")
+ """Visit the sequence represented by :class:`.HasPrefixes`
+ or :class:`.HasSuffixes`.
+
+ """
+
+ dp_table_hint_list = symbol("TH")
+ """Visit the ``_hints`` collection of a :class:`.Select` object.
+
+ """
+
+ dp_statement_hint_list = symbol("SH")
+ """Visit the ``_statement_hints`` collection of a :class:`.Select`
+ object.
+
+ """
+
+ dp_unknown_structure = symbol("UK")
+ """Visit an unknown structure.
+
+ """
+
+
+class ExtendedInternalTraversal(InternalTraversal):
+ """defines additional symbols that are useful in caching applications.
+
+ Traversals for :class:`.ClauseElement` objects only need to use
+ those symbols present in :class:`.InternalTraversal`. However, for
+ additional caching use cases within the ORM, symbols dealing with the
+ :class:`.HasCacheKey` class are added here.
+
+ """
+
+ dp_ignore = symbol("IG")
+ """Specify an object that should be ignored entirely.
+
+ This currently applies function call argument caching where some
+ arguments should not be considered to be part of a cache key.
+
+ """
+
+ dp_inspectable = symbol("IS")
+ """Visit an inspectable object where the return value is a HasCacheKey`
+ object."""
+
+ dp_multi = symbol("M")
+ """Visit an object that may be a :class:`.HasCacheKey` or may be a
+ plain hashable object."""
+
+ dp_multi_list = symbol("MT")
+ """Visit a tuple containing elements that may be :class:`.HasCacheKey` or
+ may be a plain hashable object."""
+
+ dp_has_cache_key_tuples = symbol("HT")
+ """Visit a list of tuples which contain :class:`.HasCacheKey`
+ objects.
+
+ """
+
+ dp_has_cache_key_list = symbol("HL")
+ """Visit a list of :class:`.HasCacheKey` objects."""
+
+ dp_inspectable_list = symbol("IL")
+ """Visit a list of inspectable objects which upon inspection are
+ HasCacheKey objects."""
+
+
+class ExternalTraversal(object):
+ """Base class for visitor objects which can traverse externally using
the :func:`.visitors.traverse` function.
Direct usage of the :func:`.visitors.traverse` function is usually
@@ -178,7 +471,7 @@ class ClauseVisitor(object):
return self
-class CloningVisitor(ClauseVisitor):
+class CloningExternalTraversal(ExternalTraversal):
"""Base class for visitor objects which can traverse using
the :func:`.visitors.cloned_traverse` function.
@@ -203,7 +496,7 @@ class CloningVisitor(ClauseVisitor):
)
-class ReplacingCloningVisitor(CloningVisitor):
+class ReplacingExternalTraversal(CloningExternalTraversal):
"""Base class for visitor objects which can traverse using
the :func:`.visitors.replacement_traverse` function.
@@ -233,6 +526,14 @@ class ReplacingCloningVisitor(CloningVisitor):
return replacement_traverse(obj, self.__traverse_options__, replace)
+# backwards compatibility
+Visitable = Traversible
+VisitableType = TraversibleType
+ClauseVisitor = ExternalTraversal
+CloningVisitor = CloningExternalTraversal
+ReplacingCloningVisitor = ReplacingExternalTraversal
+
+
def iterate(obj, opts):
r"""traverse the given expression structure, returning an iterator.
@@ -405,11 +706,18 @@ def cloned_traverse(obj, opts, visitors):
cloned = {}
stop_on = set(opts.get("stop_on", []))
- def clone(elem):
+ def clone(elem, **kw):
if elem in stop_on:
return elem
else:
if id(elem) not in cloned:
+
+ if "replace" in kw:
+ newelem = kw["replace"](elem)
+ if newelem is not None:
+ cloned[id(elem)] = newelem
+ return newelem
+
cloned[id(elem)] = newelem = elem._clone()
newelem._copy_internals(clone=clone)
meth = visitors.get(newelem.__visit_name__, None)
@@ -461,7 +769,14 @@ def replacement_traverse(obj, opts, replace):
stop_on.add(id(newelem))
return newelem
else:
+
if elem not in cloned:
+ if "replace" in kw:
+ newelem = kw["replace"](elem)
+ if newelem is not None:
+ cloned[elem] = newelem
+ return newelem
+
cloned[elem] = newelem = elem._clone()
newelem._copy_internals(clone=clone, **kw)
return cloned[elem]