summaryrefslogtreecommitdiff
path: root/paste/exceptions/collector.py
blob: 632ce0665aaece41cc00de58dd74d7aa3861070f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
## Originally zExceptions.ExceptionFormatter from Zope;
## Modified by Ian Bicking, Imaginary Landscape, 2005
"""
An exception collector that finds traceback information plus
supplements
"""

import sys
import traceback
import time
from six.moves import cStringIO as StringIO
import linecache
from paste.exceptions import serial_number_generator
import warnings

DEBUG_EXCEPTION_FORMATTER = True
DEBUG_IDENT_PREFIX = 'E-'
FALLBACK_ENCODING = 'UTF-8'

__all__ = ['collect_exception', 'ExceptionCollector']

class ExceptionCollector(object):

    """
    Produces a data structure that can be used by formatters to
    display exception reports.

    Magic variables:

    If you define one of these variables in your local scope, you can
    add information to tracebacks that happen in that context.  This
    allows applications to add all sorts of extra information about
    the context of the error, including URLs, environmental variables,
    users, hostnames, etc.  These are the variables we look for:

    ``__traceback_supplement__``:
        You can define this locally or globally (unlike all the other
        variables, which must be defined locally).

        ``__traceback_supplement__`` is a tuple of ``(factory, arg1,
        arg2...)``.  When there is an exception, ``factory(arg1, arg2,
        ...)`` is called, and the resulting object is inspected for
        supplemental information.

    ``__traceback_info__``:
        This information is added to the traceback, usually fairly
        literally.

    ``__traceback_hide__``:
        If set and true, this indicates that the frame should be
        hidden from abbreviated tracebacks.  This way you can hide
        some of the complexity of the larger framework and let the
        user focus on their own errors.

        By setting it to ``'before'``, all frames before this one will
        be thrown away.  By setting it to ``'after'`` then all frames
        after this will be thrown away until ``'reset'`` is found.  In
        each case the frame where it is set is included, unless you
        append ``'_and_this'`` to the value (e.g.,
        ``'before_and_this'``).

        Note that formatters will ignore this entirely if the frame
        that contains the error wouldn't normally be shown according
        to these rules.

    ``__traceback_reporter__``:
        This should be a reporter object (see the reporter module),
        or a list/tuple of reporter objects.  All reporters found this
        way will be given the exception, innermost first.

    ``__traceback_decorator__``:
        This object (defined in a local or global scope) will get the
        result of this function (the CollectedException defined
        below).  It may modify this object in place, or return an
        entirely new object.  This gives the object the ability to
        manipulate the traceback arbitrarily.

    The actually interpretation of these values is largely up to the
    reporters and formatters.

    ``collect_exception(*sys.exc_info())`` will return an object with
    several attributes:

    ``frames``:
        A list of frames
    ``exception_formatted``:
        The formatted exception, generally a full traceback
    ``exception_type``:
        The type of the exception, like ``ValueError``
    ``exception_value``:
        The string value of the exception, like ``'x not in list'``
    ``identification_code``:
        A hash of the exception data meant to identify the general
        exception, so that it shares this code with other exceptions
        that derive from the same problem.  The code is a hash of
        all the module names and function names in the traceback,
        plus exception_type.  This should be shown to users so they
        can refer to the exception later. (@@: should it include a
        portion that allows identification of the specific instance
        of the exception as well?)

    The list of frames goes innermost first.  Each frame has these
    attributes; some values may be None if they could not be
    determined.

    ``modname``:
        the name of the module
    ``filename``:
        the filename of the module
    ``lineno``:
        the line of the error
    ``revision``:
        the contents of __version__ or __revision__
    ``name``:
        the function name
    ``supplement``:
        an object created from ``__traceback_supplement__``
    ``supplement_exception``:
        a simple traceback of any exception ``__traceback_supplement__``
        created
    ``traceback_info``:
        the str() of any ``__traceback_info__`` variable found in the local
        scope (@@: should it str()-ify it or not?)
    ``traceback_hide``:
        the value of any ``__traceback_hide__`` variable
    ``traceback_log``:
        the value of any ``__traceback_log__`` variable


    ``__traceback_supplement__`` is thrown away, but a fixed
    set of attributes are captured; each of these attributes is
    optional.

    ``object``:
        the name of the object being visited
    ``source_url``:
        the original URL requested
    ``line``:
        the line of source being executed (for interpreters, like ZPT)
    ``column``:
        the column of source being executed
    ``expression``:
        the expression being evaluated (also for interpreters)
    ``warnings``:
        a list of (string) warnings to be displayed
    ``getInfo``:
        a function/method that takes no arguments, and returns a string
        describing any extra information
    ``extraData``:
        a function/method that takes no arguments, and returns a
        dictionary.  The contents of this dictionary will not be
        displayed in the context of the traceback, but globally for
        the exception.  Results will be grouped by the keys in the
        dictionaries (which also serve as titles).  The keys can also
        be tuples of (importance, title); in this case the importance
        should be ``important`` (shows up at top), ``normal`` (shows
        up somewhere; unspecified), ``supplemental`` (shows up at
        bottom), or ``extra`` (shows up hidden or not at all).

    These are used to create an object with attributes of the same
    names (``getInfo`` becomes a string attribute, not a method).
    ``__traceback_supplement__`` implementations should be careful to
    produce values that are relatively static and unlikely to cause
    further errors in the reporting system -- any complex
    introspection should go in ``getInfo()`` and should ultimately
    return a string.

    Note that all attributes are optional, and under certain
    circumstances may be None or may not exist at all -- the collector
    can only do a best effort, but must avoid creating any exceptions
    itself.

    Formatters may want to use ``__traceback_hide__`` as a hint to
    hide frames that are part of the 'framework' or underlying system.
    There are a variety of rules about special values for this
    variables that formatters should be aware of.

    TODO:

    More attributes in __traceback_supplement__?  Maybe an attribute
    that gives a list of local variables that should also be
    collected?  Also, attributes that would be explicitly meant for
    the entire request, not just a single frame.  Right now some of
    the fixed set of attributes (e.g., source_url) are meant for this
    use, but there's no explicit way for the supplement to indicate
    new values, e.g., logged-in user, HTTP referrer, environment, etc.
    Also, the attributes that do exist are Zope/Web oriented.

    More information on frames?  cgitb, for instance, produces
    extensive information on local variables.  There exists the
    possibility that getting this information may cause side effects,
    which can make debugging more difficult; but it also provides
    fodder for post-mortem debugging.  However, the collector is not
    meant to be configurable, but to capture everything it can and let
    the formatters be configurable.  Maybe this would have to be a
    configuration value, or maybe it could be indicated by another
    magical variable (which would probably mean 'show all local
    variables below this frame')
    """

    show_revisions = 0

    def __init__(self, limit=None):
        self.limit = limit

    def getLimit(self):
        limit = self.limit
        if limit is None:
            limit = getattr(sys, 'tracebacklimit', None)
        return limit

    def getRevision(self, globals):
        if not self.show_revisions:
            return None
        revision = globals.get('__revision__', None)
        if revision is None:
            # Incorrect but commonly used spelling
            revision = globals.get('__version__', None)

        if revision is not None:
            try:
                revision = str(revision).strip()
            except:
                revision = '???'
        return revision

    def collectSupplement(self, supplement, tb):
        result = {}

        for name in ('object', 'source_url', 'line', 'column',
                     'expression', 'warnings'):
            result[name] = getattr(supplement, name, None)

        func = getattr(supplement, 'getInfo', None)
        if func:
            result['info'] = func()
        else:
            result['info'] = None
        func = getattr(supplement, 'extraData', None)
        if func:
            result['extra'] = func()
        else:
            result['extra'] = None
        return SupplementaryData(**result)

    def collectLine(self, tb, extra_data):
        f = tb.tb_frame
        lineno = tb.tb_lineno
        co = f.f_code
        filename = co.co_filename
        name = co.co_name
        globals = f.f_globals
        locals = f.f_locals
        if not hasattr(locals, 'keys'):
            # Something weird about this frame; it's not a real dict
            warnings.warn(
                "Frame %s has an invalid locals(): %r" % (
                globals.get('__name__', 'unknown'), locals))
            locals = {}
        data = {}
        data['modname'] = globals.get('__name__', None)
        data['filename'] = filename
        data['lineno'] = lineno
        data['revision'] = self.getRevision(globals)
        data['name'] = name
        data['tbid'] = id(tb)

        # Output a traceback supplement, if any.
        if '__traceback_supplement__' in locals:
            # Use the supplement defined in the function.
            tbs = locals['__traceback_supplement__']
        elif '__traceback_supplement__' in globals:
            # Use the supplement defined in the module.
            # This is used by Scripts (Python).
            tbs = globals['__traceback_supplement__']
        else:
            tbs = None
        if tbs is not None:
            factory = tbs[0]
            args = tbs[1:]
            try:
                supp = factory(*args)
                data['supplement'] = self.collectSupplement(supp, tb)
                if data['supplement'].extra:
                    for key, value in data['supplement'].extra.items():
                        extra_data.setdefault(key, []).append(value)
            except:
                if DEBUG_EXCEPTION_FORMATTER:
                    out = StringIO()
                    traceback.print_exc(file=out)
                    text = out.getvalue()
                    data['supplement_exception'] = text
                # else just swallow the exception.

        try:
            tbi = locals.get('__traceback_info__', None)
            if tbi is not None:
                data['traceback_info'] = str(tbi)
        except:
            pass

        marker = []
        for name in ('__traceback_hide__', '__traceback_log__',
                     '__traceback_decorator__'):
            try:
                tbh = locals.get(name, globals.get(name, marker))
                if tbh is not marker:
                    data[name[2:-2]] = tbh
            except:
                pass

        return data

    def collectExceptionOnly(self, etype, value):
        return traceback.format_exception_only(etype, value)

    def collectException(self, etype, value, tb, limit=None):
        # The next line provides a way to detect recursion.
        __exception_formatter__ = 1
        frames = []
        ident_data = []
        traceback_decorators = []
        if limit is None:
            limit = self.getLimit()
        n = 0
        extra_data = {}
        while tb is not None and (limit is None or n < limit):
            if tb.tb_frame.f_locals.get('__exception_formatter__'):
                # Stop recursion. @@: should make a fake ExceptionFrame
                frames.append('(Recursive formatException() stopped)\n')
                break
            data = self.collectLine(tb, extra_data)
            frame = ExceptionFrame(**data)
            frames.append(frame)
            if frame.traceback_decorator is not None:
                traceback_decorators.append(frame.traceback_decorator)
            ident_data.append(frame.modname or '?')
            ident_data.append(frame.name or '?')
            tb = tb.tb_next
            n = n + 1
        ident_data.append(str(etype))
        ident = serial_number_generator.hash_identifier(
            ' '.join(ident_data), length=5, upper=True,
            prefix=DEBUG_IDENT_PREFIX)

        result = CollectedException(
            frames=frames,
            exception_formatted=self.collectExceptionOnly(etype, value),
            exception_type=etype,
            exception_value=self.safeStr(value),
            identification_code=ident,
            date=time.localtime(),
            extra_data=extra_data)
        if etype is ImportError:
            extra_data[('important', 'sys.path')] = [sys.path]
        for decorator in traceback_decorators:
            try:
                new_result = decorator(result)
                if new_result is not None:
                    result = new_result
            except:
                pass
        return result

    def safeStr(self, obj):
        try:
            return str(obj)
        except UnicodeEncodeError:
            try:
                return unicode(obj).encode(FALLBACK_ENCODING, 'replace')
            except UnicodeEncodeError:
                # This is when something is really messed up, but this can
                # happen when the __str__ of an object has to handle unicode
                return repr(obj)

limit = 200

class Bunch(object):

    """
    A generic container
    """

    def __init__(self, **attrs):
        for name, value in attrs.items():
            setattr(self, name, value)

    def __repr__(self):
        name = '<%s ' % self.__class__.__name__
        name += ' '.join(['%s=%r' % (name, str(value)[:30])
                          for name, value in self.__dict__.items()
                          if not name.startswith('_')])
        return name + '>'

class CollectedException(Bunch):
    """
    This is the result of collection the exception; it contains copies
    of data of interest.
    """
    # A list of frames (ExceptionFrame instances), innermost last:
    frames = []
    # The result of traceback.format_exception_only; this looks
    # like a normal traceback you'd see in the interactive interpreter
    exception_formatted = None
    # The *string* representation of the type of the exception
    # (@@: should we give the # actual class? -- we can't keep the
    # actual exception around, but the class should be safe)
    # Something like 'ValueError'
    exception_type = None
    # The string representation of the exception, from ``str(e)``.
    exception_value = None
    # An identifier which should more-or-less classify this particular
    # exception, including where in the code it happened.
    identification_code = None
    # The date, as time.localtime() returns:
    date = None
    # A dictionary of supplemental data:
    extra_data = {}

class SupplementaryData(Bunch):
    """
    The result of __traceback_supplement__.  We don't keep the
    supplement object around, for fear of GC problems and whatnot.
    (@@: Maybe I'm being too superstitious about copying only specific
    information over)
    """

    # These attributes are copied from the object, or left as None
    # if the object doesn't have these attributes:
    object = None
    source_url = None
    line = None
    column = None
    expression = None
    warnings = None
    # This is the *return value* of supplement.getInfo():
    info = None

class ExceptionFrame(Bunch):
    """
    This represents one frame of the exception.  Each frame is a
    context in the call stack, typically represented by a line
    number and module name in the traceback.
    """

    # The name of the module; can be None, especially when the code
    # isn't associated with a module.
    modname = None
    # The filename (@@: when no filename, is it None or '?'?)
    filename = None
    # Line number
    lineno = None
    # The value of __revision__ or __version__ -- but only if
    # show_revision = True (by defaut it is false).  (@@: Why not
    # collect this?)
    revision = None
    # The name of the function with the error (@@: None or '?' when
    # unknown?)
    name = None
    # A SupplementaryData object, if __traceback_supplement__ was found
    # (and produced no errors)
    supplement = None
    # If accessing __traceback_supplement__ causes any error, the
    # plain-text traceback is stored here
    supplement_exception = None
    # The str() of any __traceback_info__ value found
    traceback_info = None
    # The value of __traceback_hide__
    traceback_hide = False
    # The value of __traceback_decorator__
    traceback_decorator = None
    # The id() of the traceback scope, can be used to reference the
    # scope for use elsewhere
    tbid = None

    def get_source_line(self, context=0):
        """
        Return the source of the current line of this frame.  You
        probably want to .strip() it as well, as it is likely to have
        leading whitespace.

        If context is given, then that many lines on either side will
        also be returned.  E.g., context=1 will give 3 lines.
        """
        if not self.filename or not self.lineno:
            return None
        lines = []
        for lineno in range(self.lineno-context, self.lineno+context+1):
            lines.append(linecache.getline(self.filename, lineno))
        return ''.join(lines)

if hasattr(sys, 'tracebacklimit'):
    limit = min(limit, sys.tracebacklimit)

col = ExceptionCollector()

def collect_exception(t, v, tb, limit=None):
    """
    Collection an exception from ``sys.exc_info()``.

    Use like::

      try:
          blah blah
      except:
          exc_data = collect_exception(*sys.exc_info())
    """
    return col.collectException(t, v, tb, limit=limit)