diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2017-09-04 10:47:56 +0200 |
---|---|---|
committer | Stefan Behnel <stefan_ml@behnel.de> | 2017-09-04 10:47:56 +0200 |
commit | 2648f3f7cecba21c0b93032ec0c44f94097c2aac (patch) | |
tree | d532154e2deaf8b7c3e9ffecb38add03a39faf69 /Cython/Debugger | |
parent | a7b253ab4efff1ea98cf43341b055b0100a79ed5 (diff) | |
download | cython-2648f3f7cecba21c0b93032ec0c44f94097c2aac.tar.gz |
Update gdb support in libpython.py from CPython 3.7 (git rev 5fe59f8).
Diffstat (limited to 'Cython/Debugger')
-rw-r--r-- | Cython/Debugger/libpython.py | 850 |
1 files changed, 502 insertions, 348 deletions
diff --git a/Cython/Debugger/libpython.py b/Cython/Debugger/libpython.py index 3da3d776c..82a471191 100644 --- a/Cython/Debugger/libpython.py +++ b/Cython/Debugger/libpython.py @@ -25,9 +25,10 @@ giving file/line information and the state of local variables In particular, given a gdb.Value corresponding to a PyObject* in the inferior process, we can generate a "proxy value" within the gdb process. For example, given a PyObject* in the inferior process that is in fact a PyListObject* -holding three PyObject* that turn out to be PyStringObject* instances, we can -generate a proxy value within the gdb process that is a list of strings: - ["foo", "bar", "baz"] +holding three PyObject* that turn out to be PyBytesObject* instances, we can +generate a proxy value within the gdb process that is a list of bytes +instances: + [b"foo", b"bar", b"baz"] Doing so can be expensive for complicated graphs of objects, and could take some time, so we also have a "write_repr" method that writes a representation @@ -46,70 +47,72 @@ the type names are known to the debugger The module also extends gdb with some python-specific commands. ''' -try: - input = raw_input -except NameError: - pass +# NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax +# compatible (2.6+ and 3.0+). See #19308. +from __future__ import print_function +import gdb import os -import re -import sys -import struct import locale -import atexit -import warnings -import tempfile -import textwrap -import itertools - -import gdb +import sys -try: - xrange -except NameError: +if sys.version_info[0] >= 3: + unichr = chr xrange = range - -if sys.version_info[0] < 3: - # I think this is the only way to fix this bug :'( - # http://sourceware.org/bugzilla/show_bug.cgi?id=12285 - out, err = sys.stdout, sys.stderr - reload(sys).setdefaultencoding('UTF-8') - sys.stdout = out - sys.stderr = err + long = int # Look up the gdb.Type for some standard types: -_type_char_ptr = gdb.lookup_type('char').pointer() # char* -_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() -_type_void_ptr = gdb.lookup_type('void').pointer() # void* +# Those need to be refreshed as types (pointer sizes) may change when +# gdb loads different executables -SIZEOF_VOID_P = _type_void_ptr.sizeof +def _type_char_ptr(): + return gdb.lookup_type('char').pointer() # char* -Py_TPFLAGS_HEAPTYPE = (1 << 9) -Py_TPFLAGS_INT_SUBCLASS = (1 << 23) +def _type_unsigned_char_ptr(): + return gdb.lookup_type('unsigned char').pointer() # unsigned char* + + +def _type_unsigned_short_ptr(): + return gdb.lookup_type('unsigned short').pointer() + + +def _type_unsigned_int_ptr(): + return gdb.lookup_type('unsigned int').pointer() + + +def _sizeof_void_p(): + return gdb.lookup_type('void').pointer().sizeof + + +# value computed later, see PyUnicodeObjectPtr.proxy() +_is_pep393 = None + +Py_TPFLAGS_HEAPTYPE = (1 << 9) Py_TPFLAGS_LONG_SUBCLASS = (1 << 24) Py_TPFLAGS_LIST_SUBCLASS = (1 << 25) Py_TPFLAGS_TUPLE_SUBCLASS = (1 << 26) -Py_TPFLAGS_STRING_SUBCLASS = (1 << 27) Py_TPFLAGS_BYTES_SUBCLASS = (1 << 27) Py_TPFLAGS_UNICODE_SUBCLASS = (1 << 28) Py_TPFLAGS_DICT_SUBCLASS = (1 << 29) Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30) Py_TPFLAGS_TYPE_SUBCLASS = (1 << 31) -MAX_OUTPUT_LEN = 1024 + +MAX_OUTPUT_LEN=1024 hexdigits = "0123456789abcdef" ENCODING = locale.getpreferredencoding() +EVALFRAME = '_PyEval_EvalFrameDefault' class NullPyObjectPtr(RuntimeError): pass def safety_limit(val): - # Given a integer value from the process being debugged, limit it to some + # Given an integer value from the process being debugged, limit it to some # safety threshold so that arbitrary breakage within said process doesn't # break the gdb process too much (e.g. sizes of iterations, sizes of lists) return min(val, 1000) @@ -118,42 +121,45 @@ def safety_limit(val): def safe_range(val): # As per range, but don't trust the value too much: cap it to a safety # threshold in case the data was corrupted - return range(safety_limit(val)) - - -def write_unicode(file, text): - # Write a byte or unicode string to file. Unicode strings are encoded to - # ENCODING encoding with 'backslashreplace' error handler to avoid - # UnicodeEncodeError. - if not isinstance(text, str): - text = text.encode(ENCODING, 'backslashreplace') - file.write(text) - - -def os_fsencode(filename): - if isinstance(filename, str): # only encode in Py2 - return filename - encoding = sys.getfilesystemencoding() - if encoding == 'mbcs': - # mbcs doesn't support surrogateescape - return filename.encode(encoding) - encoded = [] - for char in filename: - # surrogateescape error handler - if 0xDC80 <= ord(char) <= 0xDCFF: - byte = chr(ord(char) - 0xDC00) - else: - byte = char.encode(encoding) - encoded.append(byte) - return ''.join(encoded) + return xrange(safety_limit(int(val))) +if sys.version_info[0] >= 3: + def write_unicode(file, text): + file.write(text) +else: + def write_unicode(file, text): + # Write a byte or unicode string to file. Unicode strings are encoded to + # ENCODING encoding with 'backslashreplace' error handler to avoid + # UnicodeEncodeError. + if isinstance(text, unicode): + text = text.encode(ENCODING, 'backslashreplace') + file.write(text) + +try: + os_fsencode = os.fsencode +except AttributeError: + def os_fsencode(filename): + if not isinstance(filename, unicode): + return filename + encoding = sys.getfilesystemencoding() + if encoding == 'mbcs': + # mbcs doesn't support surrogateescape + return filename.encode(encoding) + encoded = [] + for char in filename: + # surrogateescape error handler + if 0xDC80 <= ord(char) <= 0xDCFF: + byte = chr(ord(char) - 0xDC00) + else: + byte = char.encode(encoding) + encoded.append(byte) + return ''.join(encoded) class StringTruncated(RuntimeError): pass - class TruncatedStringIO(object): - '''Similar to cStringIO, but can truncate the output by raising a + '''Similar to io.StringIO, but can truncate the output by raising a StringTruncated exception''' def __init__(self, maxlen=None): self._val = '' @@ -171,41 +177,10 @@ class TruncatedStringIO(object): def getvalue(self): return self._val - -# pretty printer lookup -all_pretty_typenames = set() - - -class PrettyPrinterTrackerMeta(type): - - def __init__(self, name, bases, dict): - super(PrettyPrinterTrackerMeta, self).__init__(name, bases, dict) - all_pretty_typenames.add(self._typename) - - -# Class decorator that adds a metaclass and recreates the class with it. -# Copied from 'six'. See Cython/Utils.py. -def _add_metaclass(metaclass): - """Class decorator for creating a class with a metaclass.""" - def wrapper(cls): - orig_vars = cls.__dict__.copy() - slots = orig_vars.get('__slots__') - if slots is not None: - if isinstance(slots, str): - slots = [slots] - for slots_var in slots: - orig_vars.pop(slots_var) - orig_vars.pop('__dict__', None) - orig_vars.pop('__weakref__', None) - return metaclass(cls.__name__, cls.__bases__, orig_vars) - return wrapper - - -@_add_metaclass(PrettyPrinterTrackerMeta) class PyObjectPtr(object): """ - Class wrapping a gdb.Value that's a either a (PyObject*) within the - inferior process, or some subclass pointer e.g. (PyStringObject*) + Class wrapping a gdb.Value that's either a (PyObject*) within the + inferior process, or some subclass pointer e.g. (PyBytesObject*) There will be a subclass for every refined PyObject type that we care about. @@ -213,7 +188,6 @@ class PyObjectPtr(object): Note that at every stage the underlying pointer could be NULL, point to corrupt data, etc; this is the debugger, after all. """ - _typename = 'PyObject' def __init__(self, gdbval, cast_to=None): @@ -286,7 +260,7 @@ class PyObjectPtr(object): return PyTypeObjectPtr(self.field('ob_type')) def is_null(self): - return not self._gdbval + return 0 == long(self._gdbval) def is_optimized_out(self): ''' @@ -347,7 +321,7 @@ class PyObjectPtr(object): return '<%s at remote 0x%x>' % (self.tp_name, self.address) return FakeRepr(self.safe_tp_name(), - int(self._gdbval)) + long(self._gdbval)) def write_repr(self, out, visited): ''' @@ -386,44 +360,40 @@ class PyObjectPtr(object): # class return cls - #print 'tp_flags = 0x%08x' % tp_flags - #print 'tp_name = %r' % tp_name + #print('tp_flags = 0x%08x' % tp_flags) + #print('tp_name = %r' % tp_name) name_map = {'bool': PyBoolObjectPtr, 'classobj': PyClassObjectPtr, - 'instance': PyInstanceObjectPtr, 'NoneType': PyNoneStructPtr, 'frame': PyFrameObjectPtr, 'set' : PySetObjectPtr, 'frozenset' : PySetObjectPtr, 'builtin_function_or_method' : PyCFunctionObjectPtr, + 'method-wrapper': wrapperobject, } if tp_name in name_map: return name_map[tp_name] - if tp_flags & (Py_TPFLAGS_HEAPTYPE|Py_TPFLAGS_TYPE_SUBCLASS): - return PyTypeObjectPtr + if tp_flags & Py_TPFLAGS_HEAPTYPE: + return HeapTypeObjectPtr - if tp_flags & Py_TPFLAGS_INT_SUBCLASS: - return PyIntObjectPtr if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: return PyLongObjectPtr if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: return PyListObjectPtr if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: return PyTupleObjectPtr - if tp_flags & Py_TPFLAGS_STRING_SUBCLASS: - try: - gdb.lookup_type('PyBytesObject') - return PyBytesObjectPtr - except RuntimeError: - return PyStringObjectPtr + if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS: + return PyBytesObjectPtr if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: return PyUnicodeObjectPtr if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: return PyDictObjectPtr if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: return PyBaseExceptionObjectPtr + #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS: + # return PyTypeObjectPtr # Use the base class: return cls @@ -438,7 +408,7 @@ class PyObjectPtr(object): p = PyObjectPtr(gdbval) cls = cls.subclass_from_type(p.type()) return cls(gdbval, cast_to=cls.get_gdb_type()) - except RuntimeError as exc: + except RuntimeError: # Handle any kind of error e.g. NULL ptrs by simply using the base # class pass @@ -449,13 +419,11 @@ class PyObjectPtr(object): return gdb.lookup_type(cls._typename).pointer() def as_address(self): - return int(self._gdbval) - + return long(self._gdbval) class PyVarObjectPtr(PyObjectPtr): _typename = 'PyVarObject' - class ProxyAlreadyVisited(object): ''' Placeholder proxy to use when protecting against infinite recursion due to @@ -471,7 +439,7 @@ class ProxyAlreadyVisited(object): def _write_instance_repr(out, visited, name, pyop_attrdict, address): - '''Shared code for use by old-style and new-style classes: + '''Shared code for use by all classes: write a representation to file-like object "out"''' out.write('<') out.write(name) @@ -480,7 +448,7 @@ def _write_instance_repr(out, visited, name, pyop_attrdict, address): if isinstance(pyop_attrdict, PyDictObjectPtr): out.write('(') first = True - for pyop_arg, pyop_val in pyop_attrdict.items(): + for pyop_arg, pyop_val in pyop_attrdict.iteritems(): if not first: out.write(', ') first = False @@ -500,24 +468,27 @@ class InstanceProxy(object): def __repr__(self): if isinstance(self.attrdict, dict): - kwargs = ', '.join("%s=%r" % (arg, val) for arg, val in self.attrdict.items()) - return '<%s(%s) at remote 0x%x>' % ( - self.cl_name, kwargs, self.address) + kwargs = ', '.join(["%s=%r" % (arg, val) + for arg, val in self.attrdict.iteritems()]) + return '<%s(%s) at remote 0x%x>' % (self.cl_name, + kwargs, self.address) else: - return '<%s at remote 0x%x>' % ( - self.cl_name, self.address) - + return '<%s at remote 0x%x>' % (self.cl_name, + self.address) def _PyObject_VAR_SIZE(typeobj, nitems): + if _PyObject_VAR_SIZE._type_size_t is None: + _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t') + return ( ( typeobj.field('tp_basicsize') + nitems * typeobj.field('tp_itemsize') + - (SIZEOF_VOID_P - 1) - ) & ~(SIZEOF_VOID_P - 1) - ).cast(gdb.lookup_type('size_t')) + (_sizeof_void_p() - 1) + ) & ~(_sizeof_void_p() - 1) + ).cast(_PyObject_VAR_SIZE._type_size_t) +_PyObject_VAR_SIZE._type_size_t = None - -class PyTypeObjectPtr(PyObjectPtr): - _typename = 'PyTypeObject' +class HeapTypeObjectPtr(PyObjectPtr): + _typename = 'PyObject' def get_attr_dict(self): ''' @@ -536,9 +507,9 @@ class PyTypeObjectPtr(PyObjectPtr): size = _PyObject_VAR_SIZE(typeobj, tsize) dictoffset += size assert dictoffset > 0 - assert dictoffset % SIZEOF_VOID_P == 0 + assert dictoffset % _sizeof_void_p() == 0 - dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset + dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() dictptr = dictptr.cast(PyObjectPtrPtr) return PyObjectPtr.from_pyobject_ptr(dictptr.dereference()) @@ -551,7 +522,7 @@ class PyTypeObjectPtr(PyObjectPtr): def proxyval(self, visited): ''' - Support for new-style classes. + Support for classes. Currently we just locate the dictionary using a transliteration to python of _PyObject_GetDictPtr, ignoring descriptors @@ -568,8 +539,8 @@ class PyTypeObjectPtr(PyObjectPtr): attr_dict = {} tp_name = self.safe_tp_name() - # New-style class: - return InstanceProxy(tp_name, attr_dict, int(self._gdbval)) + # Class: + return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) def write_repr(self, out, visited): # Guard against infinite loops: @@ -578,16 +549,9 @@ class PyTypeObjectPtr(PyObjectPtr): return visited.add(self.as_address()) - try: - tp_name = self.field('tp_name').string() - except RuntimeError: - tp_name = 'unknown' - - out.write('<type %s at remote 0x%x>' % (tp_name, self.as_address())) - # pyop_attrdict = self.get_attr_dict() - # _write_instance_repr(out, visited, - # self.safe_tp_name(), pyop_attrdict, self.as_address()) - + pyop_attrdict = self.get_attr_dict() + _write_instance_repr(out, visited, + self.safe_tp_name(), pyop_attrdict, self.as_address()) class ProxyException(Exception): def __init__(self, tp_name, args): @@ -597,7 +561,6 @@ class ProxyException(Exception): def __repr__(self): return '%s%r' % (self.tp_name, self.args) - class PyBaseExceptionObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception @@ -624,7 +587,6 @@ class PyBaseExceptionObjectPtr(PyObjectPtr): out.write(self.safe_tp_name()) self.write_field_repr('args', out, visited) - class PyClassObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> @@ -640,17 +602,17 @@ class BuiltInFunctionProxy(object): def __repr__(self): return "<built-in function %s>" % self.ml_name - class BuiltInMethodProxy(object): def __init__(self, ml_name, pyop_m_self): self.ml_name = ml_name self.pyop_m_self = pyop_m_self def __repr__(self): - return '<built-in method %s of %s object at remote 0x%x>' % ( - self.ml_name, self.pyop_m_self.safe_tp_name(), - self.pyop_m_self.as_address()) - + return ('<built-in method %s of %s object at remote 0x%x>' + % (self.ml_name, + self.pyop_m_self.safe_tp_name(), + self.pyop_m_self.as_address()) + ) class PyCFunctionObjectPtr(PyObjectPtr): """ @@ -709,17 +671,21 @@ class PyDictObjectPtr(PyObjectPtr): def iteritems(self): ''' Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, - analagous to dict.items() + analogous to dict.iteritems() ''' - for i in safe_range(self.field('ma_mask') + 1): - ep = self.field('ma_table') + i - pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) + keys = self.field('ma_keys') + values = self.field('ma_values') + entries, nentries = self._get_entries(keys) + for i in safe_range(nentries): + ep = entries[i] + if long(values): + pyop_value = PyObjectPtr.from_pyobject_ptr(values[i]) + else: + pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) if not pyop_value.is_null(): pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) yield (pyop_key, pyop_value) - items = iteritems - def proxyval(self, visited): # Guard against infinite loops: if self.as_address() in visited: @@ -727,7 +693,7 @@ class PyDictObjectPtr(PyObjectPtr): visited.add(self.as_address()) result = {} - for pyop_key, pyop_value in self.items(): + for pyop_key, pyop_value in self.iteritems(): proxy_key = pyop_key.proxyval(visited) proxy_value = pyop_value.proxyval(visited) result[proxy_key] = proxy_value @@ -742,7 +708,7 @@ class PyDictObjectPtr(PyObjectPtr): out.write('{') first = True - for pyop_key, pyop_value in self.items(): + for pyop_key, pyop_value in self.iteritems(): if not first: out.write(', ') first = False @@ -751,52 +717,31 @@ class PyDictObjectPtr(PyObjectPtr): pyop_value.write_repr(out, visited) out.write('}') + def _get_entries(self, keys): + dk_nentries = int(keys['dk_nentries']) + dk_size = int(keys['dk_size']) + try: + # <= Python 3.5 + return keys['dk_entries'], dk_size + except RuntimeError: + # >= Python 3.6 + pass -class PyInstanceObjectPtr(PyObjectPtr): - _typename = 'PyInstanceObject' - - def proxyval(self, visited): - # Guard against infinite loops: - if self.as_address() in visited: - return ProxyAlreadyVisited('<...>') - visited.add(self.as_address()) - - # Get name of class: - in_class = self.pyop_field('in_class') - cl_name = in_class.pyop_field('cl_name').proxyval(visited) - - # Get dictionary of instance attributes: - in_dict = self.pyop_field('in_dict').proxyval(visited) - - # Old-style class: - return InstanceProxy(cl_name, in_dict, int(self._gdbval)) - - def write_repr(self, out, visited): - # Guard against infinite loops: - if self.as_address() in visited: - out.write('<...>') - return - visited.add(self.as_address()) - - # Old-style class: - - # Get name of class: - in_class = self.pyop_field('in_class') - cl_name = in_class.pyop_field('cl_name').proxyval(visited) - - # Get dictionary of instance attributes: - pyop_in_dict = self.pyop_field('in_dict') - - _write_instance_repr(out, visited, - cl_name, pyop_in_dict, self.as_address()) - + if dk_size <= 0xFF: + offset = dk_size + elif dk_size <= 0xFFFF: + offset = 2 * dk_size + elif dk_size <= 0xFFFFFFFF: + offset = 4 * dk_size + else: + offset = 8 * dk_size -class PyIntObjectPtr(PyObjectPtr): - _typename = 'PyIntObject' + ent_addr = keys['dk_indices']['as_1'].address + ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset + ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer() + ent_addr = ent_addr.cast(ent_ptr_t) - def proxyval(self, visited): - result = int_from_int(self.field('ob_ival')) - return result + return ent_addr, dk_nentries class PyListObjectPtr(PyObjectPtr): @@ -832,7 +777,6 @@ class PyListObjectPtr(PyObjectPtr): element.write_repr(out, visited) out.write(']') - class PyLongObjectPtr(PyObjectPtr): _typename = 'PyLongObject' @@ -854,9 +798,9 @@ class PyLongObjectPtr(PyObjectPtr): #define PyLong_SHIFT 30 #define PyLong_SHIFT 15 ''' - ob_size = int(self.field('ob_size')) + ob_size = long(self.field('ob_size')) if ob_size == 0: - return int(0) + return 0 ob_digit = self.field('ob_digit') @@ -865,7 +809,7 @@ class PyLongObjectPtr(PyObjectPtr): else: SHIFT = 30 - digits = [ob_digit[i] * (1 << (SHIFT*i)) + digits = [long(ob_digit[i]) * 2**(SHIFT*i) for i in safe_range(abs(ob_size))] result = sum(digits) if ob_size < 0: @@ -883,13 +827,11 @@ class PyBoolObjectPtr(PyLongObjectPtr): Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two <bool> instances (Py_True/Py_False) within the process being debugged. """ - _typename = 'PyBoolObject' - def proxyval(self, visited): - castto = gdb.lookup_type('PyLongObject').pointer() - self._gdbval = self._gdbval.cast(castto) - return bool(PyLongObjectPtr(self._gdbval).proxyval(visited)) - + if PyLongObjectPtr.proxyval(self, visited): + return True + else: + return False class PyNoneStructPtr(PyObjectPtr): """ @@ -939,10 +881,10 @@ class PyFrameObjectPtr(PyObjectPtr): the global variables of this frame ''' if self.is_optimized_out(): - return + return () pyop_globals = self.pyop_field('f_globals') - return iter(pyop_globals.items()) + return pyop_globals.iteritems() def iter_builtins(self): ''' @@ -950,10 +892,10 @@ class PyFrameObjectPtr(PyObjectPtr): the builtin variables ''' if self.is_optimized_out(): - return + return () pyop_builtins = self.pyop_field('f_builtins') - return iter(pyop_builtins.items()) + return pyop_builtins.iteritems() def get_var_by_name(self, name): ''' @@ -989,7 +931,7 @@ class PyFrameObjectPtr(PyObjectPtr): if self.is_optimized_out(): return None f_trace = self.field('f_trace') - if f_trace: + if long(f_trace) != 0: # we have a non-NULL f_trace: return self.f_lineno else: @@ -1004,7 +946,11 @@ class PyFrameObjectPtr(PyObjectPtr): if self.is_optimized_out(): return '(frame information optimized out)' filename = self.filename() - with open(os_fsencode(filename), 'r') as f: + try: + f = open(os_fsencode(filename), 'r') + except IOError: + return None + with f: all_lines = f.readlines() # Convert from 1-based current_line_num to 0-based list offset: return all_lines[self.current_line_num()-1] @@ -1030,25 +976,39 @@ class PyFrameObjectPtr(PyObjectPtr): out.write(')') + def print_traceback(self): + if self.is_optimized_out(): + sys.stdout.write(' (frame information optimized out)\n') + return + visited = set() + sys.stdout.write(' File "%s", line %i, in %s\n' + % (self.co_filename.proxyval(visited), + self.current_line_num(), + self.co_name.proxyval(visited))) class PySetObjectPtr(PyObjectPtr): _typename = 'PySetObject' + @classmethod + def _dummy_key(self): + return gdb.lookup_global_symbol('_PySet_Dummy').value() + + def __iter__(self): + dummy_ptr = self._dummy_key() + table = self.field('table') + for i in safe_range(self.field('mask') + 1): + setentry = table[i] + key = setentry['key'] + if key != 0 and key != dummy_ptr: + yield PyObjectPtr.from_pyobject_ptr(key) + def proxyval(self, visited): # Guard against infinite loops: if self.as_address() in visited: return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name()) visited.add(self.as_address()) - members = [] - table = self.field('table') - for i in safe_range(self.field('mask')+1): - setentry = table[i] - key = setentry['key'] - if key != 0: - key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited) - if key_proxy != '<dummy key>': - members.append(key_proxy) + members = (key.proxyval(visited) for key in self) if self.safe_tp_name() == 'frozenset': return frozenset(members) else: @@ -1077,18 +1037,11 @@ class PySetObjectPtr(PyObjectPtr): out.write('{') first = True - table = self.field('table') - for i in safe_range(self.field('mask')+1): - setentry = table[i] - key = setentry['key'] - if key != 0: - pyop_key = PyObjectPtr.from_pyobject_ptr(key) - key_proxy = pyop_key.proxyval(visited) # FIXME! - if key_proxy != '<dummy key>': - if not first: - out.write(', ') - first = False - pyop_key.write_repr(out, visited) + for key in self: + if not first: + out.write(', ') + first = False + key.write_repr(out, visited) out.write('}') if tp_name != 'set': @@ -1101,13 +1054,13 @@ class PyBytesObjectPtr(PyObjectPtr): def __str__(self): field_ob_size = self.field('ob_size') field_ob_sval = self.field('ob_sval') - return ''.join(struct.pack('b', field_ob_sval[i]) - for i in safe_range(field_ob_size)) + char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr()) + return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)]) def proxyval(self, visited): return str(self) - def write_repr(self, out, visited, py3=True): + def write_repr(self, out, visited): # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix # Get a PyStringObject* within the Python 2 gdb process: @@ -1118,10 +1071,7 @@ class PyBytesObjectPtr(PyObjectPtr): quote = "'" if "'" in proxy and not '"' in proxy: quote = '"' - - if py3: - out.write('b') - + out.write('b') out.write(quote) for byte in proxy: if byte == quote or byte == '\\': @@ -1145,9 +1095,6 @@ class PyBytesObjectPtr(PyObjectPtr): class PyStringObjectPtr(PyBytesObjectPtr): _typename = 'PyStringObject' - def write_repr(self, out, visited): - return super(PyStringObjectPtr, self).write_repr(out, visited, py3=False) - class PyTupleObjectPtr(PyObjectPtr): _typename = 'PyTupleObject' @@ -1163,8 +1110,8 @@ class PyTupleObjectPtr(PyObjectPtr): return ProxyAlreadyVisited('(...)') visited.add(self.as_address()) - result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) - for i in safe_range(int_from_int(self.field('ob_size')))]) + result = tuple(PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) + for i in safe_range(int_from_int(self.field('ob_size')))) return result def write_repr(self, out, visited): @@ -1185,6 +1132,9 @@ class PyTupleObjectPtr(PyObjectPtr): else: out.write(')') +class PyTypeObjectPtr(PyObjectPtr): + _typename = 'PyTypeObject' + def _unichr_is_printable(char): # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py @@ -1193,12 +1143,8 @@ def _unichr_is_printable(char): import unicodedata return unicodedata.category(char) not in ("C", "Z") - if sys.maxunicode >= 0x10000: - try: - _unichr = unichr - except NameError: - _unichr = chr + _unichr = unichr else: # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb def _unichr(x): @@ -1218,15 +1164,46 @@ class PyUnicodeObjectPtr(PyObjectPtr): return _type_Py_UNICODE.sizeof def proxyval(self, visited): - # From unicodeobject.h: - # Py_ssize_t length; /* Length of raw Unicode data in buffer */ - # Py_UNICODE *str; /* Raw Unicode buffer */ - field_length = int(self.field('length')) - field_str = self.field('str') + global _is_pep393 + if _is_pep393 is None: + fields = gdb.lookup_type('PyUnicodeObject').target().fields() + _is_pep393 = 'data' in [f.name for f in fields] + if _is_pep393: + # Python 3.3 and newer + may_have_surrogates = False + compact = self.field('_base') + ascii = compact['_base'] + state = ascii['state'] + is_compact_ascii = (int(state['ascii']) and int(state['compact'])) + if not int(state['ready']): + # string is not ready + field_length = long(compact['wstr_length']) + may_have_surrogates = True + field_str = ascii['wstr'] + else: + field_length = long(ascii['length']) + if is_compact_ascii: + field_str = ascii.address + 1 + elif int(state['compact']): + field_str = compact.address + 1 + else: + field_str = self.field('data')['any'] + repr_kind = int(state['kind']) + if repr_kind == 1: + field_str = field_str.cast(_type_unsigned_char_ptr()) + elif repr_kind == 2: + field_str = field_str.cast(_type_unsigned_short_ptr()) + elif repr_kind == 4: + field_str = field_str.cast(_type_unsigned_int_ptr()) + else: + # Python 3.2 and earlier + field_length = long(self.field('length')) + field_str = self.field('str') + may_have_surrogates = self.char_width() == 2 # Gather a list of ints from the Py_UNICODE array; these are either - # UCS-2 or UCS-4 code points: - if self.char_width() > 2: + # UCS-1, UCS-2 or UCS-4 code points: + if not may_have_surrogates: Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] else: # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the @@ -1253,24 +1230,19 @@ class PyUnicodeObjectPtr(PyObjectPtr): # Convert the int code points to unicode characters, and generate a # local unicode instance. # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb). - result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs]) + result = u''.join([ + (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd') + for ucs in Py_UNICODEs]) return result def write_repr(self, out, visited): + # Write this out as a Python 3 str literal, i.e. without a "u" prefix + # Get a PyUnicodeObject* within the Python 2 gdb process: proxy = self.proxyval(visited) # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr # to Python 2: - try: - gdb.parse_and_eval('PyString_Type') - except RuntimeError: - # Python 3, don't write 'u' as prefix - pass - else: - # Python 2, write the 'u' - out.write('u') - if "'" in proxy and '"' not in proxy: quote = '"' else: @@ -1371,16 +1343,40 @@ class PyUnicodeObjectPtr(PyObjectPtr): out.write(quote) - def __unicode__(self): - return self.proxyval(set()) - def __str__(self): - # In Python 3, everything is unicode (including attributes of e.g. - # code objects, such as function names). The Python 2 debugger code - # uses PyUnicodePtr objects to format strings etc, whereas with a - # Python 2 debuggee we'd get PyStringObjectPtr instances with __str__. - # Be compatible with that. - return unicode(self).encode('UTF-8') +class wrapperobject(PyObjectPtr): + _typename = 'wrapperobject' + + def safe_name(self): + try: + name = self.field('descr')['d_base']['name'].string() + return repr(name) + except (NullPyObjectPtr, RuntimeError): + return '<unknown name>' + + def safe_tp_name(self): + try: + return self.field('self')['ob_type']['tp_name'].string() + except (NullPyObjectPtr, RuntimeError): + return '<unknown tp_name>' + + def safe_self_addresss(self): + try: + address = long(self.field('self')) + return '%#x' % address + except (NullPyObjectPtr, RuntimeError): + return '<failed to get self address>' + + def proxyval(self, visited): + name = self.safe_name() + tp_name = self.safe_tp_name() + self_address = self.safe_self_addresss() + return ("<method-wrapper %s of %s object at %s>" + % (name, tp_name, self_address)) + + def write_repr(self, out, visited): + proxy = self.proxyval(visited) + out.write(proxy) def int_from_int(gdbval): @@ -1413,13 +1409,15 @@ class PyObjectPtrPrinter: proxyval = pyop.proxyval(set()) return stringify(proxyval) - def pretty_printer_lookup(gdbval): type = gdbval.type.unqualified() - if type.code == gdb.TYPE_CODE_PTR: - type = type.target().unqualified() - if str(type) in all_pretty_typenames: - return PyObjectPtrPrinter(gdbval) + if type.code != gdb.TYPE_CODE_PTR: + return None + + type = type.target().unqualified() + t = str(type) + if t in ("PyObject", "PyFrameObject", "PyUnicodeObject", "wrapperobject"): + return PyObjectPtrPrinter(gdbval) """ During development, I've been manually invoking the code in this way: @@ -1440,22 +1438,21 @@ that this python file is installed to the same path as the library (or its /usr/lib/libpython2.6.so.1.0-gdb.py /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py """ - - -def register(obj): +def register (obj): if obj is None: obj = gdb # Wire up the pretty-printer obj.pretty_printers.append(pretty_printer_lookup) -register(gdb.current_objfile()) +register (gdb.current_objfile ()) + + # Unfortunately, the exact API exposed by the gdb module varies somewhat # from build to build # See http://bugs.python.org/issue8279?#msg102276 - class Frame(object): ''' Wrapper for gdb.Frame, adding various methods @@ -1500,9 +1497,26 @@ class Frame(object): iter_frame = iter_frame.newer() return index - def is_evalframeex(self): - '''Is this a PyEval_EvalFrameEx frame?''' - if self._gdbframe.name() == 'PyEval_EvalFrameEx': + # We divide frames into: + # - "python frames": + # - "bytecode frames" i.e. PyEval_EvalFrameEx + # - "other python frames": things that are of interest from a python + # POV, but aren't bytecode (e.g. GC, GIL) + # - everything else + + def is_python_frame(self): + '''Is this a _PyEval_EvalFrameDefault frame, or some other important + frame? (see is_other_python_frame for what "important" means in this + context)''' + if self.is_evalframe(): + return True + if self.is_other_python_frame(): + return True + return False + + def is_evalframe(self): + '''Is this a _PyEval_EvalFrameDefault frame?''' + if self._gdbframe.name() == EVALFRAME: ''' I believe we also need to filter on the inline struct frame_id.inline_depth, only regarding frames with @@ -1511,44 +1525,87 @@ class Frame(object): So we reject those with type gdb.INLINE_FRAME ''' if self._gdbframe.type() == gdb.NORMAL_FRAME: - # We have a PyEval_EvalFrameEx frame: + # We have a _PyEval_EvalFrameDefault frame: return True return False - def read_var(self, varname): - """ - read_var with respect to code blocks (gdbframe.read_var works with - respect to the most recent block) + def is_other_python_frame(self): + '''Is this frame worth displaying in python backtraces? + Examples: + - waiting on the GIL + - garbage-collecting + - within a CFunction + If it is, return a descriptive string + For other frames, return False + ''' + if self.is_waiting_for_gil(): + return 'Waiting for the GIL' + + if self.is_gc_collect(): + return 'Garbage-collecting' + + # Detect invocations of PyCFunction instances: + frame = self._gdbframe + caller = frame.name() + if not caller: + return False - Apparently this function doesn't work, though, as it seems to read - variables in other frames also sometimes. - """ - block = self._gdbframe.block() - var = None + if caller in ('_PyCFunction_FastCallDict', + '_PyCFunction_FastCallKeywords'): + arg_name = 'func' + # Within that frame: + # "func" is the local containing the PyObject* of the + # PyCFunctionObject instance + # "f" is the same value, but cast to (PyCFunctionObject*) + # "self" is the (PyObject*) of the 'self' + try: + # Use the prettyprinter for the func: + func = frame.read_var(arg_name) + return str(func) + except RuntimeError: + return 'PyCFunction invocation (unable to read %s)' % arg_name - while block and var is None: + if caller == 'wrapper_call': try: - var = self._gdbframe.read_var(varname, block) - except ValueError: - pass + func = frame.read_var('wp') + return str(func) + except RuntimeError: + return '<wrapper_call invocation>' + + # This frame isn't worth reporting: + return False - block = block.superblock + def is_waiting_for_gil(self): + '''Is this frame waiting on the GIL?''' + # This assumes the _POSIX_THREADS version of Python/ceval_gil.h: + name = self._gdbframe.name() + if name: + return 'pthread_cond_timedwait' in name - return var + def is_gc_collect(self): + '''Is this frame "collect" within the garbage-collector?''' + return self._gdbframe.name() == 'collect' def get_pyop(self): try: - # self.read_var does not always work properly, so select our frame - # and restore the previously selected frame - selected_frame = gdb.selected_frame() - self._gdbframe.select() - f = gdb.parse_and_eval('f') - selected_frame.select() - except RuntimeError: + f = self._gdbframe.read_var('f') + frame = PyFrameObjectPtr.from_pyobject_ptr(f) + if not frame.is_optimized_out(): + return frame + # gdb is unable to get the "f" argument of PyEval_EvalFrameEx() + # because it was "optimized out". Try to get "f" from the frame + # of the caller, PyEval_EvalCodeEx(). + orig_frame = frame + caller = self._gdbframe.older() + if caller: + f = caller.read_var('f') + frame = PyFrameObjectPtr.from_pyobject_ptr(f) + if not frame.is_optimized_out(): + return frame + return orig_frame + except ValueError: return None - else: - return PyFrameObjectPtr.from_pyobject_ptr(f) @classmethod def get_selected_frame(cls): @@ -1559,12 +1616,30 @@ class Frame(object): @classmethod def get_selected_python_frame(cls): - '''Try to obtain the Frame for the python code in the selected frame, - or None''' + '''Try to obtain the Frame for the python-related code in the selected + frame, or None''' + try: + frame = cls.get_selected_frame() + except gdb.error: + # No frame: Python didn't start yet + return None + + while frame: + if frame.is_python_frame(): + return frame + frame = frame.older() + + # Not found: + return None + + @classmethod + def get_selected_bytecode_frame(cls): + '''Try to obtain the Frame for the python bytecode interpreter in the + selected GDB frame, or None''' frame = cls.get_selected_frame() while frame: - if frame.is_evalframeex(): + if frame.is_evalframe(): return frame frame = frame.older() @@ -1572,17 +1647,41 @@ class Frame(object): return None def print_summary(self): - if self.is_evalframeex(): + if self.is_evalframe(): pyop = self.get_pyop() if pyop: line = pyop.get_truncated_repr(MAX_OUTPUT_LEN) write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line)) - sys.stdout.write(pyop.current_line()) + if not pyop.is_optimized_out(): + line = pyop.current_line() + if line is not None: + sys.stdout.write(' %s\n' % line.strip()) else: sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) else: - sys.stdout.write('#%i\n' % self.get_index()) + info = self.is_other_python_frame() + if info: + sys.stdout.write('#%i %s\n' % (self.get_index(), info)) + else: + sys.stdout.write('#%i\n' % self.get_index()) + def print_traceback(self): + if self.is_evalframe(): + pyop = self.get_pyop() + if pyop: + pyop.print_traceback() + if not pyop.is_optimized_out(): + line = pyop.current_line() + if line is not None: + sys.stdout.write(' %s\n' % line.strip()) + else: + sys.stdout.write(' (unable to read python frame information)\n') + else: + info = self.is_other_python_frame() + if info: + sys.stdout.write(' %s\n' % info) + else: + sys.stdout.write(' (not a python frame)\n') class PyList(gdb.Command): '''List the current Python source code, if any @@ -1602,6 +1701,7 @@ class PyList(gdb.Command): gdb.COMMAND_FILES, gdb.COMPLETE_NONE) + def invoke(self, args, from_tty): import re @@ -1617,13 +1717,14 @@ class PyList(gdb.Command): if m: start, end = map(int, m.groups()) - frame = Frame.get_selected_python_frame() + # py-list requires an actual PyEval_EvalFrameEx frame: + frame = Frame.get_selected_bytecode_frame() if not frame: - print('Unable to locate python frame') + print('Unable to locate gdb frame for python bytecode interpreter') return pyop = frame.get_pyop() - if not pyop: + if not pyop or pyop.is_optimized_out(): print('Unable to read information on python frame') return @@ -1637,7 +1738,13 @@ class PyList(gdb.Command): if start<1: start = 1 - with open(os_fsencode(filename), 'r') as f: + try: + f = open(os_fsencode(filename), 'r') + except IOError as err: + sys.stdout.write('Unable to open %s: %s\n' + % (filename, err)) + return + with f: all_lines = f.readlines() # start and end are 1-based, all_lines is 0-based; # so [start-1:end] as a python slice gives us [start, end] as a @@ -1649,13 +1756,17 @@ class PyList(gdb.Command): linestr = '>' + linestr sys.stdout.write('%4s %s' % (linestr, line)) + # ...and register the command: PyList() - def move_in_stack(move_up): '''Move up or down the stack (for the py-up/py-down command)''' frame = Frame.get_selected_python_frame() + if not frame: + print('Unable to locate python frame') + return + while frame: if move_up: iter_frame = frame.older() @@ -1665,7 +1776,7 @@ def move_in_stack(move_up): if not iter_frame: break - if iter_frame.is_evalframeex(): + if iter_frame.is_python_frame(): # Result: if iter_frame.select(): iter_frame.print_summary() @@ -1678,7 +1789,6 @@ def move_in_stack(move_up): else: print('Unable to find a newer python frame') - class PyUp(gdb.Command): 'Select and print the python stack frame that called this one (if any)' def __init__(self): @@ -1687,10 +1797,10 @@ class PyUp(gdb.Command): gdb.COMMAND_STACK, gdb.COMPLETE_NONE) + def invoke(self, args, from_tty): move_in_stack(move_up=True) - class PyDown(gdb.Command): 'Select and print the python stack frame called by this one (if any)' def __init__(self): @@ -1699,15 +1809,36 @@ class PyDown(gdb.Command): gdb.COMMAND_STACK, gdb.COMPLETE_NONE) + def invoke(self, args, from_tty): move_in_stack(move_up=False) - # Not all builds of gdb have gdb.Frame.select if hasattr(gdb.Frame, 'select'): PyUp() PyDown() +class PyBacktraceFull(gdb.Command): + 'Display the current python frame and all the frames within its call stack (if any)' + def __init__(self): + gdb.Command.__init__ (self, + "py-bt-full", + gdb.COMMAND_STACK, + gdb.COMPLETE_NONE) + + + def invoke(self, args, from_tty): + frame = Frame.get_selected_python_frame() + if not frame: + print('Unable to locate python frame') + return + + while frame: + if frame.is_python_frame(): + frame.print_summary() + frame = frame.older() + +PyBacktraceFull() class PyBacktrace(gdb.Command): 'Display the current python frame and all the frames within its call stack (if any)' @@ -1720,14 +1851,18 @@ class PyBacktrace(gdb.Command): def invoke(self, args, from_tty): frame = Frame.get_selected_python_frame() + if not frame: + print('Unable to locate python frame') + return + + sys.stdout.write('Traceback (most recent call first):\n') while frame: - if frame.is_evalframeex(): - frame.print_summary() + if frame.is_python_frame(): + frame.print_traceback() frame = frame.older() PyBacktrace() - class PyPrint(gdb.Command): 'Look up the given python variable name, and print it' def __init__(self): @@ -1736,6 +1871,7 @@ class PyPrint(gdb.Command): gdb.COMMAND_DATA, gdb.COMPLETE_NONE) + def invoke(self, args, from_tty): name = str(args) @@ -1752,16 +1888,23 @@ class PyPrint(gdb.Command): pyop_var, scope = pyop_frame.get_var_by_name(name) if pyop_var: - print('%s %r = %s' % ( - scope, name, pyop_var.get_truncated_repr(MAX_OUTPUT_LEN))) + print('%s %r = %s' + % (scope, + name, + pyop_var.get_truncated_repr(MAX_OUTPUT_LEN))) else: print('%r not found' % name) PyPrint() - class PyLocals(gdb.Command): 'Look up the given python variable name, and print it' + def __init__(self, command="py-locals"): + gdb.Command.__init__ (self, + command, + gdb.COMMAND_DATA, + gdb.COMPLETE_NONE) + def invoke(self, args, from_tty): name = str(args) @@ -1790,6 +1933,18 @@ class PyLocals(gdb.Command): def get_namespace(self, pyop_frame): return pyop_frame.iter_locals() +PyLocals() + + +################################################################## +## added, not in CPython +################################################################## + +import atexit +import warnings +import tempfile +import textwrap +import itertools class PyGlobals(PyLocals): 'List all the globals in the currently select Python frame' @@ -1798,8 +1953,7 @@ class PyGlobals(PyLocals): return pyop_frame.iter_globals() -PyLocals("py-locals", gdb.COMMAND_DATA, gdb.COMPLETE_NONE) -PyGlobals("py-globals", gdb.COMMAND_DATA, gdb.COMPLETE_NONE) +PyGlobals("py-globals") class PyNameEquals(gdb.Function): |