summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNed Batchelder <ned@nedbatchelder.com>2018-06-28 06:58:38 -0400
committerNed Batchelder <ned@nedbatchelder.com>2018-06-28 06:58:38 -0400
commit7b9b9d68a086ed1f8b2f65071a8be038ac6755d1 (patch)
treed3af720d10e0d47002d0c73d4f9d0c81f3f3cf74
parent47350862d303618c4bac3dcbea73239c052acf74 (diff)
parent545fe91e9fb73a262bba260f3ac0dd6340f384aa (diff)
downloadpython-coveragepy-git-7b9b9d68a086ed1f8b2f65071a8be038ac6755d1.tar.gz
Merge remote-tracking branch 'DRMacIver/intern-table' into nedbat/tryout-bb32
-rw-r--r--coverage/ctracer/module.c25
-rw-r--r--coverage/ctracer/tracer.c315
-rw-r--r--coverage/ctracer/tracer.h53
-rw-r--r--coverage/ctracer/util.h2
-rw-r--r--requirements/pytest.pip1
-rw-r--r--tests/test_intern_table.py66
6 files changed, 447 insertions, 15 deletions
diff --git a/coverage/ctracer/module.c b/coverage/ctracer/module.c
index f308902b..99fe3fff 100644
--- a/coverage/ctracer/module.c
+++ b/coverage/ctracer/module.c
@@ -51,11 +51,26 @@ PyInit_tracer(void)
return NULL;
}
+ /* Initialize InternTable */
+ InternTableType.tp_new = PyType_GenericNew;
+ if (PyType_Ready(&InternTableType) < 0) {
+ Py_DECREF(mod);
+ return NULL;
+ }
+
+ Py_INCREF(&InternTableType);
+ if (PyModule_AddObject(mod, "InternTable", (PyObject *)&InternTableType) < 0) {
+ Py_DECREF(mod);
+ Py_DECREF(&InternTableType);
+ return NULL;
+ }
+
/* Initialize CFileDisposition */
CFileDispositionType.tp_new = PyType_GenericNew;
if (PyType_Ready(&CFileDispositionType) < 0) {
Py_DECREF(mod);
Py_DECREF(&CTracerType);
+ Py_DECREF(&InternTableType);
return NULL;
}
@@ -63,6 +78,7 @@ PyInit_tracer(void)
if (PyModule_AddObject(mod, "CFileDisposition", (PyObject *)&CFileDispositionType) < 0) {
Py_DECREF(mod);
Py_DECREF(&CTracerType);
+ Py_DECREF(&InternTableType);
Py_DECREF(&CFileDispositionType);
return NULL;
}
@@ -95,6 +111,15 @@ inittracer(void)
Py_INCREF(&CTracerType);
PyModule_AddObject(mod, "CTracer", (PyObject *)&CTracerType);
+ /* Initialize InternTable */
+ InternTableType.tp_new = PyType_GenericNew;
+ if (PyType_Ready(&InternTableType) < 0) {
+ return;
+ }
+
+ Py_INCREF(&InternTableType);
+ PyModule_AddObject(mod, "InternTable", (PyObject *)&InternTableType);
+
/* Initialize CFileDisposition */
CFileDispositionType.tp_new = PyType_GenericNew;
if (PyType_Ready(&CFileDispositionType) < 0) {
diff --git a/coverage/ctracer/tracer.c b/coverage/ctracer/tracer.c
index 01f8b19b..5a56b35a 100644
--- a/coverage/ctracer/tracer.c
+++ b/coverage/ctracer/tracer.c
@@ -8,6 +8,8 @@
#include "filedisp.h"
#include "tracer.h"
+#include <assert.h>
+
/* Python C API helpers. */
static int
@@ -60,6 +62,25 @@ error:
static void CTracer_disable_plugin(CTracer *self, PyObject * disposition);
+
+static void InternTable_init_1(InternTable *table){
+ table->capacity = 32;
+ table->max_fill = 21;
+ table->current_fill = 0;
+ table->zero_value = NULL;
+ table->entries = calloc(table->capacity, sizeof(InternEntry));
+}
+
+static void InternTable_dealloc_1(InternTable *table){
+ size_t j;
+ Py_XDECREF(table->zero_value);
+ for(j = 0; j < table->capacity; j++){
+ Py_XDECREF(table->entries[j].value);
+ }
+ free(table->entries);
+}
+
+
static int
CTracer_init(CTracer *self, PyObject *args_unused, PyObject *kwds_unused)
{
@@ -74,6 +95,8 @@ CTracer_init(CTracer *self, PyObject *args_unused, PyObject *kwds_unused)
self->context = Py_None;
Py_INCREF(self->context);
+ InternTable_init_1(&self->intern_table);
+
ret = RET_OK;
goto ok;
@@ -115,6 +138,7 @@ CTracer_dealloc(CTracer *self)
Py_XDECREF(self->data_stack_index);
Py_TYPE(self)->tp_free((PyObject*)self);
+ InternTable_dealloc_1(&self->intern_table);
}
#if TRACE_LOG
@@ -167,17 +191,155 @@ showlog(int depth, int lineno, PyObject * filename, const char * msg)
static const char * what_sym[] = {"CALL", "EXC ", "LINE", "RET "};
#endif
+
+// From http://www.cris.com/~Ttwang/tech/inthash.htm via
+// https://chromium.googlesource.com/chromium/blink/+/master/Source/wtf/HashFunctions.h
+static PY_LONG_LONG hash64(PY_LONG_LONG key){
+ key += ~(key << 32);
+ key ^= (key >> 22);
+ key += ~(key << 13);
+ key ^= (key >> 8);
+ key += (key << 3);
+ key ^= (key >> 15);
+ key += ~(key << 27);
+ key ^= (key >> 31);
+ return key;
+}
+
+/*
+ Needs to be predeclared because this function is mutally recursive with
+ InternTable_lookup
+*/
+static PyObject **
+InternTable_lookup(InternTable *table, PY_LONG_LONG key);
+
+/*
+ Core implementation of the InternTable hash table.
+
+ This attempts to look up a key at a particular point in the table. If the key
+ is present there, it returns the corresponding value pointer. If another key
+ is present there, it returns NULL. If no key is present there, it inserts the
+ key (mapping to NULL) at that point, possible grows the hash table, and
+ returns a value pointer for the key.
+
+*/
+static PyObject ** InternEntry_matches(
+ InternTable *table, size_t location, PY_LONG_LONG key
+){
+ size_t index = location & (table->capacity - 1);
+ InternEntry *entry = table->entries + index;
+ size_t i;
+ if(!entry->key){
+ entry->key = key;
+ table->current_fill += 1;
+ if(table->current_fill > table->max_fill){
+ size_t old_capacity = table->capacity;
+ size_t old_fill = table->current_fill;
+ InternEntry *old_entries = table->entries;
+ table->capacity *= 2;
+ table->max_fill *= 2;
+ table->current_fill = 0;
+ table->entries = calloc(table->capacity, sizeof(InternEntry));
+
+ /* Copy the existing entries to the new table. We start by copying
+ entries that can be successfully looked up without hashing
+ because they're at the right point in the table, then we copy
+ the rest. This ensures that table lookups should get faster as
+ we grow, because the number of keys we can look up without
+ hashing cannot decrease and will often increase. */
+
+ for(i = 0; i < old_capacity; i++){
+ InternEntry *prev = old_entries + i;
+ if(!prev->key) continue;
+ if((prev->key & (old_capacity - 1)) == i){
+ *InternTable_lookup(table, prev->key) = prev->value;
+ }
+ }
+ for(i = 0; i < old_capacity; i++){
+ InternEntry *prev = old_entries + i;
+ if(!prev->key) continue;
+ if((prev->key & (old_capacity - 1)) != i){
+ *InternTable_lookup(table, prev->key) = prev->value;
+ }
+ }
+ free(old_entries);
+ assert(table->current_fill == old_fill); (void)(old_fill);
+
+ /* The shape of the table has changed and the caller doesn't know
+ about that, so we just recurse and look up the key in the new
+ shape. This recursion is always sharply bounded, because we've
+ ensured that the key is in the table already so this call cannot
+ cause the table size to grow further. */
+ return InternTable_lookup(table, key);
+ } else {
+ return &entry->value;
+ }
+ }
+ if(entry->key == key){
+ return &entry->value;
+ }
+ return NULL;
+}
+
+/*
+Look up a key in the table, returning a pointer to a *PyObject where the
+corresponding object will be stored. This will always return non-null, but for
+a freshly created key it will be a pointer to null. The caller should then
+populate the cell with the newly created object.
+*/
+static PyObject **
+InternTable_lookup(InternTable *table, PY_LONG_LONG key){
+ size_t i;
+ PyObject ** initial_attempt;
+ size_t probe;
+
+ if(key == 0){
+ return &(table->zero_value);
+ }
+ initial_attempt = InternEntry_matches(table, key, key);
+ if(initial_attempt != NULL){
+ return initial_attempt;
+ }
+ probe = hash64(key) & (table->capacity - 1);
+ for(i = 0; i < table->capacity; i++){
+ PyObject ** attempt = InternEntry_matches(table, probe + i, key);
+ if(attempt != NULL){
+ return attempt;
+ }
+ }
+ assert(0);
+ return NULL;
+}
+
/* Record a pair of integers in self->pcur_entry->file_data. */
static int
CTracer_record_pair(CTracer *self, int l1, int l2)
{
int ret = RET_ERROR;
- PyObject * t = NULL;
+ /*
+ We combine the two int values into a PY_LONG_LONG in a slightly odd way: Rather
+ than just concatenate them, we xor them together for the low bits. The
+ reason for this is that it allows us to trigger our no-hash lookup in the
+ table more often, because it ensures a reasonable range of diversity in the
+ low bits. We can recreate the original key as u1 = key >> 32,
+ u2 = ((uint32_t)u1) ^ ((uint32_t)key), so this still maps the tuple to a
+ unique key.
+ */
+ PY_LONG_LONG u1 = (PY_UINT32_T)l1;
+ PY_LONG_LONG u2 = (PY_UINT32_T)l2;
+ PY_LONG_LONG key = (u1 << 32) | (u1 ^ u2);
- t = Py_BuildValue("(ii)", l1, l2);
- if (t == NULL) {
- goto error;
+ PyObject ** container = InternTable_lookup(
+ &self->intern_table, key);
+
+ PyObject *t = *container;
+ if(t == NULL){
+ t = Py_BuildValue("(ii)", l1, l2);
+ if (t == NULL) {
+ goto error;
+ }
+ *container = t;
}
if (PyDict_SetItem(self->pcur_entry->file_data, t, Py_None) < 0) {
@@ -185,13 +347,31 @@ CTracer_record_pair(CTracer *self, int l1, int l2)
}
ret = RET_OK;
-
error:
- Py_XDECREF(t);
-
return ret;
}
+
+/* Record a single integer in self->pcur_entry->file_data. */
+static int
+CTracer_record_int(CTracer *self, int lineno_from){
+
+ PyObject ** container = InternTable_lookup(
+ &self->intern_table, (PY_LONG_LONG)lineno_from);
+
+ PyObject * this_line = *container;
+ if (this_line == NULL) {
+ this_line = MyInt_FromInt(lineno_from);
+ *container = this_line;
+ }
+
+ if (this_line == NULL) {
+ return RET_ERROR;
+ }
+
+ return PyDict_SetItem(self->pcur_entry->file_data, this_line, Py_None);
+}
+
/* Set self->pdata_stack to the proper data_stack to use. */
static int
CTracer_set_pdata_stack(CTracer *self)
@@ -708,14 +888,7 @@ CTracer_handle_line(CTracer *self, PyFrameObject *frame)
}
}
else {
- /* Tracing lines: key is simply this_line. */
- PyObject * this_line = MyInt_FromInt(lineno_from);
- if (this_line == NULL) {
- goto error;
- }
-
- ret2 = PyDict_SetItem(self->pcur_entry->file_data, this_line, Py_None);
- Py_DECREF(this_line);
+ ret2 = CTracer_record_int(self, lineno_from);
if (ret2 < 0) {
goto error;
}
@@ -1183,3 +1356,115 @@ CTracerType = {
0, /* tp_alloc */
0, /* tp_new */
};
+
+static Py_ssize_t InternTable_length(InternTableObject *self)
+{
+ Py_ssize_t result = self->table.current_fill;
+ if(self->table.zero_value) result++;
+ return result;
+}
+
+static int
+InternTable_init(InternTableObject *self, PyObject *args_unused, PyObject *kwds_unused)
+{
+ InternTable_init_1(&self->table);
+ return RET_OK;
+}
+
+static int
+InternTable_dealloc(InternTableObject *self)
+{
+ InternTable_dealloc_1(&self->table);
+ return RET_OK;
+}
+
+static PyObject *InternTable_getitem(InternTableObject *self, PyObject *key)
+{
+ PY_LONG_LONG int_key;
+ PyObject **result;
+
+ assert(self->table.entries);
+ PyErr_Clear();
+ int_key = MyInt_AsLongLong(key);
+ if(PyErr_Occurred()){
+ return NULL;
+ }
+
+ result = InternTable_lookup(&self->table, int_key);
+ if(*result == NULL){
+ return Py_None;
+ }
+ Py_INCREF(*result);
+ return *result;
+}
+
+
+static int InternTable_setitem(InternTableObject *self, PyObject *key, PyObject *value)
+{
+ PY_LONG_LONG int_key;
+ PyObject **result;
+
+ assert(self->table.entries);
+
+ PyErr_Clear();
+ int_key = MyInt_AsLongLong(key);
+ if(PyErr_Occurred()) return RET_ERROR;
+
+ result = InternTable_lookup(&self->table, int_key);
+ if(*result != NULL){
+ Py_XDECREF(*result);
+ }
+ if(value != NULL){
+ Py_INCREF(value);
+ }
+ *result = value;
+ return RET_OK;
+}
+
+static PyMappingMethods InternTable_as_mapping = {
+ (lenfunc)InternTable_length, /*mp_length*/
+ (binaryfunc)InternTable_getitem, /*mp_subscript*/
+ (objobjargproc)InternTable_setitem, /*mp_ass_subscript*/
+};
+
+PyTypeObject
+InternTableType = {
+ MyType_HEAD_INIT
+ "coverage.InternTable", /*tp_name*/
+ sizeof(InternTableObject), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ (destructor)InternTable_dealloc, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /*tp_compare*/
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ &InternTable_as_mapping, /*tp_as_mapping*/
+ 0, /*tp_hash */
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
+ "", /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)InternTable_init,/* tp_init */
+ 0, /* tp_alloc */
+ 0, /* tp_new */
+};
diff --git a/coverage/ctracer/tracer.h b/coverage/ctracer/tracer.h
index 61c01b41..b43ecfad 100644
--- a/coverage/ctracer/tracer.h
+++ b/coverage/ctracer/tracer.h
@@ -11,6 +11,56 @@
#include "datastack.h"
+
+/*
+We use normal Python objects as keys for writing into the file_data
+dictionaries.
+
+This ends up creating the same duplicate objects over and over again inside the
+hot loop of the trace function, which is not ideal!
+
+In order to avoid that, we intern our keys global to the tracer. This means
+that in the common case where the tracer has already seen the key somewhere we
+don't need to allocate a new one. This can significantly speed up tracing.
+*/
+typedef struct InternEntry {
+ PY_LONG_LONG key;
+ PyObject *value;
+} InternEntry;
+
+typedef struct InternTable {
+ /* Store the value keyed off zero separately. This allows us to use a key
+ of zero as a not-set indicator. */
+ PyObject * zero_value;
+
+ /* The number of elements in our entries array (including absent elements).
+ Always a power of two. */
+ size_t capacity;
+
+ /* The number of entries which have a key set. Always strictly less than
+ capacity. Does not count the zero key. */
+ size_t current_fill;
+
+ /* When the fill exceeds this value, increase the capacity. Always roughly
+ the same fraction of capacity. */
+ size_t max_fill;
+
+ /* Essentially (key, value) tuples where keys are PY_LONG_LONG and values are
+ *PyObject. Values are owned by the tracer and will have their refcount
+ decremented appropriately on release.*/
+ InternEntry * entries;
+} InternTable;
+
+
+/* We expose a Python interface to the InternTable, but it's purely intended for
+ purposes of testing the InternTable itself and you shouldn't use it for anything
+ else.
+*/
+typedef struct InternTableObject{
+ PyObject_HEAD
+ InternTable table;
+} InternTableObject;
+
/* The CTracer type. */
typedef struct CTracer {
@@ -64,10 +114,13 @@ typedef struct CTracer {
int last_exc_firstlineno;
Stats stats;
+
+ InternTable intern_table;
} CTracer;
int CTracer_intern_strings(void);
extern PyTypeObject CTracerType;
+extern PyTypeObject InternTableType;
#endif /* _COVERAGE_TRACER_H */
diff --git a/coverage/ctracer/util.h b/coverage/ctracer/util.h
index 96d2e51c..5c348c2d 100644
--- a/coverage/ctracer/util.h
+++ b/coverage/ctracer/util.h
@@ -24,6 +24,7 @@
#define MyText_FromFormat PyUnicode_FromFormat
#define MyInt_FromInt(i) PyLong_FromLong((long)i)
#define MyInt_AsInt(o) (int)PyLong_AsLong(o)
+#define MyInt_AsLongLong(o) PyLong_AsUnsignedLongLong(o)
#define MyText_InternFromString(s) PyUnicode_InternFromString(s)
#define MyType_HEAD_INIT PyVarObject_HEAD_INIT(NULL, 0)
@@ -37,6 +38,7 @@
#define MyText_AsString(o) PyString_AsString(o)
#define MyText_FromFormat PyUnicode_FromFormat
#define MyInt_FromInt(i) PyInt_FromLong((long)i)
+#define MyInt_AsLongLong(o) PyInt_AsUnsignedLongLongMask(o)
#define MyInt_AsInt(o) (int)PyInt_AsLong(o)
#define MyText_InternFromString(s) PyString_InternFromString(s)
diff --git a/requirements/pytest.pip b/requirements/pytest.pip
index 19a5ccd4..d9ce3427 100644
--- a/requirements/pytest.pip
+++ b/requirements/pytest.pip
@@ -6,3 +6,4 @@
pytest==3.6.0
pytest-xdist==1.22.2
flaky==3.4.0
+hypothesis==3.30.3
diff --git a/tests/test_intern_table.py b/tests/test_intern_table.py
new file mode 100644
index 00000000..6e438782
--- /dev/null
+++ b/tests/test_intern_table.py
@@ -0,0 +1,66 @@
+import sys
+
+from tests.coveragetest import CoverageTest
+
+from hypothesis import given, example
+import hypothesis.strategies as st
+import pytest
+
+from coverage import env
+
+if env.C_TRACER:
+ from coverage.tracer import InternTable
+
+
+@pytest.mark.skipif(not env.C_TRACER, reason="Only the C tracer has refcounting issues")
+class TestInternTable(CoverageTest):
+
+ run_in_temp_dir = False
+
+ @example([0])
+ @given(st.lists(st.integers(0, 2 ** 64 - 1), unique=True))
+ def test_interns_as_none_by_default(self, ls):
+ table = InternTable()
+ for i in ls:
+ assert table[i] is None
+
+ @example(list(range(23)))
+ @example([0])
+ @given(st.one_of(
+ st.integers(0, 1000).map(lambda i: list(range(i))),
+ st.lists(st.integers(0, 2 ** 64 - 1), unique=True),
+ ))
+ def test_can_intern_integers_as_themselves(self, ls):
+ table = InternTable()
+ for i in ls:
+ table[i] = i
+ assert table[i] is i
+ for i in ls:
+ assert table[i] == i
+ assert len(table) == len(ls)
+
+ @given(
+ st.lists(st.tuples(st.integers(0, 2**64 - 1)), unique=True).map(
+ lambda ls: list(map(list, ls))
+ ),
+ )
+ def test_maintains_correct_ref_count(self, ls):
+ def refs():
+ return list(map(sys.getrefcount, ls))
+
+ initial_refs = refs()
+ assert refs() == initial_refs
+
+ table = InternTable()
+
+ for i in range(len(ls)):
+ r = initial_refs[i]
+ assert sys.getrefcount(ls[i]) == r + 1
+ x = ls[i][0]
+ table[x] = ls[i]
+ assert sys.getrefcount(ls[i]) == r + 2
+ table[x] = ls[i]
+ assert sys.getrefcount(ls[i]) == r + 2
+
+ del table
+ assert refs() == initial_refs