summaryrefslogtreecommitdiff
path: root/coverage
diff options
context:
space:
mode:
Diffstat (limited to 'coverage')
-rw-r--r--coverage/numbits.py144
-rw-r--r--coverage/sqldata.py75
2 files changed, 155 insertions, 64 deletions
diff --git a/coverage/numbits.py b/coverage/numbits.py
index abe40f41..4b340c8e 100644
--- a/coverage/numbits.py
+++ b/coverage/numbits.py
@@ -5,29 +5,62 @@
Functions to manipulate packed binary representations of number sets.
To save space, coverage stores sets of line numbers in SQLite using a packed
-binary representation called a numbits. A numbits is stored as a blob in the
-database. The exact meaning of the bytes in the blobs should be considered an
-implementation detail that might change in the future. Use these functions to
-work with those binary blobs of data.
+binary representation called a numbits. A numbits is a set of positive
+integers.
+
+A numbits is stored as a blob in the database. The exact meaning of the bytes
+in the blobs should be considered an implementation detail that might change in
+the future. Use these functions to work with those binary blobs of data.
"""
+from coverage import env
from coverage.backward import byte_to_int, bytes_to_ints, binary_bytes, zip_longest
-from coverage.misc import contract
+from coverage.misc import contract, new_contract
+
+if env.PY3:
+ def _to_blob(b):
+ """Convert a bytestring into a type SQLite will accept for a blob."""
+ return b
+ new_contract('blob', lambda v: isinstance(v, bytes))
+else:
+ def _to_blob(b):
+ """Convert a bytestring into a type SQLite will accept for a blob."""
+ return buffer(b) # pylint: disable=undefined-variable
-@contract(nums='Iterable', returns='bytes')
+ new_contract('blob', lambda v: isinstance(v, buffer)) # pylint: disable=undefined-variable
+
+@contract(nums='Iterable', returns='blob')
def nums_to_numbits(nums):
- """Convert `nums` (a non-empty iterable of ints) into a numbits."""
- nbytes = max(nums) // 8 + 1
+ """Convert `nums` into a numbits.
+
+ Arguments:
+ nums (a reusable iterable of integers): the line numbers to store.
+
+ Returns:
+ A binary blob.
+ """
+ try:
+ nbytes = max(nums) // 8 + 1
+ except ValueError:
+ # nums was empty.
+ return _to_blob(b'')
b = bytearray(nbytes)
for num in nums:
b[num//8] |= 1 << num % 8
- return bytes(b)
+ return _to_blob(bytes(b))
-@contract(numbits='bytes', returns='list[int]')
+@contract(numbits='blob', returns='list[int]')
def numbits_to_nums(numbits):
- """Convert a numbits into a list of numbers."""
+ """Convert a numbits into a list of numbers.
+
+ Arguments:
+ numbits (a binary blob): the packed number set.
+
+ Returns:
+ A list of integers.
+ """
nums = []
for byte_i, byte in enumerate(bytes_to_ints(numbits)):
for bit_i in range(8):
@@ -35,22 +68,95 @@ def numbits_to_nums(numbits):
nums.append(byte_i * 8 + bit_i)
return nums
-@contract(numbits1='bytes', numbits2='bytes', returns='bytes')
-def merge_numbits(numbits1, numbits2):
- """Merge two numbits"""
+@contract(numbits1='blob', numbits2='blob', returns='blob')
+def numbits_union(numbits1, numbits2):
+ """Compute the union of two numbits.
+
+ Arguments:
+ numbits1, numbits2: packed number sets.
+
+ Returns:
+ A new numbits, the union of the two number sets.
+ """
+ byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
+ return _to_blob(binary_bytes(b1 | b2 for b1, b2 in byte_pairs))
+
+@contract(numbits1='blob', numbits2='blob', returns='blob')
+def numbits_intersection(numbits1, numbits2):
+ """Compute the intersection of two numbits.
+
+ Arguments:
+ numbits1, numbits2: packed number sets.
+
+ Returns:
+ A new numbits, the intersection of the two number sets.
+ """
byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
- return binary_bytes(b1 | b2 for b1, b2 in byte_pairs)
+ intersection_bytes = binary_bytes(b1 & b2 for b1, b2 in byte_pairs)
+ return _to_blob(intersection_bytes.rstrip(b'\0'))
-@contract(numbits1='bytes', numbits2='bytes', returns='bool')
+@contract(numbits1='blob', numbits2='blob', returns='bool')
def numbits_any_intersection(numbits1, numbits2):
- """Is there any number that appears in both numbits?"""
+ """Is there any number that appears in both numbits?
+
+ Determine whether two number sets have a non-empty intersection. This is
+ faster than computing the intersection.
+
+ Arguments:
+ numbits1, numbits2: packed number sets.
+
+ Returns:
+ A boolean, true if there is any number in both of the number sets.
+ """
byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
return any(b1 & b2 for b1, b2 in byte_pairs)
-@contract(num='int', numbits='bytes', returns='bool')
+@contract(num='int', numbits='blob', returns='bool')
def num_in_numbits(num, numbits):
- """Does the integer `num` appear in `numbits`?"""
+ """Does the integer `num` appear in `numbits`?
+
+ Arguments:
+ num (integer)
+
+ numbits (binary blob)
+
+ Returns:
+ A boolean, true if `num` is a member of `numbits`.
+ """
nbyte, nbit = divmod(num, 8)
if nbyte >= len(numbits):
return False
return bool(byte_to_int(numbits[nbyte]) & (1 << nbit))
+
+def register_sqlite_functions(connection):
+ """
+ Define numbits functions in a SQLite connection.
+
+ This defines these functions for use in SQLite statements:
+
+ * :func:`numbits_union`
+ * :func:`numbits_intersection`
+ * :func:`numbits_any_intersection`
+ * :func:`num_in_numbits`
+
+ `connection` is a :class:`sqlite3.Connection <python:sqlite3.Connection>`
+ object. After creating the connection, pass it to this function to
+ register the numbits functions. Then you can use numbits functions in your
+ queries::
+
+ import sqlite3
+ from coverage.numbits import register_sqlite_functions
+
+ conn = sqlite3.connect('example.db')
+ register_sqlite_functions(conn)
+ c = conn.cursor()
+ c.execute(
+ "select lb.file_id, lb.context_id from line_bits lb"
+ "where num_in_numbits(?, lb.numbits)",
+ (interesting_line_number,)
+ )
+ """
+ connection.create_function("numbits_union", 2, numbits_union)
+ connection.create_function("numbits_intersection", 2, numbits_intersection)
+ connection.create_function("numbits_any_intersection", 2, numbits_any_intersection)
+ connection.create_function("num_in_numbits", 2, num_in_numbits)
diff --git a/coverage/sqldata.py b/coverage/sqldata.py
index 3ee34f0f..5e7edd72 100644
--- a/coverage/sqldata.py
+++ b/coverage/sqldata.py
@@ -20,16 +20,18 @@ import zlib
from coverage.backward import get_thread_id, iitems, to_bytes, to_string
from coverage.debug import NoDebugging, SimpleReprMixin
-from coverage import env
from coverage.files import PathAliases
from coverage.misc import CoverageException, file_be_gone, filename_suffix, isolate_module
from coverage.misc import contract
-from coverage.numbits import nums_to_numbits, numbits_to_nums, merge_numbits
+from coverage.numbits import nums_to_numbits, numbits_to_nums, numbits_union
from coverage.version import __version__
os = isolate_module(os)
-SCHEMA_VERSION = 6
+# If you change the schema, increment the SCHEMA_VERSION, and update the
+# docs in docs/dbschema.rst also.
+
+SCHEMA_VERSION = 7
# Schema versions:
# 1: Released in 5.0a2
@@ -38,6 +40,7 @@ SCHEMA_VERSION = 6
# 4: Changed line_map.bitmap to line_map.numbits.
# 5: Added foreign key declarations.
# 6: Key-value in meta.
+# 7: line_map -> line_bits
SCHEMA = """
CREATE TABLE coverage_schema (
@@ -71,8 +74,9 @@ CREATE TABLE context (
unique (context)
);
-CREATE TABLE line_map (
- -- If recording lines, a row per context per line executed.
+CREATE TABLE line_bits (
+ -- If recording lines, a row per context per file executed.
+ -- All of the line numbers for that file/context are in one numbits.
file_id integer, -- foreign key to `file`.
context_id integer, -- foreign key to `context`.
numbits blob, -- see the numbits functions in coverage.numbits
@@ -100,24 +104,6 @@ CREATE TABLE tracer (
);
"""
-if env.PY2:
- def to_blob(b):
- """Convert a bytestring into a type SQLite will accept for a blob."""
- return buffer(b) # pylint: disable=undefined-variable
-
- def from_blob(blob):
- """Convert a blob read from SQLite into a bytestring."""
- return bytes(blob)
-else:
- def to_blob(b):
- """Convert a bytestring into a type SQLite will accept for a blob."""
- return b
-
- def from_blob(blob):
- """Convert a blob read from SQLite into a bytestring."""
- return blob
-
-
class CoverageData(SimpleReprMixin):
"""Manages collected coverage data, including file storage.
@@ -386,15 +372,15 @@ class CoverageData(SimpleReprMixin):
for filename, linenos in iitems(line_data):
linemap = nums_to_numbits(linenos)
file_id = self._file_id(filename, add=True)
- query = "select numbits from line_map where file_id = ? and context_id = ?"
+ query = "select numbits from line_bits where file_id = ? and context_id = ?"
existing = list(con.execute(query, (file_id, self._current_context_id)))
if existing:
- linemap = merge_numbits(linemap, from_blob(existing[0][0]))
+ linemap = numbits_union(linemap, existing[0][0])
con.execute(
- "insert or replace into line_map "
+ "insert or replace into line_bits "
" (file_id, context_id, numbits) values (?, ?, ?)",
- (file_id, self._current_context_id, to_blob(linemap)),
+ (file_id, self._current_context_id, linemap),
)
def add_arcs(self, arc_data):
@@ -530,13 +516,13 @@ class CoverageData(SimpleReprMixin):
# Get line data.
cur = conn.execute(
- 'select file.path, context.context, line_map.numbits '
- 'from line_map '
- 'inner join file on file.id = line_map.file_id '
- 'inner join context on context.id = line_map.context_id'
+ 'select file.path, context.context, line_bits.numbits '
+ 'from line_bits '
+ 'inner join file on file.id = line_bits.file_id '
+ 'inner join context on context.id = line_bits.context_id'
)
lines = {
- (files[path], context): from_blob(numbits)
+ (files[path], context): numbits
for (path, context, numbits) in cur
}
cur.close()
@@ -610,16 +596,15 @@ class CoverageData(SimpleReprMixin):
# Get line data.
cur = conn.execute(
- 'select file.path, context.context, line_map.numbits '
- 'from line_map '
- 'inner join file on file.id = line_map.file_id '
- 'inner join context on context.id = line_map.context_id'
+ 'select file.path, context.context, line_bits.numbits '
+ 'from line_bits '
+ 'inner join file on file.id = line_bits.file_id '
+ 'inner join context on context.id = line_bits.context_id'
)
for path, context, numbits in cur:
key = (aliases.map(path), context)
- numbits = from_blob(numbits)
if key in lines:
- numbits = merge_numbits(lines[key], numbits)
+ numbits = numbits_union(lines[key], numbits)
lines[key] = numbits
cur.close()
@@ -631,12 +616,12 @@ class CoverageData(SimpleReprMixin):
'(file_id, context_id, fromno, tono) values (?, ?, ?, ?)',
arc_rows
)
- conn.execute("delete from line_map")
+ conn.execute("delete from line_bits")
conn.executemany(
- "insert into line_map "
+ "insert into line_bits "
"(file_id, context_id, numbits) values (?, ?, ?)",
[
- (file_ids[file], context_ids[context], to_blob(numbits))
+ (file_ids[file], context_ids[context], numbits)
for (file, context), numbits in lines.items()
]
)
@@ -756,7 +741,7 @@ class CoverageData(SimpleReprMixin):
if file_id is None:
return None
else:
- query = "select numbits from line_map where file_id = ?"
+ query = "select numbits from line_bits where file_id = ?"
data = [file_id]
context_ids = self._get_query_context_ids(contexts)
if context_ids is not None:
@@ -766,7 +751,7 @@ class CoverageData(SimpleReprMixin):
bitmaps = list(con.execute(query, data))
nums = set()
for row in bitmaps:
- nums.update(numbits_to_nums(from_blob(row[0])))
+ nums.update(numbits_to_nums(row[0]))
return sorted(nums)
def arcs(self, filename, contexts=None):
@@ -812,7 +797,7 @@ class CoverageData(SimpleReprMixin):
lineno_contexts_map[tono].append(context)
else:
query = (
- "select l.numbits, c.context from line_map l, context c "
+ "select l.numbits, c.context from line_bits l, context c "
"where l.context_id = c.id "
"and file_id = ?"
)
@@ -823,7 +808,7 @@ class CoverageData(SimpleReprMixin):
query += " and l.context_id in (" + ids_array + ")"
data += context_ids
for numbits, context in con.execute(query, data):
- for lineno in numbits_to_nums(from_blob(numbits)):
+ for lineno in numbits_to_nums(numbits):
lineno_contexts_map[lineno].append(context)
return lineno_contexts_map