2 files changed, 155 insertions, 64 deletions
diff --git a/coverage/numbits.py b/coverage/numbits.py
index abe40f41..4b340c8e 100644
--- a/coverage/numbits.py
+++ b/coverage/numbits.py
@@ -5,29 +5,62 @@
 Functions to manipulate packed binary representations of number sets.
 
 To save space, coverage stores sets of line numbers in SQLite using a packed
-binary representation called a numbits.  A numbits is stored as a blob in the
-database.  The exact meaning of the bytes in the blobs should be considered an
-implementation detail that might change in the future.  Use these functions to
-work with those binary blobs of data.
+binary representation called a numbits.  A numbits is a set of positive
+integers.
+
+A numbits is stored as a blob in the database.  The exact meaning of the bytes
+in the blobs should be considered an implementation detail that might change in
+the future.  Use these functions to work with those binary blobs of data.
 
 """
 
+from coverage import env
 from coverage.backward import byte_to_int, bytes_to_ints, binary_bytes, zip_longest
-from coverage.misc import contract
+from coverage.misc import contract, new_contract
+
+if env.PY3:
+    def _to_blob(b):
+        """Convert a bytestring into a type SQLite will accept for a blob."""
+        return b
 
+    new_contract('blob', lambda v: isinstance(v, bytes))
+else:
+    def _to_blob(b):
+        """Convert a bytestring into a type SQLite will accept for a blob."""
+        return buffer(b)                                    # pylint: disable=undefined-variable
 
-@contract(nums='Iterable', returns='bytes')
+    new_contract('blob', lambda v: isinstance(v, buffer))   # pylint: disable=undefined-variable
+
+@contract(nums='Iterable', returns='blob')
 def nums_to_numbits(nums):
-    """Convert `nums` (a non-empty iterable of ints) into a numbits."""
-    nbytes = max(nums) // 8 + 1
+    """Convert `nums` into a numbits.
+
+    Arguments:
+        nums (a reusable iterable of integers): the line numbers to store.
+
+    Returns:
+        A binary blob.
+    """
+    try:
+        nbytes = max(nums) // 8 + 1
+    except ValueError:
+        # nums was empty.
+        return _to_blob(b'')
     b = bytearray(nbytes)
     for num in nums:
         b[num//8] |= 1 << num % 8
-    return bytes(b)
+    return _to_blob(bytes(b))
 
-@contract(numbits='bytes', returns='list[int]')
+@contract(numbits='blob', returns='list[int]')
 def numbits_to_nums(numbits):
-    """Convert a numbits into a list of numbers."""
+    """Convert a numbits into a list of numbers.
+
+    Arguments:
+        numbits (a binary blob): the packed number set.
+
+    Returns:
+        A list of integers.
+    """
     nums = []
     for byte_i, byte in enumerate(bytes_to_ints(numbits)):
         for bit_i in range(8):
@@ -35,22 +68,95 @@ def numbits_to_nums(numbits):
                 nums.append(byte_i * 8 + bit_i)
     return nums
 
-@contract(numbits1='bytes', numbits2='bytes', returns='bytes')
-def merge_numbits(numbits1, numbits2):
-    """Merge two numbits"""
+@contract(numbits1='blob', numbits2='blob', returns='blob')
+def numbits_union(numbits1, numbits2):
+    """Compute the union of two numbits.
+
+    Arguments:
+        numbits1, numbits2: packed number sets.
+
+    Returns:
+        A new numbits, the union of the two number sets.
+    """
+    byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
+    return _to_blob(binary_bytes(b1 | b2 for b1, b2 in byte_pairs))
+
+@contract(numbits1='blob', numbits2='blob', returns='blob')
+def numbits_intersection(numbits1, numbits2):
+    """Compute the intersection of two numbits.
+
+    Arguments:
+        numbits1, numbits2: packed number sets.
+
+    Returns:
+        A new numbits, the intersection of the two number sets.
+    """
     byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
-    return binary_bytes(b1 | b2 for b1, b2 in byte_pairs)
+    intersection_bytes = binary_bytes(b1 & b2 for b1, b2 in byte_pairs)
+    return _to_blob(intersection_bytes.rstrip(b'\0'))
 
-@contract(numbits1='bytes', numbits2='bytes', returns='bool')
+@contract(numbits1='blob', numbits2='blob', returns='bool')
 def numbits_any_intersection(numbits1, numbits2):
-    """Is there any number that appears in both numbits?"""
+    """Is there any number that appears in both numbits?
+
+    Determine whether two number sets have a non-empty intersection. This is
+    faster than computing the intersection.
+
+    Arguments:
+        numbits1, numbits2: packed number sets.
+
+    Returns:
+        A boolean, true if there is any number in both of the number sets.
+    """
     byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0)
     return any(b1 & b2 for b1, b2 in byte_pairs)
 
-@contract(num='int', numbits='bytes', returns='bool')
+@contract(num='int', numbits='blob', returns='bool')
 def num_in_numbits(num, numbits):
-    """Does the integer `num` appear in `numbits`?"""
+    """Does the integer `num` appear in `numbits`?
+
+    Arguments:
+        num (integer)
+
+        numbits (binary blob)
+
+    Returns:
+        A boolean, true if `num` is a member of `numbits`.
+    """
     nbyte, nbit = divmod(num, 8)
     if nbyte >= len(numbits):
         return False
     return bool(byte_to_int(numbits[nbyte]) & (1 << nbit))
+
+def register_sqlite_functions(connection):
+    """
+    Define numbits functions in a SQLite connection.
+
+    This defines these functions for use in SQLite statements:
+
+    * :func:`numbits_union`
+    * :func:`numbits_intersection`
+    * :func:`numbits_any_intersection`
+    * :func:`num_in_numbits`
+
+    `connection` is a :class:`sqlite3.Connection <python:sqlite3.Connection>`
+    object.  After creating the connection, pass it to this function to
+    register the numbits functions.  Then you can use numbits functions in your
+    queries::
+
+        import sqlite3
+        from coverage.numbits import register_sqlite_functions
+
+        conn = sqlite3.connect('example.db')
+        register_sqlite_functions(conn)
+        c = conn.cursor()
+        c.execute(
+            "select lb.file_id, lb.context_id from line_bits lb"
+            "where num_in_numbits(?, lb.numbits)",
+            (interesting_line_number,)
+        )
+    """
+    connection.create_function("numbits_union", 2, numbits_union)
+    connection.create_function("numbits_intersection", 2, numbits_intersection)
+    connection.create_function("numbits_any_intersection", 2, numbits_any_intersection)
+    connection.create_function("num_in_numbits", 2, num_in_numbits)
diff --git a/coverage/sqldata.py b/coverage/sqldata.py
index 3ee34f0f..5e7edd72 100644
--- a/coverage/sqldata.py
+++ b/coverage/sqldata.py
@@ -20,16 +20,18 @@ import zlib
 
 from coverage.backward import get_thread_id, iitems, to_bytes, to_string
 from coverage.debug import NoDebugging, SimpleReprMixin
-from coverage import env
 from coverage.files import PathAliases
 from coverage.misc import CoverageException, file_be_gone, filename_suffix, isolate_module
 from coverage.misc import contract
-from coverage.numbits import nums_to_numbits, numbits_to_nums, merge_numbits
+from coverage.numbits import nums_to_numbits, numbits_to_nums, numbits_union
 from coverage.version import __version__
 
 os = isolate_module(os)
 
-SCHEMA_VERSION = 6
+# If you change the schema, increment the SCHEMA_VERSION, and update the
+# docs in docs/dbschema.rst also.
+
+SCHEMA_VERSION = 7
 
 # Schema versions:
 # 1: Released in 5.0a2
@@ -38,6 +40,7 @@ SCHEMA_VERSION = 6
 # 4: Changed line_map.bitmap to line_map.numbits.
 # 5: Added foreign key declarations.
 # 6: Key-value in meta.
+# 7: line_map -> line_bits
 
 SCHEMA = """
 CREATE TABLE coverage_schema (
@@ -71,8 +74,9 @@ CREATE TABLE context (
     unique (context)
 );
 
-CREATE TABLE line_map (
-    -- If recording lines, a row per context per line executed.
+CREATE TABLE line_bits (
+    -- If recording lines, a row per context per file executed.
+    -- All of the line numbers for that file/context are in one numbits.
     file_id integer,            -- foreign key to `file`.
     context_id integer,         -- foreign key to `context`.
     numbits blob,               -- see the numbits functions in coverage.numbits
@@ -100,24 +104,6 @@ CREATE TABLE tracer (
 );
 """
 
-if env.PY2:
-    def to_blob(b):
-        """Convert a bytestring into a type SQLite will accept for a blob."""
-        return buffer(b)        # pylint: disable=undefined-variable
-
-    def from_blob(blob):
-        """Convert a blob read from SQLite into a bytestring."""
-        return bytes(blob)
-else:
-    def to_blob(b):
-        """Convert a bytestring into a type SQLite will accept for a blob."""
-        return b
-
-    def from_blob(blob):
-        """Convert a blob read from SQLite into a bytestring."""
-        return blob
-
-
 class CoverageData(SimpleReprMixin):
     """Manages collected coverage data, including file storage.
 
@@ -386,15 +372,15 @@ class CoverageData(SimpleReprMixin):
             for filename, linenos in iitems(line_data):
                 linemap = nums_to_numbits(linenos)
                 file_id = self._file_id(filename, add=True)
-                query = "select numbits from line_map where file_id = ? and context_id = ?"
+                query = "select numbits from line_bits where file_id = ? and context_id = ?"
                 existing = list(con.execute(query, (file_id, self._current_context_id)))
                 if existing:
-                    linemap = merge_numbits(linemap, from_blob(existing[0][0]))
+                    linemap = numbits_union(linemap, existing[0][0])
 
                 con.execute(
-                    "insert or replace into line_map "
+                    "insert or replace into line_bits "
                     " (file_id, context_id, numbits) values (?, ?, ?)",
-                    (file_id, self._current_context_id, to_blob(linemap)),
+                    (file_id, self._current_context_id, linemap),
                 )
 
     def add_arcs(self, arc_data):
@@ -530,13 +516,13 @@ class CoverageData(SimpleReprMixin):
 
             # Get line data.
             cur = conn.execute(
-                'select file.path, context.context, line_map.numbits '
-                'from line_map '
-                'inner join file on file.id = line_map.file_id '
-                'inner join context on context.id = line_map.context_id'
+                'select file.path, context.context, line_bits.numbits '
+                'from line_bits '
+                'inner join file on file.id = line_bits.file_id '
+                'inner join context on context.id = line_bits.context_id'
                 )
             lines = {
-                (files[path], context): from_blob(numbits)
+                (files[path], context): numbits
                 for (path, context, numbits) in cur
                 }
             cur.close()
@@ -610,16 +596,15 @@ class CoverageData(SimpleReprMixin):
 
             # Get line data.
             cur = conn.execute(
-                'select file.path, context.context, line_map.numbits '
-                'from line_map '
-                'inner join file on file.id = line_map.file_id '
-                'inner join context on context.id = line_map.context_id'
+                'select file.path, context.context, line_bits.numbits '
+                'from line_bits '
+                'inner join file on file.id = line_bits.file_id '
+                'inner join context on context.id = line_bits.context_id'
                 )
             for path, context, numbits in cur:
                 key = (aliases.map(path), context)
-                numbits = from_blob(numbits)
                 if key in lines:
-                    numbits = merge_numbits(lines[key], numbits)
+                    numbits = numbits_union(lines[key], numbits)
                 lines[key] = numbits
             cur.close()
 
@@ -631,12 +616,12 @@ class CoverageData(SimpleReprMixin):
                 '(file_id, context_id, fromno, tono) values (?, ?, ?, ?)',
                 arc_rows
             )
-            conn.execute("delete from line_map")
+            conn.execute("delete from line_bits")
             conn.executemany(
-                "insert into line_map "
+                "insert into line_bits "
                 "(file_id, context_id, numbits) values (?, ?, ?)",
                 [
-                    (file_ids[file], context_ids[context], to_blob(numbits))
+                    (file_ids[file], context_ids[context], numbits)
                     for (file, context), numbits in lines.items()
                 ]
             )
@@ -756,7 +741,7 @@ class CoverageData(SimpleReprMixin):
             if file_id is None:
                 return None
             else:
-                query = "select numbits from line_map where file_id = ?"
+                query = "select numbits from line_bits where file_id = ?"
                 data = [file_id]
                 context_ids = self._get_query_context_ids(contexts)
                 if context_ids is not None:
@@ -766,7 +751,7 @@ class CoverageData(SimpleReprMixin):
                 bitmaps = list(con.execute(query, data))
                 nums = set()
                 for row in bitmaps:
-                    nums.update(numbits_to_nums(from_blob(row[0])))
+                    nums.update(numbits_to_nums(row[0]))
                 return sorted(nums)
 
     def arcs(self, filename, contexts=None):
@@ -812,7 +797,7 @@ class CoverageData(SimpleReprMixin):
                         lineno_contexts_map[tono].append(context)
             else:
                 query = (
-                    "select l.numbits, c.context from line_map l, context c "
+                    "select l.numbits, c.context from line_bits l, context c "
                     "where l.context_id = c.id "
                     "and file_id = ?"
                     )
@@ -823,7 +808,7 @@ class CoverageData(SimpleReprMixin):
                     query += " and l.context_id in (" + ids_array + ")"
                     data += context_ids
                 for numbits, context in con.execute(query, data):
-                    for lineno in numbits_to_nums(from_blob(numbits)):
+                    for lineno in numbits_to_nums(numbits):
                         lineno_contexts_map[lineno].append(context)
         return lineno_contexts_map