diff options
Diffstat (limited to 'coverage')
-rw-r--r-- | coverage/numbits.py | 144 | ||||
-rw-r--r-- | coverage/sqldata.py | 75 |
2 files changed, 155 insertions, 64 deletions
diff --git a/coverage/numbits.py b/coverage/numbits.py index abe40f41..4b340c8e 100644 --- a/coverage/numbits.py +++ b/coverage/numbits.py @@ -5,29 +5,62 @@ Functions to manipulate packed binary representations of number sets. To save space, coverage stores sets of line numbers in SQLite using a packed -binary representation called a numbits. A numbits is stored as a blob in the -database. The exact meaning of the bytes in the blobs should be considered an -implementation detail that might change in the future. Use these functions to -work with those binary blobs of data. +binary representation called a numbits. A numbits is a set of positive +integers. + +A numbits is stored as a blob in the database. The exact meaning of the bytes +in the blobs should be considered an implementation detail that might change in +the future. Use these functions to work with those binary blobs of data. """ +from coverage import env from coverage.backward import byte_to_int, bytes_to_ints, binary_bytes, zip_longest -from coverage.misc import contract +from coverage.misc import contract, new_contract + +if env.PY3: + def _to_blob(b): + """Convert a bytestring into a type SQLite will accept for a blob.""" + return b + new_contract('blob', lambda v: isinstance(v, bytes)) +else: + def _to_blob(b): + """Convert a bytestring into a type SQLite will accept for a blob.""" + return buffer(b) # pylint: disable=undefined-variable -@contract(nums='Iterable', returns='bytes') + new_contract('blob', lambda v: isinstance(v, buffer)) # pylint: disable=undefined-variable + +@contract(nums='Iterable', returns='blob') def nums_to_numbits(nums): - """Convert `nums` (a non-empty iterable of ints) into a numbits.""" - nbytes = max(nums) // 8 + 1 + """Convert `nums` into a numbits. + + Arguments: + nums (a reusable iterable of integers): the line numbers to store. + + Returns: + A binary blob. + """ + try: + nbytes = max(nums) // 8 + 1 + except ValueError: + # nums was empty. + return _to_blob(b'') b = bytearray(nbytes) for num in nums: b[num//8] |= 1 << num % 8 - return bytes(b) + return _to_blob(bytes(b)) -@contract(numbits='bytes', returns='list[int]') +@contract(numbits='blob', returns='list[int]') def numbits_to_nums(numbits): - """Convert a numbits into a list of numbers.""" + """Convert a numbits into a list of numbers. + + Arguments: + numbits (a binary blob): the packed number set. + + Returns: + A list of integers. + """ nums = [] for byte_i, byte in enumerate(bytes_to_ints(numbits)): for bit_i in range(8): @@ -35,22 +68,95 @@ def numbits_to_nums(numbits): nums.append(byte_i * 8 + bit_i) return nums -@contract(numbits1='bytes', numbits2='bytes', returns='bytes') -def merge_numbits(numbits1, numbits2): - """Merge two numbits""" +@contract(numbits1='blob', numbits2='blob', returns='blob') +def numbits_union(numbits1, numbits2): + """Compute the union of two numbits. + + Arguments: + numbits1, numbits2: packed number sets. + + Returns: + A new numbits, the union of the two number sets. + """ + byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0) + return _to_blob(binary_bytes(b1 | b2 for b1, b2 in byte_pairs)) + +@contract(numbits1='blob', numbits2='blob', returns='blob') +def numbits_intersection(numbits1, numbits2): + """Compute the intersection of two numbits. + + Arguments: + numbits1, numbits2: packed number sets. + + Returns: + A new numbits, the intersection of the two number sets. + """ byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0) - return binary_bytes(b1 | b2 for b1, b2 in byte_pairs) + intersection_bytes = binary_bytes(b1 & b2 for b1, b2 in byte_pairs) + return _to_blob(intersection_bytes.rstrip(b'\0')) -@contract(numbits1='bytes', numbits2='bytes', returns='bool') +@contract(numbits1='blob', numbits2='blob', returns='bool') def numbits_any_intersection(numbits1, numbits2): - """Is there any number that appears in both numbits?""" + """Is there any number that appears in both numbits? + + Determine whether two number sets have a non-empty intersection. This is + faster than computing the intersection. + + Arguments: + numbits1, numbits2: packed number sets. + + Returns: + A boolean, true if there is any number in both of the number sets. + """ byte_pairs = zip_longest(bytes_to_ints(numbits1), bytes_to_ints(numbits2), fillvalue=0) return any(b1 & b2 for b1, b2 in byte_pairs) -@contract(num='int', numbits='bytes', returns='bool') +@contract(num='int', numbits='blob', returns='bool') def num_in_numbits(num, numbits): - """Does the integer `num` appear in `numbits`?""" + """Does the integer `num` appear in `numbits`? + + Arguments: + num (integer) + + numbits (binary blob) + + Returns: + A boolean, true if `num` is a member of `numbits`. + """ nbyte, nbit = divmod(num, 8) if nbyte >= len(numbits): return False return bool(byte_to_int(numbits[nbyte]) & (1 << nbit)) + +def register_sqlite_functions(connection): + """ + Define numbits functions in a SQLite connection. + + This defines these functions for use in SQLite statements: + + * :func:`numbits_union` + * :func:`numbits_intersection` + * :func:`numbits_any_intersection` + * :func:`num_in_numbits` + + `connection` is a :class:`sqlite3.Connection <python:sqlite3.Connection>` + object. After creating the connection, pass it to this function to + register the numbits functions. Then you can use numbits functions in your + queries:: + + import sqlite3 + from coverage.numbits import register_sqlite_functions + + conn = sqlite3.connect('example.db') + register_sqlite_functions(conn) + c = conn.cursor() + c.execute( + "select lb.file_id, lb.context_id from line_bits lb" + "where num_in_numbits(?, lb.numbits)", + (interesting_line_number,) + ) + """ + connection.create_function("numbits_union", 2, numbits_union) + connection.create_function("numbits_intersection", 2, numbits_intersection) + connection.create_function("numbits_any_intersection", 2, numbits_any_intersection) + connection.create_function("num_in_numbits", 2, num_in_numbits) diff --git a/coverage/sqldata.py b/coverage/sqldata.py index 3ee34f0f..5e7edd72 100644 --- a/coverage/sqldata.py +++ b/coverage/sqldata.py @@ -20,16 +20,18 @@ import zlib from coverage.backward import get_thread_id, iitems, to_bytes, to_string from coverage.debug import NoDebugging, SimpleReprMixin -from coverage import env from coverage.files import PathAliases from coverage.misc import CoverageException, file_be_gone, filename_suffix, isolate_module from coverage.misc import contract -from coverage.numbits import nums_to_numbits, numbits_to_nums, merge_numbits +from coverage.numbits import nums_to_numbits, numbits_to_nums, numbits_union from coverage.version import __version__ os = isolate_module(os) -SCHEMA_VERSION = 6 +# If you change the schema, increment the SCHEMA_VERSION, and update the +# docs in docs/dbschema.rst also. + +SCHEMA_VERSION = 7 # Schema versions: # 1: Released in 5.0a2 @@ -38,6 +40,7 @@ SCHEMA_VERSION = 6 # 4: Changed line_map.bitmap to line_map.numbits. # 5: Added foreign key declarations. # 6: Key-value in meta. +# 7: line_map -> line_bits SCHEMA = """ CREATE TABLE coverage_schema ( @@ -71,8 +74,9 @@ CREATE TABLE context ( unique (context) ); -CREATE TABLE line_map ( - -- If recording lines, a row per context per line executed. +CREATE TABLE line_bits ( + -- If recording lines, a row per context per file executed. + -- All of the line numbers for that file/context are in one numbits. file_id integer, -- foreign key to `file`. context_id integer, -- foreign key to `context`. numbits blob, -- see the numbits functions in coverage.numbits @@ -100,24 +104,6 @@ CREATE TABLE tracer ( ); """ -if env.PY2: - def to_blob(b): - """Convert a bytestring into a type SQLite will accept for a blob.""" - return buffer(b) # pylint: disable=undefined-variable - - def from_blob(blob): - """Convert a blob read from SQLite into a bytestring.""" - return bytes(blob) -else: - def to_blob(b): - """Convert a bytestring into a type SQLite will accept for a blob.""" - return b - - def from_blob(blob): - """Convert a blob read from SQLite into a bytestring.""" - return blob - - class CoverageData(SimpleReprMixin): """Manages collected coverage data, including file storage. @@ -386,15 +372,15 @@ class CoverageData(SimpleReprMixin): for filename, linenos in iitems(line_data): linemap = nums_to_numbits(linenos) file_id = self._file_id(filename, add=True) - query = "select numbits from line_map where file_id = ? and context_id = ?" + query = "select numbits from line_bits where file_id = ? and context_id = ?" existing = list(con.execute(query, (file_id, self._current_context_id))) if existing: - linemap = merge_numbits(linemap, from_blob(existing[0][0])) + linemap = numbits_union(linemap, existing[0][0]) con.execute( - "insert or replace into line_map " + "insert or replace into line_bits " " (file_id, context_id, numbits) values (?, ?, ?)", - (file_id, self._current_context_id, to_blob(linemap)), + (file_id, self._current_context_id, linemap), ) def add_arcs(self, arc_data): @@ -530,13 +516,13 @@ class CoverageData(SimpleReprMixin): # Get line data. cur = conn.execute( - 'select file.path, context.context, line_map.numbits ' - 'from line_map ' - 'inner join file on file.id = line_map.file_id ' - 'inner join context on context.id = line_map.context_id' + 'select file.path, context.context, line_bits.numbits ' + 'from line_bits ' + 'inner join file on file.id = line_bits.file_id ' + 'inner join context on context.id = line_bits.context_id' ) lines = { - (files[path], context): from_blob(numbits) + (files[path], context): numbits for (path, context, numbits) in cur } cur.close() @@ -610,16 +596,15 @@ class CoverageData(SimpleReprMixin): # Get line data. cur = conn.execute( - 'select file.path, context.context, line_map.numbits ' - 'from line_map ' - 'inner join file on file.id = line_map.file_id ' - 'inner join context on context.id = line_map.context_id' + 'select file.path, context.context, line_bits.numbits ' + 'from line_bits ' + 'inner join file on file.id = line_bits.file_id ' + 'inner join context on context.id = line_bits.context_id' ) for path, context, numbits in cur: key = (aliases.map(path), context) - numbits = from_blob(numbits) if key in lines: - numbits = merge_numbits(lines[key], numbits) + numbits = numbits_union(lines[key], numbits) lines[key] = numbits cur.close() @@ -631,12 +616,12 @@ class CoverageData(SimpleReprMixin): '(file_id, context_id, fromno, tono) values (?, ?, ?, ?)', arc_rows ) - conn.execute("delete from line_map") + conn.execute("delete from line_bits") conn.executemany( - "insert into line_map " + "insert into line_bits " "(file_id, context_id, numbits) values (?, ?, ?)", [ - (file_ids[file], context_ids[context], to_blob(numbits)) + (file_ids[file], context_ids[context], numbits) for (file, context), numbits in lines.items() ] ) @@ -756,7 +741,7 @@ class CoverageData(SimpleReprMixin): if file_id is None: return None else: - query = "select numbits from line_map where file_id = ?" + query = "select numbits from line_bits where file_id = ?" data = [file_id] context_ids = self._get_query_context_ids(contexts) if context_ids is not None: @@ -766,7 +751,7 @@ class CoverageData(SimpleReprMixin): bitmaps = list(con.execute(query, data)) nums = set() for row in bitmaps: - nums.update(numbits_to_nums(from_blob(row[0]))) + nums.update(numbits_to_nums(row[0])) return sorted(nums) def arcs(self, filename, contexts=None): @@ -812,7 +797,7 @@ class CoverageData(SimpleReprMixin): lineno_contexts_map[tono].append(context) else: query = ( - "select l.numbits, c.context from line_map l, context c " + "select l.numbits, c.context from line_bits l, context c " "where l.context_id = c.id " "and file_id = ?" ) @@ -823,7 +808,7 @@ class CoverageData(SimpleReprMixin): query += " and l.context_id in (" + ids_array + ")" data += context_ids for numbits, context in con.execute(query, data): - for lineno in numbits_to_nums(from_blob(numbits)): + for lineno in numbits_to_nums(numbits): lineno_contexts_map[lineno].append(context) return lineno_contexts_map |