Skip to content

Commit c45ebac

Browse files
committed
perf: cache alias mapping
1 parent 390cb97 commit c45ebac

File tree

4 files changed

+38
-30
lines changed

4 files changed

+38
-30
lines changed

CHANGES.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ upgrading your version of coverage.py.
2323
Unreleased
2424
----------
2525

26-
Nothing yet.
26+
- Performance improvement for combining data files.
2727

2828

2929
.. scriv-start-here

coverage/control.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -998,7 +998,7 @@ def _prepare_data_for_reporting(self) -> None:
998998
if self.config.paths:
999999
mapped_data = CoverageData(warn=self._warn, debug=self._debug, no_disk=True)
10001000
if self._data is not None:
1001-
mapped_data.update(self._data, aliases=self._make_aliases())
1001+
mapped_data.update(self._data, map_path=self._make_aliases().map)
10021002
self._data = mapped_data
10031003

10041004
def report(

coverage/data.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from __future__ import annotations
1414

15+
import functools
1516
import glob
1617
import hashlib
1718
import os.path
@@ -134,6 +135,11 @@ def combine_parallel_data(
134135
if strict and not files_to_combine:
135136
raise NoDataError("No data to combine")
136137

138+
if aliases is None:
139+
map_path = None
140+
else:
141+
map_path = functools.lru_cache(maxsize=None)(aliases.map)
142+
137143
file_hashes = set()
138144
combined_any = False
139145

@@ -176,7 +182,7 @@ def combine_parallel_data(
176182
message(f"Couldn't combine data file {rel_file_name}: {exc}")
177183
delete_this_one = False
178184
else:
179-
data.update(new_data, aliases=aliases)
185+
data.update(new_data, map_path=map_path)
180186
combined_any = True
181187
if message:
182188
message(f"Combined data file {rel_file_name}")

coverage/sqldata.py

+29-27
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,12 @@
2121
import zlib
2222

2323
from typing import (
24-
cast, Any, Collection, Mapping,
24+
cast, Any, Callable, Collection, Mapping,
2525
Sequence,
2626
)
2727

2828
from coverage.debug import NoDebugging, auto_repr
2929
from coverage.exceptions import CoverageException, DataError
30-
from coverage.files import PathAliases
3130
from coverage.misc import file_be_gone, isolate_module
3231
from coverage.numbits import numbits_to_nums, numbits_union, nums_to_numbits
3332
from coverage.sqlitedb import SqliteDb
@@ -647,12 +646,16 @@ def purge_files(self, filenames: Collection[str]) -> None:
647646
continue
648647
con.execute_void(sql, (file_id,))
649648

650-
def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -> None:
651-
"""Update this data with data from several other :class:`CoverageData` instances.
649+
def update(
650+
self,
651+
other_data: CoverageData,
652+
map_path: Callable[[str], str] | None = None,
653+
) -> None:
654+
"""Update this data with data from another :class:`CoverageData`.
652655
653-
If `aliases` is provided, it's a `PathAliases` object that is used to
654-
re-map paths to match the local machine's. Note: `aliases` is None
655-
only when called directly from the test suite.
656+
If `map_path` is provided, it's a function that re-map paths to match
657+
the local machine's. Note: `map_path` is None only when called
658+
directly from the test suite.
656659
657660
"""
658661
if self._debug.should("dataop"):
@@ -664,7 +667,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
664667
if self._has_arcs and other_data._has_lines:
665668
raise DataError("Can't combine line data with arc data")
666669

667-
aliases = aliases or PathAliases()
670+
map_path = map_path or (lambda p: p)
668671

669672
# Force the database we're writing to to exist before we start nesting contexts.
670673
self._start_using()
@@ -674,7 +677,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
674677
with other_data._connect() as con:
675678
# Get files data.
676679
with con.execute("select path from file") as cur:
677-
files = {path: aliases.map(path) for (path,) in cur}
680+
files = {path: map_path(path) for (path,) in cur}
678681

679682
# Get contexts data.
680683
with con.execute("select context from context") as cur:
@@ -729,7 +732,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
729732
"inner join file on file.id = tracer.file_id",
730733
) as cur:
731734
this_tracers.update({
732-
aliases.map(path): tracer
735+
map_path(path): tracer
733736
for path, tracer in cur
734737
})
735738

@@ -768,7 +771,21 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
768771
# and context strings with integer ids. Then use the efficient
769772
# `executemany()` to insert all rows at once.
770773

771-
# Get line data.
774+
if arcs:
775+
self._choose_lines_or_arcs(arcs=True)
776+
777+
arc_rows = (
778+
(file_ids[file], context_ids[context], fromno, tono)
779+
for file, context, fromno, tono in arcs
780+
)
781+
782+
# Write the combined data.
783+
con.executemany_void(
784+
"insert or ignore into arc " +
785+
"(file_id, context_id, fromno, tono) values (?, ?, ?, ?)",
786+
arc_rows,
787+
)
788+
772789
if lines:
773790
self._choose_lines_or_arcs(lines=True)
774791

@@ -779,7 +796,7 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
779796
"inner join context on context.id = line_bits.context_id",
780797
) as cur:
781798
for path, context, numbits in cur:
782-
key = (aliases.map(path), context)
799+
key = (aliases_map(path), context)
783800
if key in lines:
784801
lines[key] = numbits_union(lines[key], numbits)
785802

@@ -792,21 +809,6 @@ def update(self, other_data: CoverageData, aliases: PathAliases | None = None) -
792809
],
793810
)
794811

795-
if arcs:
796-
self._choose_lines_or_arcs(arcs=True)
797-
798-
arc_rows = (
799-
(file_ids[file], context_ids[context], fromno, tono)
800-
for file, context, fromno, tono in arcs
801-
)
802-
803-
# Write the combined data.
804-
con.executemany_void(
805-
"insert or ignore into arc " +
806-
"(file_id, context_id, fromno, tono) values (?, ?, ?, ?)",
807-
arc_rows,
808-
)
809-
810812
con.executemany_void(
811813
"insert or ignore into tracer (file_id, tracer) values (?, ?)",
812814
((file_ids[filename], tracer) for filename, tracer in tracer_map.items()),

0 commit comments

Comments
 (0)