Skip to content

Commit 0d5a112

Browse files
committed
perf: bulk narrowing to avoid N**2. #2048
1 parent a868ed9 commit 0d5a112

File tree

5 files changed

+136
-52
lines changed

5 files changed

+136
-52
lines changed

CHANGES.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,16 @@ upgrading your version of coverage.py.
2323
Unreleased
2424
----------
2525

26+
- Performance: with branch coverage in large files, generating HTML, JSON, or
27+
LCOV reports could take far too long due to some quadratic behavior. This is
28+
now fixed, closing `issue 2048`_. Thanks to Daniel Diniz for help diagnosing
29+
the problem.
30+
2631
- Most warnings and a few errors now have links to a page in the docs
2732
explaining the specific message. Closes `issue 1921`_.
2833

2934
.. _issue 1921: https://github.com/nedbat/coveragepy/issues/1921
30-
35+
.. _issue 2048: https://github.com/nedbat/coveragepy/issues/2048
3136

3237

3338
.. start-releases

coverage/html.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
stdout_link,
3333
)
3434
from coverage.report_core import get_analysis_to_report
35-
from coverage.results import Analysis, Numbers
35+
from coverage.results import Analysis, AnalysisNarrower, Numbers
3636
from coverage.templite import Templite
3737
from coverage.types import TLineNo, TMorf
3838
from coverage.version import __url__
@@ -582,13 +582,21 @@ def write_region_index_pages(self, files_to_report: Iterable[FileToReport]) -> N
582582

583583
for noun in region_nouns:
584584
page_data = self.index_pages[noun]
585-
outside_lines = set(range(1, num_lines + 1))
586585

586+
outside_lines = set(range(1, num_lines + 1))
587587
for region in regions:
588588
if region.kind != noun:
589589
continue
590590
outside_lines -= region.lines
591-
analysis = ftr.analysis.narrow(region.lines)
591+
592+
narrower = AnalysisNarrower(ftr.analysis)
593+
narrower.add_regions(r.lines for r in regions if r.kind == noun)
594+
narrower.add_regions([outside_lines])
595+
596+
for region in regions:
597+
if region.kind != noun:
598+
continue
599+
analysis = narrower.narrow(region.lines)
592600
if not self.should_report(analysis, page_data):
593601
continue
594602
sorting_name = region.name.rpartition(".")[-1].lstrip("_")
@@ -605,7 +613,7 @@ def write_region_index_pages(self, files_to_report: Iterable[FileToReport]) -> N
605613
)
606614
)
607615

608-
analysis = ftr.analysis.narrow(outside_lines)
616+
analysis = narrower.narrow(outside_lines)
609617
if self.should_report(analysis, page_data):
610618
page_data.summaries.append(
611619
IndexItem(

coverage/jsonreport.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from coverage import __version__
1515
from coverage.report_core import get_analysis_to_report
16-
from coverage.results import Analysis, Numbers
16+
from coverage.results import Analysis, AnalysisNarrower, Numbers
1717
from coverage.types import TLineNo, TMorf
1818

1919
if TYPE_CHECKING:
@@ -128,21 +128,30 @@ def report_one_file(
128128
)
129129

130130
num_lines = len(file_reporter.source().splitlines())
131+
regions = file_reporter.code_regions()
131132
for noun, plural in file_reporter.code_region_kinds():
132-
reported_file[plural] = region_data = {}
133133
outside_lines = set(range(1, num_lines + 1))
134-
for region in file_reporter.code_regions():
134+
for region in regions:
135135
if region.kind != noun:
136136
continue
137137
outside_lines -= region.lines
138+
139+
narrower = AnalysisNarrower(analysis)
140+
narrower.add_regions(r.lines for r in regions if r.kind == noun)
141+
narrower.add_regions([outside_lines])
142+
143+
reported_file[plural] = region_data = {}
144+
for region in regions:
145+
if region.kind != noun:
146+
continue
138147
region_data[region.name] = self.make_region_data(
139148
coverage_data,
140-
analysis.narrow(region.lines),
149+
narrower.narrow(region.lines),
141150
)
142151

143152
region_data[""] = self.make_region_data(
144153
coverage_data,
145-
analysis.narrow(outside_lines),
154+
narrower.narrow(outside_lines),
146155
)
147156
return reported_file
148157

coverage/lcovreport.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from coverage.plugin import FileReporter
1515
from coverage.report_core import get_analysis_to_report
16-
from coverage.results import Analysis, Numbers
16+
from coverage.results import Analysis, AnalysisNarrower, Numbers
1717
from coverage.types import TMorf
1818

1919
if TYPE_CHECKING:
@@ -81,12 +81,15 @@ def lcov_functions(
8181
if not functions:
8282
return
8383

84+
narrower = AnalysisNarrower(file_analysis)
85+
narrower.add_regions(r.lines for _, _, r in functions)
86+
8487
functions.sort()
8588
functions_hit = 0
8689
for first_line, last_line, region in functions:
8790
# A function counts as having been executed if any of it has been
8891
# executed.
89-
analysis = file_analysis.narrow(region.lines)
92+
analysis = narrower.narrow(region.lines)
9093
hit = int(analysis.numbers.n_executed > 0)
9194
functions_hit += hit
9295

coverage/results.py

Lines changed: 99 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import collections
99
import dataclasses
10-
from collections.abc import Container, Iterable
10+
from collections.abc import Iterable
1111
from typing import TYPE_CHECKING
1212

1313
from coverage.exceptions import ConfigError
@@ -113,45 +113,6 @@ def __post_init__(self) -> None:
113113
n_missing_branches=n_missing_branches,
114114
)
115115

116-
def narrow(self, lines: Container[TLineNo]) -> Analysis:
117-
"""Create a narrowed Analysis.
118-
119-
The current analysis is copied to make a new one that only considers
120-
the lines in `lines`.
121-
"""
122-
123-
statements = {lno for lno in self.statements if lno in lines}
124-
excluded = {lno for lno in self.excluded if lno in lines}
125-
executed = {lno for lno in self.executed if lno in lines}
126-
127-
if self.has_arcs:
128-
arc_possibilities_set = {
129-
(a, b) for a, b in self.arc_possibilities_set if a in lines or b in lines
130-
}
131-
arcs_executed_set = {
132-
(a, b) for a, b in self.arcs_executed_set if a in lines or b in lines
133-
}
134-
exit_counts = {lno: num for lno, num in self.exit_counts.items() if lno in lines}
135-
no_branch = {lno for lno in self.no_branch if lno in lines}
136-
else:
137-
arc_possibilities_set = set()
138-
arcs_executed_set = set()
139-
exit_counts = {}
140-
no_branch = set()
141-
142-
return Analysis(
143-
precision=self.precision,
144-
filename=self.filename,
145-
has_arcs=self.has_arcs,
146-
statements=statements,
147-
excluded=excluded,
148-
executed=executed,
149-
arc_possibilities_set=arc_possibilities_set,
150-
arcs_executed_set=arcs_executed_set,
151-
exit_counts=exit_counts,
152-
no_branch=no_branch,
153-
)
154-
155116
def missing_formatted(self, branches: bool = False) -> str:
156117
"""The missing line numbers, formatted nicely.
157118
@@ -236,6 +197,104 @@ def branch_stats(self) -> dict[TLineNo, tuple[int, int]]:
236197
return stats
237198

238199

200+
TRegionLines = frozenset[TLineNo]
201+
202+
203+
class AnalysisNarrower:
204+
"""
205+
For reducing an `Analysis` to a subset of its lines.
206+
207+
Originally this was a simpler method on Analysis, but that led to quadratic
208+
behavior. This class does the bulk of the work up-front to provide the
209+
same results in linear time.
210+
211+
Create an AnalysisNarrower from an Analysis, bulk-add region lines to it
212+
with `add_regions`, then individually request new narrowed Analysis objects
213+
for each region with `narrow`. Doing most of the work in limited calls to
214+
`add_regions` lets us avoid poor performance.
215+
"""
216+
217+
# In this class, regions are represented by a frozenset of their lines.
218+
219+
def __init__(self, analysis: Analysis) -> None:
220+
self.analysis = analysis
221+
self.region2arc_possibilities: dict[TRegionLines, set[TArc]] = collections.defaultdict(set)
222+
self.region2arc_executed: dict[TRegionLines, set[TArc]] = collections.defaultdict(set)
223+
self.region2exit_counts: dict[TRegionLines, dict[TLineNo, int]] = collections.defaultdict(
224+
dict
225+
)
226+
227+
def add_regions(self, liness: Iterable[set[TLineNo]]) -> None:
228+
"""
229+
Pre-process a number of sets of line numbers. Later calls to `narrow`
230+
with one of these sets will provide a narrowed Analysis.
231+
"""
232+
if self.analysis.has_arcs:
233+
line2region: dict[TLineNo, TRegionLines] = {}
234+
235+
for lines in liness:
236+
fzlines = frozenset(lines)
237+
for line in lines:
238+
line2region[line] = fzlines
239+
240+
def collect_arcs(
241+
arc_set: set[TArc],
242+
region2arcs: dict[TRegionLines, set[TArc]],
243+
) -> None:
244+
for a, b in arc_set:
245+
if r := line2region.get(a):
246+
region2arcs[r].add((a, b))
247+
if r := line2region.get(b):
248+
region2arcs[r].add((a, b))
249+
250+
collect_arcs(self.analysis.arc_possibilities_set, self.region2arc_possibilities)
251+
collect_arcs(self.analysis.arcs_executed_set, self.region2arc_executed)
252+
253+
for lno, num in self.analysis.exit_counts.items():
254+
if r := line2region.get(lno):
255+
self.region2exit_counts[r][lno] = num
256+
257+
def narrow(self, lines: set[TLineNo]) -> Analysis:
258+
"""Create a narrowed Analysis.
259+
260+
The current analysis is copied to make a new one that only considers
261+
the lines in `lines`.
262+
"""
263+
264+
# Technically, the set intersections in this method are still O(N**2)
265+
# since this method is called N times, but they're very fast and moving
266+
# them to `add_regions` won't avoid the quadratic time.
267+
268+
statements = self.analysis.statements & lines
269+
excluded = self.analysis.excluded & lines
270+
executed = self.analysis.executed & lines
271+
272+
if self.analysis.has_arcs:
273+
fzlines = frozenset(lines)
274+
arc_possibilities_set = self.region2arc_possibilities[fzlines]
275+
arcs_executed_set = self.region2arc_executed[fzlines]
276+
exit_counts = self.region2exit_counts[fzlines]
277+
no_branch = self.analysis.no_branch & lines
278+
else:
279+
arc_possibilities_set = set()
280+
arcs_executed_set = set()
281+
exit_counts = {}
282+
no_branch = set()
283+
284+
return Analysis(
285+
precision=self.analysis.precision,
286+
filename=self.analysis.filename,
287+
has_arcs=self.analysis.has_arcs,
288+
statements=statements,
289+
excluded=excluded,
290+
executed=executed,
291+
arc_possibilities_set=arc_possibilities_set,
292+
arcs_executed_set=arcs_executed_set,
293+
exit_counts=exit_counts,
294+
no_branch=no_branch,
295+
)
296+
297+
239298
@dataclasses.dataclass
240299
class Numbers:
241300
"""The numerical results of measuring coverage.

0 commit comments

Comments
 (0)