Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix choking license detection post-processing #3245 #3247

Merged
merged 1 commit into from
Feb 17, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 22 additions & 18 deletions src/licensedcode/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
import uuid
from enum import Enum
from hashlib import sha1
from collections import Counter

import attr
from collections import defaultdict
from license_expression import combine_expressions
from license_expression import Licensing

Expand Down Expand Up @@ -595,33 +595,25 @@ def get_unique_detections(cls, license_detections):
Return all unique UniqueDetection from a ``license_detections`` list of
LicenseDetection.
"""
identifiers = get_identifiers(license_detections)
unique_detection_counts = dict(Counter(identifiers))

detections_by_id = get_detections_by_id(license_detections)
unique_license_detections = []
for detection_identifier in unique_detection_counts.keys():
file_regions = (

for all_detections in detections_by_id.values():
file_regions = [
detection.file_region
for detection in license_detections
if detection_identifier == detection.identifier
)
all_detections = (
detection
for detection in license_detections
if detection_identifier == detection.identifier
)
for detection in all_detections
]

detection = next(all_detections)
detection = next(iter(all_detections))
detection_mapping = detection.to_dict()
files = list(file_regions)
unique_license_detections.append(
cls(
identifier=detection.identifier_with_expression,
license_expression=detection_mapping["license_expression"],
detection_log=detection_mapping["detection_log"],
matches=detection_mapping["matches"],
count=len(files),
files=files,
count=len(file_regions),
files=file_regions,
)
)

Expand All @@ -638,6 +630,18 @@ def dict_fields(attr, value):
return attr.asdict(self, filter=dict_fields)


def get_detections_by_id(license_detections):
"""
Get a dict(hashmap) where each item is: {detection.identifier: all_detections} where
`all_detections` is all detections in `license_detections` whose detection.identifier
is the same.
"""
detections_by_id = defaultdict(list)
for detection in license_detections:
detections_by_id[detection.identifier].append(detection)

return detections_by_id

def get_identifiers(license_detections):
"""
Return identifiers for all ``license detections``.
Expand Down