Skip to content

Commit

Permalink
Merge pull request #3247 from nexB/3245-lic-detection-stuck
Browse files Browse the repository at this point in the history
Fix choking license detection post-processing #3245
  • Loading branch information
AyanSinhaMahapatra authored Feb 17, 2023
2 parents 6358a4b + f2a08de commit 44ab276
Showing 1 changed file with 22 additions and 18 deletions.
40 changes: 22 additions & 18 deletions src/licensedcode/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
import uuid
from enum import Enum
from hashlib import sha1
from collections import Counter

import attr
from collections import defaultdict
from license_expression import combine_expressions
from license_expression import Licensing

Expand Down Expand Up @@ -595,33 +595,25 @@ def get_unique_detections(cls, license_detections):
Return all unique UniqueDetection from a ``license_detections`` list of
LicenseDetection.
"""
identifiers = get_identifiers(license_detections)
unique_detection_counts = dict(Counter(identifiers))

detections_by_id = get_detections_by_id(license_detections)
unique_license_detections = []
for detection_identifier in unique_detection_counts.keys():
file_regions = (

for all_detections in detections_by_id.values():
file_regions = [
detection.file_region
for detection in license_detections
if detection_identifier == detection.identifier
)
all_detections = (
detection
for detection in license_detections
if detection_identifier == detection.identifier
)
for detection in all_detections
]

detection = next(all_detections)
detection = next(iter(all_detections))
detection_mapping = detection.to_dict()
files = list(file_regions)
unique_license_detections.append(
cls(
identifier=detection.identifier_with_expression,
license_expression=detection_mapping["license_expression"],
detection_log=detection_mapping["detection_log"],
matches=detection_mapping["matches"],
count=len(files),
files=files,
count=len(file_regions),
files=file_regions,
)
)

Expand All @@ -638,6 +630,18 @@ def dict_fields(attr, value):
return attr.asdict(self, filter=dict_fields)


def get_detections_by_id(license_detections):
"""
Get a dict(hashmap) where each item is: {detection.identifier: all_detections} where
`all_detections` is all detections in `license_detections` whose detection.identifier
is the same.
"""
detections_by_id = defaultdict(list)
for detection in license_detections:
detections_by_id[detection.identifier].append(detection)

return detections_by_id

def get_identifiers(license_detections):
"""
Return identifiers for all ``license detections``.
Expand Down

0 comments on commit 44ab276

Please sign in to comment.