Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 93 additions & 80 deletions backend/apps/github/management/commands/github_match_users.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
"""A command to perform fuzzy and exact matching of leaders/slack members with User model."""

from django.contrib.contenttypes.models import ContentType
from django.core.management.base import BaseCommand
from django.db import transaction
from thefuzz import fuzz

from apps.github.models.user import User
from apps.owasp.models.chapter import Chapter
from apps.owasp.models.committee import Committee
from apps.owasp.models.entity_member import EntityMember
from apps.owasp.models.project import Project
from apps.slack.models import Member

ID_MIN_LENGTH = 2


class Command(BaseCommand):
help = "Match leaders or Slack members with GitHub users using exact and fuzzy matching."
help = "Matches entity leader names with GitHub Users and creates EntityMember records."

def add_arguments(self, parser):
parser.add_argument(
"model_name",
type=str,
choices=("chapter", "committee", "member", "project"),
help="Model name to process: chapter, committee, project, or member",
choices=("chapter", "committee", "project", "all"),
help="Model to process: chapter, committee, project, or all.",
)
parser.add_argument(
"--threshold",
Expand All @@ -29,103 +31,114 @@ def add_arguments(self, parser):
help="Threshold for fuzzy matching (0-100)",
)

@transaction.atomic
def handle(self, *_args, **kwargs):
model_name = kwargs["model_name"].lower()
threshold = max(0, min(kwargs["threshold"], 100))

model_map = {
"chapter": (Chapter, "suggested_leaders"),
"committee": (Committee, "suggested_leaders"),
"member": (Member, "suggested_users"),
"project": (Project, "suggested_leaders"),
"chapter": Chapter,
"committee": Committee,
"project": Project,
}

if model_name not in model_map:
self.stdout.write(
self.style.ERROR(
"Invalid model name! Choose from: chapter, committee, project, member"
)
)
return
models_to_process = model_map.values() if model_name == "all" else [model_map[model_name]]

model_class, relation_field = model_map[model_name]
users = {
u["id"]: u
for u in User.objects.values("id", "login", "name")
if self._is_valid_user(u["login"], u["name"])
}
self.stdout.write("Loading GitHub users into memory...")
all_users = list(User.objects.values("id", "login", "name"))
valid_users = [u for u in all_users if self._is_valid_user(u["login"], u["name"])]
self.stdout.write(f"Found {len(valid_users)} valid users for matching.")

for instance in model_class.objects.prefetch_related(relation_field):
self.stdout.write(f"Processing {model_name} {instance.id}...")
for model_class in models_to_process:
self._process_entities(model_class, valid_users, threshold)

leaders_raw = (
[field for field in (instance.username, instance.real_name) if field]
if model_name == "member"
else instance.leaders_raw
)
exact_matches, fuzzy_matches, unmatched = self.process_leaders(
leaders_raw, threshold, users
)
self.stdout.write(self.style.SUCCESS("\nCommand finished successfully."))

def _process_entities(self, model_class, users_list, threshold):
"""Process entries."""
model_label = model_class.__class__.__name__.capitalize()
self.stdout.write(f"\n--- Processing {model_label} ---")

new_members_to_create = []

content_type = ContentType.objects.get_for_model(model_class)

for entity in model_class.objects.all():
if not entity.leaders_raw:
continue

matched_users = self._find_user_matches(entity.leaders_raw, users_list, threshold)

if not matched_users:
continue

matched_user_ids = {user["id"] for user in exact_matches + fuzzy_matches}
getattr(instance, relation_field).set(matched_user_ids)
self.stdout.write(f" - Found {len(matched_users)} leader matches for '{entity}'")

new_members_to_create.extend(
[
EntityMember(
content_type=content_type,
object_id=entity.pk,
member_id=user["id"],
kind=EntityMember.MemberKind.LEADER,
is_reviewed=False,
)
for user in matched_users
]
)

if unmatched:
self.stdout.write(f"Unmatched for {instance}: {unmatched}")
if new_members_to_create:
created_records = EntityMember.objects.bulk_create(
new_members_to_create,
ignore_conflicts=True,
)
self.stdout.write(
self.style.SUCCESS(
f" -> Created {len(created_records)} new leader records for {model_label}."
)
)
else:
self.stdout.write(
self.style.NOTICE(f" -> No new leader records to create for {model_label}.")
)

def _is_valid_user(self, login, name):
"""Check if GitHub user meets minimum requirements."""
return len(login) >= ID_MIN_LENGTH and len(name or "") >= ID_MIN_LENGTH

def process_leaders(self, leaders_raw, threshold, filtered_users):
"""Process leaders with optimized matching, capturing all exact matches."""
if not leaders_raw:
return [], [], []

exact_matches = []
fuzzy_matches = []
unmatched_leaders = []
processed_leaders = set()
def _find_user_matches(self, leaders_raw, users_list, threshold):
"""Find user matches for a list of raw leader names."""
matched_users = []

user_list = list(filtered_users.values())
for leader in leaders_raw:
if not leader or leader in processed_leaders:
for leader_name in set(leaders_raw):
if not leader_name:
continue

processed_leaders.add(leader)
leader_lower = leader.lower()

# Find all exact matches
exact_matches_for_leader = [
u
for u in user_list
if u["login"].lower() == leader_lower
or (u["name"] and u["name"].lower() == leader_lower)
]

if exact_matches_for_leader:
exact_matches.extend(exact_matches_for_leader)
for match in exact_matches_for_leader:
self.stdout.write(f"Exact match found for {leader}: {match['login']}")
leader_lower = leader_name.lower()
best_fuzzy_match = None
highest_score = 0

exact_match_found = False
for user in users_list:
if user["login"].lower() == leader_lower or (
user["name"] and user["name"].lower() == leader_lower
):
matched_users.append(user)
exact_match_found = True

if exact_match_found:
continue

# Fuzzy matching with token_sort_ratio
matches = [
u
for u in user_list
if (fuzz.token_sort_ratio(leader_lower, u["login"].lower()) >= threshold)
or (
u["name"]
and fuzz.token_sort_ratio(leader_lower, u["name"].lower()) >= threshold
)
]
for user in users_list:
score = fuzz.token_sort_ratio(leader_lower, user["login"].lower())
if user["name"]:
score = max(score, fuzz.token_sort_ratio(leader_lower, user["name"].lower()))

if score > highest_score:
highest_score = score
best_fuzzy_match = user

new_fuzzy_matches = [m for m in matches if m not in exact_matches]
if new_fuzzy_matches:
fuzzy_matches.extend(new_fuzzy_matches)
for match in new_fuzzy_matches:
self.stdout.write(f"Fuzzy match found for {leader}: {match['login']}")
else:
unmatched_leaders.append(leader)
if highest_score >= threshold:
matched_users.append(best_fuzzy_match)

return exact_matches, fuzzy_matches, unmatched_leaders
return list({user["id"]: user for user in matched_users}.values())
1 change: 1 addition & 0 deletions backend/apps/owasp/admin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .badge import BadgeAdmin
from .chapter import ChapterAdmin
from .committee import CommitteeAdmin
from .entity_member import EntityMemberAdmin
from .event import EventAdmin
from .post import PostAdmin
from .project import ProjectAdmin
Expand Down
6 changes: 3 additions & 3 deletions backend/apps/owasp/admin/chapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

from apps.owasp.models.chapter import Chapter

from .mixins import GenericEntityAdminMixin, LeaderAdminMixin
from .mixins import EntityMemberInline, GenericEntityAdminMixin


class ChapterAdmin(admin.ModelAdmin, GenericEntityAdminMixin, LeaderAdminMixin):
class ChapterAdmin(admin.ModelAdmin, GenericEntityAdminMixin):
"""Admin for Chapter model."""

autocomplete_fields = ("owasp_repository",)
filter_horizontal = LeaderAdminMixin.filter_horizontal
inlines = [EntityMemberInline]
list_display = (
"name",
"created_at",
Expand Down
11 changes: 4 additions & 7 deletions backend/apps/owasp/admin/committee.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@

from apps.owasp.models.committee import Committee

from .mixins import GenericEntityAdminMixin, LeaderAdminMixin
from .mixins import EntityMemberInline, GenericEntityAdminMixin


class CommitteeAdmin(admin.ModelAdmin, GenericEntityAdminMixin, LeaderAdminMixin):
class CommitteeAdmin(admin.ModelAdmin, GenericEntityAdminMixin):
"""Admin for Committee model."""

autocomplete_fields = (
"leaders",
"owasp_repository",
)
filter_horizontal = LeaderAdminMixin.filter_horizontal
autocomplete_fields = ("owasp_repository",)
inlines = [EntityMemberInline]
search_fields = ("name",)


Expand Down
31 changes: 31 additions & 0 deletions backend/apps/owasp/admin/entity_member.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please restore bulk approval action functionality.

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""EntityMember admin configuration."""

from django.contrib import admin

from apps.owasp.models.entity_member import EntityMember


class EntityMemberAdmin(admin.ModelAdmin):
"""Admin for EntityMember records (generic link to any OWASP entity)."""

list_display = (
"member",
"kind",
"is_reviewed",
"order",
)
list_filter = ("kind", "is_reviewed")
search_fields = ("member__login", "member__name", "description", "object_id")
fields = (
"content_type",
"object_id",
"member",
"kind",
"order",
"is_reviewed",
"description",
)
autocomplete_fields = ("member",)


admin.site.register(EntityMember, EntityMemberAdmin)
48 changes: 13 additions & 35 deletions backend/apps/owasp/admin/mixins.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
"""OWASP admin mixins for common functionality."""

from django.contrib import messages
from django.contrib.contenttypes.admin import GenericTabularInline
from django.utils.html import escape
from django.utils.safestring import mark_safe

from apps.owasp.models.entity_member import EntityMember


class EntityMemberInline(GenericTabularInline):
"""EntityMember inline for admin."""

model = EntityMember
fields = ("member", "kind", "description", "order", "is_reviewed")
raw_id_fields = ("member",)
extra = 1
ordering = ("order", "member__login")


class BaseOwaspAdminMixin:
"""Base mixin for OWASP admin classes providing common patterns."""
Expand Down Expand Up @@ -79,40 +91,6 @@ def _format_github_link(self, repository):
custom_field_owasp_url.short_description = "OWASP 🔗"


class LeaderAdminMixin(BaseOwaspAdminMixin):
"""Admin mixin for entities that can have leaders."""

actions = ("approve_suggested_leaders",)
filter_horizontal = (
"leaders",
"suggested_leaders",
)

def approve_suggested_leaders(self, request, queryset):
"""Approve suggested leaders for selected entities."""
total_approved = 0
for entity in queryset:
suggestions = entity.suggested_leaders.all()
if count := suggestions.count():
entity.leaders.add(*suggestions)
total_approved += count
entity_name = entity.name if hasattr(entity, "name") else str(entity)
self.message_user(
request,
f"Approved {count} leader suggestions for {entity_name}",
messages.SUCCESS,
)

if total_approved:
self.message_user(
request,
f"Total approved suggestions: {total_approved}",
messages.INFO,
)

approve_suggested_leaders.short_description = "Approve suggested leaders"


class StandardOwaspAdminMixin(BaseOwaspAdminMixin):
"""Standard mixin for simple OWASP admin classes."""

Expand Down
6 changes: 3 additions & 3 deletions backend/apps/owasp/admin/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

from apps.owasp.models.project import Project

from .mixins import GenericEntityAdminMixin, LeaderAdminMixin
from .mixins import EntityMemberInline, GenericEntityAdminMixin


class ProjectAdmin(admin.ModelAdmin, GenericEntityAdminMixin, LeaderAdminMixin):
class ProjectAdmin(admin.ModelAdmin, GenericEntityAdminMixin):
"""Admin for Project model."""

autocomplete_fields = (
Expand All @@ -16,7 +16,7 @@ class ProjectAdmin(admin.ModelAdmin, GenericEntityAdminMixin, LeaderAdminMixin):
"owners",
"repositories",
)
filter_horizontal = LeaderAdminMixin.filter_horizontal
inlines = [EntityMemberInline]
list_display = (
"custom_field_name",
"created_at",
Expand Down
Loading
Loading