Skip to content
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
5062c30
replace scraper logic with github .md files parsing
rudransh-shrivastava Sep 14, 2025
ca2e0e1
Update code(add tests and coderabbit suggestions)
rudransh-shrivastava Sep 15, 2025
044c95a
Update code
rudransh-shrivastava Sep 15, 2025
c0804c1
reduce cognitive complexity
rudransh-shrivastava Sep 15, 2025
277c16e
Update text
rudransh-shrivastava Sep 15, 2025
158c76c
Merge branch 'main' into feature/migrate-scraper-completely
rudransh-shrivastava Sep 15, 2025
e5559d1
Merge branch 'main' into feature/migrate-scraper-completely
kasya Sep 16, 2025
0f55b4b
remove *scrape* files and move logic to update-owasp-organization
rudransh-shrivastava Sep 16, 2025
dd3dff2
update code to fix tests
rudransh-shrivastava Sep 16, 2025
6b4e5ed
add tests for github_update_owasp_organization
rudransh-shrivastava Sep 17, 2025
e51acd7
update code
rudransh-shrivastava Sep 17, 2025
de99125
Merge branch 'main' into feature/migrate-scraper-completely
rudransh-shrivastava Sep 17, 2025
2059679
Merge branch 'main' into feature/migrate-scraper-completely
rudransh-shrivastava Sep 19, 2025
77bceb0
Merge branch 'main' into feature/migrate-scraper-completely
rudransh-shrivastava Sep 20, 2025
1596833
refactor markdown sync logic
rudransh-shrivastava Sep 20, 2025
71ee9b5
update tests
rudransh-shrivastava Sep 20, 2025
f4f8677
add tests for _verify_url method
rudransh-shrivastava Sep 20, 2025
183866d
add tests for _verify_url Project method
rudransh-shrivastava Sep 20, 2025
e5d6f54
Merge branch 'main' into feature/migrate-scraper-completely
rudransh-shrivastava Sep 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -76,24 +76,28 @@ def handle(self, *_args, **options) -> None:
print(f"{prefix:<12} {repository_url}")

try:
owasp_organization, repository = sync_repository(
owasp_organization, synced_repository = sync_repository(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

had to rename this variable otherwise code at line 106 before / line 117 now:
if repository is None: # The entire organization is being synced.
would never run as repository would never be None.

Copy link
Collaborator Author

@rudransh-shrivastava rudransh-shrivastava Sep 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can revert this now that it is no longer affecting my changes. However, this is still a bug I think.

gh_repository,
organization=owasp_organization,
user=owasp_user,
)

# OWASP chapters.
if entity_key.startswith("www-chapter-"):
chapters.append(Chapter.update_data(gh_repository, repository, save=False))
chapters.append(
Chapter.update_data(gh_repository, synced_repository, gh, save=False)
)

# OWASP projects.
elif entity_key.startswith("www-project-"):
projects.append(Project.update_data(gh_repository, repository, save=False))
projects.append(
Project.update_data(gh_repository, synced_repository, gh, save=False)
)

# OWASP committees.
elif entity_key.startswith("www-committee-"):
committees.append(
Committee.update_data(gh_repository, repository, save=False)
Committee.update_data(gh_repository, synced_repository, gh, save=False)
)
except Exception:
logger.exception("Error syncing repository %s", repository_url)
Expand Down
85 changes: 0 additions & 85 deletions backend/apps/owasp/management/commands/owasp_scrape_chapters.py

This file was deleted.

This file was deleted.

104 changes: 0 additions & 104 deletions backend/apps/owasp/management/commands/owasp_scrape_projects.py

This file was deleted.

9 changes: 6 additions & 3 deletions backend/apps/owasp/models/chapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,12 @@ def active_chapters_count():
"""Return active chapters count."""
return IndexBase.get_total_count("chapters", search_filters="idx_is_active:true")

def from_github(self, repository) -> None:
def from_github(self, repository, gh) -> None:
"""Update instance based on GitHub repository data.

Args:
repository (github.Repository): The GitHub repository instance.
gh (Github): The authenticated Github client instance.

"""
self.owasp_repository = repository
Expand All @@ -108,6 +109,7 @@ def from_github(self, repository) -> None:
"region": "region",
"tags": "tags",
},
gh,
)

self.created_at = repository.created_at
Expand Down Expand Up @@ -192,12 +194,13 @@ def bulk_save( # type: ignore[override]
BulkSaveModel.bulk_save(Chapter, chapters, fields=fields)

@staticmethod
def update_data(gh_repository, repository, *, save: bool = True) -> Chapter:
def update_data(gh_repository, repository, gh, *, save: bool = True) -> Chapter:
"""Update chapter data from GitHub repository.

Args:
gh_repository (github.Repository): The GitHub repository instance.
repository (github.Repository): The repository data to update from.
gh (Github): The authenticated Github client instance.
save (bool, optional): Whether to save the instance.

Returns:
Expand All @@ -210,7 +213,7 @@ def update_data(gh_repository, repository, *, save: bool = True) -> Chapter:
except Chapter.DoesNotExist:
chapter = Chapter(key=key)

chapter.from_github(repository)
chapter.from_github(repository, gh)
if save:
chapter.save()

Expand Down
7 changes: 4 additions & 3 deletions backend/apps/owasp/models/committee.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __str__(self) -> str:
"""Committee human readable representation."""
return f"{self.name}"

def from_github(self, repository) -> None:
def from_github(self, repository, gh) -> None:
"""Update instance based on GitHub repository data."""
self.owasp_repository = repository

Expand All @@ -46,6 +46,7 @@ def from_github(self, repository) -> None:
"name": "title",
"tags": "tags",
},
gh,
)

self.created_at = repository.created_at
Expand Down Expand Up @@ -75,15 +76,15 @@ def bulk_save(committees, fields=None) -> None: # type: ignore[override]
BulkSaveModel.bulk_save(Committee, committees, fields=fields)

@staticmethod
def update_data(gh_repository, repository, *, save: bool = True) -> "Committee":
def update_data(gh_repository, repository, gh, *, save: bool = True) -> "Committee":
"""Update committee data."""
key = gh_repository.name.lower()
try:
committee = Committee.objects.get(key=key)
except Committee.DoesNotExist:
committee = Committee(key=key)

committee.from_github(repository)
committee.from_github(repository, gh)
if save:
committee.save()

Expand Down
Loading