Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ dev = [
"flake8",
"pre-commit",
"pytest",
"pytest-cov"
"pytest-cov",
"pytest-mock"
]

[project.urls]
Expand Down
23 changes: 8 additions & 15 deletions src/pyosmeta/cli/process_reviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@

import pickle

from pydantic import ValidationError
from pyosmeta import ProcessIssues, ReviewModel
from pyosmeta import ProcessIssues
from pyosmeta.github_api import GitHubAPI


Expand All @@ -35,27 +34,21 @@ def main():
# Get all issues for approved packages - load as dict
# TODO: this doesn't have to be in process issues at all. it could fully
# Call the github module
issues = process_review.return_response()
accepted_reviews = process_review.parse_issue_header(issues, 45)
issues = process_review.get_issues()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Above - if you rebase from main (i am pretty sure_ i fiex this

labels=["6/pyOS-approved 🚀🚀🚀"] -->
labels=["6/pyOS-approved"]

this is why we had a magical deleted file in our last pr 🙈 i removed the emojis.

Copy link
Contributor Author

@sneakers-the-rat sneakers-the-rat Jul 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merged in main, got the new label!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you!! now we are no longer deleting all of the packages ✨ 😆

accepted_reviews, errors = process_review.parse_issues(issues)
for url, error in errors.items():
print(f"Error in review at url: {url}")
print(error)
print("-" * 20)

# Update gh metrics via api for all packages
repo_endpoints = process_review.get_repo_endpoints(accepted_reviews)
all_reviews = process_review.get_gh_metrics(
repo_endpoints, accepted_reviews
)

# Populate model objects with review data + metrics
final_reviews = {}
for key, review in all_reviews.items():
# First add gh meta to each dict
print("Parsing & validating", key)
try:
final_reviews[key] = ReviewModel(**review)
except ValidationError as ve:
print(key, ":", ve)

with open("all_reviews.pickle", "wb") as f:
pickle.dump(final_reviews, f)
pickle.dump(all_reviews, f)


if __name__ == "__main__":
Expand Down
138 changes: 61 additions & 77 deletions src/pyosmeta/cli/update_review_teams.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,100 +28,84 @@
from pyosmeta.contributors import ProcessContributors
from pyosmeta.file_io import clean_export_yml, load_pickle
from pyosmeta.github_api import GitHubAPI
from pyosmeta.models import PersonModel
from pyosmeta.models import PersonModel, ReviewModel, ReviewUser
from pyosmeta.utils_clean import get_clean_user


def process_user(
user: ReviewUser,
role: str,
pkg_name: str,
contribs: dict[str, PersonModel],
processor: ProcessContributors,
) -> tuple[ReviewUser, dict[str, PersonModel]]:
"""
- Add a new contributor to `contribs` (mutating it)
- Add user to any reviews/etc. that they're on (i don't rly understand that part,
someone else write these docs plz (mutating `contribs`)
- get their human name from the github name, mutating the `user` object.
"""
gh_user = get_clean_user(user.github_username)

if gh_user not in contribs.keys():
# If they aren't already in contribs, add them
print("Found a new contributor!", gh_user)
new_contrib = processor.return_user_info(gh_user)
new_contrib["date_added"] = datetime.now().strftime("%Y-%m-%d")
try:
contribs[gh_user] = PersonModel(**new_contrib)
except ValidationError as ve:
print(ve)

# Update user package contributions (if it's unique)
review_key = processor.contrib_types[role][0]
contribs[gh_user].add_unique_value(review_key, pkg_name.lower())

# Update user contrib list (if it's unique)
review_roles = processor.contrib_types[role][1]
contribs[gh_user].add_unique_value("contributor_type", review_roles)

# If users's name is missing in issue, populate from contribs
if not user.name:
user.name = getattr(contribs[gh_user], "name")

return user, contribs


def main():
github_api = GitHubAPI()
process_contribs = ProcessContributors(github_api, [])

# Two pickle files are outputs of the two other scripts
# use that data to limit web calls
contribs = load_pickle("all_contribs.pickle")
packages = load_pickle("all_reviews.pickle")
contribs: dict[str, PersonModel] = load_pickle("all_contribs.pickle")
packages: dict[str, ReviewModel] = load_pickle("all_reviews.pickle")

contrib_types = process_contribs.contrib_types

for pkg_name, issue_meta in packages.items():
for pkg_name, review in packages.items():
print("Processing review team for:", pkg_name)
for issue_role in contrib_types.keys():
if issue_role == "all_current_maintainers":
# Loop through each maintainer in the list
for i, a_maintainer in enumerate(
issue_meta.all_current_maintainers
):
gh_user = get_clean_user(a_maintainer["github_username"])

if gh_user not in contribs.keys():
print("Found a new contributor!", gh_user)
new_contrib = process_contribs.return_user_info(
gh_user
)
new_contrib["date_added"] = datetime.now().strftime(
"%Y-%m-%d"
)
try:
contribs[gh_user] = PersonModel(**new_contrib)
except ValidationError as ve:
print(ve)

# Update user package contributions (if it's unique)
review_key = contrib_types[issue_role][0]
contribs[gh_user].add_unique_value(
review_key, pkg_name.lower()
)

# Update user contrib list (if it's unique)
review_roles = contrib_types[issue_role][1]
contribs[gh_user].add_unique_value(
"contributor_type", review_roles
)

# If name is missing in issue, populate from contribs
if a_maintainer["name"] == "":
name = getattr(contribs[gh_user], "name")
packages[pkg_name].all_current_maintainers[i][
"name"
] = name

else:
# Else we are processing editors, reviewers...
gh_user = get_clean_user(
getattr(packages[pkg_name], issue_role)["github_username"]
)

if gh_user not in contribs.keys():
# If they aren't already in contribs, add them
print("Found a new contributor!", gh_user)
new_contrib = process_contribs.return_user_info(gh_user)
new_contrib["date_added"] = datetime.now().strftime(
"%Y-%m-%d"
for role in contrib_types.keys():
user: list[ReviewUser] | ReviewUser = getattr(review, role)

# handle lists or singleton users separately
if isinstance(user, list):
for i, a_user in enumerate(user):
a_user, contribs = process_user(
a_user, role, pkg_name, contribs, process_contribs
)
try:
contribs[gh_user] = PersonModel(**new_contrib)
except ValidationError as ve:
print(ve)

# Update user package contributions (if it's unique)
review_key = contrib_types[issue_role][0]
contribs[gh_user].add_unique_value(
review_key, pkg_name.lower()
# update individual user in reference to issue list
user[i] = a_user
elif isinstance(user, ReviewUser):
user, contribs = process_user(
user, role, pkg_name, contribs, process_contribs
)

# Update user contrib list (if it's unique)
review_roles = contrib_types[issue_role][1]
contribs[gh_user].add_unique_value(
"contributor_type", review_roles
setattr(review, role, user)
else:
raise TypeError(
"Keys in the `contrib_types` map must be a `ReviewUser` or `list[ReviewUser]` in the `ReviewModel`"
)

# If users's name is missing in issue, populate from contribs
if getattr(issue_meta, issue_role)["name"] == "":
attribute_value = getattr(packages[pkg_name], issue_role)
attribute_value["name"] = getattr(
contribs[gh_user], "name"
)

# Export to yaml
contribs_ls = [model.model_dump() for model in contribs.values()]
pkgs_ls = [model.model_dump() for model in packages.values()]
Expand Down
3 changes: 1 addition & 2 deletions src/pyosmeta/contributors.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ def __init__(self, github_api: GitHubAPI, json_files: List) -> None:
]

self.contrib_types = {
"reviewer_1": ["packages_reviewed", ["reviewer", "peer-review"]],
"reviewer_2": ["packages_reviewed", ["reviewer", "peer-review"]],
"reviewers": ["packages_reviewed", ["reviewer", "peer-review"]],
"editor": ["packages_editor", ["editor", "peer-review"]],
"submitting_author": [
"packages_submitted",
Expand Down
2 changes: 1 addition & 1 deletion src/pyosmeta/file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def clean_yaml_file(filename):


def clean_export_yml(
a_dict: Dict[str, Union[str, List[str]]], filename: str
a_dict: Dict[str, Union[str, List[str]]] | List[dict], filename: str
) -> None:
"""Inputs a dictionary with keys - contribs or packages.
It then converse to a list for export, and creates a cleaned
Expand Down
15 changes: 15 additions & 0 deletions src/pyosmeta/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pyosmeta.models.base import (
GhMeta,
PersonModel,
ReviewModel,
ReviewUser,
UrlValidatorMixin,
)

__all__ = [
"UrlValidatorMixin",
"PersonModel",
"GhMeta",
"ReviewModel",
"ReviewUser",
]
86 changes: 38 additions & 48 deletions src/pyosmeta/models.py → src/pyosmeta/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
"""

import re
from typing import Optional, Set, Union
from datetime import datetime
from typing import Any, Optional, Set, Union

import requests
from pydantic import (
Expand Down Expand Up @@ -202,6 +203,21 @@ def clean_date(cls, a_date: Optional[str]) -> str:
return clean_date(a_date)


class ReviewUser(BaseModel):
"""Minimal model of a github user, used in several places in review parsing"""

name: str
github_username: str

@field_validator("github_username", mode="after")
def deurl_github_username(cls, github_username: str) -> str:
return github_username.replace("https://github.com/", "")

@field_validator("name", mode="after")
def demarkdown_name(cls, name: str) -> str:
return re.sub(r"\[|\]", "", name)


class ReviewModel(BaseModel):
# Make sure model populates both aliases and original attr name
model_config = ConfigDict(
Expand All @@ -214,23 +230,23 @@ class ReviewModel(BaseModel):
package_description: str = Field(
"", validation_alias=AliasChoices("one-line_description_of_package")
)
submitting_author: dict[str, str | None] = {}
all_current_maintainers: list[dict[str, str | None]] = {}
repository_link: str | None = None
submitting_author: ReviewUser | None = None
all_current_maintainers: list[ReviewUser] = Field(default_factory=list)
repository_link: str
version_submitted: Optional[str] = None
categories: Optional[list[str]] = None
editor: dict[str, str | None] = {}
reviewer_1: dict[str, str | None] = {}
reviewer_2: dict[str, str | None] = {}
editor: ReviewUser | None = None
eic: ReviewUser | None = None
reviewers: list[ReviewUser] = Field(default_factory=list)
archive: str | None = None
version_accepted: str | None = None
date_accepted: str | None = Field(
default=None,
validation_alias=AliasChoices("Date accepted", "date_accepted"),
)
created_at: str = None
updated_at: str = None
closed_at: Optional[str] = None
created_at: datetime = None
updated_at: datetime = None
closed_at: Optional[datetime] = None
issue_link: str = None
joss: Optional[str] = None
partners: Optional[list[str]] = None
Expand All @@ -255,22 +271,6 @@ def clean_date_review(cls, a_date: Optional[str]) -> str:
else:
return f"{new_date[2]}-{new_date[0]}-{new_date[1]}"

@field_validator(
"created_at",
"updated_at",
"closed_at",
mode="before",
)
@classmethod
def clean_date(cls, a_date: Optional[str]) -> str:
"""Cleans up a datetime from github and returns a date string

Runs the general clean_date function in this module as a validator.

"""

return clean_date(a_date)

@field_validator(
"package_name",
mode="before",
Expand Down Expand Up @@ -310,33 +310,12 @@ def clean_markdown_url(cls, repo: str) -> str:
else:
return repo

@field_validator(
"editor",
"reviewer_1",
"reviewer_2",
mode="before",
)
@classmethod
def clean_gh_url(cls, user: dict[str, str]) -> dict[str, str]:
"""Remove markdown link remnants from gh usernames and name.

Sometimes editors and reviewers add names using github links.
Remove the link data.
"""

user["github_username"] = user["github_username"].replace(
"https://github.com/", ""
)
user["name"] = re.sub(r"\[|\]", "", user["name"])

return user

@field_validator(
"categories",
mode="before",
)
@classmethod
def clean_categories(cls, categories: list[str]) -> list[str]:
def clean_categories(cls, categories: list[str]) -> list[str] | None:
"""Make sure each category in the list is a valid value.

Valid pyos software categories are:
Expand All @@ -358,6 +337,8 @@ def clean_categories(cls, categories: list[str]) -> list[str]:
list[str]
List of cleaned categories.
"""
if categories is None:
return None

valid_categories = {
"data-processing": "data-processing-munging",
Expand All @@ -375,3 +356,12 @@ def clean_categories(cls, categories: list[str]) -> list[str]:
# No match found, keep the original category
cleaned_cats.append(category)
return cleaned_cats

@field_validator("all_current_maintainers", mode="before")
@classmethod
def listify(cls, item: Any):
"""Make a field that's expected to be plural so before any validation"""
if not isinstance(item, list):
return [item]
else:
return item
Loading