diff --git a/pyproject.toml b/pyproject.toml
index 1ce0fb6..138cded 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,8 @@ dev = [
"flake8",
"pre-commit",
"pytest",
- "pytest-cov"
+ "pytest-cov",
+ "pytest-mock"
]
[project.urls]
diff --git a/src/pyosmeta/cli/process_reviews.py b/src/pyosmeta/cli/process_reviews.py
index 535ddfa..4de411f 100644
--- a/src/pyosmeta/cli/process_reviews.py
+++ b/src/pyosmeta/cli/process_reviews.py
@@ -18,8 +18,7 @@
import pickle
-from pydantic import ValidationError
-from pyosmeta import ProcessIssues, ReviewModel
+from pyosmeta import ProcessIssues
from pyosmeta.github_api import GitHubAPI
@@ -35,8 +34,12 @@ def main():
# Get all issues for approved packages - load as dict
# TODO: this doesn't have to be in process issues at all. it could fully
# Call the github module
- issues = process_review.return_response()
- accepted_reviews = process_review.parse_issue_header(issues, 45)
+ issues = process_review.get_issues()
+ accepted_reviews, errors = process_review.parse_issues(issues)
+ for url, error in errors.items():
+ print(f"Error in review at url: {url}")
+ print(error)
+ print("-" * 20)
# Update gh metrics via api for all packages
repo_endpoints = process_review.get_repo_endpoints(accepted_reviews)
@@ -44,18 +47,8 @@ def main():
repo_endpoints, accepted_reviews
)
- # Populate model objects with review data + metrics
- final_reviews = {}
- for key, review in all_reviews.items():
- # First add gh meta to each dict
- print("Parsing & validating", key)
- try:
- final_reviews[key] = ReviewModel(**review)
- except ValidationError as ve:
- print(key, ":", ve)
-
with open("all_reviews.pickle", "wb") as f:
- pickle.dump(final_reviews, f)
+ pickle.dump(all_reviews, f)
if __name__ == "__main__":
diff --git a/src/pyosmeta/cli/update_review_teams.py b/src/pyosmeta/cli/update_review_teams.py
index 31ea219..8e81445 100644
--- a/src/pyosmeta/cli/update_review_teams.py
+++ b/src/pyosmeta/cli/update_review_teams.py
@@ -28,100 +28,84 @@
from pyosmeta.contributors import ProcessContributors
from pyosmeta.file_io import clean_export_yml, load_pickle
from pyosmeta.github_api import GitHubAPI
-from pyosmeta.models import PersonModel
+from pyosmeta.models import PersonModel, ReviewModel, ReviewUser
from pyosmeta.utils_clean import get_clean_user
+def process_user(
+ user: ReviewUser,
+ role: str,
+ pkg_name: str,
+ contribs: dict[str, PersonModel],
+ processor: ProcessContributors,
+) -> tuple[ReviewUser, dict[str, PersonModel]]:
+ """
+ - Add a new contributor to `contribs` (mutating it)
+ - Add user to any reviews/etc. that they're on (i don't rly understand that part,
+ someone else write these docs plz (mutating `contribs`)
+ - get their human name from the github name, mutating the `user` object.
+ """
+ gh_user = get_clean_user(user.github_username)
+
+ if gh_user not in contribs.keys():
+ # If they aren't already in contribs, add them
+ print("Found a new contributor!", gh_user)
+ new_contrib = processor.return_user_info(gh_user)
+ new_contrib["date_added"] = datetime.now().strftime("%Y-%m-%d")
+ try:
+ contribs[gh_user] = PersonModel(**new_contrib)
+ except ValidationError as ve:
+ print(ve)
+
+ # Update user package contributions (if it's unique)
+ review_key = processor.contrib_types[role][0]
+ contribs[gh_user].add_unique_value(review_key, pkg_name.lower())
+
+ # Update user contrib list (if it's unique)
+ review_roles = processor.contrib_types[role][1]
+ contribs[gh_user].add_unique_value("contributor_type", review_roles)
+
+ # If users's name is missing in issue, populate from contribs
+ if not user.name:
+ user.name = getattr(contribs[gh_user], "name")
+
+ return user, contribs
+
+
def main():
github_api = GitHubAPI()
process_contribs = ProcessContributors(github_api, [])
# Two pickle files are outputs of the two other scripts
# use that data to limit web calls
- contribs = load_pickle("all_contribs.pickle")
- packages = load_pickle("all_reviews.pickle")
+ contribs: dict[str, PersonModel] = load_pickle("all_contribs.pickle")
+ packages: dict[str, ReviewModel] = load_pickle("all_reviews.pickle")
contrib_types = process_contribs.contrib_types
- for pkg_name, issue_meta in packages.items():
+ for pkg_name, review in packages.items():
print("Processing review team for:", pkg_name)
- for issue_role in contrib_types.keys():
- if issue_role == "all_current_maintainers":
- # Loop through each maintainer in the list
- for i, a_maintainer in enumerate(
- issue_meta.all_current_maintainers
- ):
- gh_user = get_clean_user(a_maintainer["github_username"])
-
- if gh_user not in contribs.keys():
- print("Found a new contributor!", gh_user)
- new_contrib = process_contribs.return_user_info(
- gh_user
- )
- new_contrib["date_added"] = datetime.now().strftime(
- "%Y-%m-%d"
- )
- try:
- contribs[gh_user] = PersonModel(**new_contrib)
- except ValidationError as ve:
- print(ve)
-
- # Update user package contributions (if it's unique)
- review_key = contrib_types[issue_role][0]
- contribs[gh_user].add_unique_value(
- review_key, pkg_name.lower()
- )
-
- # Update user contrib list (if it's unique)
- review_roles = contrib_types[issue_role][1]
- contribs[gh_user].add_unique_value(
- "contributor_type", review_roles
- )
-
- # If name is missing in issue, populate from contribs
- if a_maintainer["name"] == "":
- name = getattr(contribs[gh_user], "name")
- packages[pkg_name].all_current_maintainers[i][
- "name"
- ] = name
-
- else:
- # Else we are processing editors, reviewers...
- gh_user = get_clean_user(
- getattr(packages[pkg_name], issue_role)["github_username"]
- )
-
- if gh_user not in contribs.keys():
- # If they aren't already in contribs, add them
- print("Found a new contributor!", gh_user)
- new_contrib = process_contribs.return_user_info(gh_user)
- new_contrib["date_added"] = datetime.now().strftime(
- "%Y-%m-%d"
+ for role in contrib_types.keys():
+ user: list[ReviewUser] | ReviewUser = getattr(review, role)
+
+ # handle lists or singleton users separately
+ if isinstance(user, list):
+ for i, a_user in enumerate(user):
+ a_user, contribs = process_user(
+ a_user, role, pkg_name, contribs, process_contribs
)
- try:
- contribs[gh_user] = PersonModel(**new_contrib)
- except ValidationError as ve:
- print(ve)
-
- # Update user package contributions (if it's unique)
- review_key = contrib_types[issue_role][0]
- contribs[gh_user].add_unique_value(
- review_key, pkg_name.lower()
+ # update individual user in reference to issue list
+ user[i] = a_user
+ elif isinstance(user, ReviewUser):
+ user, contribs = process_user(
+ user, role, pkg_name, contribs, process_contribs
)
-
- # Update user contrib list (if it's unique)
- review_roles = contrib_types[issue_role][1]
- contribs[gh_user].add_unique_value(
- "contributor_type", review_roles
+ setattr(review, role, user)
+ else:
+ raise TypeError(
+ "Keys in the `contrib_types` map must be a `ReviewUser` or `list[ReviewUser]` in the `ReviewModel`"
)
- # If users's name is missing in issue, populate from contribs
- if getattr(issue_meta, issue_role)["name"] == "":
- attribute_value = getattr(packages[pkg_name], issue_role)
- attribute_value["name"] = getattr(
- contribs[gh_user], "name"
- )
-
# Export to yaml
contribs_ls = [model.model_dump() for model in contribs.values()]
pkgs_ls = [model.model_dump() for model in packages.values()]
diff --git a/src/pyosmeta/contributors.py b/src/pyosmeta/contributors.py
index 0925ac6..6ffd82b 100644
--- a/src/pyosmeta/contributors.py
+++ b/src/pyosmeta/contributors.py
@@ -39,8 +39,7 @@ def __init__(self, github_api: GitHubAPI, json_files: List) -> None:
]
self.contrib_types = {
- "reviewer_1": ["packages_reviewed", ["reviewer", "peer-review"]],
- "reviewer_2": ["packages_reviewed", ["reviewer", "peer-review"]],
+ "reviewers": ["packages_reviewed", ["reviewer", "peer-review"]],
"editor": ["packages_editor", ["editor", "peer-review"]],
"submitting_author": [
"packages_submitted",
diff --git a/src/pyosmeta/file_io.py b/src/pyosmeta/file_io.py
index 570cff4..ad82125 100644
--- a/src/pyosmeta/file_io.py
+++ b/src/pyosmeta/file_io.py
@@ -168,7 +168,7 @@ def clean_yaml_file(filename):
def clean_export_yml(
- a_dict: Dict[str, Union[str, List[str]]], filename: str
+ a_dict: Dict[str, Union[str, List[str]]] | List[dict], filename: str
) -> None:
"""Inputs a dictionary with keys - contribs or packages.
It then converse to a list for export, and creates a cleaned
diff --git a/src/pyosmeta/models/__init__.py b/src/pyosmeta/models/__init__.py
new file mode 100644
index 0000000..6362b93
--- /dev/null
+++ b/src/pyosmeta/models/__init__.py
@@ -0,0 +1,15 @@
+from pyosmeta.models.base import (
+ GhMeta,
+ PersonModel,
+ ReviewModel,
+ ReviewUser,
+ UrlValidatorMixin,
+)
+
+__all__ = [
+ "UrlValidatorMixin",
+ "PersonModel",
+ "GhMeta",
+ "ReviewModel",
+ "ReviewUser",
+]
diff --git a/src/pyosmeta/models.py b/src/pyosmeta/models/base.py
similarity index 88%
rename from src/pyosmeta/models.py
rename to src/pyosmeta/models/base.py
index 43b6f74..7c61691 100644
--- a/src/pyosmeta/models.py
+++ b/src/pyosmeta/models/base.py
@@ -4,7 +4,8 @@
"""
import re
-from typing import Optional, Set, Union
+from datetime import datetime
+from typing import Any, Optional, Set, Union
import requests
from pydantic import (
@@ -202,6 +203,21 @@ def clean_date(cls, a_date: Optional[str]) -> str:
return clean_date(a_date)
+class ReviewUser(BaseModel):
+ """Minimal model of a github user, used in several places in review parsing"""
+
+ name: str
+ github_username: str
+
+ @field_validator("github_username", mode="after")
+ def deurl_github_username(cls, github_username: str) -> str:
+ return github_username.replace("https://github.com/", "")
+
+ @field_validator("name", mode="after")
+ def demarkdown_name(cls, name: str) -> str:
+ return re.sub(r"\[|\]", "", name)
+
+
class ReviewModel(BaseModel):
# Make sure model populates both aliases and original attr name
model_config = ConfigDict(
@@ -214,23 +230,23 @@ class ReviewModel(BaseModel):
package_description: str = Field(
"", validation_alias=AliasChoices("one-line_description_of_package")
)
- submitting_author: dict[str, str | None] = {}
- all_current_maintainers: list[dict[str, str | None]] = {}
- repository_link: str | None = None
+ submitting_author: ReviewUser | None = None
+ all_current_maintainers: list[ReviewUser] = Field(default_factory=list)
+ repository_link: str
version_submitted: Optional[str] = None
categories: Optional[list[str]] = None
- editor: dict[str, str | None] = {}
- reviewer_1: dict[str, str | None] = {}
- reviewer_2: dict[str, str | None] = {}
+ editor: ReviewUser | None = None
+ eic: ReviewUser | None = None
+ reviewers: list[ReviewUser] = Field(default_factory=list)
archive: str | None = None
version_accepted: str | None = None
date_accepted: str | None = Field(
default=None,
validation_alias=AliasChoices("Date accepted", "date_accepted"),
)
- created_at: str = None
- updated_at: str = None
- closed_at: Optional[str] = None
+ created_at: datetime = None
+ updated_at: datetime = None
+ closed_at: Optional[datetime] = None
issue_link: str = None
joss: Optional[str] = None
partners: Optional[list[str]] = None
@@ -255,22 +271,6 @@ def clean_date_review(cls, a_date: Optional[str]) -> str:
else:
return f"{new_date[2]}-{new_date[0]}-{new_date[1]}"
- @field_validator(
- "created_at",
- "updated_at",
- "closed_at",
- mode="before",
- )
- @classmethod
- def clean_date(cls, a_date: Optional[str]) -> str:
- """Cleans up a datetime from github and returns a date string
-
- Runs the general clean_date function in this module as a validator.
-
- """
-
- return clean_date(a_date)
-
@field_validator(
"package_name",
mode="before",
@@ -310,33 +310,12 @@ def clean_markdown_url(cls, repo: str) -> str:
else:
return repo
- @field_validator(
- "editor",
- "reviewer_1",
- "reviewer_2",
- mode="before",
- )
- @classmethod
- def clean_gh_url(cls, user: dict[str, str]) -> dict[str, str]:
- """Remove markdown link remnants from gh usernames and name.
-
- Sometimes editors and reviewers add names using github links.
- Remove the link data.
- """
-
- user["github_username"] = user["github_username"].replace(
- "https://github.com/", ""
- )
- user["name"] = re.sub(r"\[|\]", "", user["name"])
-
- return user
-
@field_validator(
"categories",
mode="before",
)
@classmethod
- def clean_categories(cls, categories: list[str]) -> list[str]:
+ def clean_categories(cls, categories: list[str]) -> list[str] | None:
"""Make sure each category in the list is a valid value.
Valid pyos software categories are:
@@ -358,6 +337,8 @@ def clean_categories(cls, categories: list[str]) -> list[str]:
list[str]
List of cleaned categories.
"""
+ if categories is None:
+ return None
valid_categories = {
"data-processing": "data-processing-munging",
@@ -375,3 +356,12 @@ def clean_categories(cls, categories: list[str]) -> list[str]:
# No match found, keep the original category
cleaned_cats.append(category)
return cleaned_cats
+
+ @field_validator("all_current_maintainers", mode="before")
+ @classmethod
+ def listify(cls, item: Any):
+ """Make a field that's expected to be plural so before any validation"""
+ if not isinstance(item, list):
+ return [item]
+ else:
+ return item
diff --git a/src/pyosmeta/models/github.py b/src/pyosmeta/models/github.py
new file mode 100644
index 0000000..4a35c86
--- /dev/null
+++ b/src/pyosmeta/models/github.py
@@ -0,0 +1,129 @@
+"""
+Models for Github API requests autogenerated from JSON schema,
+and then deduplicated and trimmed for just specifying what we use here.
+
+eg. given the Issue schema at https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#get-an-issue
+Using `datamodel-code-generator` ( https://docs.pydantic.dev/latest/integrations/datamodel_code_generator/ )
+
+```
+datamodel-codegen --input issue_schema.json --input-file-type jsonschema --output issue_models.py
+```
+"""
+
+# generated by datamodel-codegen:
+# filename: issue_schema.json
+# timestamp: 2024-07-02T23:19:14+00:00
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any, List, Literal, Optional, Union
+
+from pydantic import AnyUrl, BaseModel, ConfigDict, Field
+
+
+class User(BaseModel):
+ name: Optional[str] = None
+ email: Optional[str] = None
+ login: str = Field(..., examples=["octocat"])
+ id: int = Field(..., examples=[1])
+ node_id: Optional[str] = Field(None, examples=["MDQ6VXNlcjE="])
+ avatar_url: Optional[AnyUrl] = Field(
+ None, examples=["https://github.com/images/error/octocat_happy.gif"]
+ )
+ gravatar_id: Optional[str] = Field(
+ None, examples=["41d064eb2195891e12d0413f63227ea7"]
+ )
+ url: Optional[AnyUrl] = Field(
+ None, examples=["https://api.github.com/users/octocat"]
+ )
+ html_url: Optional[AnyUrl] = Field(
+ None, examples=["https://github.com/octocat"]
+ )
+ type: str = Field(..., examples=["User"])
+ site_admin: bool = False
+ starred_at: Optional[str] = Field(
+ None, examples=['"2020-07-09T00:17:55Z"']
+ )
+
+ model_config = ConfigDict(extra="allow")
+
+
+class Assignee(User): ...
+
+
+class Creator(User): ...
+
+
+class ClosedBy(User): ...
+
+
+class Owner(User): ...
+
+
+class Labels(BaseModel):
+ id: Optional[int] = None
+ node_id: Optional[str] = None
+ url: Optional[AnyUrl] = None
+ name: Optional[str] = None
+ description: Optional[str] = None
+ color: Optional[str] = None
+ default: Optional[bool] = None
+
+
+class Issue(BaseModel):
+ id: Optional[int] = None
+ node_id: Optional[str] = None
+ url: AnyUrl = Field(
+ ...,
+ description="URL for the issue",
+ examples=["https://api.github.com/repositories/42/issues/1"],
+ )
+ repository_url: AnyUrl
+ labels_url: Optional[AnyUrl] = None
+ comments_url: Optional[AnyUrl] = None
+ events_url: Optional[AnyUrl] = None
+ html_url: Optional[AnyUrl] = None
+ number: int = Field(
+ ...,
+ description="Number uniquely identifying the issue within its repository",
+ examples=[42],
+ )
+ state: Optional[Literal["open", "closed"]] = Field(
+ None,
+ description="State of the issue; either 'open' or 'closed'",
+ examples=["open"],
+ )
+ title: str = Field(
+ ...,
+ description="Title of the issue",
+ examples=["Widget creation fails in Safari on OS X 10.8"],
+ )
+ body: Optional[str] = Field(
+ None,
+ description="Contents of the issue",
+ examples=[
+ "It looks like the new widget form is broken on Safari. When I try and create the widget, Safari crashes. This is reproducible on 10.8, but not 10.9. Maybe a browser bug?"
+ ],
+ )
+ user: Optional[User] = None
+ labels: List[Union[str, Labels]] = Field(
+ default_factory=list,
+ description="Labels to associate with this issue; pass one or more label names to replace the set of labels on this issue; send an empty array to clear all labels from the issue; note that the labels are silently dropped for users without push access to the repository",
+ examples=["bug", "registration"],
+ )
+ assignee: Optional[Assignee] = None
+ assignees: Optional[List[Assignee]] = None
+ locked: bool = False
+ comments: int
+ closed_at: Optional[datetime] = None
+ created_at: datetime
+ updated_at: datetime
+ draft: Optional[bool] = None
+ closed_by: Optional[ClosedBy] = None
+ body_html: Optional[str] = None
+ body_text: Optional[str] = None
+ timeline_url: Optional[AnyUrl] = None
+ reactions: Optional[Any] = Field(None, title="Reaction Rollup")
+
+ model_config = ConfigDict(extra="allow")
diff --git a/src/pyosmeta/parse_issues.py b/src/pyosmeta/parse_issues.py
index f2ed812..65cef1c 100644
--- a/src/pyosmeta/parse_issues.py
+++ b/src/pyosmeta/parse_issues.py
@@ -1,10 +1,16 @@
import re
+import traceback
import warnings
from dataclasses import dataclass
-from typing import Any
+from typing import Any, List, Union
+
+from pydantic import ValidationError
+
+from pyosmeta.models import ReviewModel, ReviewUser
+from pyosmeta.models.github import Issue
from .github_api import GitHubAPI
-from .utils_clean import clean_date_accepted_key, clean_markdown
+from .utils_clean import clean_date_accepted_key
from .utils_parse import parse_user_names
@@ -40,7 +46,7 @@ def __init__(self, github_api: GitHubAPI):
"forks_count",
]
- def return_response(self) -> list[dict[str, object]]:
+ def get_issues(self) -> list[Issue]:
"""
Call return response in GitHub api object.
@@ -52,14 +58,20 @@ def return_response(self) -> list[dict[str, object]]:
List of dict items each containing a review issue
"""
- return self.github_api.return_response()
+ issues = self.github_api.return_response()
+ return [Issue(**i) for i in issues]
def _is_review_role(self, string: str) -> bool:
"""
Returns true if starts with any of the 3 items below.
"""
- return string.startswith(
- ("Submitting", "Editor", "Reviewer", "All current maintainers")
+ return any(
+ [
+ substr in string.lower()
+ for substr in (
+ ("submitting", "editor", "eic", "reviewer", "maintainers")
+ )
+ ]
)
def _remove_extra_chars(self, a_str: str) -> str:
@@ -71,20 +83,187 @@ def _remove_extra_chars(self, a_str: str) -> str:
return a_str.strip()
- def parse_issue_header(
- self, issues: list[str], total_lines: int = 20
- ) -> dict[str, str]:
+ def _split_header(self, body: str) -> tuple[str, str]:
+ """
+ Split an issue body into the header and the body using ---,
+ joining the body remainder if there are extra unexpected ---'s
+ """
+ parts = body.split("---")
+ if len(parts) > 2:
+ body_remainder = "---".join(parts[1:])
+ elif len(parts) == 1:
+ # eg. if we just have a header
+ return parts[0], ""
+ else:
+ body_remainder = parts[1]
+ return parts[0], body_remainder
+
+ def _header_as_dict(self, header: str) -> dict[str, str]:
+ """
+ Preprocess each of the lines in the header, splitting on the
+ colon, returning key-value pairs of unprocessed header fields.
+
+ Since values are heterogeneous, don't do any processing here,
+ but keys can be preprocessed into a canonical form
+ """
+ lines = header.split("\n")
+ meta = {}
+ for line in lines:
+ # remove asterisks around keys
+ line = line.strip()
+ line = re.sub(r"^\*\*(.*?:)\*\*", r"\1", line)
+
+ # split on first occurrence of non-url colon
+ line_split = re.split(r"(? dict:
+ """
+ Combine reviewer_1, reviewer_2, ... to a single reviewers field
+ """
+ if "reviewers" not in meta:
+ meta["reviewers"] = []
+
+ # first gather reviewers (don't mutate object we're iterating over)
+ delete_keys = []
+ for key, val in meta.items():
+ if key.startswith("reviewer") and key != "reviewers":
+ meta["reviewers"].append(val)
+ delete_keys.append(key)
+
+ meta = {k: v for k, v in meta.items() if k not in delete_keys}
+
+ # later processing steps expect this to be a comma separated list
+ # this is a string if we only specified the `reviewers` key in the review issue,
+ # but a list if we specified `reviewer_1:` etc.
+ if isinstance(meta["reviewers"], list):
+ meta["reviewers"] = ", ".join(meta["reviewers"])
+
+ return meta
+
+ def _add_issue_metadata(
+ self, meta: dict, issue: Issue, keys: list[str]
+ ) -> dict:
+ """
+ Add keys from the review issue to the review model
+ """
+ for key in keys:
+ meta[key] = getattr(issue, key)
+ return meta
+
+ def _preprocess_meta(self, meta: dict) -> dict:
+ """
+ Apply preprocessing steps before parsing specific fields in issue meta
+ """
+ meta = self._combine_reviewers(meta)
+ # add other preprocessing steps here...
+ return meta
+
+ def _postprocess_meta(self, meta: dict, body: List[str]) -> dict:
+ """
+ Apply postprocessing steps after parsing individual fields.
+
+ Putting all in one method for now, but individual steps should be split
+ out for testing and maintainability if this gets too big ;)
+ """
+ meta = clean_date_accepted_key(meta)
+ meta["issue_link"] = str(meta.get("url", "")).replace(
+ "https://api.github.com/repos/", "https://github.com/"
+ )
+ # Get categories and issue review link
+ meta["categories"] = self.get_categories(body, "## Scope", 10)
+ # NOTE: right now the numeric value is hard coded based on the
+ # number of categories listed in the issue template.
+ # this could be made more flexible if it just runs until it runs
+ # out of categories to parse
+ meta["partners"] = self.get_categories(
+ body, "## Community Partnerships", 3
+ )
+
+ return meta
+
+ def _parse_field(self, key: str, val: str) -> Any:
+ """
+ Method dispatcher for parsing specific header fields.
+ If none found, return value unchanged.
+ """
+ if self._is_review_role(key):
+ return self.get_contributor_data(val)
+ # elif False:
+ # add other conditions here for special processing of fields..
+ # pass
+ else:
+ return val
+
+ def parse_issue(self, issue: Issue | str) -> ReviewModel:
+ """
+ Parse a single review header for its metadata
+
+ issue : :class:`.Issue`
+ The issue to parse! if a string, assume we are getting the issue body,
+ which prevents us from doing some postprocessing steps
+ """
+ if isinstance(issue, Issue):
+ issue_body = issue.body
+ else:
+ # issue body is passed in as string
+ issue_body = issue
+
+ # Separate the issue's header from its body
+ header, body = self._split_header(issue_body)
+ body = [line.strip() for line in body.split("\n")]
+
+ # Process the header...
+ meta = self._header_as_dict(header)
+ meta = self._preprocess_meta(meta)
+
+ model = {}
+ for key, val in meta.items():
+ model[key] = self._parse_field(key, val)
+
+ # Add any requested metadata from the Issue object to the review object
+ if isinstance(issue, Issue):
+ model = self._add_issue_metadata(
+ model,
+ issue,
+ [
+ "url",
+ "created_at",
+ "updated_at",
+ "closed_at",
+ "repository_url",
+ ],
+ )
+
+ # Finalize review model before casting
+ model = self._postprocess_meta(model, body)
+
+ return ReviewModel(**model)
+
+ def parse_issues(
+ self, issues: list[Issue]
+ ) -> tuple[dict[str, ReviewModel], dict[str, str]]:
"""Parses through each header comment for selected reviews and returns
review metadata.
Parameters
----------
- issues : list
- List returned from the return_response method that contains the
+ issues : list[:class:`.Issue`]
+ List returned from the get_issues method that contains the
metadata at the top of each issue
- total_lines : int
- an integer representing the total number of lines to parse in the
- issue header. Default = 20
Returns
-------
@@ -94,82 +273,22 @@ def parse_issue_header(
See meta_dates below for the full list of keys.
"""
- meta_dates = ["created_at", "updated_at", "closed_at"]
-
- review = {}
- review_final = {}
+ reviews = {}
+ errors = {}
for issue in issues:
- # Return issue comment as cleaned list + package name
- pkg_name, body_data = self.comment_to_list(issue)
- if not pkg_name:
- continue
-
- review[pkg_name] = self.get_issue_meta(body_data, total_lines)
-
- # Normalize date accepted key to be the same in each review
- review[pkg_name] = clean_date_accepted_key(review[pkg_name])
-
- # Add issue open and close date to package meta from GH response
- # Date cleaning happens via pydantic validator not here
- for a_date in meta_dates:
- review[pkg_name][a_date] = issue[a_date]
-
- review[pkg_name]["issue_link"] = issue["url"].replace(
- "https://api.github.com/repos/", "https://github.com/"
- )
-
- # Get categories and issue review link
- review[pkg_name]["categories"] = self.get_categories(
- body_data, "## Scope", 10
- )
- # NOTE: right now the numeric value is hard coded based on the
- # number of categories listed in the issue template.
- # this could be made more flexible if it just runs until it runs
- # out of categories to parse
- review[pkg_name]["partners"] = self.get_categories(
- body_data, "## Community Partnerships", 3
- )
- # TODO: the current workflow will not parse domains
- # add a separate workflow to parse domains and add them to the
- # categories list
- # review[pkg_name]["domains"] = self.get_categories(body_data,
- # '## Domains',
- # 3)
-
- # Only return keys for metadata that we need
- final_keys = [
- "submitting_author",
- "all_current_maintainers",
- "package_name",
- "one-line_description_of_package",
- "repository_link",
- "version_submitted",
- "editor",
- "reviewer_1",
- "reviewer_2",
- "archive",
- "version_accepted",
- "joss_doi",
- "date_accepted",
- "categories",
- "partners",
- "domain",
- "created_at",
- "updated_at",
- "closed_at",
- "issue_link",
- "categories",
- ]
-
- review_final[pkg_name] = {
- key: review[pkg_name][key]
- for key in final_keys
- if key in review[pkg_name].keys()
- }
-
- return review_final
-
- def get_contributor_data(self, line: list[str]) -> dict[str, str | int]:
+ try:
+ review = self.parse_issue(issue)
+ reviews[review.package_name] = review
+ except ValidationError as e:
+ errors[str(issue.url)] = "\n".join(
+ traceback.format_exception(e)
+ )
+
+ return reviews, errors
+
+ def get_contributor_data(
+ self, line: str
+ ) -> Union[ReviewUser, List[ReviewUser]]:
"""Parse names for various review roles from issue metadata.
Parameters
@@ -184,67 +303,16 @@ def get_contributor_data(self, line: list[str]) -> dict[str, str | int]:
Containing the metadata for a submitting author, reviewer, or
maintainer(s).
"""
-
- meta = {}
- a_key = line[0].lower().replace(" ", "_")
-
- if line[0].startswith("All current maintainers"):
- names = line[1].split(",")
- meta[a_key] = []
- for name in names:
- # Add each maintainer to the dict
- a_maint = parse_user_names(username=name)
- meta[a_key].append(a_maint)
- else:
- names = parse_user_names(line[1])
- meta[a_key] = names
-
- return meta
-
- def get_issue_meta(
- self,
- body_data: list[str],
- end_range: int,
- ) -> dict[str, str]:
- """Process a single review returning metadata for that review.
-
- Parse through a list of strings, each of which represents a line in the
- first comment of a review. Return the cleaned review metadata.
-
- Parameters
- ----------
- body_data : list
- A list containing all body data for the top comment in an issue.
- end_range : int
- The number of lines to parse at the top of the issue (this may
- change over time so this variable allows us to have different
- processing based upon the date of the issue being opened)
-
- Returns
- -------
- dict
- """
- issue_meta = {}
- # TODO: change to for line in review_comment
- for single_line in body_data[0:end_range]:
- meta = {}
- a_key = single_line[0].lower().replace(" ", "_")
- # If the line is for a review role - editor, maintainer, reviewer
- if self._is_review_role(single_line[0]):
- # Collect metadata for each review role
- meta = self.get_contributor_data(single_line)
- elif len(single_line) > 1:
- meta[a_key] = single_line[1].strip()
- else:
- meta[a_key] = self._remove_extra_chars(single_line[0])
-
- issue_meta.update(meta)
-
- return issue_meta
+ users = line.split(",")
+ models = [parse_user_names(username=user) for user in users]
+ models = [model for model in models if model is not None]
+ if len(models) == 1:
+ models = models[0]
+ return models
# TODO: decide if this belongs here or in the github obj?
def get_repo_endpoints(
- self, review_issues: dict[str, str]
+ self, review_issues: dict[str, ReviewModel]
) -> dict[str, str]:
"""
Returns a list of repository endpoints
@@ -263,7 +331,7 @@ def get_repo_endpoints(
all_repos = {}
for a_package in review_issues.keys():
- repo = review_issues[a_package]["repository_link"].strip("/")
+ repo = review_issues[a_package].repository_link.strip("/")
owner, repo = repo.split("/")[-2:]
# TODO: could be simpler code - Remove any link remnants
pattern = r"[\(\)\[\]?]"
@@ -274,65 +342,12 @@ def get_repo_endpoints(
)
return all_repos
- def comment_to_list(self, issue: dict[str, str]) -> tuple[str, list[str]]:
- """Parses the first comment in a pyOpenSci review issue.
-
- Returns the package name
- and the body of the comment parsed into a list of elements.
-
- Parameters
- ----------
- issue : dict
- A dictionary containing the json response for an issue comment.
-
- Returns
- -------
- pkg_name : str
- The name of the package
- comment : list
- A list containing the comment elements in order
- """
-
- body = issue["body"]
- # Clean line breaks (could be done with a regex too)
- lines = body.split("\n")
- lines = [a_line.strip("\r").strip() for a_line in lines]
- # Some users decide to hold the issue titles.
- # For those, clean the markdown bold ** element
- lines = [
- line.replace("**", "").strip()
- for line in lines
- if line.strip() != ""
- ]
- # You need a space after : or else it will break https:// in two
- body_data = [line.split(": ") for line in lines if line.strip() != ""]
-
- # Loop through issue header and grab relevant review metadata
- name_index = next(
- (
- i
- for i, sublist in enumerate(body_data)
- if sublist[0] == "Package Name"
- ),
- None,
- )
-
- if name_index is None:
- warnings.warn(
- "Package Name not found in the issue comment.", UserWarning
- )
- pkg_name = "missing_name"
- else:
- pkg_name = body_data[name_index][1]
-
- return clean_markdown(pkg_name), body_data
-
# Rename to process_gh_metrics?
def get_gh_metrics(
self,
endpoints: dict[str, str],
- reviews: dict[str, dict[str, Any]],
- ) -> dict[str, dict[str, Any]]:
+ reviews: dict[str, ReviewModel],
+ ) -> dict[str, ReviewModel]:
"""
Get GitHub metrics for each review based on provided endpoints.
@@ -361,7 +376,7 @@ def get_gh_metrics(
self.github_api.get_last_commit(url)
)
# Add github meta to review metadata
- reviews[pkg_name]["gh_meta"] = pkg_meta[pkg_name]
+ reviews[pkg_name].gh_meta = pkg_meta[pkg_name]
return reviews
@@ -407,16 +422,15 @@ def process_repo_meta(self, url: str) -> dict[str, Any]:
# This works - i could just make it more generic and remove fmt since it's
# not used and replace it with a number of values and a test string
def get_categories(
- self, issue_list: list[list[str]], section_str: str, num_vals: int
- ) -> list[str]:
+ self, issue_list: list[str], section_str: str, num_vals: int
+ ) -> list[str] | None:
"""Parse through a pyOS review issue and grab categories associated
with a package
Parameters
----------
issue_list : list[list[str]]
- The first comment from the issue split into lines and then the
- lines split as by self.comment_to_list()
+ The body of the comment from the issue split into lines
section_str : str
The section string to find where the categories live in the review
@@ -428,22 +442,22 @@ def get_categories(
3 partner options.
"""
# Find the starting index of the category section
- # This will be more robust if we use starts_with rather than in i think
- try:
- index = next(
- i
- for i, sublist in enumerate(issue_list)
- if section_str in sublist
- )
- # Iterate starting at the specified section location index
- # find the first line starting with " - ["
- # This represents the first category in a list of categories
- for i in range(index + 1, len(issue_list)):
- if issue_list[i] and issue_list[i][0].startswith("- ["):
- cat_index = i
- break
- except StopIteration:
- print(section_str, "not found in the list.")
+ index = [
+ i for i, sublist in enumerate(issue_list) if section_str in sublist
+ ]
+ if len(index) == 0:
+ warnings.warn(f"{section_str} not found in the list")
+ return None
+ index = index[0]
+
+ # find the list within the category
+ cat_index = None
+ for i in range(index + 1, len(issue_list)):
+ if issue_list[i] and "- [" in issue_list[i]:
+ cat_index = i
+ break
+ if cat_index is None:
+ warnings.warn(f"List not found for section {section_str}")
return None
# Get checked categories for package
@@ -451,7 +465,7 @@ def get_categories(
# rather than hard coding it
cat_list = issue_list[cat_index : cat_index + num_vals]
selected = [
- item[0] for item in cat_list if re.search(r"- \[[xX]\] ", item[0])
+ item for item in cat_list if re.search(r"- \[[xX]\] ", item)
]
# Above returns a list of list elements that are checked
# Now, clean the extra markdown and only return the category text
diff --git a/src/pyosmeta/utils_parse.py b/src/pyosmeta/utils_parse.py
index 899bfa3..b3b0eb5 100644
--- a/src/pyosmeta/utils_parse.py
+++ b/src/pyosmeta/utils_parse.py
@@ -3,10 +3,11 @@
pyOpenSci review and contributor metadata.
"""
+from pyosmeta.models import ReviewUser
from pyosmeta.utils_clean import clean_name
-def parse_user_names(username: str) -> dict[str, str]:
+def parse_user_names(username: str) -> ReviewUser | None:
"""Parses authors, contributors, editors and usernames from
the requested issues.
@@ -37,11 +38,12 @@ def parse_user_names(username: str) -> dict[str, str]:
"github_username": clean_name(names[1]),
"name": clean_name(names[0]),
}
-
+ elif len(names) == 0:
+ return None
else:
parsed = {
"github_username": clean_name(names[0]),
"name": "",
}
- return parsed
+ return ReviewUser(**parsed)
diff --git a/tests/conftest.py b/tests/conftest.py
index ae92e26..f3c8186 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,8 +1,14 @@
+from pathlib import Path
+from typing import Callable, Literal, Optional, Union, overload
+
import pytest
from pyosmeta.contributors import ProcessContributors
from pyosmeta.github_api import GitHubAPI
+from pyosmeta.models.github import Issue
from pyosmeta.parse_issues import ProcessIssues
+DATA_DIR = Path(__file__).parent / "data"
+
@pytest.fixture
def ghuser_response():
@@ -54,75 +60,117 @@ def issue_list():
"""A fixture representing an API return from GitHub for a two issues
to be used in our test suite.
- We only use the body for parse_issue_header but for now i'll leave it all
+ We only use the body for parse_issues but for now i'll leave it all
as it may be useful for other tests.
"""
issue = [
- {
- "url": "https://api.github.com/repos/pyOpenSci/software-submission/issues/147",
- "repository_url": "https://api.github.com/repos/pyOpenSci/software-submission",
- "labels_url": "https://api.github.com/repos/pyOpenSci/software-submission/issues/147/labels{/name}",
- "number": 147,
- "title": "`sunpy` Review",
- "assignee": {
- "login": "cmarmo",
- "id": 1662261,
- "node_id": "MDQ6VXNlcjE2NjIyNjE=",
- "avatar_url": "https://avatars.githubusercontent.com/u/1662261?v=4",
- "type": "User",
- "site_admin": False,
- },
- "assignees": [{...}],
- "milestone": None,
- "comments": 35,
- "created_at": "2023-10-30T18:45:06Z",
- "updated_at": "2024-02-22T01:24:31Z",
- "closed_at": "2024-01-27T23:05:39Z",
- "author_association": "NONE",
- "active_lock_reason": None,
- "body": "Submitting Author: Nabil Freij (@nabobalis)\r\nAll current maintainers: @ehsteve,@dpshelio,@wafels,@ayshih,@Cadair,@nabobalis,@dstansby,@DanRyanIrish,@wtbarnes,@ConorMacBride,@alasdairwilson,@hayesla,@vn-ki\r\nPackage Name: sunpy\r\nOne-Line Description of Package: Python for Solar Physics \r\nRepository Link: https://github.com/sunpy/sunpy\r\nVersion submitted: 5.0.1\r\nEditor: @cmarmo \r\nReviewer 1: @Septaris\r\nReviewer 2: @nutjob4life\r\nArchive: [](https://doi.org/10.5281/zenodo.8384174)\r\nVersion accepted: 5.1.1\r\nJOSS DOI: [](https://joss.theoj.org/papers/10.21105/joss.01832)\r\nDate accepted (month/day/year): 01/18/2024\r\n\r\n---\r\n\r\n## Code of Conduct & Commitment to Maintain Package\r\n\r\n- [X] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted.\r\n- [X] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment].\r\n\r\n## Description\r\n\r\n- sunpy is a community-developed, free and open-source solar data analysis environment for Python. It includes an interface for searching and downloading data from multiple data providers, data containers for image and time series data, commonly used solar coordinate frames and associated transformations, as well as other functionality needed for solar data analysis.\r\n\r\n## Scope\r\n\r\n- Please indicate which category or categories. \r\nCheck out our [package scope page][PackageCategories] to learn more about our \r\nscope. (If you are unsure of which category you fit, we suggest you make a pre-submission inquiry):\r\n\r\n\t- [X] Data retrieval\r\n\t- [X] Data extraction\r\n\t- [X] Data processing/munging\r\n\t- [ ] Data deposition\r\n\t- [ ] Data validation and testing\r\n\t- [X] Data visualization[^1]\t \r\n\t- [ ] Workflow automation\r\n\t- [ ] Citation management and bibliometrics\r\n\t- [ ] Scientific software wrappers\r\n\t- [ ] Database interoperability\r\n\r\n## Domain Specific\r\n\r\n- [ ] Geospatial\r\n- [ ] Education\r\n\t\r\n## Community Partnerships\r\nIf your package is associated with a pyOpenSci partner community, please check below:\r\n\r\n- [ ] astropy\r\n- [x] sunpy\r\n- [ ] [Pangeo][pangeoWebsite]\r\n\t- [ ] My package adheres to the [Pangeo standards listed in the pyOpenSci peer review guidebook][PangeoCollaboration]\r\n\r\n## Technical checks\r\n\r\nFor details about the pyOpenSci packaging requirements, see our [packaging guide][PackagingGuide]. Confirm each of the following by checking the box. This package:\r\n\r\n- [X] does not violate the Terms of Service of any service it interacts with. \r\n- [X] uses an [OSI approved license][OsiApprovedLicense].\r\n- [X] contains a README with instructions for installing the development version. \r\n- [ ] includes documentation with examples for all functions.\r\n **I will need to double check the examples, we have documentation for all public API**\r\n- [X] contains a tutorial with examples of its essential functions and uses.\r\n- [X] has a test suite.\r\n- [X] has continuous integration setup, such as GitHub Actions CircleCI, and/or others.\r\n\r\n## Are you OK with Reviewers Submitting Issues and/or pull requests to your Repo Directly?\r\nThis option will allow reviewers to open smaller issues that can then be linked to PR's rather than submitting a more dense text based review. It will also allow you to demonstrate addressing the issue via PR links.\r\n\r\n- [x] Yes I am OK with reviewers submitting requested changes as issues to my repo. Reviewers will then link to the issues in their submitted review.\r\n\r\nConfirm each of the following by checking the box.\r\n\r\n- [X] I have read the [author guide](https://www.pyopensci.org/software-peer-review/how-to/author-guide.html). \r\n- [X] I expect to maintain this package for at least 2 years and can help find a replacement for the maintainer (team) if needed.\r\n\r\n## Please fill out our survey\r\n\r\n- [X] [Last but not least please fill out our pre-review survey](https://forms.gle/F9mou7S3jhe8DMJ16). This helps us track\r\nsubmission and improve our peer review process. We will also ask our reviewers \r\nand editors to fill this out.\r\n\r\n**P.S.** Have feedback/comments about our review process? Leave a comment [here][Comments]\r\n\r\n## Editor and Review Templates\r\n\r\nThe [editor template can be found here][Editor Template].\r\n\r\nThe [review template can be found here][Review Template].\r\n\r\n[PackagingGuide]: https://www.pyopensci.org/python-package-guide/\r\n\r\n[PackageCategories]: https://www.pyopensci.org/software-peer-review/about/package-scope.html\r\n\r\n[JournalOfOpenSourceSoftware]: http://joss.theoj.org/\r\n\r\n[JossSubmissionRequirements]: https://joss.readthedocs.io/en/latest/submitting.html#submission-requirements\r\n\r\n[JossPaperRequirements]: https://joss.readthedocs.io/en/latest/submitting.html#what-should-my-paper-contain\r\n\r\n[PyOpenSciCodeOfConduct]: https://www.pyopensci.org/governance/CODE_OF_CONDUCT\r\n\r\n[OsiApprovedLicense]: https://opensource.org/licenses\r\n\r\n[Editor Template]: https://www.pyopensci.org/software-peer-review/appendices/templates.html#editor-s-template\r\n\r\n[Review Template]: https://www.pyopensci.org/software-peer-review/appendices/templates.html#peer-review-template\r\n\r\n[Comments]: https://pyopensci.discourse.group/\r\n\r\n[PangeoCollaboration]: https://www.pyopensci.org/software-peer-review/partners/pangeo\r\n\r\n[pangeoWebsite]: https://www.pangeo.io\r\n[Commitment]: https://www.pyopensci.org/software-peer-review/our-process/policies.html#after-acceptance-package-ownership-and-maintenance\r\n\r\n",
- "state_reason": "completed",
- },
- {
- "url": "https://api.github.com/repos/pyOpenSci/software-submission/issues/146",
- "repository_url": "https://api.github.com/repos/pyOpenSci/software-submission",
- "title": "ncompare",
- "user": {
- "login": "danielfromearth",
- "id": 114174502,
- "node_id": "U_kgDOBs4qJg",
- "avatar_url": "https://avatars.githubusercontent.com/u/114174502?v=4",
- "gravatar_id": "",
- "url": "https://api.github.com/users/danielfromearth",
- "received_events_url": "https://api.github.com/users/danielfromearth/received_events",
- "type": "User",
- "site_admin": False,
- },
- "labels": [{...}],
- "state": "open",
- "locked": False,
- "assignee": {
- "login": "tomalrussell",
- "id": 2762769,
- "type": "User",
- "site_admin": False,
- },
- "assignees": [{...}],
- "milestone": None,
- "comments": 21,
- "created_at": "2023-10-25T13:12:48Z",
- "updated_at": "2024-02-06T17:59:37Z",
- "closed_at": None,
- "author_association": "NONE",
- "active_lock_reason": None,
- "body": 'Submitting Author: Daniel Kaufman (@danielfromearth)\r\nAll current maintainers: (@danielfromearth)\r\nPackage Name: `ncompare`\r\nOne-Line Description of Package: `ncompare` compares two netCDF files at the command line, by generating a report of the matching and non-matching groups, variables, and attributes.\r\nRepository Link: https://github.com/nasa/ncompare\r\nVersion submitted: 1.4.0\r\nEditor: @tomalrussell \r\nReviewer 1: @cmarmo \r\nReviewer 2: @cmtso \r\nArchive: [10.5281/zenodo.10625407](https://zenodo.org/doi/10.5281/zenodo.10625407)\r\nJOSS DOI: TBD\r\nVersion accepted: 1.7.2\r\nDate accepted (month/day/year): 02/06/2024\r\n\r\n---\r\n\r\n## Code of Conduct & Commitment to Maintain Package\r\n\r\n- [x] I agree to abide by [pyOpenSci\'s Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted.\r\n- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment].\r\n\r\n## Description\r\n\r\nThis tool ("ncompare") compares the structure of two Network Common Data Form (NetCDF) files at the command line. \r\n\r\n\r\n## Scope\r\n\r\n- Please indicate which category or categories. \r\nCheck out our [package scope page][PackageCategories] to learn more about our \r\nscope. (If you are unsure of which category you fit, we suggest you make a pre-submission inquiry):\r\n\r\n\t- [ ] Data retrieval\r\n\t- [ ] Data extraction\r\n\t- [ ] Data processing/munging\r\n\t- [ ] Data deposition\r\n\t- [x] Data validation and testing\r\n\t- [ ] Data visualization[^1]\r\n\t- [ ] Workflow automation\r\n\t- [ ] Citation management and bibliometrics\r\n\t- [ ] Scientific software wrappers\r\n\t- [ ] Database interoperability\r\n\r\nDomain Specific & Community Partnerships \r\n\r\n\t- [ ] Geospatial\r\n\t- [ ] Education\r\n\t- [ ] Pangeo\r\n\t\r\n\r\n## Community Partnerships\r\nIf your package is associated with an \r\nexisting community please check below:\r\n\r\n- [ ] [Pangeo][pangeoWebsite]\r\n\t- [ ] My package adheres to the [Pangeo standards listed in the pyOpenSci peer review guidebook][PangeoCollaboration]\r\n\r\n> [^1]: Please fill out a pre-submission inquiry before submitting a data visualization package.\r\n\r\n- **For all submissions**, explain how the and why the package falls under the categories you indicated above. In your explanation, please address the following points (briefly, 1-2 sentences for each): \r\n\r\n - Who is the target audience and what are scientific applications of this package?\r\n \r\nThe target audience is anyone who manages the generation, manipulation, or validation of netCDF files. This package can be applied to to these netCDF file tasks in any scientific discipline;\xa0although it would be most relevant to applications with large multidimensional datasets, e.g., for comparing climate models, for Earth science data reanalyses, and for remote sensing data.\r\n\r\n - Are there other Python packages that accomplish the same thing? If so, how does yours differ?\r\n\r\nThe `ncdiff` function in the `nco` (netCDF Operators) library, as well as `ncmpidiff` and `nccmp`, compute value \r\ndifferences, but --- as far as we are aware --- do not have a dedicated function to show structural differences between netCDF4 datasets. Our package, `ncompare` provides a light-weight Python-based tool for rapid **visual** comparisons of group & variable _structures_, _attributes_, and _chunking_. \r\n\r\n - If you made a pre-submission enquiry, please paste the link to the corresponding issue, forum post, or other discussion, or `@tag` the editor you contacted:\r\n\r\nPre-submission inquiry #142 \r\n\r\n## Technical checks\r\n\r\nFor details about the pyOpenSci packaging requirements, see our [packaging guide][PackagingGuide]. Confirm each of the following by checking the box. This package:\r\n\r\n- [x] does not violate the Terms of Service of any service it interacts with. \r\n- [x] uses an [OSI approved license][OsiApprovedLicense].\r\n- [x] contains a README with instructions for installing the development version. \r\n- [x] includes documentation with examples for all functions.\r\n- [x] contains a tutorial with examples of its essential functions and uses.\r\n- [x] has a test suite.\r\n- [x] has continuous integration setup, such as GitHub Actions CircleCI, and/or others.\r\n\r\n## Publication Options\r\n\r\n- [x] Do you wish to automatically submit to the [Journal of Open Source Software][JournalOfOpenSourceSoftware]? If so:\r\n\r\n\r\n JOSS Checks
\r\n\r\n- [x] The package has an **obvious research application** according to JOSS\'s definition in their [submission requirements][JossSubmissionRequirements]. Be aware that completing the pyOpenSci review process **does not** guarantee acceptance to JOSS. Be sure to read their submission requirements (linked above) if you are interested in submitting to JOSS.\r\n- [x] The package is not a "minor utility" as defined by JOSS\'s [submission requirements][JossSubmissionRequirements]: "Minor ‘utility’ packages, including ‘thin’ API clients, are not acceptable." pyOpenSci welcomes these packages under "Data Retrieval", but JOSS has slightly different criteria.\r\n- [(NOT YET)] The package contains a `paper.md` matching [JOSS\'s requirements][JossPaperRequirements] with a high-level description in the package root or in `inst/`.\r\n- [(NOT YET)] The package is deposited in a long-term repository with the DOI: \r\n\r\n*Note: JOSS accepts our review as theirs. You will NOT need to go through another full review. JOSS will only review your paper.md file. Be sure to link to this pyOpenSci issue when a JOSS issue is opened for your package. Also be sure to tell the JOSS editor that this is a pyOpenSci reviewed package once you reach this step.*\r\n \r\n \r\n\r\n## Are you OK with Reviewers Submitting Issues and/or pull requests to your Repo Directly?\r\nThis option will allow reviewers to open smaller issues that can then be linked to PR\'s rather than submitting a more dense text based review. It will also allow you to demonstrate addressing the issue via PR links.\r\n\r\n- [x] Yes I am OK with reviewers submitting requested changes as issues to my repo. Reviewers will then link to the issues in their submitted review.\r\n\r\nConfirm each of the following by checking the box.\r\n\r\n- [x] I have read the [author guide](https://www.pyopensci.org/software-peer-review/how-to/author-guide.html). \r\n- [x] I expect to maintain this package for at least 2 years and can help find a replacement for the maintainer (team) if needed.\r\n\r\n## Please fill out our survey\r\n\r\n- [x] [Last but not least please fill out our pre-review survey](https://forms.gle/F9mou7S3jhe8DMJ16). This helps us track\r\nsubmission and improve our peer review process. We will also ask our reviewers \r\nand editors to fill this out.\r\n\r\n**P.S.** Have feedback/comments about our review process? Leave a comment [here][Comments]\r\n\r\n## Editor and Review Templates\r\n\r\nThe [editor template can be found here][Editor Template].\r\n\r\nThe [review template can be found here][Review Template].\r\n\r\n[PackagingGuide]: https://www.pyopensci.org/python-package-guide/\r\n\r\n[PackageCategories]: https://www.pyopensci.org/software-peer-review/about/package-scope.html\r\n\r\n[JournalOfOpenSourceSoftware]: http://joss.theoj.org/\r\n\r\n[JossSubmissionRequirements]: https://joss.readthedocs.io/en/latest/submitting.html#submission-requirements\r\n\r\n[JossPaperRequirements]: https://joss.readthedocs.io/en/latest/submitting.html#what-should-my-paper-contain\r\n\r\n[PyOpenSciCodeOfConduct]: https://www.pyopensci.org/governance/CODE_OF_CONDUCT\r\n\r\n[OsiApprovedLicense]: https://opensource.org/licenses\r\n\r\n[Editor Template]: https://www.pyopensci.org/software-peer-review/appendices/templates.html#editor-s-template\r\n\r\n[Review Template]: https://www.pyopensci.org/software-peer-review/appendices/templates.html#peer-review-template\r\n\r\n[Comments]: https://pyopensci.discourse.group/\r\n\r\n[PangeoCollaboration]: https://www.pyopensci.org/software-peer-review/partners/pangeo\r\n\r\n[pangeoWebsite]: https://www.pangeo.io\r\n[Commitment]: https://www.pyopensci.org/software-peer-review/our-process/policies.html#after-acceptance-package-ownership-and-maintenance\r\n',
- "reactions": {
- "url": "https://api.github.com/repos/pyOpenSci/software-submission/issues/146/reactions",
- "total_count": 0,
- },
- "timeline_url": "https://api.github.com/repos/pyOpenSci/software-submission/issues/146/timeline",
- "performed_via_github_app": None,
- "state_reason": None,
- },
+ Issue(
+ **{
+ "url": "https://api.github.com/repos/pyOpenSci/software-submission/issues/147",
+ "repository_url": "https://api.github.com/repos/pyOpenSci/software-submission",
+ "labels_url": "https://api.github.com/repos/pyOpenSci/software-submission/issues/147/labels{/name}",
+ "number": 147,
+ "title": "`sunpy` Review",
+ "assignee": {
+ "login": "cmarmo",
+ "id": 1662261,
+ "node_id": "MDQ6VXNlcjE2NjIyNjE=",
+ "avatar_url": "https://avatars.githubusercontent.com/u/1662261?v=4",
+ "type": "User",
+ "site_admin": False,
+ },
+ "assignees": [],
+ "milestone": None,
+ "comments": 35,
+ "created_at": "2023-10-30T18:45:06Z",
+ "updated_at": "2024-02-22T01:24:31Z",
+ "closed_at": "2024-01-27T23:05:39Z",
+ "author_association": "NONE",
+ "active_lock_reason": None,
+ "body": "Submitting Author: Nabil Freij (@nabobalis)\r\nAll current maintainers: @ehsteve,@dpshelio,@wafels,@ayshih,@Cadair,@nabobalis,@dstansby,@DanRyanIrish,@wtbarnes,@ConorMacBride,@alasdairwilson,@hayesla,@vn-ki\r\nPackage Name: sunpy\r\nOne-Line Description of Package: Python for Solar Physics \r\nRepository Link: https://github.com/sunpy/sunpy\r\nVersion submitted: 5.0.1\r\nEditor: @cmarmo \r\nReviewer 1: @Septaris\r\nReviewer 2: @nutjob4life\r\nArchive: [](https://doi.org/10.5281/zenodo.8384174)\r\nVersion accepted: 5.1.1\r\nJOSS DOI: [](https://joss.theoj.org/papers/10.21105/joss.01832)\r\nDate accepted (month/day/year): 01/18/2024\r\n\r\n---\r\n\r\n## Code of Conduct & Commitment to Maintain Package\r\n\r\n- [X] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted.\r\n- [X] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment].\r\n\r\n## Description\r\n\r\n- sunpy is a community-developed, free and open-source solar data analysis environment for Python. It includes an interface for searching and downloading data from multiple data providers, data containers for image and time series data, commonly used solar coordinate frames and associated transformations, as well as other functionality needed for solar data analysis.\r\n\r\n## Scope\r\n\r\n- Please indicate which category or categories. \r\nCheck out our [package scope page][PackageCategories] to learn more about our \r\nscope. (If you are unsure of which category you fit, we suggest you make a pre-submission inquiry):\r\n\r\n\t- [X] Data retrieval\r\n\t- [X] Data extraction\r\n\t- [X] Data processing/munging\r\n\t- [ ] Data deposition\r\n\t- [ ] Data validation and testing\r\n\t- [X] Data visualization[^1]\t \r\n\t- [ ] Workflow automation\r\n\t- [ ] Citation management and bibliometrics\r\n\t- [ ] Scientific software wrappers\r\n\t- [ ] Database interoperability\r\n\r\n## Domain Specific\r\n\r\n- [ ] Geospatial\r\n- [ ] Education\r\n\t\r\n## Community Partnerships\r\nIf your package is associated with a pyOpenSci partner community, please check below:\r\n\r\n- [ ] astropy\r\n- [x] sunpy\r\n- [ ] [Pangeo][pangeoWebsite]\r\n\t- [ ] My package adheres to the [Pangeo standards listed in the pyOpenSci peer review guidebook][PangeoCollaboration]\r\n\r\n## Technical checks\r\n\r\nFor details about the pyOpenSci packaging requirements, see our [packaging guide][PackagingGuide]. Confirm each of the following by checking the box. This package:\r\n\r\n- [X] does not violate the Terms of Service of any service it interacts with. \r\n- [X] uses an [OSI approved license][OsiApprovedLicense].\r\n- [X] contains a README with instructions for installing the development version. \r\n- [ ] includes documentation with examples for all functions.\r\n **I will need to double check the examples, we have documentation for all public API**\r\n- [X] contains a tutorial with examples of its essential functions and uses.\r\n- [X] has a test suite.\r\n- [X] has continuous integration setup, such as GitHub Actions CircleCI, and/or others.\r\n\r\n## Are you OK with Reviewers Submitting Issues and/or pull requests to your Repo Directly?\r\nThis option will allow reviewers to open smaller issues that can then be linked to PR's rather than submitting a more dense text based review. It will also allow you to demonstrate addressing the issue via PR links.\r\n\r\n- [x] Yes I am OK with reviewers submitting requested changes as issues to my repo. Reviewers will then link to the issues in their submitted review.\r\n\r\nConfirm each of the following by checking the box.\r\n\r\n- [X] I have read the [author guide](https://www.pyopensci.org/software-peer-review/how-to/author-guide.html). \r\n- [X] I expect to maintain this package for at least 2 years and can help find a replacement for the maintainer (team) if needed.\r\n\r\n## Please fill out our survey\r\n\r\n- [X] [Last but not least please fill out our pre-review survey](https://forms.gle/F9mou7S3jhe8DMJ16). This helps us track\r\nsubmission and improve our peer review process. We will also ask our reviewers \r\nand editors to fill this out.\r\n\r\n**P.S.** Have feedback/comments about our review process? Leave a comment [here][Comments]\r\n\r\n## Editor and Review Templates\r\n\r\nThe [editor template can be found here][Editor Template].\r\n\r\nThe [review template can be found here][Review Template].\r\n\r\n[PackagingGuide]: https://www.pyopensci.org/python-package-guide/\r\n\r\n[PackageCategories]: https://www.pyopensci.org/software-peer-review/about/package-scope.html\r\n\r\n[JournalOfOpenSourceSoftware]: http://joss.theoj.org/\r\n\r\n[JossSubmissionRequirements]: https://joss.readthedocs.io/en/latest/submitting.html#submission-requirements\r\n\r\n[JossPaperRequirements]: https://joss.readthedocs.io/en/latest/submitting.html#what-should-my-paper-contain\r\n\r\n[PyOpenSciCodeOfConduct]: https://www.pyopensci.org/governance/CODE_OF_CONDUCT\r\n\r\n[OsiApprovedLicense]: https://opensource.org/licenses\r\n\r\n[Editor Template]: https://www.pyopensci.org/software-peer-review/appendices/templates.html#editor-s-template\r\n\r\n[Review Template]: https://www.pyopensci.org/software-peer-review/appendices/templates.html#peer-review-template\r\n\r\n[Comments]: https://pyopensci.discourse.group/\r\n\r\n[PangeoCollaboration]: https://www.pyopensci.org/software-peer-review/partners/pangeo\r\n\r\n[pangeoWebsite]: https://www.pangeo.io\r\n[Commitment]: https://www.pyopensci.org/software-peer-review/our-process/policies.html#after-acceptance-package-ownership-and-maintenance\r\n\r\n",
+ "state_reason": "completed",
+ }
+ ),
+ Issue(
+ **{
+ "url": "https://api.github.com/repos/pyOpenSci/software-submission/issues/146",
+ "repository_url": "https://api.github.com/repos/pyOpenSci/software-submission",
+ "title": "ncompare",
+ "number": 147,
+ "user": {
+ "login": "danielfromearth",
+ "id": 114174502,
+ "node_id": "U_kgDOBs4qJg",
+ "avatar_url": "https://avatars.githubusercontent.com/u/114174502?v=4",
+ "gravatar_id": "",
+ "url": "https://api.github.com/users/danielfromearth",
+ "received_events_url": "https://api.github.com/users/danielfromearth/received_events",
+ "type": "User",
+ "site_admin": False,
+ },
+ "labels": [],
+ "state": "open",
+ "locked": False,
+ "assignee": {
+ "login": "tomalrussell",
+ "id": 2762769,
+ "type": "User",
+ "site_admin": False,
+ },
+ "assignees": [],
+ "milestone": None,
+ "comments": 21,
+ "created_at": "2023-10-25T13:12:48Z",
+ "updated_at": "2024-02-06T17:59:37Z",
+ "closed_at": None,
+ "author_association": "NONE",
+ "active_lock_reason": None,
+ "body": 'Submitting Author: Daniel Kaufman (@danielfromearth)\r\nAll current maintainers: (@danielfromearth)\r\nPackage Name: `ncompare`\r\nOne-Line Description of Package: `ncompare` compares two netCDF files at the command line, by generating a report of the matching and non-matching groups, variables, and attributes.\r\nRepository Link: https://github.com/nasa/ncompare\r\nVersion submitted: 1.4.0\r\nEditor: @tomalrussell \r\nReviewer 1: @cmarmo \r\nReviewer 2: @cmtso \r\nArchive: [10.5281/zenodo.10625407](https://zenodo.org/doi/10.5281/zenodo.10625407)\r\nJOSS DOI: TBD\r\nVersion accepted: 1.7.2\r\nDate accepted (month/day/year): 02/06/2024\r\n\r\n---\r\n\r\n## Code of Conduct & Commitment to Maintain Package\r\n\r\n- [x] I agree to abide by [pyOpenSci\'s Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted.\r\n- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment].\r\n\r\n## Description\r\n\r\nThis tool ("ncompare") compares the structure of two Network Common Data Form (NetCDF) files at the command line. \r\n\r\n\r\n## Scope\r\n\r\n- Please indicate which category or categories. \r\nCheck out our [package scope page][PackageCategories] to learn more about our \r\nscope. (If you are unsure of which category you fit, we suggest you make a pre-submission inquiry):\r\n\r\n\t- [ ] Data retrieval\r\n\t- [ ] Data extraction\r\n\t- [ ] Data processing/munging\r\n\t- [ ] Data deposition\r\n\t- [x] Data validation and testing\r\n\t- [ ] Data visualization[^1]\r\n\t- [ ] Workflow automation\r\n\t- [ ] Citation management and bibliometrics\r\n\t- [ ] Scientific software wrappers\r\n\t- [ ] Database interoperability\r\n\r\nDomain Specific & Community Partnerships \r\n\r\n\t- [ ] Geospatial\r\n\t- [ ] Education\r\n\t- [ ] Pangeo\r\n\t\r\n\r\n## Community Partnerships\r\nIf your package is associated with an \r\nexisting community please check below:\r\n\r\n- [ ] [Pangeo][pangeoWebsite]\r\n\t- [ ] My package adheres to the [Pangeo standards listed in the pyOpenSci peer review guidebook][PangeoCollaboration]\r\n\r\n> [^1]: Please fill out a pre-submission inquiry before submitting a data visualization package.\r\n\r\n- **For all submissions**, explain how the and why the package falls under the categories you indicated above. In your explanation, please address the following points (briefly, 1-2 sentences for each): \r\n\r\n - Who is the target audience and what are scientific applications of this package?\r\n \r\nThe target audience is anyone who manages the generation, manipulation, or validation of netCDF files. This package can be applied to to these netCDF file tasks in any scientific discipline;\xa0although it would be most relevant to applications with large multidimensional datasets, e.g., for comparing climate models, for Earth science data reanalyses, and for remote sensing data.\r\n\r\n - Are there other Python packages that accomplish the same thing? If so, how does yours differ?\r\n\r\nThe `ncdiff` function in the `nco` (netCDF Operators) library, as well as `ncmpidiff` and `nccmp`, compute value \r\ndifferences, but --- as far as we are aware --- do not have a dedicated function to show structural differences between netCDF4 datasets. Our package, `ncompare` provides a light-weight Python-based tool for rapid **visual** comparisons of group & variable _structures_, _attributes_, and _chunking_. \r\n\r\n - If you made a pre-submission enquiry, please paste the link to the corresponding issue, forum post, or other discussion, or `@tag` the editor you contacted:\r\n\r\nPre-submission inquiry #142 \r\n\r\n## Technical checks\r\n\r\nFor details about the pyOpenSci packaging requirements, see our [packaging guide][PackagingGuide]. Confirm each of the following by checking the box. This package:\r\n\r\n- [x] does not violate the Terms of Service of any service it interacts with. \r\n- [x] uses an [OSI approved license][OsiApprovedLicense].\r\n- [x] contains a README with instructions for installing the development version. \r\n- [x] includes documentation with examples for all functions.\r\n- [x] contains a tutorial with examples of its essential functions and uses.\r\n- [x] has a test suite.\r\n- [x] has continuous integration setup, such as GitHub Actions CircleCI, and/or others.\r\n\r\n## Publication Options\r\n\r\n- [x] Do you wish to automatically submit to the [Journal of Open Source Software][JournalOfOpenSourceSoftware]? If so:\r\n\r\n\r\n JOSS Checks
\r\n\r\n- [x] The package has an **obvious research application** according to JOSS\'s definition in their [submission requirements][JossSubmissionRequirements]. Be aware that completing the pyOpenSci review process **does not** guarantee acceptance to JOSS. Be sure to read their submission requirements (linked above) if you are interested in submitting to JOSS.\r\n- [x] The package is not a "minor utility" as defined by JOSS\'s [submission requirements][JossSubmissionRequirements]: "Minor ‘utility’ packages, including ‘thin’ API clients, are not acceptable." pyOpenSci welcomes these packages under "Data Retrieval", but JOSS has slightly different criteria.\r\n- [(NOT YET)] The package contains a `paper.md` matching [JOSS\'s requirements][JossPaperRequirements] with a high-level description in the package root or in `inst/`.\r\n- [(NOT YET)] The package is deposited in a long-term repository with the DOI: \r\n\r\n*Note: JOSS accepts our review as theirs. You will NOT need to go through another full review. JOSS will only review your paper.md file. Be sure to link to this pyOpenSci issue when a JOSS issue is opened for your package. Also be sure to tell the JOSS editor that this is a pyOpenSci reviewed package once you reach this step.*\r\n \r\n \r\n\r\n## Are you OK with Reviewers Submitting Issues and/or pull requests to your Repo Directly?\r\nThis option will allow reviewers to open smaller issues that can then be linked to PR\'s rather than submitting a more dense text based review. It will also allow you to demonstrate addressing the issue via PR links.\r\n\r\n- [x] Yes I am OK with reviewers submitting requested changes as issues to my repo. Reviewers will then link to the issues in their submitted review.\r\n\r\nConfirm each of the following by checking the box.\r\n\r\n- [x] I have read the [author guide](https://www.pyopensci.org/software-peer-review/how-to/author-guide.html). \r\n- [x] I expect to maintain this package for at least 2 years and can help find a replacement for the maintainer (team) if needed.\r\n\r\n## Please fill out our survey\r\n\r\n- [x] [Last but not least please fill out our pre-review survey](https://forms.gle/F9mou7S3jhe8DMJ16). This helps us track\r\nsubmission and improve our peer review process. We will also ask our reviewers \r\nand editors to fill this out.\r\n\r\n**P.S.** Have feedback/comments about our review process? Leave a comment [here][Comments]\r\n\r\n## Editor and Review Templates\r\n\r\nThe [editor template can be found here][Editor Template].\r\n\r\nThe [review template can be found here][Review Template].\r\n\r\n[PackagingGuide]: https://www.pyopensci.org/python-package-guide/\r\n\r\n[PackageCategories]: https://www.pyopensci.org/software-peer-review/about/package-scope.html\r\n\r\n[JournalOfOpenSourceSoftware]: http://joss.theoj.org/\r\n\r\n[JossSubmissionRequirements]: https://joss.readthedocs.io/en/latest/submitting.html#submission-requirements\r\n\r\n[JossPaperRequirements]: https://joss.readthedocs.io/en/latest/submitting.html#what-should-my-paper-contain\r\n\r\n[PyOpenSciCodeOfConduct]: https://www.pyopensci.org/governance/CODE_OF_CONDUCT\r\n\r\n[OsiApprovedLicense]: https://opensource.org/licenses\r\n\r\n[Editor Template]: https://www.pyopensci.org/software-peer-review/appendices/templates.html#editor-s-template\r\n\r\n[Review Template]: https://www.pyopensci.org/software-peer-review/appendices/templates.html#peer-review-template\r\n\r\n[Comments]: https://pyopensci.discourse.group/\r\n\r\n[PangeoCollaboration]: https://www.pyopensci.org/software-peer-review/partners/pangeo\r\n\r\n[pangeoWebsite]: https://www.pangeo.io\r\n[Commitment]: https://www.pyopensci.org/software-peer-review/our-process/policies.html#after-acceptance-package-ownership-and-maintenance\r\n',
+ "reactions": {
+ "url": "https://api.github.com/repos/pyOpenSci/software-submission/issues/146/reactions",
+ "total_count": 0,
+ },
+ "timeline_url": "https://api.github.com/repos/pyOpenSci/software-submission/issues/146/timeline",
+ "performed_via_github_app": None,
+ "state_reason": None,
+ }
+ ),
]
return issue
+
+
+@pytest.fixture
+def data_file() -> Callable[[Optional[str], bool], Union[str, Path]]:
+ """
+ Closure that returns a getter for files within the data directory.
+
+ Examples:
+
+ >>> data_file()
+ DATA_DIR
+ >>> data_file('myfile.txt')
+ DATA_DIR / 'myfile.txt'
+ >>> data_file('myfile.txt', load=True)
+ {contents of myfile.txt}
+ """
+
+ @overload
+ def _data_file(file: Optional[str], load: Literal[True]) -> str: ...
+ @overload
+ def _data_file(file: Optional[str], load: Literal[False]) -> Path: ...
+
+ def _data_file(
+ file: Optional[str] = None, load: bool = False
+ ) -> Union[str, Path]:
+ if file is None:
+ return DATA_DIR
+
+ path = DATA_DIR / file
+ if load:
+ with open(path, "r") as a_file:
+ data = a_file.read()
+ return data
+ else:
+ return path
+
+ return _data_file
diff --git a/tests/data/reviews/bolded_keys.txt b/tests/data/reviews/bolded_keys.txt
new file mode 100644
index 0000000..7275920
--- /dev/null
+++ b/tests/data/reviews/bolded_keys.txt
@@ -0,0 +1,41 @@
+**Submitting Author:** Fakename (@fakeauthor)
+**All current maintainers:** (@fakeauthor1, @fakeauthor2)
+**Package Name:** fake_package
+**One-Line Description of Package:** A fake python package
+**Repository Link:** https://example.com/fakeauthor1/fake_package
+**Version submitted:** v1.0.0
+**EiC:** @fakeeic
+**Editor:** @fakeeditor
+**Reviewer 1:** @fakereviewer1
+**Reviewer 2:** @fakereviewer2
+**Reviews Expected By:** fake date
+**Archive:** [](https://example.com/fakearchive)
+**Version accepted:** 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive))
+**Date accepted (month/day/year):** 06/29/2024
+
+---
+
+## Scope
+
+- Please indicate which category or categories.
+Check out our [package scope page][PackageCategories] to learn more about our
+scope. (If you are unsure of which category you fit, we suggest you make a pre-submission inquiry):
+
+ - [ ] Data retrieval
+ - [ ] Data extraction
+ - [x] Data processing/munging
+ - [ ] Data deposition
+ - [ ] Data validation and testing
+ - [ ] Data visualization[^1]
+ - [ ] Workflow automation
+ - [ ] Citation management and bibliometrics
+ - [ ] Scientific software wrappers
+ - [ ] Database interoperability
+
+
+## Community Partnerships
+If your package is associated with an
+existing community please check below:
+
+- [ ] [Pangeo][pangeoWebsite]
+ - [ ] My package adheres to the [Pangeo standards listed in the pyOpenSci peer review guidebook][PangeoCollaboration]
diff --git a/tests/data/reviews/reviewer_keyed.txt b/tests/data/reviews/reviewer_keyed.txt
new file mode 100644
index 0000000..3a3560a
--- /dev/null
+++ b/tests/data/reviews/reviewer_keyed.txt
@@ -0,0 +1,27 @@
+Submitting Author: Fakename (@fakeauthor)
+All current maintainers: (@fakeauthor1, @fakeauthor2)
+Package Name: fake_package
+One-Line Description of Package: A fake python package
+Repository Link: https://example.com/fakeauthor1/fake_package
+Version submitted: v1.0.0
+EiC: @fakeeic
+Editor: @fakeeditor
+Reviewer 1: @fakereviewer1
+Reviewer 2: @fakereviewer2
+Reviews Expected By: fake date
+Archive: [](https://example.com/fakearchive)
+Version accepted: 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive))
+Date accepted (month/day/year): 06/29/2024
+
+---
+
+## Scope
+
+- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted.
+- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment].
+(etc)
+
+## Community Partnerships
+
+- [ ] etc
+- [ ] aaaaaa
diff --git a/tests/data/reviews/reviewer_list.txt b/tests/data/reviews/reviewer_list.txt
new file mode 100644
index 0000000..3e9f9e4
--- /dev/null
+++ b/tests/data/reviews/reviewer_list.txt
@@ -0,0 +1,26 @@
+Submitting Author: Fakename (@fakeauthor)
+All current maintainers: (@fakeauthor1, @fakeauthor2)
+Package Name: fake_package
+One-Line Description of Package: A fake python package
+Repository Link: https://example.com/fakeauthor1/fake_package
+Version submitted: v1.0.0
+EiC: @fakeeic
+Editor: @fakeeditor
+Reviewers: @fakereviewer1 , @fakereviewer2, @fakereviewer3
+Reviews Expected By: fake date
+Archive: [](https://example.com/fakearchive)
+Version accepted: 2.0.0 ([repo](https://example.com/fakeauthor1/fake_package/releases/tag/v2.0.0), [pypi](https://pypi.org/project/fake_project/2.0.0), [archive](https://example.com/fakearchive))
+Date accepted (month/day/year): 06/29/2024
+
+---
+
+## Scope
+
+- [x] I agree to abide by [pyOpenSci's Code of Conduct][PyOpenSciCodeOfConduct] during the review process and in maintaining my package after should it be accepted.
+- [x] I have read and will commit to package maintenance after the review as per the [pyOpenSci Policies Guidelines][Commitment].
+(etc)
+
+## Community Partnerships
+
+- [ ] etc
+- [ ] aaaaaa
diff --git a/tests/integration/test_parse_issues.py b/tests/integration/test_parse_issues.py
index 47e9423..5dc8bb8 100644
--- a/tests/integration/test_parse_issues.py
+++ b/tests/integration/test_parse_issues.py
@@ -1,10 +1,49 @@
"""Test parse issues workflow"""
+import pytest
+from pyosmeta.models import ReviewUser
+
def test_parse_issue_header(process_issues, issue_list):
"""Should return a dict, should return 2 keys in the dict"""
- reviews = process_issues.parse_issue_header(issue_list, 20)
+ reviews, errors = process_issues.parse_issues(issue_list)
print(reviews)
assert len(reviews.keys()) == 2
assert list(reviews.keys())[0] == "sunpy"
+
+
+@pytest.mark.parametrize(
+ "file,expected",
+ [
+ (
+ "reviews/reviewer_keyed.txt",
+ [
+ ReviewUser(name="", github_username="fakereviewer1"),
+ ReviewUser(name="", github_username="fakereviewer2"),
+ ],
+ ),
+ (
+ "reviews/reviewer_list.txt",
+ [
+ ReviewUser(name="", github_username="fakereviewer1"),
+ ReviewUser(name="", github_username="fakereviewer2"),
+ ReviewUser(name="", github_username="fakereviewer3"),
+ ],
+ ),
+ ],
+)
+def test_parse_reviewers(file, expected, process_issues, data_file):
+ """Handle the multiple forms of reviewers"""
+ review = data_file(file, True)
+ review = process_issues.parse_issue(review)
+ assert review.reviewers == expected
+
+
+def test_parse_bolded_keys(process_issues, data_file):
+ """
+ Bolding the keys in the review doesn't break the parser
+ """
+ review = data_file("reviews/bolded_keys.txt", True)
+ review = process_issues.parse_issue(review)
+ assert review.package_name == "fake_package"
diff --git a/tests/unit/test_get_contrib_data.py b/tests/unit/test_get_contrib_data.py
index cc28d53..1b13baf 100644
--- a/tests/unit/test_get_contrib_data.py
+++ b/tests/unit/test_get_contrib_data.py
@@ -1,12 +1,13 @@
import pytest
+from pydantic import ValidationError
+from pyosmeta.models.github import Issue
sample_response = {
"url": "https://api.github.com/repos/pyOpenSci/software-submission/issues/147",
"repository_url": "https://api.github.com/repos/pyOpenSci/software-submission",
"title": "`sunpy` Review",
- "assignee": {
- "login": "cmarmo",
- },
+ "number": 147,
+ "assignee": {"login": "cmarmo", "id": 12345, "type": "User"},
"comments": 35,
"created_at": "2023-10-30T18:45:06Z",
"updated_at": "2024-02-22T01:24:31Z",
@@ -27,12 +28,13 @@
}
-def test_comment_to_list_returns_list(process_issues):
- """Test that comment_to_list returns a list"""
+def test_parse_header_as_dict(process_issues):
+ """Test that we can parse a header as a dict"""
+ header, body = process_issues._split_header(sample_response["body"])
- name, body = process_issues.comment_to_list(sample_response)
+ meta = process_issues._header_as_dict(header)
- assert isinstance(body, list)
+ assert isinstance(meta, dict)
def test_comment_no_name(process_issues):
@@ -43,16 +45,16 @@ def test_comment_no_name(process_issues):
This is a template issue not a code issue.
"""
- with pytest.warns(
- UserWarning, match="Package Name not found in the issue comment."
- ):
- name, body = process_issues.comment_to_list(sample_response_no_name)
- assert name == "missing_name"
+ with pytest.raises(ValidationError):
+ _ = process_issues.parse_issue(Issue(**sample_response_no_name))
def test_comment_to_list_package_name(process_issues):
"""Test that comment_to_list returns a proper package name"""
+ header, body = process_issues._split_header(sample_response["body"])
- name, body = process_issues.comment_to_list(sample_response)
+ meta = process_issues._header_as_dict(header)
+ assert meta["package_name"] == "sunpy"
- assert name == "sunpy"
+ review = process_issues.parse_issue(Issue(**sample_response))
+ assert review.package_name == "sunpy"
diff --git a/tests/unit/test_github_api.py b/tests/unit/test_github_api.py
index 3113faf..03be907 100644
--- a/tests/unit/test_github_api.py
+++ b/tests/unit/test_github_api.py
@@ -2,6 +2,7 @@
import secrets
import pytest
+from pyosmeta import github_api
from pyosmeta.github_api import GitHubAPI
@@ -22,6 +23,11 @@ def mock_missing_github_token(monkeypatch, tmpdir):
# Remove the GitHub token from the environment variable
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
+ def do_nothing():
+ pass
+
+ monkeypatch.setattr(github_api, "load_dotenv", do_nothing)
+
def test_get_token(mock_github_token):
"""Test that get_token accesses the token correctly when it is
@@ -33,10 +39,8 @@ def test_get_token(mock_github_token):
def test_missing_token(mock_missing_github_token, tmpdir):
- """Test that a keyerror is raised when the token is missing.
- If you have a token in your temporary environment, this will
- fail and not return a keyerror."""
- os.chdir(tmpdir)
+ """Test that a keyerror is raised when the token is missing.."""
+
github_api = GitHubAPI()
with pytest.raises(KeyError, match="Oops! A GITHUB_TOKEN environment"):
diff --git a/tests/unit/test_parse_categories.py b/tests/unit/test_parse_categories.py
index e9419cb..747e041 100644
--- a/tests/unit/test_parse_categories.py
+++ b/tests/unit/test_parse_categories.py
@@ -2,33 +2,36 @@
from pyosmeta.models import ReviewModel
checked = [
- ["Submitting Author", "Nabil Freij (@nabobalis)"],
- ["- sunpy is a community-developed, free and open-source."],
- ["## Scope"],
- ["- Please indicate which category or categories."],
- ["- [X] Data retrieval"],
- ["- [ ] Data extraction"],
- ["- [x] Data Viz"],
- ["## Domain Specific"],
- ["something else"],
+ "Submitting Author",
+ "Nabil Freij (@nabobalis)",
+ "- sunpy is a community-developed, free and open-source.",
+ "## Scope",
+ "- Please indicate which category or categories.",
+ "- [X] Data retrieval",
+ "- [ ] Data extraction",
+ "- [x] Data Viz",
+ "## Domain Specific",
+ "something else",
]
not_checked = [
- ["Submitting Author", "Nabil Freij (@nabobalis)"],
- ["- sunpy is a community-developed, free and open-source."],
- ["## Scope"],
- ["- Please indicate which category or categories."],
- ["- [ ] Data retrieval"],
- ["- [ ] Data extraction"],
- ["- [ ] Data Viz"],
- ["## Domain Specific"],
- ["something else"],
+ "Submitting Author",
+ "Nabil Freij (@nabobalis)",
+ "- sunpy is a community-developed, free and open-source.",
+ "## Scope",
+ "- Please indicate which category or categories.",
+ "- [ ] Data retrieval",
+ "- [ ] Data extraction",
+ "- [ ] Data Viz",
+ "## Domain Specific",
+ "something else",
]
no_categories = [
- ["Submitting Author", "Nabil Freij (@nabobalis)"],
- ["- sunpy is a community-developed, free and open-source."],
- ["something else"],
+ "Submitting Author",
+ "Nabil Freij (@nabobalis)",
+ "- sunpy is a community-developed, free and open-source.",
+ "something else",
]
@@ -91,5 +94,5 @@ def test_clean_categories(
):
"""Test that ensures our pydantic model cleans categories as expected"""
- review = ReviewModel(categories=input_categories)
- assert review.categories == expected_return
+ review = ReviewModel.clean_categories(categories=input_categories)
+ assert review == expected_return
diff --git a/tests/unit/test_parse_issue_header_methods.py b/tests/unit/test_parse_issue_header_methods.py
index 4a837a3..79b4d30 100644
--- a/tests/unit/test_parse_issue_header_methods.py
+++ b/tests/unit/test_parse_issue_header_methods.py
@@ -4,13 +4,13 @@
"""
-def test_comment_to_list(process_issues, issue_list):
+def test_issue_as_dict(process_issues, issue_list):
"""A method within the parse issue header that turns the
- dictionary response from github into a parsable list.
+ dictionary response from github into a dict.
Test that it captures the package name properly and the
appropriate number of lines of information contained in the comment"""
-
- pkg_name, body_data = process_issues.comment_to_list(issue_list[0])
- assert pkg_name == "sunpy"
- assert len(body_data) == 79
+ header, body = process_issues._split_header(issue_list[0].body)
+ meta = process_issues._header_as_dict(header)
+ assert meta["package_name"] == "sunpy"
+ assert len(meta) == 13
diff --git a/tests/unit/test_utils_parse.py b/tests/unit/test_utils_parse.py
index 271f7e8..7f778f6 100644
--- a/tests/unit/test_utils_parse.py
+++ b/tests/unit/test_utils_parse.py
@@ -1,6 +1,7 @@
"""Tests for parse helper functions located in utils_parse module."""
import pytest
+from pyosmeta.models import ReviewUser
from pyosmeta.utils_parse import parse_user_names
@@ -9,14 +10,14 @@
[
(
"Test User (@test1user)",
- {"name": "Test User", "github_username": "test1user"},
+ ReviewUser(name="Test User", github_username="test1user"),
),
- ("(@test2user)", {"name": "", "github_username": "test2user"}),
+ ("(@test2user)", ReviewUser(name="", github_username="test2user")),
(
"Test (user) 3 (@test3user)",
- {"name": "Test user 3", "github_username": "test3user"},
+ ReviewUser(name="Test user 3", github_username="test3user"),
),
- ("@test4user", {"name": "", "github_username": "test4user"}),
+ ("@test4user", ReviewUser(name="", github_username="test4user")),
],
)
def test_parse_user_names(name, expected_result):