From 0e1ec40b44dbea6512de6d893c76fc3ef84af2c6 Mon Sep 17 00:00:00 2001 From: driazati Date: Wed, 19 Jan 2022 12:10:47 -0800 Subject: [PATCH 1/2] Add bot to ping reviewers after no activity --- .github/workflows/ping_reviewers.yml | 21 +++ tests/python/unittest/test_ci.py | 66 ++++++++ tests/scripts/ping_reviewers.py | 223 +++++++++++++++++++++++++++ 3 files changed, 310 insertions(+) create mode 100644 .github/workflows/ping_reviewers.yml create mode 100755 tests/scripts/ping_reviewers.py diff --git a/.github/workflows/ping_reviewers.yml b/.github/workflows/ping_reviewers.yml new file mode 100644 index 000000000000..5585110b0481 --- /dev/null +++ b/.github/workflows/ping_reviewers.yml @@ -0,0 +1,21 @@ +name: Ping Reviewers +on: + schedule: + - cron: "0/15 * * * *" + workflow_dispatch: + +concurrency: + group: ping + cancel-in-progress: true + +jobs: + ping: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Ping reviewers + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -eux + python tests/scripts/ping_reviewers.py --wait-time-minutes 1440 diff --git a/tests/python/unittest/test_ci.py b/tests/python/unittest/test_ci.py index dfd1fd5cde17..e270a877a9ee 100644 --- a/tests/python/unittest/test_ci.py +++ b/tests/python/unittest/test_ci.py @@ -161,5 +161,71 @@ def test(commands, should_skip, pr_title, why): ) +def test_ping_reviewers(): + reviewers_script = REPO_ROOT / "tests" / "scripts" / "ping_reviewers.py" + + def run(pr, check): + data = { + "data": { + "repository": { + "pullRequests": { + "nodes": [pr], + "edges": [], + } + } + } + } + proc = subprocess.run( + [ + str(reviewers_script), + "--dry-run", + "--wait-time-minutes", + "1", + "--cutoff-pr-number", + "5", + "--allowlist", + "user", + "--pr-json", + json.dumps(data), + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + ) + if proc.returncode != 0: + raise RuntimeError(f"Process failed:\nstdout:\n{proc.stdout}\n\nstderr:\n{proc.stderr}") + + assert check in proc.stdout + + run( + { + "isDraft": True, + }, + "Checking 0 PRs", + ) + + run( + { + "isDraft": False, + "number": 2, + }, + "Checking 0 PRs", + ) + + run( + { + "number": 123, + "url": "https://github.com/apache/tvm/pull/123", + "body": "cc @someone", + "isDraft": False, + "author": {"login": "user"}, + "reviews": {"nodes": []}, + "publishedAt": "2022-01-18T17:54:19Z", + "comments": {"nodes": []}, + }, + "Pinging reviewers ['someone'] on https://github.com/apache/tvm/pull/123", + ) + + if __name__ == "__main__": sys.exit(pytest.main([__file__] + sys.argv[1:])) diff --git a/tests/scripts/ping_reviewers.py b/tests/scripts/ping_reviewers.py new file mode 100755 index 000000000000..2d93397636fe --- /dev/null +++ b/tests/scripts/ping_reviewers.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import argparse +import re +import datetime +import json +from typing import Dict, Any, List + +from git_utils import git, GitHubRepo, parse_remote + + +def prs_query(user: str, repo: str, cursor: str = None): + after = "" + if cursor is not None: + after = f', before:"{cursor}"' + return f""" + {{ + repository(name: "{repo}", owner: "{user}") {{ + pullRequests(states: [OPEN], last: 10{after}) {{ + edges {{ + cursor + }} + nodes {{ + number + url + body + isDraft + author {{ + login + }} + reviews(last:100) {{ + nodes {{ + author {{ login }} + comments(last:100) {{ + nodes {{ + updatedAt + bodyText + }} + }} + }} + }} + publishedAt + comments(last:100) {{ + nodes {{ + authorAssociation + bodyText + updatedAt + author {{ + login + }} + }} + }} + }} + }} + }} + }} + """ + + +def find_reviewers(body: str) -> List[str]: + matches = re.findall(r"(cc( @[-A-Za-z0-9]+)+)", body, flags=re.MULTILINE) + matches = [full for full, last in matches] + + reviewers = [] + for match in matches: + if match.startswith("cc "): + match = match.replace("cc ", "") + users = [x.strip() for x in match.split("@")] + reviewers += users + + reviewers = set(x for x in reviewers if x != "") + return list(reviewers) + + +def check_pr(pr, wait_time): + published_at = datetime.datetime.strptime(pr["publishedAt"], "%Y-%m-%dT%H:%M:%SZ") + last_action = published_at + + # GitHub counts comments left as part of a review separately than standalone + # comments + reviews = pr["reviews"]["nodes"] + review_comments = [] + for review in reviews: + review_comments += review["comments"]["nodes"] + + # Collate all comments + comments = pr["comments"]["nodes"] + review_comments + + # Find the last date of any comment + for comment in comments: + commented_at = datetime.datetime.strptime(comment["updatedAt"], "%Y-%m-%dT%H:%M:%SZ") + if commented_at > last_action: + last_action = commented_at + + time_since_last_action = datetime.datetime.utcnow() - last_action + + # Find reviewers in the PR's body + pr_body_reviewers = find_reviewers(pr["body"]) + + # Pull out reviewers from any cc @... text in a comment + cc_reviewers = [find_reviewers(c["bodyText"]) for c in comments] + cc_reviewers = [r for revs in cc_reviewers for r in revs] + + # Anyone that has left a review as a reviewer (this may include the PR + # author since their responses count as reviews) + review_reviewers = list(set(r["author"]["login"] for r in reviews)) + + reviewers = cc_reviewers + review_reviewers + pr_body_reviewers + + if time_since_last_action > wait_time: + print( + " Pinging reviewers", + reviewers, + "on", + pr["url"], + "since it has been", + time_since_last_action, + "since anything happened on that PR", + ) + return reviewers + + return None + + +def ping_reviewers(pr, reviewers): + reviewers = [f"@{r}" for r in reviewers] + text = ( + "It has been a while since this PR was updated, " + + " ".join(reviewers) + + " please leave a review or address the outstanding comments" + ) + r = github.post(f"issues/{pr['number']}/comments", {"body": text}) + print(r) + + +if __name__ == "__main__": + help = "Comment on languishing issues and PRs" + parser = argparse.ArgumentParser(description=help) + parser.add_argument("--remote", default="origin", help="ssh remote to parse") + parser.add_argument("--wait-time-minutes", required=True, type=int, help="ssh remote to parse") + parser.add_argument("--cutoff-pr-number", default=0, type=int, help="ssh remote to parse") + parser.add_argument("--dry-run", action="store_true", help="don't update GitHub") + parser.add_argument("--allowlist", help="filter by these PR authors") + parser.add_argument("--pr-json", help="(testing) data for testing to use instead of GitHub") + args = parser.parse_args() + + remote = git(["config", "--get", f"remote.{args.remote}.url"]) + user, repo = parse_remote(remote) + + wait_time = datetime.timedelta(minutes=int(args.wait_time_minutes)) + cutoff_pr_number = int(args.cutoff_pr_number) + print( + "Running with:\n" + f" time cutoff: {wait_time}\n" + f" number cutoff: {cutoff_pr_number}\n" + f" dry run: {args.dry_run}\n" + f" user/repo: {user}/{repo}\n", + end="", + ) + + # [slow rollout] + # This code is here to gate this feature to a limited set of people before + # deploying it for everyone to avoid spamming in the case of bugs or + # ongoing development. + if args.allowlist: + author_allowlist = args.allowlist.split(",") + else: + github = GitHubRepo(token=os.environ["GITHUB_TOKEN"], user=user, repo=repo) + allowlist_issue = github.get("issues/9983") + author_allowlist = set(find_reviewers(allowlist_issue["body"])) + + if args.pr_json: + r = json.loads(args.pr_json) + else: + q = prs_query(user, repo) + r = github.graphql(q) + + # Loop until all PRs have been checked + while True: + prs = r["data"]["repository"]["pullRequests"]["nodes"] + + # Don't look at draft PRs at all + prs = [pr for pr in prs if not pr["isDraft"]] + + # Don't look at super old PRs + prs = [pr for pr in prs if pr["number"] > cutoff_pr_number] + + # [slow rollout] + prs = [pr for pr in prs if pr["author"]["login"] in author_allowlist] + + print(f"Checking {len(prs)} PRs: {[pr['number'] for pr in prs]}") + + # Ping reviewers on each PR in the response if necessary + for pr in prs: + print("Checking", pr["url"]) + reviewers = check_pr(pr, wait_time) + if reviewers is not None and not args.dry_run: + ping_reviewers(pr, reviewers) + + edges = r["data"]["repository"]["pullRequests"]["edges"] + if len(edges) == 0: + # No more results to check + break + + cursor = edges[0]["cursor"] + r = github.graphql(prs_query(user, repo, cursor)) From 1d92aef02db66513e81d91e049a5f2a03cbf0205 Mon Sep 17 00:00:00 2001 From: driazati Date: Thu, 27 Jan 2022 11:18:09 -0800 Subject: [PATCH 2/2] Address comments --- .github/workflows/ping_reviewers.yml | 2 +- docs/contribute/code_review.rst | 8 ++++ tests/python/unittest/test_ci.py | 70 +++++++++++++++++++++++++++- tests/scripts/ping_reviewers.py | 19 ++++++-- 4 files changed, 91 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ping_reviewers.yml b/.github/workflows/ping_reviewers.yml index 5585110b0481..a2e4c0103f4f 100644 --- a/.github/workflows/ping_reviewers.yml +++ b/.github/workflows/ping_reviewers.yml @@ -10,7 +10,7 @@ concurrency: jobs: ping: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Ping reviewers diff --git a/docs/contribute/code_review.rst b/docs/contribute/code_review.rst index 778574458427..173f8577ab37 100644 --- a/docs/contribute/code_review.rst +++ b/docs/contribute/code_review.rst @@ -220,3 +220,11 @@ To do so -- please click on changes tab in the pull request, then select approve or comment on the code and click request changes. Code owner can decide if the code can be merged in case by case if some of the reviewers did not respond in time(e.g. a week) and existing reviews are sufficient. + +Reviewers +~~~~~~~~~ +Reviewers should strive to leave timely feedback on pull requests for which their +review was requested. Reviewing code is an important part of the project's health +and should be considered a regular responsibility for contributors. Automated +tooling helps out in this regard, as PRs with no activity for a set amount of +time will get a bot comment pinging the relevant parties. diff --git a/tests/python/unittest/test_ci.py b/tests/python/unittest/test_ci.py index e270a877a9ee..f5183d79b768 100644 --- a/tests/python/unittest/test_ci.py +++ b/tests/python/unittest/test_ci.py @@ -84,7 +84,7 @@ def test(commands, should_skip, pr_title, why): git.run(*command) pr_number = "1234" proc = subprocess.run( - [str(skip_ci_script), "--pr", pr_number, "--pr-title", pr_title], cwd=git.cwd # dir + [str(skip_ci_script), "--pr", pr_number, "--pr-title", pr_title], cwd=git.cwd ) expected = 0 if should_skip else 1 assert proc.returncode == expected, why @@ -161,10 +161,16 @@ def test(commands, should_skip, pr_title, why): ) -def test_ping_reviewers(): +def test_ping_reviewers(tmpdir_factory): reviewers_script = REPO_ROOT / "tests" / "scripts" / "ping_reviewers.py" def run(pr, check): + git = TempGit(tmpdir_factory.mktemp("tmp_git_dir")) + # Jenkins git is too old and doesn't have 'git init --initial-branch' + git.run("init") + git.run("checkout", "-b", "main") + git.run("remote", "add", "origin", "https://github.com/apache/tvm.git") + data = { "data": { "repository": { @@ -187,10 +193,13 @@ def run(pr, check): "user", "--pr-json", json.dumps(data), + "--now", + "2022-01-26T17:54:19Z", ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8", + cwd=git.cwd, ) if proc.returncode != 0: raise RuntimeError(f"Process failed:\nstdout:\n{proc.stdout}\n\nstderr:\n{proc.stderr}") @@ -226,6 +235,63 @@ def run(pr, check): "Pinging reviewers ['someone'] on https://github.com/apache/tvm/pull/123", ) + # Check allowlist functionality + run( + { + "number": 123, + "url": "https://github.com/apache/tvm/pull/123", + "body": "cc @someone", + "isDraft": False, + "author": {"login": "user2"}, + "reviews": {"nodes": []}, + "publishedAt": "2022-01-18T17:54:19Z", + "comments": { + "nodes": [ + {"updatedAt": "2022-01-19T17:54:19Z", "bodyText": "abc"}, + ] + }, + }, + "Checking 0 PRs", + ) + + # Old comment, ping + run( + { + "number": 123, + "url": "https://github.com/apache/tvm/pull/123", + "body": "cc @someone", + "isDraft": False, + "author": {"login": "user"}, + "reviews": {"nodes": []}, + "publishedAt": "2022-01-18T17:54:19Z", + "comments": { + "nodes": [ + {"updatedAt": "2022-01-19T17:54:19Z", "bodyText": "abc"}, + ] + }, + }, + "Pinging reviewers ['someone'] on https://github.com/apache/tvm/pull/123", + ) + + # New comment, don't ping + run( + { + "number": 123, + "url": "https://github.com/apache/tvm/pull/123", + "body": "cc @someone", + "isDraft": False, + "author": {"login": "user"}, + "reviews": {"nodes": []}, + "publishedAt": "2022-01-18T17:54:19Z", + "comments": { + "nodes": [ + {"updatedAt": "2022-01-27T17:54:19Z", "bodyText": "abc"}, + ] + }, + }, + "Not pinging PR 123", + ) + if __name__ == "__main__": sys.exit(pytest.main([__file__] + sys.argv[1:])) diff --git a/tests/scripts/ping_reviewers.py b/tests/scripts/ping_reviewers.py index 2d93397636fe..50e67824888c 100755 --- a/tests/scripts/ping_reviewers.py +++ b/tests/scripts/ping_reviewers.py @@ -25,6 +25,8 @@ from git_utils import git, GitHubRepo, parse_remote +GIT_DATE_FORMAT = "%Y-%m-%dT%H:%M:%SZ" + def prs_query(user: str, repo: str, cursor: str = None): after = "" @@ -89,8 +91,8 @@ def find_reviewers(body: str) -> List[str]: return list(reviewers) -def check_pr(pr, wait_time): - published_at = datetime.datetime.strptime(pr["publishedAt"], "%Y-%m-%dT%H:%M:%SZ") +def check_pr(pr, wait_time, now): + published_at = datetime.datetime.strptime(pr["publishedAt"], GIT_DATE_FORMAT) last_action = published_at # GitHub counts comments left as part of a review separately than standalone @@ -105,11 +107,11 @@ def check_pr(pr, wait_time): # Find the last date of any comment for comment in comments: - commented_at = datetime.datetime.strptime(comment["updatedAt"], "%Y-%m-%dT%H:%M:%SZ") + commented_at = datetime.datetime.strptime(comment["updatedAt"], GIT_DATE_FORMAT) if commented_at > last_action: last_action = commented_at - time_since_last_action = datetime.datetime.utcnow() - last_action + time_since_last_action = now - last_action # Find reviewers in the PR's body pr_body_reviewers = find_reviewers(pr["body"]) @@ -135,6 +137,8 @@ def check_pr(pr, wait_time): "since anything happened on that PR", ) return reviewers + else: + print(f"Not pinging PR {pr['number']}") return None @@ -159,6 +163,7 @@ def ping_reviewers(pr, reviewers): parser.add_argument("--dry-run", action="store_true", help="don't update GitHub") parser.add_argument("--allowlist", help="filter by these PR authors") parser.add_argument("--pr-json", help="(testing) data for testing to use instead of GitHub") + parser.add_argument("--now", help="(testing) custom string for current time") args = parser.parse_args() remote = git(["config", "--get", f"remote.{args.remote}.url"]) @@ -192,6 +197,10 @@ def ping_reviewers(pr, reviewers): q = prs_query(user, repo) r = github.graphql(q) + now = datetime.datetime.utcnow() + if args.now: + now = datetime.datetime.strptime(args.now, GIT_DATE_FORMAT) + # Loop until all PRs have been checked while True: prs = r["data"]["repository"]["pullRequests"]["nodes"] @@ -210,7 +219,7 @@ def ping_reviewers(pr, reviewers): # Ping reviewers on each PR in the response if necessary for pr in prs: print("Checking", pr["url"]) - reviewers = check_pr(pr, wait_time) + reviewers = check_pr(pr, wait_time, now) if reviewers is not None and not args.dry_run: ping_reviewers(pr, reviewers)