Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/slash-command-handler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ jobs:
if: >
github.event.issue.pull_request &&
(startsWith(github.event.comment.body, '/tag-run-ci-label') ||
startsWith(github.event.comment.body, '/rerun-failed-ci'))
startsWith(github.event.comment.body, '/rerun-failed-ci') ||
startsWith(github.event.comment.body, '/tag-and-rerun-ci'))
runs-on: ubuntu-latest

steps:
Expand Down
5 changes: 3 additions & 2 deletions docs/developer_guide/contribution_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ https://github.com/sgl-project/sglang/blob/main/.github/CI_PERMISSIONS.json

For CI to run on a pull request, it must have the **run-ci** label. Authorized users can add the label or rerun failed tests by commenting on the PR with one of these commands:

- `/tag-run-ci-label`: Tag the "run-ci" label. Every future commits will trigger CI.
- `/rerun-failed-ci`: Rerun the failed/flaky tests of the last commit.
- `/tag-run-ci-label`: Adds the "run-ci" label. Every future commit will trigger CI.
- `/rerun-failed-ci`: Reruns the failed or flaky tests from the most recent commit.
- `/tag-and-rerun-ci`: A single command that performs both `/tag-run-ci-label` and `/rerun-failed-ci`.

If you have permission, the [Slash Command Handler](https://github.com/sgl-project/sglang/actions/workflows/slash_command_handler.yml) will run your command and react with a 👍 to your comment. It may take up to a few minutes for the reaction to appear. Here’s a usage [example](https://github.com/sgl-project/sglang/pull/13498#issuecomment-3547552157).

Expand Down
71 changes: 54 additions & 17 deletions scripts/ci/slash_command_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,31 +43,38 @@ def load_permissions(user_login):
sys.exit(1)


def handle_tag_run_ci(gh_repo, pr, comment, user_perms):
def handle_tag_run_ci(gh_repo, pr, comment, user_perms, react_on_success=True):
"""
Handles the /tag-run-ci-label command.
Returns True if action was taken, False otherwise.
"""
if not user_perms.get("can_tag_run_ci_label", False):
print("Permission denied: can_tag_run_ci_label is false.")
return
return False

print("Permission granted. Adding 'run-ci' label.")
pr.add_to_labels("run-ci")

# React to the comment with +1
comment.create_reaction("+1")
print("Label added and comment reacted.")
if react_on_success:
comment.create_reaction("+1")
print("Label added and comment reacted.")
else:
print("Label added (reaction suppressed).")

return True


def handle_rerun_failed_ci(gh_repo, pr, comment, user_perms):
def handle_rerun_failed_ci(gh_repo, pr, comment, user_perms, react_on_success=True):
"""
Handles the /rerun-failed-ci command.
Reruns workflows with 'failure' or 'skipped' conclusions.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should also consider the cancelled conclustion.

Returns True if action was taken, False otherwise.
"""
if not user_perms.get("can_rerun_failed_ci", False):
print("Permission denied: can_rerun_failed_ci is false.")
return
return False

print("Permission granted. Triggering rerun of failed workflows.")
print("Permission granted. Triggering rerun of failed or skipped workflows.")

# Get the SHA of the latest commit in the PR
head_sha = pr.head.sha
Expand All @@ -78,22 +85,35 @@ def handle_rerun_failed_ci(gh_repo, pr, comment, user_perms):

rerun_count = 0
for run in runs:
# We only care about completed runs that failed
if run.status == "completed" and run.conclusion == "failure":
print(f"Rerunning workflow: {run.name} (ID: {run.id})")
if run.status != "completed":
continue

if run.conclusion == "failure":
print(f"Rerunning failed workflow: {run.name} (ID: {run.id})")
try:
# PyGithub uses rerun_failed_jobs() or rerun() depending on version/intent
# The traceback suggested rerun_failed_jobs
# Use rerun_failed_jobs for efficiency on failures
run.rerun_failed_jobs()
rerun_count += 1
except Exception as e:
print(f"Failed to rerun workflow {run.id}: {e}")

elif run.conclusion == "skipped":
print(f"Rerunning skipped workflow: {run.name} (ID: {run.id})")
try:
# Skipped workflows don't have 'failed jobs', so we use full rerun()
run.rerun()
rerun_count += 1
except Exception as e:
print(f"Failed to rerun workflow {run.id}: {e}")
Comment on lines +91 to +107

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To improve maintainability and reduce code duplication, you can refactor the logic for handling 'failure' and 'skipped' conclusions. The current implementation has two very similar try-except blocks. By determining the appropriate rerun action first and then executing it in a single try-except block, the code becomes cleaner and easier to manage.

Suggested change
if run.conclusion == "failure":
print(f"Rerunning failed workflow: {run.name} (ID: {run.id})")
try:
# PyGithub uses rerun_failed_jobs() or rerun() depending on version/intent
# The traceback suggested rerun_failed_jobs
# Use rerun_failed_jobs for efficiency on failures
run.rerun_failed_jobs()
rerun_count += 1
except Exception as e:
print(f"Failed to rerun workflow {run.id}: {e}")
elif run.conclusion == "skipped":
print(f"Rerunning skipped workflow: {run.name} (ID: {run.id})")
try:
# Skipped workflows don't have 'failed jobs', so we use full rerun()
run.rerun()
rerun_count += 1
except Exception as e:
print(f"Failed to rerun workflow {run.id}: {e}")
action = None
if run.conclusion == "failure":
print(f"Rerunning failed workflow: {run.name} (ID: {run.id})")
# Use rerun_failed_jobs for efficiency on failures
action = run.rerun_failed_jobs
elif run.conclusion == "skipped":
print(f"Rerunning skipped workflow: {run.name} (ID: {run.id})")
# Skipped workflows don't have 'failed jobs', so we use full rerun()
action = run.rerun
if action:
try:
action()
rerun_count += 1
except Exception as e:
print(f"Failed to rerun workflow {run.id}: {e}")


if rerun_count > 0:
comment.create_reaction("+1")
print(f"Triggered rerun for {rerun_count} failed workflows.")
print(f"Triggered rerun for {rerun_count} workflows.")
if react_on_success:
comment.create_reaction("+1")
return True
else:
print("No failed workflows found to rerun.")
print("No failed or skipped workflows found to rerun.")
return False


def main():
Expand Down Expand Up @@ -121,7 +141,6 @@ def main():
comment = repo.get_issue(pr_number).get_comment(comment_id)

# 4. Parse Command and Execute
# split lines to handle cases where there might be text after the command
first_line = comment_body.split("\n")[0].strip()

if first_line.startswith("/tag-run-ci-label"):
Expand All @@ -130,6 +149,24 @@ def main():
elif first_line.startswith("/rerun-failed-ci"):
handle_rerun_failed_ci(repo, pr, comment, user_perms)

elif first_line.startswith("/tag-and-rerun-ci"):
# Perform both actions, but suppress individual reactions
print("Processing combined command: /tag-and-rerun-ci")

tagged = handle_tag_run_ci(
repo, pr, comment, user_perms, react_on_success=False
)
rerun = handle_rerun_failed_ci(
repo, pr, comment, user_perms, react_on_success=False
)

# If at least one action was successful, add the reaction here
if tagged or rerun:
comment.create_reaction("+1")
print("Combined command processed successfully; reaction added.")
else:
print("Combined command finished, but no actions were taken.")

else:
print(f"Unknown or ignored command: {first_line}")

Expand Down
Loading