diff --git a/.github/workflows/build-predictor.yml b/.github/workflows/build-predictor.yml
deleted file mode 100644
index 2073843..0000000
--- a/.github/workflows/build-predictor.yml
+++ /dev/null
@@ -1,66 +0,0 @@
-name: "Build Predictor App"
-
-on:
- workflow_call:
- inputs:
- rebuild:
- description: "Force a rebuild of the app"
- type: boolean
-
-env:
- BUILD_CACHE_KEY: "issue-labeler/predictor-app"
- GH_TOKEN: ${{ github.token }}
-
-jobs:
- check-cache:
- runs-on: ubuntu-24.04
- permissions:
- actions: write
- steps:
- - name: "Check the cache for an existing build of the Predictor"
- id: restore-predictor-app
- uses: actions/cache/restore@v4
- with:
- path: labeler-build/Predictor
- key: ${{ env.BUILD_CACHE_KEY }}
- lookup-only: true
- fail-on-cache-miss: false
-
- - name: "Show instructions for rebuilding"
- if: ${{ steps.restore-predictor-app.outputs.cache-hit == 'true' && !inputs.rebuild }}
- run: echo "To rebuild the predictor app, delete the '${{ env.BUILD_CACHE_KEY }}' action cache entry or rerun the 'build-predictor' workflow with 'rebuild' set to true."
-
- - name: "Delete existing cache entry"
- if: ${{ steps.restore-predictor-app.outputs.cache-hit == 'true' && inputs.rebuild }}
- run: |
- gh api --method DELETE \
- -H "Accept: application/vnd.github+json" \
- -H "X-GitHub-Api-Version: 2022-11-28" \
- /repos/${{ github.repository }}/actions/caches?key=${{ env.BUILD_CACHE_KEY }}
-
- outputs:
- needs-build: ${{ steps.restore-predictor-app.outputs.cache-hit != 'true' || inputs.rebuild }}
-
- build-predictor:
- runs-on: ubuntu-24.04
- needs: check-cache
- if: ${{ needs.check-cache.outputs.needs-build == 'true' }}
- steps:
- - name: "Check out the 'dotnet/issue-labeler' repo"
- uses: actions/checkout@v4
- with:
- repository: dotnet/issue-labeler
- ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1
-
- - uses: actions/setup-dotnet@v4
- with:
- dotnet-version: "9.0.x"
-
- - name: "Build Predictor"
- run: dotnet publish --self-contained -r linux-x64 -c Release -o ./labeler-build/Predictor ./src/Predictor
-
- - name: "Save Predictor app to cache"
- uses: actions/cache/save@v4
- with:
- path: labeler-build/Predictor
- key: ${{ env.BUILD_CACHE_KEY }}
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
deleted file mode 100644
index 72d04bc..0000000
--- a/.github/workflows/build.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: Build and Test
-
-on:
- push:
- branches:
- - main
- pull_request:
- branches:
- - main
-
-jobs:
- build:
- runs-on: ubuntu-latest
-
- steps:
- - name: Checkout repository
- uses: actions/checkout@v2
-
- - name: Setup .NET
- uses: actions/setup-dotnet@v2
- with:
- dotnet-version: '9.x'
-
- - name: Build solution
- run: dotnet build --configuration Release
-
- - name: Run tests
- run: dotnet test --configuration Release --no-build --verbosity minimal
diff --git a/.github/workflows/cache-retention.yml b/.github/workflows/cache-retention.yml
deleted file mode 100644
index 1777ece..0000000
--- a/.github/workflows/cache-retention.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-name: "Cache Retention"
-
-on:
- workflow_call:
- inputs:
- skip_issue_model:
- description: "Skip cache retention of the issue model"
- type: boolean
- skip_pull_model:
- description: "Skip cache retention of the pull model"
- type: boolean
-
-jobs:
- restore-predictor:
- runs-on: ubuntu-24.04
- steps:
- - name: "Check the cache for an existing build of the Predictor"
- uses: actions/cache/restore@v4
- with:
- path: labeler-build/Predictor
- key: issue-labeler/predictor-app
- fail-on-cache-miss: true
-
- restore-issue-model:
- if: ${{ !inputs.skip_issue_model }}
- runs-on: ubuntu-24.04
- steps:
- - name: "Restore issue model from cache"
- uses: actions/cache/restore@v4
- with:
- path: labeler-cache/issue-model.zip
- key: issue-labeler/issues/model/LIVE
- fail-on-cache-miss: true
-
- restore-pull-model:
- if: ${{ !inputs.skip_pull_model }}
- runs-on: ubuntu-24.04
- steps:
- - name: "Restore pull model from cache"
- uses: actions/cache/restore@v4
- with:
- path: labeler-cache/pull-model.zip
- key: issue-labeler/pulls/model/LIVE
- fail-on-cache-miss: true
diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml
new file mode 100644
index 0000000..bc7d5ec
--- /dev/null
+++ b/.github/workflows/ci-build.yml
@@ -0,0 +1,46 @@
+# CI Build and Test of the IssueLabeler solution
+name: "CI Build"
+
+on:
+ push:
+ branches:
+ - main
+ paths:
+ - ".github/workflows/ci-*.yml"
+ - "IssueLabeler/**"
+
+ pull_request:
+ branches:
+ - main
+
+ workflow_dispatch:
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+ - name: "Set up the .NET SDK"
+ uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1
+ with:
+ dotnet-version: 9.0.x
+
+ - name: "Build the IssueLabeler solution"
+ run: dotnet build IssueLabeler/ --configuration Release
+
+ test:
+ needs: build
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+ - name: "Set up the .NET SDK"
+ uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1
+ with:
+ dotnet-version: 9.0.x
+
+ - name: "Run tests from the IssueLabeler solution"
+ run: dotnet test IssueLabeler/
diff --git a/.github/workflows/download-issues.yml b/.github/workflows/download-issues.yml
deleted file mode 100644
index 2e4e7ad..0000000
--- a/.github/workflows/download-issues.yml
+++ /dev/null
@@ -1,122 +0,0 @@
-name: "Download Issues"
-
-on:
- workflow_call:
- inputs:
- github_token:
- description: "The GitHub token (defaults to action token)"
- type: string
- repository:
- description: "The org/repo to download data from (defaults to current repository)"
- type: string
-
- label_prefix:
- description: "Label prefix"
- type: string
- required: true
-
- issue_limit:
- description: "Max number of items to include in the model"
- type: number
- page_size:
- description: "The number of items to include on each request (max 100)"
- type: number
- page_limit:
- description: "Max pages of items to download"
- type: number
- retries:
- description: "Comma-separated list of retry delays in seconds"
- type: string
- data_cache_key:
- description: "The optional cache key suffix to use for saving the data"
- type: string
- backup_cache_key:
- description: "The cache key suffix to use for backing up the last downloaded data"
- type: string
- default: "backup"
-
-permissions:
- issues: read
- actions: write
-
-env:
- DATA_PATH: labeler-cache/issue-data.tsv
- DATA_CACHE_KEY: issue-labeler/issues/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }}
- BACKUP_CACHE_KEY: issue-labeler/issues/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }}/${{ inputs.backup_cache_key }}
- GH_TOKEN: ${{ github.token }}
-
-jobs:
- download-issues:
- runs-on: ubuntu-24.04
- steps:
- - name: "Restore existing data from cache"
- id: check-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.DATA_PATH }}
- key: ${{ env.DATA_CACHE_KEY }}
- fail-on-cache-miss: false
-
- - name: "Check for existing backup cache entry"
- id: check-backup
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.DATA_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
- lookup-only: true
- fail-on-cache-miss: false
-
- - name: "Abort if backup cache entry already exists"
- if: ${{ steps.check-backup.outputs.cache-hit == 'true' }}
- run: |
- echo "Cannot save backup of existing data. Backup cache key already exists."
- echo "Key: ${{ env.BACKUP_CACHE_KEY }}"
-
- exit 1
-
- - name: "Cache backup of existing data"
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- id: backup-data
- uses: actions/cache/save@v4
- with:
- path: ${{ env.DATA_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
-
- - name: "Delete existing cache entry"
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- run: |
- gh api --method DELETE \
- -H "Accept: application/vnd.github+json" \
- -H "X-GitHub-Api-Version: 2022-11-28" \
- /repos/${{ github.repository }}/actions/caches?key=${{ env.DATA_CACHE_KEY }}
-
- rm ${{ env.DATA_PATH }}
-
- - name: "Check out the 'dotnet/issue-labeler' repo"
- uses: actions/checkout@v4
- with:
- repository: dotnet/issue-labeler
- ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1
-
- - uses: actions/setup-dotnet@v4
- with:
- dotnet-version: "9.0.x"
-
- - name: "Run Downloader"
- run: |
- dotnet run -c Release --project ./src/Downloader -- \
- ${{ format('--token "{0}"', inputs.github_token || secrets.GITHUB_TOKEN) }} \
- ${{ format('--repo "{0}"', inputs.repository || github.repository) }} \
- ${{ format('--issue-data "{0}"', env.DATA_PATH) }} \
- ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \
- ${{ inputs.retries && format('--retries "{0}"', inputs.retries) }} \
- ${{ inputs.issue_limit && format('--issue-limit {0}', inputs.issue_limit) || '' }} \
- ${{ inputs.page_size && format('--page-size {0}', inputs.page_size) || '' }} \
- ${{ inputs.page_limit && format('--page-limit {0}', inputs.page_limit) || '' }}
-
- - name: "Save data to cache"
- uses: actions/cache/save@v4
- with:
- path: ${{ env.DATA_PATH }}
- key: ${{ env.DATA_CACHE_KEY }}
diff --git a/.github/workflows/download-pulls.yml b/.github/workflows/download-pulls.yml
deleted file mode 100644
index ef8173a..0000000
--- a/.github/workflows/download-pulls.yml
+++ /dev/null
@@ -1,122 +0,0 @@
-name: "Download Pulls"
-
-on:
- workflow_call:
- inputs:
- github_token:
- description: "The GitHub token (defaults to action token)"
- type: string
- repository:
- description: "The org/repo to download data from (defaults to current repository)"
- type: string
-
- label_prefix:
- description: "Label prefix"
- type: string
- required: true
-
- pull_limit:
- description: "Max number of items to include in the model"
- type: number
- page_size:
- description: "The number of items to include on each request (max 100)"
- type: number
- page_limit:
- description: "Max pages of items to download"
- type: number
- retries:
- description: "Comma-separated list of retry delays in seconds"
- type: string
- data_cache_key:
- description: "The optional cache key suffix to use for saving the data"
- type: string
- backup_cache_key:
- description: "The cache key suffix to use for backing up the last downloaded data"
- type: string
- default: "backup"
-
-permissions:
- pull-requests: read
- actions: write
-
-env:
- DATA_PATH: labeler-cache/pull-data.tsv
- DATA_CACHE_KEY: issue-labeler/pulls/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }}
- BACKUP_CACHE_KEY: issue-labeler/pulls/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }}/${{ inputs.backup_cache_key }}
- GH_TOKEN: ${{ github.token }}
-
-jobs:
- download-pulls:
- runs-on: ubuntu-24.04
- steps:
- - name: "Restore existing data from cache"
- id: check-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.DATA_PATH }}
- key: ${{ env.DATA_CACHE_KEY }}
- fail-on-cache-miss: false
-
- - name: "Check for existing backup cache entry"
- id: check-backup
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.DATA_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
- lookup-only: true
- fail-on-cache-miss: false
-
- - name: "Abort if backup cache entry already exists"
- if: ${{ steps.check-backup.outputs.cache-hit == 'true' }}
- run: |
- echo "Cannot save backup of existing data. Backup cache key already exists."
- echo "Key: ${{ env.BACKUP_CACHE_KEY }}"
-
- exit 1
-
- - name: "Cache backup of existing data"
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- id: backup-data
- uses: actions/cache/save@v4
- with:
- path: ${{ env.DATA_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
-
- - name: "Delete existing cache entry"
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- run: |
- gh api --method DELETE \
- -H "Accept: application/vnd.github+json" \
- -H "X-GitHub-Api-Version: 2022-11-28" \
- /repos/${{ github.repository }}/actions/caches?key=${{ env.DATA_CACHE_KEY }}
-
- rm ${{ env.DATA_PATH }}
-
- - name: "Check out the 'dotnet/issue-labeler' repo"
- uses: actions/checkout@v4
- with:
- repository: dotnet/issue-labeler
- ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1
-
- - uses: actions/setup-dotnet@v4
- with:
- dotnet-version: "9.0.x"
-
- - name: "Run Downloader"
- run: |
- dotnet run -c Release --project ./src/Downloader -- \
- ${{ format('--token "{0}"', inputs.github_token || secrets.GITHUB_TOKEN) }} \
- ${{ format('--repo "{0}"', inputs.repository || github.repository) }} \
- ${{ format('--pull-data "{0}"', env.DATA_PATH) }} \
- ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \
- ${{ inputs.retries && format('--retries "{0}"', inputs.retries) }} \
- ${{ inputs.pull_limit && format('--pull-limit {0}', inputs.pull_limit) || '' }} \
- ${{ inputs.page_size && format('--page-size {0}', inputs.page_size) || '' }} \
- ${{ inputs.page_limit && format('--page-limit {0}', inputs.page_limit) || '' }}
-
- - name: "Save data to cache"
- uses: actions/cache/save@v4
- with:
- path: ${{ env.DATA_PATH }}
- key: ${{ env.DATA_CACHE_KEY }}
diff --git a/.github/workflows/labeler-build-predictor.yml b/.github/workflows/labeler-build-predictor.yml
deleted file mode 100644
index 8a12b31..0000000
--- a/.github/workflows/labeler-build-predictor.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-name: "Labeler: Build Predictor App"
-
-on:
- # Allow dispatching the workflow via the Actions UI
- workflow_dispatch:
- inputs:
- rebuild:
- description: "Force a rebuild of the app"
- type: boolean
-
-jobs:
- build-predictor:
- permissions:
- actions: write
- uses: dotnet/issue-labeler/.github/workflows/build-predictor.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1
- with:
- rebuild: ${{ inputs.rebuild }}
diff --git a/.github/workflows/labeler-cache-retention.yml b/.github/workflows/labeler-cache-retention.yml
index ea12d2b..ab3e0de 100644
--- a/.github/workflows/labeler-cache-retention.yml
+++ b/.github/workflows/labeler-cache-retention.yml
@@ -1,13 +1,35 @@
+# Regularly restore the prediction models from cache to prevent cache eviction
name: "Labeler: Cache Retention"
+# For more information about GitHub's action cache limits and eviction policy, see:
+# https://docs.github.com/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction-policy
+
on:
schedule:
- cron: "36 4 * * *" # 4:36 every day (arbitrary time daily)
workflow_dispatch:
+ inputs:
+ cache_key:
+ description: "The cache key suffix to use for restoring the model from cache. Defaults to 'ACTIVE'."
+ required: true
+ default: "ACTIVE"
+
+env:
+ CACHE_KEY: ${{ inputs.cache_key || 'ACTIVE' }}
jobs:
- cache-retention:
- # Do not run the workflow on forks outside the 'dotnet' org
- if: ${{ github.repository_owner == 'dotnet' }}
- uses: dotnet/issue-labeler/.github/workflows/cache-retention.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1
+ restore-cache:
+ # Do not automatically run the workflow on forks outside the 'dotnet' org
+ if: ${{ github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet' }}
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ type: ["issues", "pulls"]
+ steps:
+ - uses: dotnet/issue-labeler/restore@main
+ with:
+ type: ${{ matrix.type }}
+ cache_key: ${{ env.CACHE_KEY }}
+ fail-on-cache-miss: true
diff --git a/.github/workflows/labeler-predict-issues.yml b/.github/workflows/labeler-predict-issues.yml
index f1783e7..7e10ba7 100644
--- a/.github/workflows/labeler-predict-issues.yml
+++ b/.github/workflows/labeler-predict-issues.yml
@@ -1,32 +1,55 @@
-name: "Labeler: Predict Issue Labels"
+# Predict labels for Issues using a trained model
+name: "Labeler: Predict (Issues)"
on:
- # Only automatically predict area labels when issues are originally opened
+ # Only automatically predict area labels when issues are first opened
issues:
types: opened
# Allow dispatching the workflow via the Actions UI, specifying ranges of numbers
workflow_dispatch:
inputs:
- issue_numbers:
- description: "Issue Numbers (comma-separated list of ranges)"
- type: string
- model_cache_key:
- description: "The cache key suffix to use for loading the model"
- type: string
+ issues:
+ description: "Issue Numbers (comma-separated list of ranges)."
required: true
- default: "LIVE"
+ cache_key:
+ description: "The cache key suffix to use for restoring the model. Defaults to 'ACTIVE'."
+ required: true
+ default: "ACTIVE"
+
+env:
+ # Do not allow failure for jobs triggered automatically (as this causes red noise on the workflows list)
+ ALLOW_FAILURE: ${{ github.event_name == 'workflow_dispatch' }}
+
+ LABEL_PREFIX: "area-"
+ THRESHOLD: 0.40
+ DEFAULT_LABEL: "needs-area-label"
jobs:
- predict-issues:
- # Do not run the workflow on forks outside the 'dotnet' org
- if: ${{ github.repository_owner == 'dotnet' && (inputs.issue_numbers || github.event.issue.number) }}
+ predict-issue-label:
+ # Do not automatically run the workflow on forks outside the 'dotnet' org
+ if: ${{ github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet' }}
+ runs-on: ubuntu-latest
permissions:
issues: write
- uses: dotnet/issue-labeler/.github/workflows/predict-issues.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1
- with:
- model_cache_key: ${{ inputs.model_cache_key }}
- issue_numbers: ${{ inputs.issue_numbers || github.event.issue.number }}
- label_prefix: "area-"
- threshold: 0.40
- default_label: "needs-area-label"
+ steps:
+ - name: "Restore issues model from cache"
+ id: restore-model
+ uses: dotnet/issue-labeler/restore@main
+ with:
+ type: issues
+ fail-on-cache-miss: ${{ env.ALLOW_FAILURE }}
+ quiet: true
+
+ - name: "Predict issue labels"
+ id: prediction
+ if: ${{ steps.restore-model.outputs.cache-hit == 'true' }}
+ uses: dotnet/issue-labeler/predict@main
+ with:
+ issues: ${{ inputs.issues || github.event.issue.number }}
+ label_prefix: ${{ env.LABEL_PREFIX }}
+ threshold: ${{ env.THRESHOLD }}
+ default_label: ${{ env.DEFAULT_LABEL }}
+ env:
+ GITHUB_TOKEN: ${{ github.token }}
+ continue-on-error: ${{ !env.ALLOW_FAILURE }}
diff --git a/.github/workflows/labeler-predict-pulls.yml b/.github/workflows/labeler-predict-pulls.yml
index 5b286a4..af2f68c 100644
--- a/.github/workflows/labeler-predict-pulls.yml
+++ b/.github/workflows/labeler-predict-pulls.yml
@@ -1,4 +1,5 @@
-name: "Labeler: Predict Pull Labels"
+# Predict labels for Pull Requests using a trained model
+name: "Labeler: Predict (Pulls)"
on:
# Per to the following documentation:
@@ -17,25 +18,47 @@ on:
# Allow dispatching the workflow via the Actions UI, specifying ranges of numbers
workflow_dispatch:
inputs:
- pull_numbers:
- description: "Pull Numbers (comma-separated list of ranges)"
- type: string
- model_cache_key:
- description: "The cache key suffix to use for loading the model"
- type: string
+ pulls:
+ description: "Pull Request Numbers (comma-separated list of ranges)."
required: true
- default: "LIVE"
+ cache_key:
+ description: "The cache key suffix to use for restoring the model. Defaults to 'ACTIVE'."
+ required: true
+ default: "ACTIVE"
+
+env:
+ # Do not allow failure for jobs triggered automatically (this can block PR merge)
+ ALLOW_FAILURE: ${{ github.event_name == 'workflow_dispatch' }}
+
+ LABEL_PREFIX: "area-"
+ THRESHOLD: 0.40
+ DEFAULT_LABEL: "needs-area-label"
jobs:
- predict-pulls:
- # Do not run the workflow on forks outside the 'dotnet' org
- if: ${{ github.repository_owner == 'dotnet' && (inputs.pull_numbers || github.event.number) }}
+ predict-pull-label:
+ # Do not automatically run the workflow on forks outside the 'dotnet' org
+ if: ${{ github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet' }}
+ runs-on: ubuntu-latest
permissions:
pull-requests: write
- uses: dotnet/issue-labeler/.github/workflows/predict-pulls.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1
- with:
- model_cache_key: ${{ inputs.model_cache_key }}
- pull_numbers: ${{ inputs.pull_numbers || github.event.number }}
- label_prefix: "area-"
- threshold: 0.40
- default_label: "needs-area-label"
+ steps:
+ - name: "Restore pulls model from cache"
+ id: restore-model
+ uses: dotnet/issue-labeler/restore@main
+ with:
+ type: pulls
+ fail-on-cache-miss: ${{ env.ALLOW_FAILURE }}
+ quiet: true
+
+ - name: "Predict pull labels"
+ id: prediction
+ if: ${{ steps.restore-model.outputs.cache-hit == 'true' }}
+ uses: dotnet/issue-labeler/predict@main
+ with:
+ pulls: ${{ inputs.pulls || github.event.number }}
+ label_prefix: ${{ env.LABEL_PREFIX }}
+ threshold: ${{ env.THRESHOLD }}
+ default_label: ${{ env.DEFAULT_LABEL }}
+ env:
+ GITHUB_TOKEN: ${{ github.token }}
+ continue-on-error: ${{ !env.ALLOW_FAILURE }}
diff --git a/.github/workflows/labeler-promote.yml b/.github/workflows/labeler-promote.yml
index 97f40af..965d502 100644
--- a/.github/workflows/labeler-promote.yml
+++ b/.github/workflows/labeler-promote.yml
@@ -1,42 +1,49 @@
-name: "Labeler: Promote Models"
+# Promote a model from staging to 'ACTIVE', backing up the currently 'ACTIVE' model
+name: "Labeler: Promotion"
on:
# Dispatched via the Actions UI, promotes the staged models from
- # a staging slot into the prediction environment
+ # a staged slot into the prediction environment
workflow_dispatch:
inputs:
- promote_issues:
+ issues:
description: "Issues: Promote Model"
type: boolean
required: true
- promote_pulls:
+ pulls:
description: "Pulls: Promote Model"
type: boolean
required: true
- model_cache_key:
- description: "The cache key suffix to promote into the 'LIVE' cache"
- type: string
+ staged_key:
+ description: "The cache key suffix to use for promoting a staged model to 'ACTIVE'. Defaults to 'staged'."
required: true
- default: "staging"
- backup_cache_key:
- description: "The cache key suffix to use for backing up the currently promoted model"
- type: string
+ default: "staged"
+ backup_key:
+ description: "The cache key suffix to use for backing up the currently active model. Defaults to 'backup'."
default: "backup"
permissions:
actions: write
jobs:
- labeler-promote-issues:
- if: ${{ inputs.promote_issues }}
- uses: dotnet/issue-labeler/.github/workflows/promote-issues.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1
- with:
- model_cache_key: ${{ inputs.model_cache_key }}
- backup_cache_key: ${{ inputs.backup_cache_key }}
+ promote-issues:
+ if: ${{ inputs.issues }}
+ runs-on: ubuntu-latest
+ steps:
+ - name: "Promote Model for Issues"
+ uses: dotnet/issue-labeler/promote@main
+ with:
+ type: "issues"
+ staged_key: ${{ inputs.staged_key }}
+ backup_key: ${{ inputs.backup_key }}
- labeler-promote-pulls:
- if: ${{ inputs.promote_pulls }}
- uses: dotnet/issue-labeler/.github/workflows/promote-pulls.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1
- with:
- model_cache_key: ${{ inputs.model_cache_key }}
- backup_cache_key: ${{ inputs.backup_cache_key }}
+ promote-pulls:
+ if: ${{ inputs.pulls }}
+ runs-on: ubuntu-latest
+ steps:
+ - name: "Promote Model for Pull Requests"
+ uses: dotnet/issue-labeler/promote@main
+ with:
+ type: "pulls"
+ staged_key: ${{ inputs.staged_key }}
+ backup_key: ${{ inputs.backup_key }}
diff --git a/.github/workflows/labeler-train.yml b/.github/workflows/labeler-train.yml
index bb56563..3d7f542 100644
--- a/.github/workflows/labeler-train.yml
+++ b/.github/workflows/labeler-train.yml
@@ -1,68 +1,158 @@
-name: "Labeler: Train Models"
+# Train the Issues and Pull Requests models for label prediction
+name: "Labeler: Training"
on:
- # Dispatched via the Actions UI, stages new models for promotion consideration
- # Each step of the workflow can be run independently: Download, Train, and Test
workflow_dispatch:
inputs:
- download_issues:
- description: "Issues: Download Data"
- type: boolean
- default: true
- train_issues:
- description: "Issues: Train Model"
- type: boolean
- default: true
- test_issues:
- description: "Issues: Test Model"
- type: boolean
- default: true
- download_pulls:
- description: "Pulls: Download Data"
- type: boolean
- default: true
- train_pulls:
- description: "Pulls: Train Model"
- type: boolean
- default: true
- test_pulls:
- description: "Pulls: Test Model"
- type: boolean
- default: true
+ type:
+ description: "Issues or Pull Requests"
+ type: choice
+ required: true
+ default: "Both"
+ options:
+ - "Both"
+ - "Issues"
+ - "Pull Requests"
- data_limit:
- description: "Max number of items to include in the model"
- type: number
+ steps:
+ description: "Training Steps"
+ type: choice
+ required: true
+ default: "All"
+ options:
+ - "All"
+ - "Download Data"
+ - "Train Model"
+ - "Test Model"
- github_token:
- description: "The GitHub token (defaults to action token)"
- type: string
repository:
- description: "The org/repo to download data from (defaults to current repository)"
- type: string
+ description: "The org/repo to download data from. Defaults to the current repository."
+ limit:
+ description: "Max number of items to download for training/testing the model (newest items are used). Defaults to the max number of pages times the page size."
+ type: number
+ page_size:
+ description: "Number of items per page in GitHub API requests. Defaults to 100 for issues, 25 for pull requests."
+ type: number
+ page_limit:
+ description: "Maximum number of pages to download for training/testing the model. Defaults to 1000 for issues, 4000 for pull requests."
+ type: number
cache_key_suffix:
- description: "The cache key suffix to use for staging data/models (use 'LIVE' to bypass staging)"
- type: string
+ description: "The cache key suffix to use for staged data/models (use 'ACTIVE' to bypass staging). Defaults to 'staged'."
required: true
- default: "staging"
+ default: "staged"
+
+env:
+ CACHE_KEY: ${{ inputs.cache_key_suffix }}
+ REPOSITORY: ${{ inputs.repository || github.repository }}
+ LABEL_PREFIX: "area-"
+ THRESHOLD: "0.40"
+ LIMIT: ${{ inputs.limit }}
+ PAGE_SIZE: ${{ inputs.page_size }}
+ PAGE_LIMIT: ${{ inputs.page_limit }}
jobs:
- labeler-train:
+ download-issues:
+ if: ${{ contains(fromJSON('["Both", "Issues"]'), inputs.type) && contains(fromJSON('["All", "Download Data"]'), inputs.steps) }}
+ runs-on: ubuntu-latest
permissions:
issues: read
+ steps:
+ - name: "Download Issues"
+ uses: dotnet/issue-labeler/download@main
+ with:
+ type: "issues"
+ cache_key: ${{ env.CACHE_KEY }}
+ repository: ${{ env.REPOSITORY }}
+ label_prefix: ${{ env.LABEL_PREFIX }}
+ limit: ${{ env.LIMIT }}
+ page_size: ${{ env.PAGE_SIZE }}
+ page_limit: ${{ env.PAGE_LIMIT }}
+ env:
+ GITHUB_TOKEN: ${{ github.token }}
+
+ download-pulls:
+ if: ${{ contains(fromJSON('["Both", "Pull Requests"]'), inputs.type) && contains(fromJSON('["All", "Download Data"]'), inputs.steps) }}
+ runs-on: ubuntu-latest
+ permissions:
+ pull-requests: read
+ steps:
+ - name: "Download Pull Requests"
+ uses: dotnet/issue-labeler/download@main
+ with:
+ type: "pulls"
+ cache_key: ${{ env.CACHE_KEY }}
+ repository: ${{ env.REPOSITORY }}
+ label_prefix: ${{ env.LABEL_PREFIX }}
+ limit: ${{ env.LIMIT }}
+ page_size: ${{ env.PAGE_SIZE }}
+ page_limit: ${{ env.PAGE_LIMIT }}
+ env:
+ GITHUB_TOKEN: ${{ github.token }}
+
+ train-issues:
+ if: ${{ always() && contains(fromJSON('["Both", "Issues"]'), inputs.type) && contains(fromJSON('["All", "Train Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.download-issues.result) }}
+ runs-on: ubuntu-latest
+ permissions: {}
+ needs: download-issues
+ steps:
+ - name: "Train Model for Issues"
+ uses: dotnet/issue-labeler/train@main
+ with:
+ type: "issues"
+ data_cache_key: ${{ env.CACHE_KEY }}
+ model_cache_key: ${{ env.CACHE_KEY }}
+
+ train-pulls:
+ if: ${{ always() && contains(fromJSON('["Both", "Pull Requests"]'), inputs.type) && contains(fromJSON('["All", "Train Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.download-pulls.result) }}
+ runs-on: ubuntu-latest
+ permissions: {}
+ needs: download-pulls
+ steps:
+ - name: "Train Model for Pull Requests"
+ uses: dotnet/issue-labeler/train@main
+ with:
+ type: "pulls"
+ data_cache_key: ${{ env.CACHE_KEY }}
+ model_cache_key: ${{ env.CACHE_KEY }}
+
+ test-issues:
+ if: ${{ always() && contains(fromJSON('["Both", "Issues"]'), inputs.type) && contains(fromJSON('["All", "Test Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.train-issues.result) }}
+ runs-on: ubuntu-latest
+ permissions:
+ issues: read
+ needs: train-issues
+ steps:
+ - name: "Test Model for Issues"
+ uses: dotnet/issue-labeler/test@main
+ with:
+ type: "issues"
+ cache_key: ${{ env.CACHE_KEY }}
+ repository: ${{ env.REPOSITORY }}
+ label_prefix: ${{ env.LABEL_PREFIX }}
+ threshold: ${{ env.THRESHOLD }}
+ limit: ${{ env.LIMIT }}
+ page_size: ${{ env.PAGE_SIZE }}
+ page_limit: ${{ env.PAGE_LIMIT }}
+ env:
+ GITHUB_TOKEN: ${{ github.token }}
+
+ test-pulls:
+ if: ${{ always() && contains(fromJSON('["Both", "Pull Requests"]'), inputs.type) && contains(fromJSON('["All", "Test Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.train-pulls.result) }}
+ runs-on: ubuntu-latest
+ permissions:
pull-requests: read
- actions: write
- uses: dotnet/issue-labeler/.github/workflows/train.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1
- with:
- download_issues: ${{ inputs.download_issues }}
- train_issues: ${{ inputs.train_issues }}
- test_issues: ${{ inputs.test_issues }}
- download_pulls: ${{ inputs.download_pulls }}
- train_pulls: ${{ inputs.train_pulls }}
- test_pulls: ${{ inputs.test_pulls }}
- data_limit: ${{ inputs.data_limit && fromJSON(inputs.data_limit) || 0 }}
- github_token: ${{ inputs.github_token }}
- repository: ${{ inputs.repository }}
- cache_key_suffix: ${{ inputs.cache_key_suffix }}
- label_prefix: "area-"
- threshold: 0.40
+ needs: train-pulls
+ steps:
+ - name: "Test Model for Pull Requests"
+ uses: dotnet/issue-labeler/test@main
+ with:
+ type: "pulls"
+ cache_key: ${{ env.CACHE_KEY }}
+ repository: ${{ env.REPOSITORY }}
+ label_prefix: ${{ env.LABEL_PREFIX }}
+ threshold: ${{ env.THRESHOLD }}
+ limit: ${{ env.LIMIT }}
+ page_size: ${{ env.PAGE_SIZE }}
+ page_limit: ${{ env.PAGE_LIMIT }}
+ env:
+ GITHUB_TOKEN: ${{ github.token }}
diff --git a/.github/workflows/predict-issues.yml b/.github/workflows/predict-issues.yml
deleted file mode 100644
index f97eab9..0000000
--- a/.github/workflows/predict-issues.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-name: "Predict Issue Labels"
-
-on:
- workflow_call:
- inputs:
- issue_numbers:
- description: "Issue Numbers"
- type: string
- required: true
- label_prefix:
- description: "Label Prefix"
- type: string
- required: true
- threshold:
- description: "The minimum confidence score for a label prediction"
- type: number
- required: true
- default_label:
- description: "Default Label (leave blank for no default label)"
- type: string
- model_cache_key:
- description: "The cache key suffix to use for loading the model"
- type: string
- required: true
- default: "LIVE"
-
-permissions:
- issues: write
-
-env:
- MODEL_PATH: labeler-cache/issue-model.zip
- MODEL_CACHE_KEY: issue-labeler/issues/model/${{ inputs.model_cache_key }}
- BUILD_CACHE_KEY: "issue-labeler/predictor-app"
-
-jobs:
- predict-issues:
- runs-on: ubuntu-24.04
- steps:
- - name: "Restore the Predictor app from cache"
- id: restore-predictor-app
- uses: actions/cache/restore@v4
- with:
- path: labeler-build/Predictor
- key: ${{ env.BUILD_CACHE_KEY }}
- fail-on-cache-miss: true
-
- - name: "Restore model from cache"
- id: restore-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.MODEL_CACHE_KEY }}
- fail-on-cache-miss: true
-
- - name: "Run Predictor"
- run: |
- ./labeler-build/Predictor/Predictor \
- ${{ format('--token "{0}"', secrets.GITHUB_TOKEN) }} \
- ${{ format('--repo "{0}"', github.repository) }} \
- ${{ format('--issue-model "{0}"', env.MODEL_PATH) }} \
- ${{ format('--issue-numbers "{0}"', inputs.issue_numbers) }} \
- ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \
- ${{ format('--threshold {0}', inputs.threshold) }} \
- ${{ inputs.default_label && format('--default-label "{0}"', inputs.default_label) }}
diff --git a/.github/workflows/predict-pulls.yml b/.github/workflows/predict-pulls.yml
deleted file mode 100644
index 3ef239a..0000000
--- a/.github/workflows/predict-pulls.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-name: "Predict Pull Labels"
-
-on:
- workflow_call:
- inputs:
- pull_numbers:
- description: "Pull Numbers"
- type: string
- required: true
- label_prefix:
- description: "Label Prefix"
- type: string
- required: true
- threshold:
- description: "The minimum confidence score for a label prediction"
- type: number
- required: true
- default_label:
- description: "Default Label (leave blank for no default label)"
- type: string
- model_cache_key:
- description: "The cache key suffix to use for loading the model"
- type: string
- required: true
- default: "LIVE"
-
-permissions:
- pull-requests: write
-
-env:
- MODEL_PATH: labeler-cache/pull-model.zip
- MODEL_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.model_cache_key }}
- BUILD_CACHE_KEY: "issue-labeler/predictor-app"
-
-jobs:
- predict-pulls:
- runs-on: ubuntu-24.04
- steps:
- - name: "Restore the Predictor app from cache"
- id: restore-predictor-app
- uses: actions/cache/restore@v4
- with:
- path: labeler-build/Predictor
- key: ${{ env.BUILD_CACHE_KEY }}
- fail-on-cache-miss: true
-
- - name: "Restore model from cache"
- id: restore-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.MODEL_CACHE_KEY }}
- fail-on-cache-miss: true
-
- - name: "Run Predictor"
- run: |
- ./labeler-build/Predictor/Predictor \
- ${{ format('--token "{0}"', secrets.GITHUB_TOKEN) }} \
- ${{ format('--repo "{0}"', github.repository) }} \
- ${{ format('--pull-model "{0}"', env.MODEL_PATH) }} \
- ${{ format('--pull-numbers "{0}"', inputs.pull_numbers) }} \
- ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \
- ${{ format('--threshold {0}', inputs.threshold) }} \
- ${{ inputs.default_label && format('--default-label "{0}"', inputs.default_label) }}
diff --git a/.github/workflows/promote-issues.yml b/.github/workflows/promote-issues.yml
deleted file mode 100644
index 86f500a..0000000
--- a/.github/workflows/promote-issues.yml
+++ /dev/null
@@ -1,88 +0,0 @@
-name: "Promote Issues Model"
-
-on:
- workflow_call:
- inputs:
- model_cache_key:
- description: "The cache key suffix to promote from staging"
- type: string
- required: true
- backup_cache_key:
- description: "The cache key suffix to use for backing up the currently promoted model"
- type: string
- default: "backup"
-
-env:
- MODEL_PATH: labeler-cache/issue-model.zip
- MODEL_CACHE_KEY: issue-labeler/issues/model/${{ inputs.model_cache_key }}
- PROMOTION_CACHE_KEY: issue-labeler/issues/model/LIVE
- BACKUP_CACHE_KEY: issue-labeler/issues/model/${{ inputs.backup_cache_key }}
- GH_TOKEN: ${{ github.token }}
-
-permissions:
- actions: write
-
-jobs:
- promote-issues:
- runs-on: ubuntu-24.04
-
- steps:
- - name: "Check for existing backup cache entry"
- id: check-backup
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
- lookup-only: true
- fail-on-cache-miss: false
-
- - name: "Abort if backup cache entry already exists"
- if: ${{ steps.check-backup.outputs.cache-hit == 'true' }}
- run: |
- echo "Cannot save backup of currently promoted model. Backup cache key already exists."
- echo "Key: ${{ env.BACKUP_CACHE_KEY }}"
-
- exit 1
-
- - name: "Restore existing promotion cache entry if one exists"
- if: ${{ steps.check-backup.outputs.cache-hit != 'true'}}
- id: check-promotion
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.PROMOTION_CACHE_KEY }}
- fail-on-cache-miss: false
-
- - name: "Cache backup of existing promotion model"
- if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }}
- id: backup-model
- uses: actions/cache/save@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
-
- - name: "Remove local copy of currently promoted model"
- if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }}
- run: rm ${{ env.MODEL_PATH }}
-
- - name: "Restore model to be promoted from cache"
- id: restore-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.MODEL_CACHE_KEY }}
- fail-on-cache-miss: true
-
- - name: "Delete existing cache entry"
- if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }}
- run: |
- gh api --method DELETE \
- -H "Accept: application/vnd.github+json" \
- -H "X-GitHub-Api-Version: 2022-11-28" \
- /repos/${{ github.repository }}/actions/caches?key=${{ env.PROMOTION_CACHE_KEY }}
-
- - name: "Save promoted model to cache"
- uses: actions/cache/save@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.PROMOTION_CACHE_KEY }}
diff --git a/.github/workflows/promote-pulls.yml b/.github/workflows/promote-pulls.yml
deleted file mode 100644
index b1c847c..0000000
--- a/.github/workflows/promote-pulls.yml
+++ /dev/null
@@ -1,88 +0,0 @@
-name: "Promote Pulls Model"
-
-on:
- workflow_call:
- inputs:
- model_cache_key:
- description: "The cache key suffix to promote from staging"
- type: string
- required: true
- backup_cache_key:
- description: "The cache key suffix to use for backing up the currently promoted model"
- type: string
- default: "backup"
-
-env:
- MODEL_PATH: labeler-cache/pull-model.zip
- MODEL_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.model_cache_key }}
- PROMOTION_CACHE_KEY: issue-labeler/pulls/model/LIVE
- BACKUP_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.backup_cache_key }}
- GH_TOKEN: ${{ github.token }}
-
-permissions:
- actions: write
-
-jobs:
- promote-pulls:
- runs-on: ubuntu-24.04
-
- steps:
- - name: "Check for existing backup cache entry"
- id: check-backup
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
- lookup-only: true
- fail-on-cache-miss: false
-
- - name: "Abort if backup cache entry already exists"
- if: ${{ steps.check-backup.outputs.cache-hit == 'true' }}
- run: |
- echo "Cannot save backup of currently promoted model. Backup cache key already exists."
- echo "Key: ${{ env.BACKUP_CACHE_KEY }}"
-
- exit 1
-
- - name: "Restore existing promotion cache entry if one exists"
- if: ${{ steps.check-backup.outputs.cache-hit != 'true'}}
- id: check-promotion
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.PROMOTION_CACHE_KEY }}
- fail-on-cache-miss: false
-
- - name: "Cache backup of existing promotion model"
- if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }}
- id: backup-model
- uses: actions/cache/save@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
-
- - name: "Remove local copy of currently promoted model"
- if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }}
- run: rm ${{ env.MODEL_PATH }}
-
- - name: "Restore model to be promoted from cache"
- id: restore-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.MODEL_CACHE_KEY }}
- fail-on-cache-miss: true
-
- - name: "Delete existing cache entry"
- if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }}
- run: |
- gh api --method DELETE \
- -H "Accept: application/vnd.github+json" \
- -H "X-GitHub-Api-Version: 2022-11-28" \
- /repos/${{ github.repository }}/actions/caches?key=${{ env.PROMOTION_CACHE_KEY }}
-
- - name: "Save promoted model to cache"
- uses: actions/cache/save@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.PROMOTION_CACHE_KEY }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..6ba0934
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,107 @@
+# Create a new release of the Issue Labeler, publishing the predictor Docker container image to the GitHub container registry
+name: "Release"
+
+on:
+ workflow_dispatch:
+ inputs:
+ image_tags:
+ description: "The optional semicolon separated list of tags to apply to the published Docker container image. The ref name is added automatically."
+
+env:
+ BASE_IMAGE: mcr.microsoft.com/dotnet/runtime:9.0-noble-chiseled
+ IMAGE_TAGS: ${{ inputs.image_tags && format('{0};{1}', github.ref_name, inputs.image_tags) || github.ref_name }}
+ PREDICTOR_IMAGE_NAME: ${{ github.repository }}/predictor
+ PACKAGE_NAME_ESCAPED: issue-labeler%2Fpredictor
+ GITHUB_API_PACKAGE_OWNER: /orgs/dotnet
+
+jobs:
+ publish-predictor:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ packages: write
+ outputs:
+ digest: ${{ steps.published-image.outputs.digest }}
+ published_image_digest: ${{ steps.published-image.outputs.published_image_digest }}
+
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+ - name: "Set up the .NET SDK"
+ uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1
+ with:
+ dotnet-version: 9.0.x
+
+ - name: "Log in to the GitHub Container Registry"
+ uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: "Publish Predictor"
+ run: |
+ dotnet publish IssueLabeler/src/Predictor/Predictor.csproj \
+ /t:PublishContainer \
+ -p DebugType=none \
+ -p ContainerBaseImage=${{ env.BASE_IMAGE }} \
+ -p ContainerRegistry=ghcr.io \
+ -p ContainerImageTags='"${{ env.IMAGE_TAGS }}"' \
+ -p ContainerRepository=${{ env.PREDICTOR_IMAGE_NAME }} \
+ -p ContainerAuthors=${{ github.repository_owner }} \
+ -p ContainerInformationUrl=${{ format('{0}/{1}', github.server_url, github.repository) }} \
+ -p ContainerDocumentationUrl=${{ format('{0}/{1}/wiki', github.server_url, github.repository) }} \
+ -p ContainerLicenseExpression=${{ format('{0}/{1}/blob/main/LICENSE.TXT', github.server_url, github.repository) }}
+
+ - name: "Capture and output the Docker image digest to the workflow summary"
+ id: published-image
+ env:
+ GH_TOKEN: ${{ github.token }}
+ run: |
+ DIGEST=` \
+ gh api \
+ -H "Accept: application/vnd.github+json" \
+ -H "X-GitHub-Api-Version: 2022-11-28" \
+ ${{ format('{0}/packages/container/{1}/versions', env.GITHUB_API_PACKAGE_OWNER, env.PACKAGE_NAME_ESCAPED) }} \
+ | jq -r '.[] | select(.metadata.container.tags[] == "v2.0.0") | .name' \
+ `
+ PUBLISHED_IMAGE_DIGEST=ghcr.io/${{ env.PREDICTOR_IMAGE_NAME }}@${DIGEST}
+
+ echo "digest=$DIGEST" >> $GITHUB_OUTPUT
+ echo "published_image_digest=$PUBLISHED_IMAGE_DIGEST" >> $GITHUB_OUTPUT
+
+ echo "> [!NOTE]" >> $GITHUB_STEP_SUMMARY
+ echo "> **Docker container image published.**" >> $GITHUB_STEP_SUMMARY
+ echo "> Digest: \`$DIGEST\`" >> $GITHUB_STEP_SUMMARY
+ echo "> Published: \`$PUBLISHED_IMAGE_DIGEST\`" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+
+ update-predictor-action:
+ runs-on: ubuntu-latest
+ needs: publish-predictor
+ permissions:
+ contents: write
+ packages: read
+
+ steps:
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+ - name: "Update the `predict` action to use the published image digest"
+ run: |
+ PREDICT_ACTION="predict/action.yml"
+ sed -i "s|ghcr.io/${{ env.PREDICTOR_IMAGE_NAME }}@.*|${{ needs.publish-predictor.outputs.published_image_digest }} # ${{ env.IMAGE_TAGS }}|" $PREDICT_ACTION
+
+ git config user.name "GitHub Actions"
+ git config user.email "actions@github.com"
+ git add $PREDICT_ACTION
+ git commit -m "Release '${{ github.ref_name }}' with predictor digest '${{ needs.publish-predictor.outputs.digest }}'"
+ git push origin ${{ github.ref_name }}
+
+ echo "> [!NOTE]" >> $GITHUB_STEP_SUMMARY
+ echo "> Updated [\`predict/action.yml\` (${{ github.ref_name }})](${{ format('{0}/{1}/blob/{2}/predict/action.yml', github.server_url, github.repository, github.ref_name) }}) to:" >> $GITHUB_STEP_SUMMARY
+ echo "> \`${{ needs.publish-predictor.outputs.published_image_digest }}\`" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+
+ echo "\`\`\`yml" >> $GITHUB_STEP_SUMMARY
+ grep -i -B1 -A10 '^\s*using:\s*docker' $PREDICT_ACTION >> $GITHUB_STEP_SUMMARY
+ echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
diff --git a/.github/workflows/test-issues.yml b/.github/workflows/test-issues.yml
deleted file mode 100644
index cfeccc9..0000000
--- a/.github/workflows/test-issues.yml
+++ /dev/null
@@ -1,66 +0,0 @@
-name: "Test Issues Model"
-
-on:
- workflow_call:
- inputs:
- github_token:
- description: "The GitHub token (defaults to action token)"
- type: string
- repository:
- description: "The org/repo to download data from (defaults to current repository)"
- type: string
-
- label_prefix:
- description: "Label Prefix"
- type: string
- required: true
- threshold:
- description: "The minimum confidence score for a label prediction"
- type: number
- required: true
-
- issue_limit:
- description: "Max number of items to include in the test"
- type: number
- model_cache_key:
- description: "The cache key suffix to use for loading the model"
- type: string
- required: true
-
-env:
- MODEL_PATH: labeler-cache/issue-model.zip
- MODEL_CACHE_KEY: issue-labeler/issues/model/${{ inputs.model_cache_key }}
-
-jobs:
- test-issues:
- runs-on: ubuntu-24.04
- steps:
- - name: "Check out the 'dotnet/issue-labeler' repo"
- uses: actions/checkout@v4
- with:
- repository: dotnet/issue-labeler
- ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1
-
- - uses: actions/setup-dotnet@v4
- with:
- dotnet-version: "9.0.x"
-
- # Restore from cache after checkout out the repo to prevent
- # the restored files from getting removed during checkout
- - name: "Restore model from cache"
- id: restore-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.MODEL_CACHE_KEY }}
- fail-on-cache-miss: true
-
- - name: "Run Tester"
- run: |
- dotnet run -c Release --project ./src/Tester -- \
- ${{ format('--token "{0}"', inputs.github_token || secrets.GITHUB_TOKEN) }} \
- ${{ format('--repo "{0}"',inputs.repository || github.repository) }} \
- ${{ format('--issue-model "{0}"', env.MODEL_PATH) }} \
- ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \
- ${{ format('--threshold {0}', inputs.threshold) }} \
- ${{ inputs.issue_limit && format('--issue-limit {0}', inputs.issue_limit) || '' }}
diff --git a/.github/workflows/test-pulls.yml b/.github/workflows/test-pulls.yml
deleted file mode 100644
index 0cd8afb..0000000
--- a/.github/workflows/test-pulls.yml
+++ /dev/null
@@ -1,66 +0,0 @@
-name: "Test Pulls Model"
-
-on:
- workflow_call:
- inputs:
- github_token:
- description: "The GitHub token (defaults to action token)"
- type: string
- repository:
- description: "The org/repo to download data from (defaults to current repository)"
- type: string
-
- label_prefix:
- description: "Label Prefix"
- type: string
- required: true
- threshold:
- description: "The minimum confidence score for a label prediction"
- type: number
- required: true
-
- pull_limit:
- description: "Max number of items to include in the test"
- type: number
- model_cache_key:
- description: "The cache key suffix to use for loading the model"
- type: string
- required: true
-
-env:
- MODEL_PATH: labeler-cache/pull-model.zip
- MODEL_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.model_cache_key }}
-
-jobs:
- test-pulls:
- runs-on: ubuntu-24.04
- steps:
- - name: "Check out the 'dotnet/issue-labeler' repo"
- uses: actions/checkout@v4
- with:
- repository: dotnet/issue-labeler
- ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1
-
- - uses: actions/setup-dotnet@v4
- with:
- dotnet-version: "9.0.x"
-
- # Restore from cache after checkout out the repo to prevent
- # the restored files from getting removed during checkout
- - name: "Restore model from cache"
- id: restore-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.MODEL_CACHE_KEY }}
- fail-on-cache-miss: true
-
- - name: "Run Tester"
- run: |
- dotnet run -c Release --project ./src/Tester -- \
- ${{ format('--token "{0}"', inputs.github_token || secrets.GITHUB_TOKEN) }} \
- ${{ format('--repo "{0}"',inputs.repository || github.repository) }} \
- ${{ format('--pull-model "{0}"', env.MODEL_PATH) }} \
- ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \
- ${{ format('--threshold {0}', inputs.threshold) }} \
- ${{ inputs.pull_limit && format('--pull-limit {0}', inputs.pull_limit) || '' }}
diff --git a/.github/workflows/train-issues.yml b/.github/workflows/train-issues.yml
deleted file mode 100644
index 333df32..0000000
--- a/.github/workflows/train-issues.yml
+++ /dev/null
@@ -1,106 +0,0 @@
-name: "Train Issues Model"
-
-on:
- workflow_call:
- inputs:
- data_cache_key:
- description: "The optional cache key suffix to use for loading the data"
- type: string
- model_cache_key:
- description: "The cache key suffix to use for saving the model"
- type: string
- required: true
-
-permissions:
- actions: write
-
-env:
- DATA_PATH: labeler-cache/issue-data.tsv
- DATA_CACHE_KEY: issue-labeler/issues/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }}
- MODEL_PATH: labeler-cache/issue-model.zip
- MODEL_CACHE_KEY: issue-labeler/issues/model/${{ inputs.model_cache_key }}
- BACKUP_CACHE_KEY: issue-labeler/issues/model/${{ inputs.model_cache_key }}/backup
- GH_TOKEN: ${{ github.token }}
-
-jobs:
- train-issues:
- runs-on: ubuntu-24.04
- steps:
- - name: "Check out the 'dotnet/issue-labeler' repo"
- uses: actions/checkout@v4
- with:
- repository: dotnet/issue-labeler
- ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1
-
- - uses: actions/setup-dotnet@v4
- with:
- dotnet-version: "9.0.x"
-
- # Restore from cache after checkout out the repo to prevent
- # the restored files from getting removed during checkout
- - name: "Restore data from cache"
- id: restore-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.DATA_PATH }}
- key: ${{ env.DATA_CACHE_KEY }}
- fail-on-cache-miss: true
-
- - name: "Restore existing model cache entry if one exists"
- id: check-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.MODEL_CACHE_KEY }}
- fail-on-cache-miss: false
-
- - name: "Check for existing backup cache entry"
- id: check-backup
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
- lookup-only: true
- fail-on-cache-miss: false
-
- - name: "Abort if backup cache entry already exists"
- if: ${{ steps.check-backup.outputs.cache-hit == 'true' }}
- run: |
- echo "Cannot save backup of existing model. Backup cache key already exists."
- echo "Key: ${{ env.BACKUP_CACHE_KEY }}"
-
- exit 1
-
- - name: "Cache backup of existing model"
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- id: backup-model
- uses: actions/cache/save@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
-
- - name: "Delete restored model"
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- run: |
- rm ${{ env.MODEL_PATH }}
-
- - name: "Run Trainer"
- run: |
- dotnet run -c Release --project ./src/Trainer -- \
- ${{ format('--issue-data "{0}"', env.DATA_PATH) }} \
- ${{ format('--issue-model "{0}"', env.MODEL_PATH) }}
-
- - name: "Delete existing model cache entry"
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- run: |
- gh api --method DELETE \
- -H "Accept: application/vnd.github+json" \
- -H "X-GitHub-Api-Version: 2022-11-28" \
- /repos/${{ github.repository }}/actions/caches?key=${{ env.MODEL_CACHE_KEY }}
-
- - name: "Save model to cache"
- uses: actions/cache/save@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.MODEL_CACHE_KEY }}
diff --git a/.github/workflows/train-pulls.yml b/.github/workflows/train-pulls.yml
deleted file mode 100644
index a2855f5..0000000
--- a/.github/workflows/train-pulls.yml
+++ /dev/null
@@ -1,106 +0,0 @@
-name: "Train Pulls Model"
-
-on:
- workflow_call:
- inputs:
- data_cache_key:
- description: "The optional cache key suffix to use for loading the data"
- type: string
- model_cache_key:
- description: "The cache key suffix to use for saving the model"
- type: string
- required: true
-
-permissions:
- actions: write
-
-env:
- DATA_PATH: labeler-cache/pull-data.tsv
- DATA_CACHE_KEY: issue-labeler/pulls/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }}
- MODEL_PATH: labeler-cache/pull-model.zip
- MODEL_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.model_cache_key }}
- BACKUP_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.model_cache_key }}/backup
- GH_TOKEN: ${{ github.token }}
-
-jobs:
- train-pulls:
- runs-on: ubuntu-24.04
- steps:
- - name: "Check out the 'dotnet/issue-labeler' repo"
- uses: actions/checkout@v4
- with:
- repository: dotnet/issue-labeler
- ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1
-
- - uses: actions/setup-dotnet@v4
- with:
- dotnet-version: "9.0.x"
-
- # Restore from cache after checkout out the repo to prevent
- # the restored files from getting removed during checkout
- - name: "Restore data from cache"
- id: restore-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.DATA_PATH }}
- key: ${{ env.DATA_CACHE_KEY }}
- fail-on-cache-miss: true
-
- - name: "Restore existing model cache entry if one exists"
- id: check-cache
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.MODEL_CACHE_KEY }}
- fail-on-cache-miss: false
-
- - name: "Check for existing backup cache entry"
- id: check-backup
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- uses: actions/cache/restore@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
- lookup-only: true
- fail-on-cache-miss: false
-
- - name: "Abort if backup cache entry already exists"
- if: ${{ steps.check-backup.outputs.cache-hit == 'true' }}
- run: |
- echo "Cannot save backup of existing model. Backup cache key already exists."
- echo "Key: ${{ env.BACKUP_CACHE_KEY }}"
-
- exit 1
-
- - name: "Cache backup of existing model"
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- id: backup-model
- uses: actions/cache/save@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.BACKUP_CACHE_KEY }}
-
- - name: "Delete restored model"
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- run: |
- rm ${{ env.MODEL_PATH }}
-
- - name: "Run Trainer"
- run: |
- dotnet run -c Release --project ./src/Trainer -- \
- ${{ format('--pull-data "{0}"', env.DATA_PATH) }} \
- ${{ format('--pull-model "{0}"', env.MODEL_PATH) }}
-
- - name: "Delete existing model cache entry"
- if: ${{ steps.check-cache.outputs.cache-hit == 'true' }}
- run: |
- gh api --method DELETE \
- -H "Accept: application/vnd.github+json" \
- -H "X-GitHub-Api-Version: 2022-11-28" \
- /repos/${{ github.repository }}/actions/caches?key=${{ env.MODEL_CACHE_KEY }}
-
- - name: "Save model to cache"
- uses: actions/cache/save@v4
- with:
- path: ${{ env.MODEL_PATH }}
- key: ${{ env.MODEL_CACHE_KEY }}
diff --git a/.github/workflows/train.yml b/.github/workflows/train.yml
deleted file mode 100644
index d98ef0f..0000000
--- a/.github/workflows/train.yml
+++ /dev/null
@@ -1,121 +0,0 @@
-name: "Train Models"
-
-on:
- workflow_call:
- inputs:
- download_issues:
- description: "Issues: Download Data"
- type: boolean
- train_issues:
- description: "Issues: Train Model"
- type: boolean
- test_issues:
- description: "Issues: Test Model"
- type: boolean
- download_pulls:
- description: "Pulls: Download Data"
- type: boolean
- train_pulls:
- description: "Pulls: Train Model"
- type: boolean
- test_pulls:
- description: "Pulls: Test Model"
- type: boolean
-
- label_prefix:
- description: "Label Prefix"
- type: string
- required: true
- threshold:
- description: "The minimum confidence score for a label prediction"
- type: number
- required: true
-
- data_limit:
- description: "Max number of items to include in the model"
- type: number
-
- github_token:
- description: "The GitHub token (defaults to action token)"
- type: string
- repository:
- description: "The org/repo to download data from (defaults to current repository)"
- type: string
- cache_key_suffix:
- description: "The cache key suffix to use for staging data/models (use 'LIVE' to bypass staging)"
- type: string
- required: true
-
-jobs:
- build-predictor:
- uses: dotnet/issue-labeler/.github/workflows/build-predictor.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1
-
- labeler-download-issues:
- needs: build-predictor
- if: ${{ inputs.download_issues }}
- permissions:
- issues: read
- actions: write
- uses: dotnet/issue-labeler/.github/workflows/download-issues.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1
- with:
- github_token: ${{ inputs.github_token || github.token }}
- repository: ${{ inputs.repository || github.repository }}
- data_cache_key: ${{ inputs.cache_key_suffix }}
- issue_limit: ${{ inputs.data_limit && fromJSON(inputs.data_limit) || 0 }}
- label_prefix: ${{ inputs.label_prefix }}
-
- labeler-train-issues:
- needs: labeler-download-issues
- if: ${{ inputs.train_issues && always() && (needs.labeler-download-issues.result == 'success' || needs.labeler-download-issues.result == 'skipped') }}
- permissions:
- actions: write
- uses: dotnet/issue-labeler/.github/workflows/train-issues.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1
- with:
- data_cache_key: ${{ inputs.cache_key_suffix }}
- model_cache_key: ${{ inputs.cache_key_suffix }}
-
- labeler-test-issues:
- needs: [labeler-download-issues, labeler-train-issues]
- if: ${{ inputs.test_issues && always() && (needs.labeler-download-issues.result == 'success' || needs.labeler-download-issues.result == 'skipped') && (needs.labeler-train-issues.result == 'success' || needs.labeler-train-issues.result == 'skipped') }}
- uses: dotnet/issue-labeler/.github/workflows/test-issues.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1
- with:
- github_token: ${{ inputs.github_token || github.token }}
- repository: ${{ inputs.repository || github.repository }}
- model_cache_key: ${{ inputs.cache_key_suffix }}
- label_prefix: ${{ inputs.label_prefix }}
- threshold: ${{ inputs.threshold }}
-
- labeler-download-pulls:
- needs: build-predictor
- if: ${{ inputs.download_pulls }}
- permissions:
- pull-requests: read
- actions: write
- uses: dotnet/issue-labeler/.github/workflows/download-pulls.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1
- with:
- github_token: ${{ inputs.github_token || github.token }}
- repository: ${{ inputs.repository || github.repository }}
- data_cache_key: ${{ inputs.cache_key_suffix }}
- pull_limit: ${{ inputs.data_limit && fromJSON(inputs.data_limit) || 0 }}
- label_prefix: ${{ inputs.label_prefix }}
-
- labeler-train-pulls:
- needs: labeler-download-pulls
- if: ${{ inputs.train_pulls && always() && (needs.labeler-download-pulls.result == 'success' || needs.labeler-download-pulls.result == 'skipped') }}
- permissions:
- actions: write
- uses: dotnet/issue-labeler/.github/workflows/train-pulls.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1
- with:
- data_cache_key: ${{ inputs.cache_key_suffix }}
- model_cache_key: ${{ inputs.cache_key_suffix }}
-
- labeler-test-pulls:
- needs: [labeler-download-pulls, labeler-train-pulls]
- if: ${{ inputs.test_pulls && always() && (needs.labeler-download-pulls.result == 'success' || needs.labeler-download-pulls.result == 'skipped') && (needs.labeler-train-pulls.result == 'success' || needs.labeler-train-pulls.result == 'skipped') }}
- uses: dotnet/issue-labeler/.github/workflows/test-pulls.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1
- with:
- github_token: ${{ inputs.github_token || github.token }}
- repository: ${{ inputs.repository || github.repository }}
- model_cache_key: ${{ inputs.cache_key_suffix }}
- label_prefix: ${{ inputs.label_prefix }}
- threshold: ${{ inputs.threshold }}
diff --git a/Directory.Build.props b/Directory.Build.props
deleted file mode 100644
index d74c6ef..0000000
--- a/Directory.Build.props
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
- net9.0
-
-
diff --git a/Directory.Packages.props b/Directory.Packages.props
deleted file mode 100644
index ab51761..0000000
--- a/Directory.Packages.props
+++ /dev/null
@@ -1,20 +0,0 @@
-
-
- true
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/IssueLabeler/Directory.Build.props b/IssueLabeler/Directory.Build.props
new file mode 100644
index 0000000..8272204
--- /dev/null
+++ b/IssueLabeler/Directory.Build.props
@@ -0,0 +1,15 @@
+
+
+
+
+ net9.0
+ true
+ true
+ $(MSBuildThisFileDirectory)artifacts
+
+
+
+
+ root
+
+
diff --git a/IssueLabeler/Directory.Packages.props b/IssueLabeler/Directory.Packages.props
new file mode 100644
index 0000000..5c936eb
--- /dev/null
+++ b/IssueLabeler/Directory.Packages.props
@@ -0,0 +1,24 @@
+
+
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/IssueLabeler.sln b/IssueLabeler/IssueLabeler.sln
similarity index 58%
rename from IssueLabeler.sln
rename to IssueLabeler/IssueLabeler.sln
index b412f13..14cbb47 100644
--- a/IssueLabeler.sln
+++ b/IssueLabeler/IssueLabeler.sln
@@ -3,6 +3,8 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "src\Common\Common.csproj", "{3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}"
+EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Downloader", "src\Downloader\Downloader.csproj", "{AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Trainer", "src\Trainer\Trainer.csproj", "{F1FE4054-C44E-487F-90F9-2F111AB7BD9C}"
@@ -13,14 +15,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Tester", "src\Tester\Tester
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GitHubClient", "src\GitHubClient\GitHubClient.csproj", "{57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}"
EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "1. Downloader", "1. Downloader", "{02EA681E-C7D8-13C7-8484-4AC65E1B71E8}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "2. Trainer", "2. Trainer", "{871B398D-3AB6-4F8B-9BC8-64646BDA0B75}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "3. Tester", "3. Tester", "{48C9A18F-FB08-41D5-9832-492AEFF6B2B2}"
-EndProject
-Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "4. Predictor", "4. Predictor", "{79FFE9CC-3518-4A4E-8FAB-DB121EE93AB8}"
-EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".github", ".github", "{A2C54AC3-3D94-4CD3-885E-D1892063CC58}"
ProjectSection(SolutionItems) = preProject
.github\copilot-instructions.md = .github\copilot-instructions.md
@@ -28,27 +22,16 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".github", ".github", "{A2C5
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{43E392F7-70F3-471D-A96A-E413E4387CA6}"
ProjectSection(SolutionItems) = preProject
- .github\workflows\build-predictor.yml = .github\workflows\build-predictor.yml
- .github\workflows\build.yml = .github\workflows\build.yml
- .github\workflows\download-issues.yml = .github\workflows\download-issues.yml
- .github\workflows\download-pulls.yml = .github\workflows\download-pulls.yml
- .github\workflows\labeler-build-predictor.yml = .github\workflows\labeler-build-predictor.yml
- .github\workflows\labeler-predict-issues.yml = .github\workflows\labeler-predict-issues.yml
- .github\workflows\labeler-predict-pulls.yml = .github\workflows\labeler-predict-pulls.yml
- .github\workflows\labeler-promote.yml = .github\workflows\labeler-promote.yml
- .github\workflows\labeler-train.yml = .github\workflows\labeler-train.yml
- .github\workflows\predict-issues.yml = .github\workflows\predict-issues.yml
- .github\workflows\predict-pulls.yml = .github\workflows\predict-pulls.yml
- .github\workflows\promote-issues.yml = .github\workflows\promote-issues.yml
- .github\workflows\promote-pulls.yml = .github\workflows\promote-pulls.yml
- .github\workflows\test-issues.yml = .github\workflows\test-issues.yml
- .github\workflows\test-pulls.yml = .github\workflows\test-pulls.yml
- .github\workflows\train-issues.yml = .github\workflows\train-issues.yml
- .github\workflows\train-pulls.yml = .github\workflows\train-pulls.yml
- .github\workflows\train.yml = .github\workflows\train.yml
+ ..\.github\workflows\build.yml = ..\.github\workflows\build.yml
+ ..\.github\workflows\labeler-cache-retention.yml = ..\.github\workflows\labeler-cache-retention.yml
+ ..\.github\workflows\labeler-predict-issues.yml = ..\.github\workflows\labeler-predict-issues.yml
+ ..\.github\workflows\labeler-predict-pulls.yml = ..\.github\workflows\labeler-predict-pulls.yml
+ ..\.github\workflows\labeler-promote.yml = ..\.github\workflows\labeler-promote.yml
+ ..\.github\workflows\labeler-train.yml = ..\.github\workflows\labeler-train.yml
+ ..\.github\workflows\release.yml = ..\.github\workflows\release.yml
EndProjectSection
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Shared.Tests", "tests\Shared.Tests\Shared.Tests.csproj", "{DCE6AA73-1E8A-4EB1-989C-235C11E5ECA4}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common.Tests", "tests\Common.Tests\Common.Tests.csproj", "{D3F816D3-5CAE-4CF1-8977-F92AE96B481B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -56,6 +39,10 @@ Global
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Release|Any CPU.Build.0 = Release|Any CPU
{AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}.Release|Any CPU.ActiveCfg = Release|Any CPU
@@ -76,19 +63,15 @@ Global
{57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}.Debug|Any CPU.Build.0 = Debug|Any CPU
{57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}.Release|Any CPU.ActiveCfg = Release|Any CPU
{57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}.Release|Any CPU.Build.0 = Release|Any CPU
- {DCE6AA73-1E8A-4EB1-989C-235C11E5ECA4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {DCE6AA73-1E8A-4EB1-989C-235C11E5ECA4}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {DCE6AA73-1E8A-4EB1-989C-235C11E5ECA4}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {DCE6AA73-1E8A-4EB1-989C-235C11E5ECA4}.Release|Any CPU.Build.0 = Release|Any CPU
+ {D3F816D3-5CAE-4CF1-8977-F92AE96B481B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {D3F816D3-5CAE-4CF1-8977-F92AE96B481B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {D3F816D3-5CAE-4CF1-8977-F92AE96B481B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {D3F816D3-5CAE-4CF1-8977-F92AE96B481B}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
- {AB75FE13-DB1A-4B6F-8B27-1486F98EA75C} = {02EA681E-C7D8-13C7-8484-4AC65E1B71E8}
- {F1FE4054-C44E-487F-90F9-2F111AB7BD9C} = {871B398D-3AB6-4F8B-9BC8-64646BDA0B75}
- {2E39B0A5-2F4A-4D6E-8A0D-0366238CB21E} = {79FFE9CC-3518-4A4E-8FAB-DB121EE93AB8}
- {BEA133F4-5686-49DF-83E4-641C26B3CC25} = {48C9A18F-FB08-41D5-9832-492AEFF6B2B2}
{43E392F7-70F3-471D-A96A-E413E4387CA6} = {A2C54AC3-3D94-4CD3-885E-D1892063CC58}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
diff --git a/NuGet.config b/IssueLabeler/NuGet.config
similarity index 100%
rename from NuGet.config
rename to IssueLabeler/NuGet.config
diff --git a/IssueLabeler/src/Common/App.cs b/IssueLabeler/src/Common/App.cs
new file mode 100644
index 0000000..c9dd8e4
--- /dev/null
+++ b/IssueLabeler/src/Common/App.cs
@@ -0,0 +1,73 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Actions.Core.Markdown;
+using Actions.Core.Services;
+
+///
+/// This class contains methods to run tasks and handle exceptions.
+///
+public static class App
+{
+ ///
+ /// Runs a list of tasks, catching and handling exceptions by logging them to the action's output and summary.
+ ///
+ /// Upon completion, the persistent summary is written.
+ /// The list of tasks to run, waiting for all tasks to complete.
+ /// The GitHub action service.
+ /// A boolean indicating whether all tasks were completed successfully.
+ public async static Task RunTasks(List tasks, ICoreService action)
+ {
+ var allTasks = Task.WhenAll(tasks);
+ var success = await RunTasks(allTasks, action);
+
+ return success;
+ }
+
+ ///
+ /// Runs a list of tasks, catching and handling exceptions by logging them to the action's output and summary.
+ ///
+ /// The Task result type.
+ /// The list of tasks to run, waiting for all tasks to complete.
+ /// The GitHub action service.
+ /// A tuple containing the results of the tasks and a boolean indicating whether all tasks were completed successfully.
+ public async static Task<(TResult[], bool)> RunTasks(List> tasks, ICoreService action)
+ {
+ var allTasks = Task.WhenAll(tasks);
+ var success = await RunTasks(allTasks, action);
+
+ return (allTasks.Result, success);
+ }
+
+ ///
+ /// Runs a single task, catching and handling exceptions by logging them to the action's output and summary.
+ ///
+ /// The task to run, waiting for it to complete.
+ /// The GitHub action service.
+ /// A boolean indicating whether the task was completed successfully.
+ private async static Task RunTasks(Task task, ICoreService action)
+ {
+ var success = false;
+
+ try
+ {
+ task.Wait();
+ success = true;
+ }
+ catch (AggregateException ex)
+ {
+ action.WriteError($"Exception occurred: {ex.Message}");
+
+ action.Summary.AddPersistent(summary =>
+ {
+ summary.AddAlert("Exception occurred", AlertType.Caution);
+ summary.AddNewLine();
+ summary.AddNewLine();
+ summary.AddMarkdownCodeBlock(ex.Message);
+ });
+ }
+
+ await action.Summary.WritePersistentAsync();
+ return success;
+ }
+}
diff --git a/IssueLabeler/src/Common/ArgUtils.cs b/IssueLabeler/src/Common/ArgUtils.cs
new file mode 100644
index 0000000..e728648
--- /dev/null
+++ b/IssueLabeler/src/Common/ArgUtils.cs
@@ -0,0 +1,423 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics.CodeAnalysis;
+using System.Text.RegularExpressions;
+using Actions.Core.Services;
+
+public class ArgUtils
+{
+ private ICoreService action;
+ private Action showUsage;
+ private Queue? arguments { get; }
+
+ ///
+ /// Create an arguments utility class instance for a GitHub action, with input values retrieved from the GitHub action.
+ ///
+ /// The GitHub action service.
+ /// A method to show usage information for the application.
+ public ArgUtils(ICoreService action, Action showUsage)
+ {
+ this.action = action;
+ this.showUsage = message => showUsage(message, action);
+ }
+
+ ///
+ /// Create an arguments utility class instance for a GitHub action, with input values retrieved from a queue of command-line arguments.
+ ///
+ /// The GitHub action service.
+ /// A method to show usage information for the application.
+ /// The queue of command-line arguments to extract argument values from.
+ public ArgUtils(ICoreService action, Action showUsage, Queue arguments) : this(action, showUsage)
+ {
+ this.arguments = arguments;
+ }
+
+ ///
+ /// Gets the input string for the specified input.
+ ///
+ ///
+ /// When running as a GitHub action, this method will retrieve the input value from the action's inputs.
+ ///
+ ///
+ /// When using the constructor with a queue of command-line arguments, this method will dequeue the next argument from the queue.
+ ///
+ /// The name of the input to retrieve.
+ /// A nullable string containing the input value if retrieved, or null if there is no value specified.
+ private string? GetInputString(string inputName)
+ {
+ string? input = null;
+
+ if (arguments is not null)
+ {
+ if (arguments.TryDequeue(out string? argValue))
+ {
+ input = argValue;
+ }
+ }
+ else
+ {
+ input = action.GetInput(inputName);
+ }
+
+ return string.IsNullOrWhiteSpace(input) ? null : input;
+ }
+
+ ///
+ /// Try to get a string input value, guarding against null values.
+ ///
+ /// The name of the input to retrieve.
+ /// The output string value if retrieved, or null if there is no value specified or it was empty.
+ /// true if the input value was retrieved successfully, false otherwise.
+ public bool TryGetString(string inputName, [NotNullWhen(true)] out string? value)
+ {
+ value = GetInputString(inputName);
+ return value is not null;
+ }
+
+ ///
+ /// Determine if the specified flag is provided and set to true.
+ ///
+ /// The name of the flag to retrieve.
+ /// true if the flag is provided and set to true, false otherwise.
+ /// A boolean indicating if the flag was checked successfully, only returning false if specified as an invalid value.
+ public bool TryGetFlag(string inputName, [NotNullWhen(true)] out bool? value)
+ {
+ string? input = GetInputString(inputName);
+
+ if (input is null)
+ {
+ value = false;
+ return true;
+ }
+
+ if (!bool.TryParse(input, out bool parsedValue))
+ {
+ showUsage($"Input '{inputName}' must be 'true', 'false', 'TRUE', or 'FALSE'.");
+ value = null;
+ return false;
+ }
+
+ value = parsedValue;
+ return true;
+ }
+
+ ///
+ /// Try to get the GitHub repository name from the input or environment variable.
+ ///
+ ///
+ /// Defaults to the GITHUB_REPOSITORY environment variable if the input is not specified.
+ ///
+ /// The name of the input to retrieve.
+ /// The GitHub organization name, extracted from the specified {org}/{repo} value.
+ /// The GitHub repository name, extracted from the specified {org}/{repo} value.
+ /// true if the input value was retrieved successfully, false otherwise.
+ public bool TryGetRepo(string inputName, [NotNullWhen(true)] out string? org, [NotNullWhen(true)] out string? repo)
+ {
+ string? orgRepo = GetInputString(inputName) ?? Environment.GetEnvironmentVariable("GITHUB_REPOSITORY");
+
+ if (orgRepo is null || !orgRepo.Contains('/'))
+ {
+ showUsage($$"""Input '{{inputName}}' has an empty value or is not in the format of '{org}/{repo}'. Value defaults to GITHUB_REPOSITORY environment variable if not specified.""");
+ org = null;
+ repo = null;
+ return false;
+ }
+
+ string[] parts = orgRepo.Split('/');
+ org = parts[0];
+ repo = parts[1];
+ return true;
+ }
+
+ ///
+ /// Try to get the GitHub repository list from the input or environment variable.
+ ///
+ ///
+ /// Defaults to the GITHUB_REPOSITORY environment variable if the input is not specified.
+ ///
+ ///
+ /// All repositories must be from the same organization.
+ ///
+ /// The name of the input to retrieve.
+ /// The GitHub organization name, extracted from the specified {org}/{repo} value.
+ /// The list of GitHub repository names, extracted from the specified {org}/{repo} value.
+ /// true if the input value was retrieved successfully, false otherwise.
+ public bool TryGetRepoList(string inputName, [NotNullWhen(true)] out string? org, [NotNullWhen(true)] out List? repos)
+ {
+ string? orgRepos = GetInputString(inputName) ?? Environment.GetEnvironmentVariable("GITHUB_REPOSITORY");
+ org = null;
+ repos = null;
+
+ if (orgRepos is null)
+ {
+ showUsage($$"""Input '{{inputName}}' has an empty value or is not in the format of '{org}/{repo}': {{orgRepos}}""");
+ return false;
+ }
+
+ foreach (var orgRepo in orgRepos.Split(',').Select(r => r.Trim()))
+ {
+ if (!orgRepo.Contains('/'))
+ {
+ showUsage($$"""Input '{{inputName}}' contains a value that is not in the format of '{org}/{repo}': {{orgRepo}}""");
+ return false;
+ }
+
+ string[] parts = orgRepo.Split('/');
+
+ if (org is not null && org != parts[0])
+ {
+ showUsage($"All '{inputName}' values must be from the same org.");
+ return false;
+ }
+
+ org ??= parts[0];
+ repos ??= [];
+ repos.Add(parts[1]);
+ }
+
+ return (org is not null && repos is not null);
+ }
+
+ ///
+ /// Try to get the label prefix from the input.
+ ///
+ ///
+ /// The label prefix must end with a non-alphanumeric character.
+ ///
+ /// The name of the input to retrieve.
+ /// The label predicate function that checks if a label starts with the specified prefix.
+ /// true if the label prefix was retrieved successfully, false otherwise.
+ public bool TryGetLabelPrefix(string inputName, [NotNullWhen(true)] out Func? labelPredicate)
+ {
+ string? labelPrefix = GetInputString(inputName);
+
+ if (labelPrefix is null)
+ {
+ labelPredicate = null;
+ return false;
+ }
+
+ // Require that the label prefix end in something other than a letter or number
+ // This promotes the pattern of prefixes that are clear, rather than a prefix that
+ // could be matched as the beginning of another word in the label
+ if (Regex.IsMatch(labelPrefix.AsSpan(^1),"[a-zA-Z0-9]"))
+ {
+ showUsage($"""
+ Input '{inputName}' must end in a non-alphanumeric character.
+
+ The recommended label prefix terminating character is '-'.
+ The recommended label prefix for applying area labels is 'area-'.
+ """);
+
+ labelPredicate = null;
+ return false;
+ }
+
+ labelPredicate = (label) => label.StartsWith(labelPrefix, StringComparison.OrdinalIgnoreCase);
+ return true;
+ }
+
+ ///
+ /// Try to get a file path from the input.
+ ///
+ ///
+ /// The file path is converted to an absolute path if it is not already absolute.
+ ///
+ /// The name of the input to retrieve.
+ /// The output file path if retrieved, or null if there is no value specified.
+ /// true if the input value was retrieved successfully, false otherwise.
+ public bool TryGetPath(string inputName, out string? path)
+ {
+ path = GetInputString(inputName);
+
+ if (path is null)
+ {
+ return false;
+ }
+
+ if (!Path.IsPathRooted(path))
+ {
+ path = Path.GetFullPath(path);
+ }
+
+ return true;
+ }
+
+ ///
+ /// Try to get a string array from the input.
+ ///
+ ///
+ /// The string array is split by commas and trimmed of whitespace.
+ ///
+ /// The name of the input to retrieve.
+ /// The output string array if retrieved, or null if there is no value specified.
+ /// true if the input value was retrieved successfully, false otherwise.
+ public bool TryGetStringArray(string inputName, [NotNullWhen(true)] out string[]? values)
+ {
+ string? input = GetInputString(inputName);
+
+ if (input is null)
+ {
+ values = null;
+ return false;
+ }
+
+ values = input.Split(',', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
+ return true;
+ }
+
+ ///
+ /// Try to get an integer from the input.
+ ///
+ /// The name of the input to retrieve.
+ /// The output integer value if retrieved, or null if there is no value specified.
+ /// true if the input value was retrieved successfully, false otherwise.
+ public bool TryGetInt(string inputName, [NotNullWhen(true)] out int? value) =>
+ TryParseInt(inputName, GetInputString(inputName), out value);
+
+ ///
+ /// Try to parse an integer from the input string.
+ ///
+ /// The name of the input to retrieve.
+ /// The input string to parse.
+ /// The output integer value if parsed successfully, or null if the input is invalid.
+ /// true if the input value was parsed successfully, false otherwise.
+ private bool TryParseInt(string inputName, string? input, [NotNullWhen(true)] out int? value)
+ {
+ if (input is null || !int.TryParse(input, out int parsedValue))
+ {
+ showUsage($"Input '{inputName}' must be an integer.");
+ value = null;
+ return false;
+ }
+
+ value = parsedValue;
+ return true;
+ }
+
+ ///
+ /// Try to get an integer array from the input.
+ ///
+ /// The name of the input to retrieve.
+ /// The output integer array if retrieved, or null if there is no value specified.
+ /// true if the input value was retrieved successfully, false otherwise.
+ public bool TryGetIntArray(string inputName, [NotNullWhen(true)] out int[]? values)
+ {
+ string? input = GetInputString(inputName);
+
+ if (input is not null)
+ {
+ string[] inputValues = input.Split(',');
+
+ int[] parsedValues = inputValues.SelectMany(v => {
+ if (!TryParseInt(inputName, v, out int? value))
+ {
+ return new int[0];
+ }
+
+ return [value.Value];
+ }).ToArray();
+
+ if (parsedValues.Length == inputValues.Length)
+ {
+ values = parsedValues;
+ return true;
+ }
+ }
+
+ values = null;
+ return false;
+ }
+
+ ///
+ /// Try to get a float from the input.
+ ///
+ /// The name of the input to retrieve.
+ /// The output float value if retrieved, or null if there is no value specified.
+ /// true if the input value was retrieved successfully, false otherwise.
+ public bool TryGetFloat(string inputName, [NotNullWhen(true)] out float? value)
+ {
+ string? input = GetInputString(inputName);
+
+ if (input is null || !float.TryParse(input, out float parsedValue))
+ {
+ showUsage($"Input '{inputName}' must be a decimal value.");
+ value = null;
+ return false;
+ }
+
+ value = parsedValue;
+ return true;
+ }
+
+ ///
+ /// Try to get a list of number ranges from the input.
+ ///
+ ///
+ /// The input is a comma-separated list of numbers and/or dash-separated ranges.
+ ///
+ /// The name of the input to retrieve.
+ /// The output list of ulong values if retrieved, or null if there is no value specified.
+ /// true if the input value was retrieved successfully, false otherwise.
+ public bool TryGetNumberRanges(string inputName, [NotNullWhen(true)] out List? values)
+ {
+ string? input = GetInputString(inputName);
+
+ if (input is not null)
+ {
+ var showUsageError = () => showUsage($"Input '{inputName}' must be comma-separated list of numbers and/or dash-separated ranges. Example: 1-3,5,7-9.");
+ List numbers = [];
+
+ foreach (var range in input.Split(','))
+ {
+ var beginEnd = range.Split('-');
+
+ if (beginEnd.Length == 1)
+ {
+ if (!ulong.TryParse(beginEnd[0], out ulong number))
+ {
+ showUsageError();
+ values = null;
+ return false;
+ }
+
+ numbers.Add(number);
+ }
+ else if (beginEnd.Length == 2)
+ {
+ if (!ulong.TryParse(beginEnd[0], out ulong begin))
+ {
+ showUsageError();
+ values = null;
+ return false;
+ }
+
+ if (!ulong.TryParse(beginEnd[1], out ulong end))
+ {
+ showUsageError();
+ values = null;
+ return false;
+ }
+
+ for (var number = begin; number <= end; number++)
+ {
+ numbers.Add(number);
+ }
+ }
+ else
+ {
+ showUsageError();
+ values = null;
+ return false;
+ }
+ }
+
+ values = numbers;
+ return true;
+ }
+
+ values = null;
+ return false;
+ }
+}
diff --git a/IssueLabeler/src/Common/Common.csproj b/IssueLabeler/src/Common/Common.csproj
new file mode 100644
index 0000000..e093af2
--- /dev/null
+++ b/IssueLabeler/src/Common/Common.csproj
@@ -0,0 +1,12 @@
+
+
+
+ enable
+ enable
+
+
+
+
+
+
+
diff --git a/src/Common/DataFileUtils.cs b/IssueLabeler/src/Common/DataFileUtils.cs
similarity index 100%
rename from src/Common/DataFileUtils.cs
rename to IssueLabeler/src/Common/DataFileUtils.cs
diff --git a/IssueLabeler/src/Common/GitHubActionSummary.cs b/IssueLabeler/src/Common/GitHubActionSummary.cs
new file mode 100644
index 0000000..bdc927c
--- /dev/null
+++ b/IssueLabeler/src/Common/GitHubActionSummary.cs
@@ -0,0 +1,72 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Actions.Core.Summaries;
+
+namespace Actions.Core.Services;
+
+///
+/// This class provides methods to manage the GitHub action summary.
+///
+public static class GitHubActionSummary
+{
+ private static List> persistentSummaryWrites = [];
+
+ ///
+ /// Add persistent writes to the GitHub action summary, emitting them immediately
+ /// and storing them for future rewrites when the summary is updated.
+ ///
+ /// The GitHub action summary.
+ /// The invocation that results in adding content to the summary, to be replayed whenever the persistent summary is rewritten.
+ public static void AddPersistent(this Summary summary, Action writeToSummary)
+ {
+ persistentSummaryWrites.Add(writeToSummary);
+ writeToSummary(summary);
+ }
+
+ ///
+ /// Writes a status message to the GitHub action summary and emits it immediately, always printing
+ /// the status at the top of the summary, with other persistent writes below it.
+ ///
+ /// The GitHub action service.
+ /// The status message to write.
+ /// The async task.
+ public static async Task WriteStatusAsync(this ICoreService action, string message)
+ {
+ action.WriteInfo(message);
+
+ await action.Summary.WritePersistentAsync(summary =>
+ {
+ summary.AddMarkdownHeading("Status", 3);
+ summary.AddRaw(message);
+
+ if (persistentSummaryWrites.Any())
+ {
+ summary.AddMarkdownHeading("Results", 3);
+ }
+ });
+ }
+
+ ///
+ /// Writes the persistent summary to the GitHub action summary, clearing it first.
+ ///
+ /// The GitHub action summary.
+ /// An optional action to write a status message to the summary.
+ /// The async task.
+ public static async Task WritePersistentAsync(this Summary summary, Action? writeStatus = null)
+ {
+ await summary.ClearAsync();
+
+ if (writeStatus is not null)
+ {
+ writeStatus(summary);
+ }
+
+ foreach (var write in persistentSummaryWrites)
+ {
+ write(summary);
+ }
+
+ await summary.WriteAsync();
+ }
+}
diff --git a/src/Common/ModelType.cs b/IssueLabeler/src/Common/ModelType.cs
similarity index 100%
rename from src/Common/ModelType.cs
rename to IssueLabeler/src/Common/ModelType.cs
diff --git a/IssueLabeler/src/Downloader/Args.cs b/IssueLabeler/src/Downloader/Args.cs
new file mode 100644
index 0000000..c18705d
--- /dev/null
+++ b/IssueLabeler/src/Downloader/Args.cs
@@ -0,0 +1,176 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Actions.Core.Services;
+
+public struct Args
+{
+ public readonly string GitHubToken => Environment.GetEnvironmentVariable("GITHUB_TOKEN")!;
+ public string Org { get; set; }
+ public List Repos { get; set; }
+ public string? IssuesDataPath { get; set; }
+ public int? IssuesLimit { get; set; }
+ public string? PullsDataPath { get; set; }
+ public int? PullsLimit { get; set; }
+ public int? PageSize { get; set; }
+ public int? PageLimit { get; set; }
+ public int[] Retries { get; set; }
+ public string[]? ExcludedAuthors { get; set; }
+ public Predicate LabelPredicate { get; set; }
+ public bool Verbose { get; set; }
+
+ static void ShowUsage(string? message, ICoreService action)
+ {
+ action.WriteNotice($$"""
+ ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}}
+
+ Required environment variables:
+ GITHUB_TOKEN GitHub token to be used for API calls.
+
+ Required arguments:
+ --repo The GitHub repositories in format org/repo (comma separated for multiple).
+ --label-prefix Prefix for label predictions. Must end with a character other than a letter or number.
+
+ Required for downloading issue data:
+ --issues-data Path for issue data file to create (TSV file).
+
+ Required for downloading pull request data:
+ --pulls-data Path for pull request data file to create (TSV file).
+
+ Optional arguments:
+ --issues-limit Maximum number of issues to download. Defaults to: No limit.
+ --pulls-limit Maximum number of pull requests to download. Defaults to: No limit.
+ --page-size Number of items per page in GitHub API requests.
+ --page-limit Maximum number of pages to retrieve.
+ --excluded-authors Comma-separated list of authors to exclude.
+ --retries Comma-separated retry delays in seconds.
+ Defaults to: 30,30,300,300,3000,3000.
+ --verbose Enable verbose output.
+ """);
+
+ Environment.Exit(1);
+ }
+
+ public static Args? Parse(string[] args, ICoreService action)
+ {
+ Queue arguments = new(args);
+ ArgUtils argUtils = new(action, ShowUsage, arguments);
+
+ Args argsData = new()
+ {
+ Retries = [30, 30, 300, 300, 3000, 3000]
+ };
+
+ if (string.IsNullOrEmpty(argsData.GitHubToken))
+ {
+ ShowUsage("Environment variable GITHUB_TOKEN is empty.", action);
+ return null;
+ }
+
+ while (arguments.Count > 0)
+ {
+ string argument = arguments.Dequeue();
+
+ switch (argument)
+ {
+ case "--repo":
+ if (!argUtils.TryGetRepoList("--repo", out string? org, out List? repos))
+ {
+ return null;
+ }
+ argsData.Org = org;
+ argsData.Repos = repos;
+ break;
+
+ case "--label-prefix":
+ if (!argUtils.TryGetLabelPrefix("--label-prefix", out Func? labelPredicate))
+ {
+ return null;
+ }
+ argsData.LabelPredicate = new(labelPredicate);
+ break;
+
+ case "--excluded-authors":
+ if (!argUtils.TryGetStringArray("--excluded-authors", out string[]? excludedAuthors))
+ {
+ return null;
+ }
+ argsData.ExcludedAuthors = excludedAuthors;
+ break;
+
+ case "--issues-data":
+ if (!argUtils.TryGetPath("--issues-data", out string? IssuesDataPath))
+ {
+ return null;
+ }
+ argsData.IssuesDataPath = IssuesDataPath;
+ break;
+
+ case "--issues-limit":
+ if (!argUtils.TryGetInt("--issues-limit", out int? IssuesLimit))
+ {
+ return null;
+ }
+ argsData.IssuesLimit = IssuesLimit;
+ break;
+
+ case "--pulls-data":
+ if (!argUtils.TryGetPath("--pulls-data", out string? PullsDataPath))
+ {
+ return null;
+ }
+ argsData.PullsDataPath = PullsDataPath;
+ break;
+
+ case "--pulls-limit":
+ if (!argUtils.TryGetInt("--pulls-limit", out int? PullsLimit))
+ {
+ return null;
+ }
+ argsData.PullsLimit = PullsLimit;
+ break;
+
+ case "--page-size":
+ if (!argUtils.TryGetInt("--page-size", out int? pageSize))
+ {
+ return null;
+ }
+ argsData.PageSize = pageSize;
+ break;
+
+ case "--page-limit":
+ if (!argUtils.TryGetInt("--page-limit", out int? pageLimit))
+ {
+ return null;
+ }
+ argsData.PageLimit = pageLimit;
+ break;
+
+ case "--retries":
+ if (!argUtils.TryGetIntArray("--retries", out int[]? retries))
+ {
+ return null;
+ }
+ argsData.Retries = retries;
+ break;
+
+ case "--verbose":
+ argsData.Verbose = true;
+ break;
+
+ default:
+ ShowUsage($"Unrecognized argument: {argument}", action);
+ return null;
+ }
+ }
+
+ if (argsData.Org is null || argsData.Repos is null || argsData.LabelPredicate is null ||
+ (argsData.IssuesDataPath is null && argsData.PullsDataPath is null))
+ {
+ ShowUsage(null, action);
+ return null;
+ }
+
+ return argsData;
+ }
+}
diff --git a/src/Downloader/Downloader.cs b/IssueLabeler/src/Downloader/Downloader.cs
similarity index 59%
rename from src/Downloader/Downloader.cs
rename to IssueLabeler/src/Downloader/Downloader.cs
index 5e68173..4e4df1c 100644
--- a/src/Downloader/Downloader.cs
+++ b/IssueLabeler/src/Downloader/Downloader.cs
@@ -1,33 +1,39 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
-using static DataFileUtils;
+using Actions.Core.Extensions;
+using Actions.Core.Services;
using GitHubClient;
+using Microsoft.Extensions.DependencyInjection;
+using static DataFileUtils;
-if (Args.Parse(args) is not Args argsData)
-{
- return;
-}
+using var provider = new ServiceCollection()
+ .AddGitHubActionsCore()
+ .BuildServiceProvider();
+
+var action = provider.GetRequiredService();
+if (Args.Parse(args, action) is not Args argsData) return 1;
List tasks = [];
-if (!string.IsNullOrEmpty(argsData.IssueDataPath))
+if (!string.IsNullOrEmpty(argsData.IssuesDataPath))
{
- EnsureOutputDirectory(argsData.IssueDataPath);
- tasks.Add(Task.Run(() => DownloadIssues(argsData.IssueDataPath)));
+ EnsureOutputDirectory(argsData.IssuesDataPath);
+ tasks.Add(Task.Run(() => DownloadIssues(argsData.IssuesDataPath)));
}
-if (!string.IsNullOrEmpty(argsData.PullDataPath))
+if (!string.IsNullOrEmpty(argsData.PullsDataPath))
{
- EnsureOutputDirectory(argsData.PullDataPath);
- tasks.Add(Task.Run(() => DownloadPullRequests(argsData.PullDataPath)));
+ EnsureOutputDirectory(argsData.PullsDataPath);
+ tasks.Add(Task.Run(() => DownloadPullRequests(argsData.PullsDataPath)));
}
-await Task.WhenAll(tasks);
+var success = await App.RunTasks(tasks, action);
+return success ? 0 : 1;
async Task DownloadIssues(string outputPath)
{
- Console.WriteLine($"Issues Data Path: {outputPath}");
+ action.WriteInfo($"Issues Data Path: {outputPath}");
byte perFlushCount = 0;
@@ -36,9 +42,9 @@ async Task DownloadIssues(string outputPath)
foreach (var repo in argsData.Repos)
{
- await foreach (var result in GitHubApi.DownloadIssues(argsData.GithubToken, argsData.Org, repo, argsData.LabelPredicate,
- argsData.IssueLimit, argsData.PageSize ?? 100, argsData.PageLimit ?? 1000,
- argsData.Retries, argsData.ExcludedAuthors ?? [], argsData.Verbose))
+ await foreach (var result in GitHubApi.DownloadIssues(argsData.GitHubToken, argsData.Org, repo, argsData.LabelPredicate,
+ argsData.IssuesLimit, argsData.PageSize, argsData.PageLimit,
+ argsData.Retries, argsData.ExcludedAuthors, action, argsData.Verbose))
{
writer.WriteLine(FormatIssueRecord(result.Label, result.Issue.Title, result.Issue.Body));
@@ -55,7 +61,7 @@ async Task DownloadIssues(string outputPath)
async Task DownloadPullRequests(string outputPath)
{
- Console.WriteLine($"Pulls Data Path: {outputPath}");
+ action.WriteInfo($"Pulls Data Path: {outputPath}");
byte perFlushCount = 0;
@@ -64,9 +70,9 @@ async Task DownloadPullRequests(string outputPath)
foreach (var repo in argsData.Repos)
{
- await foreach (var result in GitHubApi.DownloadPullRequests(argsData.GithubToken, argsData.Org, repo, argsData.LabelPredicate,
- argsData.PullLimit, argsData.PageSize ?? 25, argsData.PageLimit ?? 4000,
- argsData.Retries, argsData.ExcludedAuthors ?? [], argsData.Verbose))
+ await foreach (var result in GitHubApi.DownloadPullRequests(argsData.GitHubToken, argsData.Org, repo, argsData.LabelPredicate,
+ argsData.PullsLimit, argsData.PageSize, argsData.PageLimit,
+ argsData.Retries, argsData.ExcludedAuthors, action, argsData.Verbose))
{
writer.WriteLine(FormatPullRequestRecord(result.Label, result.PullRequest.Title, result.PullRequest.Body, result.PullRequest.FileNames, result.PullRequest.FolderNames));
diff --git a/src/Downloader/Downloader.csproj b/IssueLabeler/src/Downloader/Downloader.csproj
similarity index 57%
rename from src/Downloader/Downloader.csproj
rename to IssueLabeler/src/Downloader/Downloader.csproj
index 497184a..8074961 100644
--- a/src/Downloader/Downloader.csproj
+++ b/IssueLabeler/src/Downloader/Downloader.csproj
@@ -7,15 +7,17 @@
-
+
-
+
+
-
+
+
diff --git a/IssueLabeler/src/GitHubClient/GitHubApi.cs b/IssueLabeler/src/GitHubClient/GitHubApi.cs
new file mode 100644
index 0000000..2f5d5b3
--- /dev/null
+++ b/IssueLabeler/src/GitHubClient/GitHubApi.cs
@@ -0,0 +1,573 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Actions.Core.Services;
+using GraphQL;
+using GraphQL.Client.Http;
+using GraphQL.Client.Serializer.SystemTextJson;
+using System.Collections.Concurrent;
+using System.Net.Http.Json;
+
+namespace GitHubClient;
+
+public class GitHubApi
+{
+ private static ConcurrentDictionary _graphQLClients = new();
+ private static ConcurrentDictionary _restClients = new();
+ private const int MaxLabelDelaySeconds = 30;
+
+ ///
+ /// Gets or creates a GraphQL client for the GitHub API using the provided token.
+ ///
+ /// The timeout is set to 2 minutes and the client is cached for reuse.
+ /// The GitHub token to use for authentication.
+ /// A GraphQLHttpClient instance configured with the provided token and necessary headers.
+ private static GraphQLHttpClient GetGraphQLClient(string githubToken) =>
+ _graphQLClients.GetOrAdd(githubToken, token =>
+ {
+ GraphQLHttpClient client = new("https://api.github.com/graphql", new SystemTextJsonSerializer());
+ client.HttpClient.DefaultRequestHeaders.Authorization =
+ new System.Net.Http.Headers.AuthenticationHeaderValue(
+ scheme: "bearer",
+ parameter: token);
+
+ client.HttpClient.Timeout = TimeSpan.FromMinutes(2);
+
+ return client;
+ });
+
+ ///
+ /// Gets or creates a REST client for the GitHub API using the provided token.
+ ///
+ /// The client is cached for reuse.
+ /// The GitHub token to use for authentication.
+ /// An HttpClient instance configured with the provided token and necessary headers.
+ private static HttpClient GetRestClient(string githubToken) =>
+ _restClients.GetOrAdd(githubToken, token =>
+ {
+ HttpClient client = new();
+ client.DefaultRequestHeaders.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue(
+ scheme: "bearer",
+ parameter: token);
+ client.DefaultRequestHeaders.Accept.Add(new("application/vnd.github+json"));
+ client.DefaultRequestHeaders.Add("X-GitHub-Api-Version", "2022-11-28");
+ client.DefaultRequestHeaders.Add("User-Agent", "Issue-Labeler");
+
+ return client;
+ });
+
+ ///
+ /// Downloads issues from a GitHub repository, filtering them by label and other criteria.
+ ///
+ /// The GitHub token to use for authentication.
+ /// The GitHub organization name.
+ /// The GitHub repository name.
+ /// A predicate function to filter labels.
+ /// The maximum number of issues to download.
+ /// The number of items per page in GitHub API requests.
+ /// The maximum number of pages to retrieve.
+ /// An array of retry delays in seconds.
+ /// An array of authors to exclude from the results.
+ /// The GitHub action service.
+ /// Emit verbose output into the action log.
+ /// The downloaded issues as an async enumerable collection of tuples containing the issue and its predicate-matched label (when only one matcing label is found).
+ public static async IAsyncEnumerable<(Issue Issue, string Label)> DownloadIssues(
+ string githubToken,
+ string org, string repo,
+ Predicate labelPredicate,
+ int? issuesLimit,
+ int? pageSize,
+ int? pageLimit,
+ int[] retries,
+ string[]? excludedAuthors,
+ ICoreService action,
+ bool verbose = false)
+ {
+ await foreach (var item in DownloadItems("issues", githubToken, org, repo, labelPredicate, issuesLimit, pageSize ?? 100, pageLimit ?? 1000, retries, excludedAuthors, action, verbose))
+ {
+ yield return (item.Item, item.Label);
+ }
+ }
+
+ ///
+ /// Downloads pull requests from a GitHub repository, filtering them by label and other criteria.
+ ///
+ /// The GitHub token to use for authentication.
+ /// The GitHub organization name.
+ /// The GitHub repository name.
+ /// A predicate function to filter labels.
+ /// The maximum number of pull requests to download.
+ /// The number of items per page in GitHub API requests.
+ /// The maximum number of pages to retrieve.
+ /// An array of retry delays in seconds.
+ /// An array of authors to exclude from the results.
+ /// The GitHub action service.
+ /// Emit verbose output into the action log.
+ /// The downloaded pull requests as an async enumerable collection of tuples containing the pull request and its predicate-matched label (when only one matching label is found).
+ public static async IAsyncEnumerable<(PullRequest PullRequest, string Label)> DownloadPullRequests(
+ string githubToken,
+ string org,
+ string repo,
+ Predicate labelPredicate,
+ int? pullsLimit,
+ int? pageSize,
+ int? pageLimit,
+ int[] retries,
+ string[]? excludedAuthors,
+ ICoreService action,
+ bool verbose = false)
+ {
+ var items = DownloadItems("pullRequests", githubToken, org, repo, labelPredicate, pullsLimit, pageSize ?? 25, pageLimit ?? 4000, retries, excludedAuthors, action, verbose);
+
+ await foreach (var item in items)
+ {
+ yield return (item.Item, item.Label);
+ }
+ }
+
+ ///
+ /// Downloads items from a GitHub repository, filtering them by label and other criteria.
+ ///
+ ///
+ /// The GraphQL query name for the item type (e.g., "issues" or "pullRequests").
+ /// The GitHub token to use for authentication.
+ /// The GitHub organization name.
+ /// The GitHub repository name.
+ /// A predicate function to filter labels.
+ /// The maximum number of issues to download.
+ /// The number of items per page in GitHub API requests.
+ /// The maximum number of pages to retrieve.
+ /// An array of retry delays in seconds.
+ /// An array of authors to exclude from the results.
+ /// The GitHub action service.
+ /// Emit verbose output into the action log.
+ /// The downloaded items as an async enumerable collection of tuples containing the item and its predicate-matched label (when only one matching label is found).
+ ///
+ private static async IAsyncEnumerable<(T Item, string Label)> DownloadItems(
+ string itemQueryName,
+ string githubToken,
+ string org,
+ string repo,
+ Predicate labelPredicate,
+ int? itemLimit,
+ int pageSize,
+ int pageLimit,
+ int[] retries,
+ string[]? excludedAuthors,
+ ICoreService action,
+ bool verbose) where T : Issue
+ {
+ pageSize = Math.Min(pageSize, 100);
+
+ string typeNames = typeof(T) == typeof(PullRequest) ? "Pull Requests" : "Issues";
+ string typeName = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue";
+
+ int pageNumber = 0;
+ string? after = null;
+ bool hasNextPage = true;
+ int loadedCount = 0;
+ int includedCount = 0;
+ int? totalCount = null;
+ byte retry = 0;
+ bool finished = false;
+
+ do
+ {
+ action.WriteInfo($"Downloading {typeNames} page {pageNumber + 1} from {org}/{repo}...{(retry > 0 ? $" (retry {retry} of {retries.Length}) " : "")}{(after is not null ? $" (cursor: '{after}')" : "")}");
+
+ Page page;
+
+ try
+ {
+ page = await GetItemsPage(githubToken, org, repo, pageSize, after, itemQueryName);
+ }
+ catch (Exception ex) when (
+ ex is HttpIOException ||
+ ex is HttpRequestException ||
+ ex is GraphQLHttpRequestException ||
+ ex is TaskCanceledException
+ )
+ {
+ action.WriteInfo($"Exception caught during query.\n {ex.Message}");
+
+ if (retry >= retries.Length - 1)
+ {
+ await action.WriteStatusAsync($"Retry limit of {retries.Length} reached. Aborting.");
+
+ throw new ApplicationException($"""
+ Retry limit of {retries.Length} reached. Aborting.
+
+ {ex.Message}
+
+ Total Downloaded: {totalCount}
+ Applicable for Training: {loadedCount}
+ Page Number: {pageNumber}
+ """
+ );
+ }
+ else
+ {
+ await action.WriteStatusAsync($"Waiting {retries[retry]} seconds before retry {retry + 1} of {retries.Length}...");
+ await Task.Delay(retries[retry] * 1000);
+ retry++;
+
+ continue;
+ }
+ }
+
+ if (after == page.EndCursor)
+ {
+ action.WriteError($"Paging did not progress. Cursor: '{after}'. Aborting.");
+ break;
+ }
+
+ pageNumber++;
+ after = page.EndCursor;
+ hasNextPage = page.HasNextPage;
+ loadedCount += page.Nodes.Length;
+ totalCount ??= page.TotalCount;
+ retry = 0;
+
+ foreach (T item in page.Nodes)
+ {
+ if (excludedAuthors is not null && item.Author?.Login is not null && excludedAuthors.Contains(item.Author.Login, StringComparer.InvariantCultureIgnoreCase))
+ {
+ if (verbose) action.WriteInfo($"{typeName} {org}/{repo}#{item.Number} - Excluded from output. Author '{item.Author.Login}' is in excluded list.");
+ continue;
+ }
+
+ // If there are more labels, there might be other applicable
+ // labels that were not loaded and the model is incomplete.
+ if (item.Labels.HasNextPage)
+ {
+ if (verbose) action.WriteInfo($"{typeName} {org}/{repo}#{item.Number} - Excluded from output. Not all labels were loaded.");
+ continue;
+ }
+
+ // Only items with exactly one applicable label are used for the model.
+ string[] labels = Array.FindAll(item.LabelNames, labelPredicate);
+ if (labels.Length != 1)
+ {
+ if (verbose) action.WriteInfo($"{typeName} {org}/{repo}#{item.Number} - Excluded from output. {labels.Length} applicable labels found.");
+ continue;
+ }
+
+ // Exactly one applicable label was found on the item. Include it in the model.
+ if (verbose) action.WriteInfo($"{typeName} {org}/{repo}#{item.Number} - Included in output. Applicable label: '{labels[0]}'.");
+
+ yield return (item, labels[0]);
+
+ includedCount++;
+
+ if (itemLimit.HasValue && includedCount >= itemLimit)
+ {
+ break;
+ }
+ }
+
+ finished = (!hasNextPage || pageNumber >= pageLimit || (itemLimit.HasValue && includedCount >= itemLimit));
+
+ await action.WriteStatusAsync(
+ $"Items to Include: {includedCount} (limit: {(itemLimit.HasValue ? itemLimit : "none")}) | " +
+ $"Items Downloaded: {loadedCount} (total: {totalCount}) | " +
+ $"Pages Downloaded: {pageNumber} (limit: {pageLimit})");
+
+ if (finished)
+ {
+ action.Summary.AddPersistent(summary => {
+ summary.AddMarkdownHeading($"Finished Downloading {typeNames} from {org}/{repo}", 2);
+ summary.AddMarkdownList([
+ $"Items to Include: {includedCount} (limit: {(itemLimit.HasValue ? itemLimit : "none")})",
+ $"Items Downloaded: {loadedCount} (total: {totalCount})",
+ $"Pages Downloaded: {pageNumber} (limit: {pageLimit})"
+ ]);
+ });
+ }
+ }
+ while (!finished);
+ }
+
+ ///
+ /// Retrieves a page of items from a GitHub repository using GraphQL.
+ ///
+ /// The type of items to retrieve (e.g., Issue or PullRequest).
+ /// The GitHub token to use for authentication.
+ /// The GitHub organization name.
+ /// The GitHub repository name.
+ /// The number of items per page in GitHub API requests.
+ /// The cursor for pagination (null for the first page).
+ /// The GraphQL query name for the item type (e.g., "issues" or "pullRequests").
+ /// The page of items retrieved from the GitHub repository.
+ /// When the GraphQL request returns errors or the response does not include the expected data.
+ private static async Task> GetItemsPage(string githubToken, string org, string repo, int pageSize, string? after, string itemQueryName) where T : Issue
+ {
+ GraphQLHttpClient client = GetGraphQLClient(githubToken);
+
+ string files = typeof(T) == typeof(PullRequest) ? "files (first: 100) { nodes { path } }" : "";
+
+ GraphQLRequest query = new GraphQLRequest
+ {
+ Query = $$"""
+ query ($owner: String!, $repo: String!, $after: String) {
+ repository (owner: $owner, name: $repo) {
+ result:{{itemQueryName}} (after: $after, first: {{pageSize}}, orderBy: {field: CREATED_AT, direction: DESC}) {
+ nodes {
+ number
+ title
+ author { login }
+ body: bodyText
+ labels (first: 25) {
+ nodes { name },
+ pageInfo { hasNextPage }
+ }
+ {{files}}
+ }
+ pageInfo {
+ hasNextPage
+ endCursor
+ }
+ totalCount
+ }
+ }
+ }
+ """,
+ Variables = new
+ {
+ Owner = org,
+ Repo = repo,
+ After = after
+ }
+ };
+
+ var response = await client.SendQueryAsync>>(query);
+
+ if (response.Errors?.Any() ?? false)
+ {
+ string errors = string.Join("\n\n", response.Errors.Select((e, i) => $"{i + 1}. {e.Message}").ToArray());
+ throw new ApplicationException($"GraphQL request returned errors.\n\n{errors}");
+ }
+ else if (response.Data is null || response.Data.Repository is null || response.Data.Repository.Result is null)
+ {
+ throw new ApplicationException("GraphQL response did not include the repository result data");
+ }
+
+ return response.Data.Repository.Result;
+ }
+
+ ///
+ /// Gets an issue from a GitHub repository using GraphQL.
+ ///
+ /// The GitHub token to use for authentication.
+ /// The GitHub organization name.
+ /// The GitHub repository name.
+ /// The issue number.
+ /// An array of retry delays in seconds.
+ /// The GitHub action service.
+ /// Emit verbose output into the action log.
+ /// The issue retrieved from the GitHub repository, or null if not found.
+ public static async Task GetIssue(string githubToken, string org, string repo, ulong number, int[] retries, ICoreService action, bool verbose) =>
+ await GetItem(githubToken, org, repo, number, retries, verbose, "issue", action);
+
+ ///
+ /// Gets a pull request from a GitHub repository using GraphQL.
+ ///
+ /// The GitHub token to use for authentication.
+ /// The GitHub organization name.
+ /// The GitHub repository name.
+ /// The pull request number.
+ /// An array of retry delays in seconds.
+ /// The GitHub action service.
+ /// Emit verbose output into the action log.
+ /// The pull request retrieved from the GitHub repository, or null if not found.
+ public static async Task GetPullRequest(string githubToken, string org, string repo, ulong number, int[] retries, ICoreService action, bool verbose) =>
+ await GetItem(githubToken, org, repo, number, retries, verbose, "pullRequest", action);
+
+ private static async Task GetItem(string githubToken, string org, string repo, ulong number, int[] retries, bool verbose, string itemQueryName, ICoreService action) where T : Issue
+ {
+ GraphQLHttpClient client = GetGraphQLClient(githubToken);
+ string files = typeof(T) == typeof(PullRequest) ? "files (first: 100) { nodes { path } }" : "";
+
+ GraphQLRequest query = new GraphQLRequest
+ {
+ Query = $$"""
+ query ($owner: String!, $repo: String!, $number: Int!) {
+ repository (owner: $owner, name: $repo) {
+ result:{{itemQueryName}} (number: $number) {
+ number
+ title
+ author { login }
+ body: bodyText
+ labels (first: 25) {
+ nodes { name },
+ pageInfo { hasNextPage }
+ }
+ {{files}}
+ }
+ }
+ }
+ """,
+ Variables = new
+ {
+ Owner = org,
+ Repo = repo,
+ Number = number
+ }
+ };
+
+ byte retry = 0;
+ string typeName = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue";
+
+ while (retry < retries.Length)
+ {
+ try
+ {
+ var response = await client.SendQueryAsync>(query);
+
+ if (!(response.Errors?.Any() ?? false) && response.Data?.Repository?.Result is not null)
+ {
+ return response.Data.Repository.Result;
+ }
+
+ if (response.Errors?.Any() ?? false)
+ {
+ // These errors occur when an issue/pull does not exist or when the API rate limit has been exceeded
+ if (response.Errors.Any(e => e.Message.StartsWith("API rate limit exceeded")))
+ {
+ action.WriteInfo($"""
+ [{typeName} {org}/{repo}#{number}] Failed to retrieve data.
+ Rate limit has been reached.
+ {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")}
+ """);
+ }
+ else
+ {
+ // Could not detect this as a rate limit issue. Do not retry.
+ string errors = string.Join("\n\n", response.Errors.Select((e, i) => $"{i + 1}. {e.Message}").ToArray());
+
+ action.WriteInfo($"""
+ [{typeName} {org}/{repo}#{number}] Failed to retrieve data.
+ GraphQL request returned errors:
+
+ {errors}
+ """);
+
+ return null;
+ }
+ }
+ else
+ {
+ // Do not retry as these errors are not recoverable
+ // This is usually a bug during development when the query/response model is incorrect
+ action.WriteInfo($"""
+ [{typeName} {org}/{repo}#{number}] Failed to retrieve data.
+ GraphQL response did not include the repository result data.
+ """);
+
+ return null;
+ }
+ }
+ catch (Exception ex) when (
+ ex is HttpIOException ||
+ ex is HttpRequestException ||
+ ex is GraphQLHttpRequestException ||
+ ex is TaskCanceledException
+ )
+ {
+ // Retry on exceptions as they can be temporary network issues
+ action.WriteInfo($"""
+ [{typeName} {org}/{repo}#{number}] Failed to retrieve data.
+ Exception caught during query.
+
+ {ex.Message}
+
+ {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")}
+ """);
+ }
+
+ await Task.Delay(retries[retry++] * 1000);
+ }
+
+ return null;
+ }
+
+ ///
+ /// Adds a label to an issue or pull request in a GitHub repository.
+ ///
+ /// The GitHub token to use for authentication.
+ /// The GitHub organization name.
+ /// The GitHub repository name.
+ /// The type of item (e.g., "issue" or "pull request").
+ /// The issue or pull request number.
+ /// The label to add.
+ /// An array of retry delays in seconds. A maximum delay of 30 seconds is enforced.
+ /// The GitHub action service.
+ /// A string describing a failure, or null if successful.
+ public static async Task AddLabel(string githubToken, string org, string repo, string type, ulong number, string label, int[] retries, ICoreService action)
+ {
+ var client = GetRestClient(githubToken);
+ byte retry = 0;
+
+ while (retry < retries.Length)
+ {
+ var response = await client.PostAsJsonAsync(
+ $"https://api.github.com/repos/{org}/{repo}/issues/{number}/labels",
+ new string[] { label },
+ CancellationToken.None);
+
+ if (response.IsSuccessStatusCode)
+ {
+ return null;
+ }
+
+ action.WriteInfo($"""
+ [{type} {org}/{repo}#{number}] Failed to add label '{label}'. {response.ReasonPhrase} ({response.StatusCode})
+ {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")}
+ """);
+
+ int delay = Math.Min(retries[retry++], MaxLabelDelaySeconds);
+ await Task.Delay(delay * 1000);
+ }
+
+ return $"Failed to add label '{label}' after {retries.Length} retries.";
+ }
+
+ ///
+ /// Removes a label from an issue or pull request in a GitHub repository.
+ ///
+ /// The GitHub token to use for authentication.
+ /// The GitHub organization name.
+ /// The GitHub repository name.
+ /// The type of item (e.g., "issue" or "pull request").
+ /// The issue or pull request number.
+ /// The label to add.
+ /// An array of retry delays in seconds. A maximum delay of 30 seconds is enforced.
+ /// The GitHub action service.
+ /// A string describing a failure, or null if successful.
+ public static async Task RemoveLabel(string githubToken, string org, string repo, string type, ulong number, string label, int[] retries, ICoreService action)
+ {
+ var client = GetRestClient(githubToken);
+ byte retry = 0;
+
+ while (retry < retries.Length)
+ {
+ var response = await client.DeleteAsync(
+ $"https://api.github.com/repos/{org}/{repo}/issues/{number}/labels/{label}",
+ CancellationToken.None);
+
+ if (response.IsSuccessStatusCode)
+ {
+ return null;
+ }
+
+ action.WriteInfo($"""
+ [{type} {org}/{repo}#{number}] Failed to remove label '{label}'. {response.ReasonPhrase} ({response.StatusCode})
+ {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")}
+ """);
+
+ int delay = Math.Min(retries[retry++], MaxLabelDelaySeconds);
+ await Task.Delay(delay * 1000);
+ }
+
+ return $"Failed to remove label '{label}' after {retries.Length} retries.";
+ }
+}
diff --git a/src/GitHubClient/GitHubClient.csproj b/IssueLabeler/src/GitHubClient/GitHubClient.csproj
similarity index 78%
rename from src/GitHubClient/GitHubClient.csproj
rename to IssueLabeler/src/GitHubClient/GitHubClient.csproj
index bd43aac..ce9e773 100644
--- a/src/GitHubClient/GitHubClient.csproj
+++ b/IssueLabeler/src/GitHubClient/GitHubClient.csproj
@@ -6,6 +6,7 @@
+
@@ -15,4 +16,8 @@
+
+
+
+
diff --git a/src/GitHubClient/QueryModel.cs b/IssueLabeler/src/GitHubClient/QueryModel.cs
similarity index 100%
rename from src/GitHubClient/QueryModel.cs
rename to IssueLabeler/src/GitHubClient/QueryModel.cs
diff --git a/IssueLabeler/src/Predictor/Args.cs b/IssueLabeler/src/Predictor/Args.cs
new file mode 100644
index 0000000..24389ce
--- /dev/null
+++ b/IssueLabeler/src/Predictor/Args.cs
@@ -0,0 +1,113 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Actions.Core.Services;
+
+public struct Args
+{
+ public string GitHubToken => Environment.GetEnvironmentVariable("GITHUB_TOKEN")!;
+ public string Org { get; set; }
+ public string Repo { get; set; }
+ public float Threshold { get; set; }
+ public Func LabelPredicate { get; set; }
+ public string[]? ExcludedAuthors { get; set; }
+ public string? IssuesModelPath { get; set; }
+ public List? Issues { get; set; }
+ public string? PullsModelPath { get; set; }
+ public List? Pulls { get; set; }
+ public string? DefaultLabel { get; set; }
+ public int[] Retries { get; set; }
+ public bool Verbose { get; set; }
+ public bool Test { get; set; }
+
+ static void ShowUsage(string? message, ICoreService action)
+ {
+ action.WriteNotice($$"""
+ ERROR: Invalid or missing inputs.{{(message is null ? "" : " " + message)}}
+
+ Required environment variables:
+ GITHUB_TOKEN GitHub token to be used for API calls.
+
+ Inputs are specified as ALL_CAPS environment variables prefixed with 'INPUT_'.
+
+ Required inputs:
+ REPO GitHub repository in the format {org}/{repo}.
+ Defaults to: GITHUB_REPOSITORY environment variable.
+ LABEL_PREFIX Prefix for label predictions.
+ Must end with a non-alphanumeric character.
+
+ Required inputs for predicting issue labels:
+ ISSUES_MODEL Path to the issue prediction model file (ZIP file).
+ ISSUES Comma-separated list of issue number ranges.
+ Example: 1-3,7,5-9.
+
+ Required inputs for predicting pull request labels:
+ PULLS_MODEL Path to the pull request prediction model file (ZIP file).
+ PULLS Comma-separated list of pull request number ranges.
+ Example: 1-3,7,5-9.
+
+ Optional inputs:
+ THRESHOLD Minimum prediction confidence threshold. Range (0,1].
+ Defaults to: 0.4.
+ DEFAULT_LABEL Label to apply if no label is predicted.
+ EXCLUDED_AUTHORS Comma-separated list of authors to exclude.
+ RETRIES Comma-separated retry delays in seconds.
+ Defaults to: 30,30,300,300,3000,3000.
+ TEST Run in test mode, outputting predictions without applying labels.
+ Must be one of: true, false, TRUE, FALSE
+ VERBOSE Enable verbose output.
+ Must be one of: true, false, TRUE, FALSE
+ """);
+
+ Environment.Exit(1);
+ }
+
+ public static Args? Parse(string[] args, ICoreService action)
+ {
+ ArgUtils argUtils = new(action, ShowUsage);
+ argUtils.TryGetRepo("repo", out var org, out var repo);
+ argUtils.TryGetLabelPrefix("label_prefix", out var labelPredicate);
+ argUtils.TryGetPath("issues_model", out var issuesModelPath);
+ argUtils.TryGetNumberRanges("issues", out var issues);
+ argUtils.TryGetPath("pulls_model", out var pullsModelPath);
+ argUtils.TryGetNumberRanges("pulls", out var pulls);
+ argUtils.TryGetStringArray("excluded_authors", out var excludedAuthors);
+ argUtils.TryGetFloat("threshold", out var threshold);
+ argUtils.TryGetIntArray("retries", out var retries);
+ argUtils.TryGetString("default_label", out var defaultLabel);
+ argUtils.TryGetFlag("test", out var test);
+ argUtils.TryGetFlag("verbose", out var verbose);
+
+ if (org is null || repo is null || threshold is null || labelPredicate is null ||
+ (issues is null && pulls is null))
+ {
+ ShowUsage(null, action);
+ return null;
+ }
+
+ Args argsData = new()
+ {
+ Org = org,
+ Repo = repo,
+ LabelPredicate = labelPredicate,
+ DefaultLabel = defaultLabel,
+ IssuesModelPath = issuesModelPath,
+ Issues = issues,
+ PullsModelPath = pullsModelPath,
+ Pulls = pulls,
+ ExcludedAuthors = excludedAuthors,
+ Threshold = threshold ?? 0.4f,
+ Retries = retries ?? [30, 30, 300, 300, 3000, 3000],
+ Test = test ?? false,
+ Verbose = verbose ?? false
+ };
+
+ if (string.IsNullOrEmpty(argsData.GitHubToken))
+ {
+ ShowUsage("Environment variable GITHUB_TOKEN is empty.", action);
+ return null;
+ }
+
+ return argsData;
+ }
+}
diff --git a/src/Predictor/Models.cs b/IssueLabeler/src/Predictor/Models.cs
similarity index 100%
rename from src/Predictor/Models.cs
rename to IssueLabeler/src/Predictor/Models.cs
diff --git a/IssueLabeler/src/Predictor/Predictor.cs b/IssueLabeler/src/Predictor/Predictor.cs
new file mode 100644
index 0000000..f032487
--- /dev/null
+++ b/IssueLabeler/src/Predictor/Predictor.cs
@@ -0,0 +1,291 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Actions.Core.Extensions;
+using Actions.Core.Services;
+using Actions.Core.Summaries;
+using GitHubClient;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+
+using var provider = new ServiceCollection()
+ .AddGitHubActionsCore()
+ .BuildServiceProvider();
+
+var action = provider.GetRequiredService();
+if (Args.Parse(args, action) is not Args argsData) return 1;
+
+List> tasks = new();
+
+if (argsData.IssuesModelPath is not null && argsData.Issues is not null)
+{
+ await action.WriteStatusAsync($"Loading prediction engine for issues model...");
+ var issueContext = new MLContext();
+ var issueModel = issueContext.Model.Load(argsData.IssuesModelPath, out _);
+ var issuePredictor = issueContext.Model.CreatePredictionEngine(issueModel);
+ await action.WriteStatusAsync($"Issues prediction engine ready.");
+
+ foreach (ulong issueNumber in argsData.Issues)
+ {
+ var result = await GitHubApi.GetIssue(argsData.GitHubToken, argsData.Org, argsData.Repo, issueNumber, argsData.Retries, action, argsData.Verbose);
+
+ if (result is null)
+ {
+ action.WriteNotice($"[Issue {argsData.Org}/{argsData.Repo}#{issueNumber}] could not be found or downloaded. Skipped.");
+ continue;
+ }
+
+ if (argsData.ExcludedAuthors is not null && result.Author?.Login is not null && argsData.ExcludedAuthors.Contains(result.Author.Login, StringComparer.InvariantCultureIgnoreCase))
+ {
+ action.WriteNotice($"[Issue {argsData.Org}/{argsData.Repo}#{issueNumber}] Author '{result.Author.Login}' is in excluded list. Skipped.");
+ continue;
+ }
+
+ tasks.Add(Task.Run(() => ProcessPrediction(
+ issuePredictor,
+ issueNumber,
+ new Issue(result),
+ argsData.LabelPredicate,
+ argsData.DefaultLabel,
+ ModelType.Issue,
+ argsData.Retries,
+ argsData.Test
+ )));
+
+ action.WriteInfo($"[Issue {argsData.Org}/{argsData.Repo}#{issueNumber}] Queued for prediction.");
+ }
+}
+
+if (argsData.PullsModelPath is not null && argsData.Pulls is not null)
+{
+ await action.WriteStatusAsync($"Loading prediction engine for pulls model...");
+ var pullContext = new MLContext();
+ var pullModel = pullContext.Model.Load(argsData.PullsModelPath, out _);
+ var pullPredictor = pullContext.Model.CreatePredictionEngine(pullModel);
+ await action.WriteStatusAsync($"Pulls prediction engine ready.");
+
+ foreach (ulong pullNumber in argsData.Pulls)
+ {
+ var result = await GitHubApi.GetPullRequest(argsData.GitHubToken, argsData.Org, argsData.Repo, pullNumber, argsData.Retries, action, argsData.Verbose);
+
+ if (result is null)
+ {
+ action.WriteNotice($"[Pull Request {argsData.Org}/{argsData.Repo}#{pullNumber}] could not be found or downloaded. Skipped.");
+ continue;
+ }
+
+ if (argsData.ExcludedAuthors is not null && result.Author?.Login is not null && argsData.ExcludedAuthors.Contains(result.Author.Login))
+ {
+ action.WriteNotice($"[Pull Request {argsData.Org}/{argsData.Repo}#{pullNumber}] Author '{result.Author.Login}' is in excluded list. Skipped.");
+ continue;
+ }
+
+ tasks.Add(Task.Run(() => ProcessPrediction(
+ pullPredictor,
+ pullNumber,
+ new PullRequest(result),
+ argsData.LabelPredicate,
+ argsData.DefaultLabel,
+ ModelType.PullRequest,
+ argsData.Retries,
+ argsData.Test
+ )));
+
+ action.WriteInfo($"[Pull Request {argsData.Org}/{argsData.Repo}#{pullNumber}] Queued for prediction.");
+ }
+}
+
+var (predictionResults, success) = await App.RunTasks(tasks, action);
+
+foreach (var prediction in predictionResults.OrderBy(p => p.Number))
+{
+ action.WriteInfo(prediction.ResultMessage);
+}
+
+await action.Summary.WritePersistentAsync();
+return success ? 0 : 1;
+
+async Task<(ulong Number, string ResultMessage, bool Success)> ProcessPrediction(PredictionEngine predictor, ulong number, T issueOrPull, Func labelPredicate, string? defaultLabel, ModelType type, int[] retries, bool test) where T : Issue
+{
+ List> predictionResults = [];
+ string typeName = type == ModelType.PullRequest ? "Pull Request" : "Issue";
+ List resultMessageParts = [];
+ string? error = null;
+
+ (ulong, string, bool) GetResult(bool success)
+ {
+ foreach (var summaryWrite in predictionResults)
+ {
+ action.Summary.AddPersistent(summaryWrite);
+ }
+
+ return (number, $"[{typeName} {argsData.Org}/{argsData.Repo}#{number}] {string.Join(' ', resultMessageParts)}", success);
+ }
+
+ (ulong, string, bool) Success() => GetResult(true);
+ (ulong, string, bool) Failure() => GetResult(false);
+
+ predictionResults.Add(summary => summary.AddRawMarkdown($"- **{argsData.Org}/{argsData.Repo}#{number}**", true));
+
+ if (issueOrPull.HasMoreLabels)
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - Skipping prediction. Too many labels applied already; cannot be sure no applicable label is already applied.", true));
+ resultMessageParts.Add("Too many labels applied already.");
+
+ return Success();
+ }
+
+ var applicableLabel = issueOrPull.Labels?.FirstOrDefault(labelPredicate);
+
+ bool hasDefaultLabel =
+ (defaultLabel is not null) &&
+ (issueOrPull.Labels?.Any(l => l.Equals(defaultLabel, StringComparison.OrdinalIgnoreCase)) ?? false);
+
+ if (applicableLabel is not null)
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - No prediction needed. Applicable label `{applicableLabel}` already exists.", true));
+
+ if (hasDefaultLabel && defaultLabel is not null)
+ {
+ if (!test)
+ {
+ error = await GitHubApi.RemoveLabel(argsData.GitHubToken, argsData.Org, argsData.Repo, typeName, number, defaultLabel, argsData.Retries, action);
+ }
+
+ if (error is null)
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - Removed default label `{defaultLabel}`.", true));
+ resultMessageParts.Add($"Default label '{defaultLabel}' removed.");
+ return Success();
+ }
+ else
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - **Error removing default label `{defaultLabel}`**: {error}", true));
+ resultMessageParts.Add($"Error occurred removing default label '{defaultLabel}'");
+ return Failure();
+ }
+ }
+
+ resultMessageParts.Add($"No prediction needed. Applicable label '{applicableLabel}' already exists.");
+ return Success();
+ }
+
+ var prediction = predictor.Predict(issueOrPull);
+
+ if (prediction.Score is null || prediction.Score.Length == 0)
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - No prediction was made. The prediction engine did not return any possible predictions.", true));
+ resultMessageParts.Add("No prediction was made. The prediction engine did not return any possible predictions.");
+ return Success();
+ }
+
+ VBuffer> labels = default;
+ predictor.OutputSchema[nameof(LabelPrediction.Score)].GetSlotNames(ref labels);
+
+ var predictions = prediction.Score
+ .Select((score, index) => new
+ {
+ Score = score,
+ Label = labels.GetItemOrDefault(index).ToString()
+ })
+ // Ensure predicted labels match the expected predicate
+ .Where(prediction => labelPredicate(prediction.Label))
+ // Capture the top 3 for including in the output
+ .OrderByDescending(p => p.Score)
+ .Take(3);
+
+ var bestScore = predictions.FirstOrDefault(p => p.Score >= argsData.Threshold);
+
+ if (bestScore is not null)
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - Predicted label: `{bestScore.Label}` meets the threshold of {argsData.Threshold}.", true));
+ }
+ else
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - No label prediction met the threshold of {argsData.Threshold}.", true));
+ }
+
+ foreach (var labelPrediction in predictions)
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - `{labelPrediction.Label}` - Score: {labelPrediction.Score}", true));
+ }
+
+ if (bestScore is not null)
+ {
+ if (!test)
+ {
+ error = await GitHubApi.AddLabel(argsData.GitHubToken, argsData.Org, argsData.Repo, typeName, number, bestScore.Label, retries, action);
+ }
+
+ if (error is null)
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - **`{bestScore.Label}` applied**", true));
+ resultMessageParts.Add($"Label '{bestScore.Label}' applied.");
+
+ if (hasDefaultLabel && defaultLabel is not null)
+ {
+ if (!test)
+ {
+ error = await GitHubApi.RemoveLabel(argsData.GitHubToken, argsData.Org, argsData.Repo, typeName, number, defaultLabel, retries, action);
+ }
+
+ if (error is null)
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - **Removed default label `{defaultLabel}`**", true));
+ resultMessageParts.Add($"Default label '{defaultLabel}' removed.");
+ return Success();
+ }
+ else
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - **Error removing default label `{defaultLabel}`**: {error}", true));
+ resultMessageParts.Add($"Error occurred removing default label '{defaultLabel}'");
+ return Failure();
+ }
+ }
+ else
+ {
+ return Success();
+ }
+ }
+ else
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - **Error applying label `{bestScore.Label}`**: {error}", true));
+ resultMessageParts.Add($"Error occurred applying label '{bestScore.Label}'");
+ return Failure();
+ }
+ }
+
+ if (defaultLabel is not null)
+ {
+ if (hasDefaultLabel)
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - Default label `{defaultLabel}` is already applied.", true));
+ resultMessageParts.Add($"No prediction made. Default label '{defaultLabel}' is already applied.");
+ return Success();
+ }
+ else
+ {
+ if (!test)
+ {
+ error = await GitHubApi.AddLabel(argsData.GitHubToken, argsData.Org, argsData.Repo, typeName, number, defaultLabel, argsData.Retries, action);
+ }
+
+ if (error is null)
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - **Default label `{defaultLabel}` applied.**", true));
+ resultMessageParts.Add($"No prediction made. Default label '{defaultLabel}' applied.");
+ return Success();
+ }
+ else
+ {
+ predictionResults.Add(summary => summary.AddRawMarkdown($" - **Error applying default label `{defaultLabel}`**: {error}", true));
+ resultMessageParts.Add($"Error occurred applying default label '{defaultLabel}'");
+ return Failure();
+ }
+ }
+ }
+
+ resultMessageParts.Add("No prediction made. No applicable label found. No action taken.");
+ return GetResult(error is null);
+}
diff --git a/IssueLabeler/src/Predictor/Predictor.csproj b/IssueLabeler/src/Predictor/Predictor.csproj
new file mode 100644
index 0000000..c15109e
--- /dev/null
+++ b/IssueLabeler/src/Predictor/Predictor.csproj
@@ -0,0 +1,31 @@
+
+
+
+
+ Exe
+ enable
+ enable
+
+
+
+
+ true
+ true
+ Predict labels for GitHub issues and pull requests using a machine learning model.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/IssueLabeler/src/Tester/Args.cs b/IssueLabeler/src/Tester/Args.cs
new file mode 100644
index 0000000..722d901
--- /dev/null
+++ b/IssueLabeler/src/Tester/Args.cs
@@ -0,0 +1,190 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Actions.Core.Services;
+
+public struct Args
+{
+ public readonly string GitHubToken => Environment.GetEnvironmentVariable("GITHUB_TOKEN")!;
+ public string Org { get; set; }
+ public List Repos { get; set; }
+ public float Threshold { get; set; }
+ public Predicate LabelPredicate { get; set; }
+ public string[]? ExcludedAuthors { get; set; }
+ public string? IssuesModelPath { get; set; }
+ public int? IssuesLimit { get; set; }
+ public string? PullsModelPath { get; set; }
+ public int? PullsLimit { get; set; }
+ public int? PageSize { get; set; }
+ public int? PageLimit { get; set; }
+ public int[] Retries { get; set; }
+ public bool Verbose { get; set; }
+
+ static void ShowUsage(string? message, ICoreService action)
+ {
+ action.WriteNotice($$"""
+ ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}}
+
+ Required environment variables:
+ GITHUB_TOKEN GitHub token to be used for API calls.
+
+ Required arguments:
+ --repo The GitHub repositories in format org/repo (comma separated for multiple).
+ --label-prefix Prefix for label predictions. Must end with a character other than a letter or number.
+
+ Required for testing the issues model:
+ --issues-model Path to existing issue prediction model file (ZIP file).
+
+ Required for testing the pull requests model:
+ --pulls-model Path to existing pull request prediction model file (ZIP file).
+
+ Optional arguments:
+ --excluded-authors Comma-separated list of authors to exclude.
+ --threshold Minimum prediction confidence threshold. Range (0,1].
+ Defaults to: 0.4.
+ --issues-limit Maximum number of issues to download. Defaults to: No limit.
+ --pulls-limit Maximum number of pull requests to download. Defaults to: No limit.
+ --page-size Number of items per page in GitHub API requests.
+ Defaults to: 100 for issues, 25 for pull requests.
+ --page-limit Maximum number of pages to retrieve.
+ Defaults to: 1000 for issues, 4000 for pull requests.
+ --retries Comma-separated retry delays in seconds.
+ Defaults to: 30,30,300,300,3000,3000.
+ --verbose Enable verbose output.
+ """);
+
+ Environment.Exit(1);
+ }
+
+ public static Args? Parse(string[] args, ICoreService action)
+ {
+ Queue arguments = new(args);
+ ArgUtils argUtils = new(action, ShowUsage, arguments);
+
+ Args argsData = new()
+ {
+ Threshold = 0.4f,
+ Retries = [30, 30, 300, 300, 3000, 3000]
+ };
+
+ if (string.IsNullOrEmpty(argsData.GitHubToken))
+ {
+ ShowUsage("Environment variable GITHUB_TOKEN is empty.", action);
+ return null;
+ }
+
+ while (arguments.Count > 0)
+ {
+ string argument = arguments.Dequeue();
+
+ switch (argument)
+ {
+ case "--repo":
+ if (!argUtils.TryGetRepoList("--repo", out string? org, out List? repos))
+ {
+ return null;
+ }
+ argsData.Org = org;
+ argsData.Repos = repos;
+ break;
+
+ case "--label-prefix":
+ if (!argUtils.TryGetLabelPrefix("--label-prefix", out Func? labelPredicate))
+ {
+ return null;
+ }
+ argsData.LabelPredicate = new(labelPredicate);
+ break;
+
+ case "--excluded-authors":
+ if (!argUtils.TryGetStringArray("--excluded-authors", out string[]? excludedAuthors))
+ {
+ return null;
+ }
+ argsData.ExcludedAuthors = excludedAuthors;
+ break;
+
+ case "--threshold":
+ if (!argUtils.TryGetFloat("--threshold", out float? threshold))
+ {
+ return null;
+ }
+ argsData.Threshold = threshold.Value;
+ break;
+
+ case "--issues-model":
+ if (!argUtils.TryGetPath("--issues-model", out string? IssuesModelPath))
+ {
+ return null;
+ }
+ argsData.IssuesModelPath = IssuesModelPath;
+ break;
+
+ case "--issues-limit":
+ if (!argUtils.TryGetInt("--issues-limit", out int? IssuesLimit))
+ {
+ return null;
+ }
+ argsData.IssuesLimit = IssuesLimit;
+ break;
+
+ case "--pulls-model":
+ if (!argUtils.TryGetPath("--pulls-model", out string? PullsModelPath))
+ {
+ return null;
+ }
+ argsData.PullsModelPath = PullsModelPath;
+ break;
+
+ case "--pulls-limit":
+ if (!argUtils.TryGetInt("--pulls-limit", out int? PullsLimit))
+ {
+ return null;
+ }
+ argsData.PullsLimit = PullsLimit;
+ break;
+
+ case "--page-size":
+ if (!argUtils.TryGetInt("--page-size", out int? pageSize))
+ {
+ return null;
+ }
+ argsData.PageSize = pageSize;
+ break;
+
+ case "--page-limit":
+ if (!argUtils.TryGetInt("--page-limit", out int? pageLimit))
+ {
+ return null;
+ }
+ argsData.PageLimit = pageLimit;
+ break;
+
+ case "--retries":
+ if (!argUtils.TryGetIntArray("--retries", out int[]? retries))
+ {
+ return null;
+ }
+ argsData.Retries = retries;
+ break;
+
+ case "--verbose":
+ argsData.Verbose = true;
+ break;
+
+ default:
+ ShowUsage($"Unrecognized argument: {argument}", action);
+ return null;
+ }
+ }
+
+ if (argsData.Org is null || argsData.Repos.Count == 0 || argsData.LabelPredicate is null ||
+ (argsData.IssuesModelPath is null && argsData.PullsModelPath is null))
+ {
+ ShowUsage(null, action);
+ return null;
+ }
+
+ return argsData;
+ }
+}
diff --git a/src/Tester/Models.cs b/IssueLabeler/src/Tester/Models.cs
similarity index 79%
rename from src/Tester/Models.cs
rename to IssueLabeler/src/Tester/Models.cs
index c8f7b07..a616bdb 100644
--- a/src/Tester/Models.cs
+++ b/IssueLabeler/src/Tester/Models.cs
@@ -3,6 +3,7 @@
public class Issue
{
+ public string Repo { get; set; }
public ulong Number { get; set; }
public string? Label { get; set; }
public string? Title { get; set; }
@@ -13,10 +14,9 @@ public class Issue
public string? Area { get => Label; }
public string? Description { get => Body; }
- public Issue() { }
-
- public Issue(GitHubClient.Issue issue, Predicate labelPredicate)
+ public Issue(string repo, GitHubClient.Issue issue, Predicate labelPredicate)
{
+ Repo = repo;
Number = issue.Number;
Title = issue.Title;
Body = issue.Body;
@@ -31,9 +31,7 @@ public class PullRequest : Issue
public string? FileNames { get; set; }
public string? FolderNames { get; set; }
- public PullRequest() { }
-
- public PullRequest(GitHubClient.PullRequest pull, Predicate labelPredicate) : base(pull, labelPredicate)
+ public PullRequest(string repo, GitHubClient.PullRequest pull, Predicate labelPredicate) : base(repo, pull, labelPredicate)
{
FileNames = string.Join(' ', pull.FileNames);
FolderNames = string.Join(' ', pull.FolderNames);
diff --git a/IssueLabeler/src/Tester/Tester.cs b/IssueLabeler/src/Tester/Tester.cs
new file mode 100644
index 0000000..65c9f7d
--- /dev/null
+++ b/IssueLabeler/src/Tester/Tester.cs
@@ -0,0 +1,259 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Actions.Core.Extensions;
+using Actions.Core.Markdown;
+using Actions.Core.Services;
+using Actions.Core.Summaries;
+using GitHubClient;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+
+using var provider = new ServiceCollection()
+ .AddGitHubActionsCore()
+ .BuildServiceProvider();
+
+var action = provider.GetRequiredService();
+var config = Args.Parse(args, action);
+if (config is not Args argsData) return 1;
+
+List> tasks = [];
+
+if (argsData.IssuesModelPath is not null)
+{
+ tasks.Add(Task.Run(() => TestIssues()));
+}
+
+if (argsData.PullsModelPath is not null)
+{
+ tasks.Add(Task.Run(() => TestPullRequests()));
+}
+
+var (results, success) = await App.RunTasks(tasks, action);
+
+foreach (var (itemType, stats) in results)
+{
+ AlertType resultAlert = (stats.MatchesPercentage >= 0.65f && stats.MismatchesPercentage < 0.15f) ? AlertType.Note : AlertType.Warning;
+
+ action.Summary.AddPersistent(summary =>
+ {
+ summary.AddMarkdownHeading($"Finished Testing {(itemType == typeof(PullRequest) ? "Pull Requests" : "Issues")}", 2);
+ summary.AddAlert($"**{stats.Total}** items were tested with **{stats.MatchesPercentage:P2} matches** and **{stats.MismatchesPercentage:P2} mismatches**.", resultAlert);
+ summary.AddRawMarkdown($"Testing complete. **{stats.Total}** items tested, with the following results.", true);
+ summary.AddNewLine();
+
+ SummaryTableRow headerRow = new([
+ new("", Header: true),
+ new("Total", Header: true, Alignment: TableColumnAlignment.Right),
+ new("Matches", Header: true, Alignment: TableColumnAlignment.Right),
+ new("Mismatches", Header: true, Alignment: TableColumnAlignment.Right),
+ new("No Prediction", Header: true, Alignment: TableColumnAlignment.Right),
+ new("No Existing Label", Header: true, Alignment: TableColumnAlignment.Right)
+ ]);
+
+ SummaryTableRow countsRow = new([
+ new("Count"),
+ new($"{stats.Total:N0}"),
+ new($"{stats.Matches:N0}"),
+ new($"{stats.Mismatches:N0}"),
+ new($"{stats.NoPrediction:N0}"),
+ new($"{stats.NoExisting:N0}")
+ ]);
+
+ SummaryTableRow percentageRow = new([
+ new("Percentage", Header: true),
+ new($""),
+ new($"{stats.MatchesPercentage:P2}"),
+ new($"{stats.MismatchesPercentage:P2}"),
+ new($"{stats.NoPredictionPercentage:P2}"),
+ new($"{stats.NoExistingPercentage:P2}")
+ ]);
+
+ summary.AddMarkdownTable(new(headerRow, [countsRow, percentageRow]));
+ summary.AddNewLine();
+ summary.AddMarkdownList([
+ "**Matches**: The predicted label matches the existing label, including when no prediction is made and there is no existing label. Correct prediction.",
+ "**Mismatches**: The predicted label _does not match_ the existing label. Incorrect prediction.",
+ "**No Prediction**: No prediction was made, but the existing item had a label. Incorrect prediction.",
+ "**No Existing Label**: A prediction was made, but there was no existing label. Incorrect prediction."
+ ]);
+ summary.AddNewLine();
+ summary.AddAlert($"If the **Matches** percentage is **at least 65%** and the **Mismatches** percentage is **less than 10%**, the model testing is considered favorable.", AlertType.Tip);
+ });
+}
+
+await action.Summary.WritePersistentAsync();
+return success ? 0 : 1;
+
+async Task<(Type, TestStats)> TestIssues()
+{
+ var predictor = GetPredictionEngine(argsData.IssuesModelPath);
+ var stats = new TestStats();
+
+ async IAsyncEnumerable DownloadIssues(string githubToken, string repo)
+ {
+ await foreach (var result in GitHubApi.DownloadIssues(githubToken, argsData.Org, repo, argsData.LabelPredicate, argsData.IssuesLimit, argsData.PageSize, argsData.PageLimit, argsData.Retries, argsData.ExcludedAuthors, action, argsData.Verbose))
+ {
+ yield return new(repo, result.Issue, argsData.LabelPredicate);
+ }
+ }
+
+ action.WriteInfo($"Testing issues from {argsData.Repos.Count} repositories.");
+
+ foreach (var repo in argsData.Repos)
+ {
+ await action.WriteStatusAsync($"Downloading and testing issues from {argsData.Org}/{repo}.");
+
+ await foreach (var issue in DownloadIssues(argsData.GitHubToken, repo))
+ {
+ TestPrediction(issue, predictor, stats);
+ }
+
+ await action.WriteStatusAsync($"Finished Testing Issues from {argsData.Org}/{repo}.");
+ }
+
+ return (typeof(Issue), stats);
+}
+
+async Task<(Type, TestStats)> TestPullRequests()
+{
+ var predictor = GetPredictionEngine(argsData.PullsModelPath);
+ var stats = new TestStats();
+
+ async IAsyncEnumerable DownloadPullRequests(string githubToken, string repo)
+ {
+ await foreach (var result in GitHubApi.DownloadPullRequests(githubToken, argsData.Org, repo, argsData.LabelPredicate, argsData.PullsLimit, argsData.PageSize, argsData.PageLimit, argsData.Retries, argsData.ExcludedAuthors, action, argsData.Verbose))
+ {
+ yield return new(repo, result.PullRequest, argsData.LabelPredicate);
+ }
+ }
+
+ foreach (var repo in argsData.Repos)
+ {
+ await action.WriteStatusAsync($"Downloading and testing pull requests from {argsData.Org}/{repo}.");
+
+ await foreach (var pull in DownloadPullRequests(argsData.GitHubToken, repo))
+ {
+ TestPrediction(pull, predictor, stats);
+ }
+
+ await action.WriteStatusAsync($"Finished Testing Pull Requests from {argsData.Org}/{repo}.");
+ }
+
+ return (typeof(PullRequest), stats);
+}
+
+static string GetStats(List values)
+{
+ if (values.Count == 0)
+ {
+ return "N/A";
+ }
+
+ float min = values.Min();
+ float average = values.Average();
+ float max = values.Max();
+ double deviation = Math.Sqrt(values.Average(v => Math.Pow(v - average, 2)));
+
+ return $"{min} | {average} | {max} | {deviation}";
+}
+
+PredictionEngine GetPredictionEngine(string modelPath) where T : Issue
+{
+ var context = new MLContext();
+ var model = context.Model.Load(modelPath, out _);
+
+ return context.Model.CreatePredictionEngine(model);
+}
+
+void TestPrediction(T result, PredictionEngine predictor, TestStats stats) where T : Issue
+{
+ var itemType = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue";
+
+ (string? predictedLabel, float? score) = GetPrediction(
+ predictor,
+ result,
+ argsData.Threshold);
+
+ if (predictedLabel is null && result.Label is not null)
+ {
+ stats.NoPrediction++;
+ }
+ else if (predictedLabel is not null && result.Label is null)
+ {
+ stats.NoExisting++;
+ }
+ else if (predictedLabel?.ToLower() == result.Label?.ToLower())
+ {
+ stats.Matches++;
+
+ if (score.HasValue)
+ {
+ stats.MatchScores.Add(score.Value);
+ }
+ }
+ else
+ {
+ stats.Mismatches++;
+
+ if (score.HasValue)
+ {
+ stats.MismatchScores.Add(score.Value);
+ }
+ }
+
+ action.StartGroup($"{itemType} {argsData.Org}/{result.Repo}#{result.Number} - Predicted: {(predictedLabel ?? "")} - Existing: {(result.Label ?? "")}");
+ action.WriteInfo($"Total : {stats.Total}");
+ action.WriteInfo($"Matches : {stats.Matches} ({stats.MatchesPercentage:P2}) - Min | Avg | Max | StdDev: {GetStats(stats.MatchScores)}");
+ action.WriteInfo($"Mismatches : {stats.Mismatches} ({stats.MismatchesPercentage:P2}) - Min | Avg | Max | StdDev: {GetStats(stats.MismatchScores)}");
+ action.WriteInfo($"No Prediction: {stats.NoPrediction} ({stats.NoPredictionPercentage:P2})");
+ action.WriteInfo($"No Existing : {stats.NoExisting} ({stats.NoExistingPercentage:P2})");
+ action.EndGroup();
+}
+
+(string? PredictedLabel, float? PredictionScore) GetPrediction(PredictionEngine predictor, T issueOrPull, float? threshold) where T : Issue
+{
+ var prediction = predictor.Predict(issueOrPull);
+ var itemType = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue";
+
+ if (prediction.Score is null || prediction.Score.Length == 0)
+ {
+ action.WriteInfo($"No prediction was made for {itemType} {argsData.Org}/{issueOrPull.Repo}#{issueOrPull.Number}.");
+ return (null, null);
+ }
+
+ VBuffer> labels = default;
+ predictor.OutputSchema[nameof(LabelPrediction.Score)].GetSlotNames(ref labels);
+
+ var bestScore = prediction.Score
+ .Select((score, index) => new
+ {
+ Score = score,
+ Label = labels.GetItemOrDefault(index).ToString()
+ })
+ .OrderByDescending(p => p.Score)
+ .FirstOrDefault(p => threshold is null || p.Score >= threshold);
+
+ return bestScore is not null ? (bestScore.Label, bestScore.Score) : ((string?)null, (float?)null);
+}
+
+class TestStats
+{
+ public TestStats() { }
+
+ public int Matches { get; set; } = 0;
+ public int Mismatches { get; set; } = 0;
+ public int NoPrediction { get; set; } = 0;
+ public int NoExisting { get; set; } = 0;
+
+ public float Total => Matches + Mismatches + NoPrediction + NoExisting;
+
+ public float MatchesPercentage => (float)Matches / Total;
+ public float MismatchesPercentage => (float)Mismatches / Total;
+ public float NoPredictionPercentage => (float)NoPrediction / Total;
+ public float NoExistingPercentage => (float)NoExisting / Total;
+
+ public List MatchScores => [];
+ public List MismatchScores => [];
+}
diff --git a/src/Tester/Tester.csproj b/IssueLabeler/src/Tester/Tester.csproj
similarity index 61%
rename from src/Tester/Tester.csproj
rename to IssueLabeler/src/Tester/Tester.csproj
index 497184a..aed9a8b 100644
--- a/src/Tester/Tester.csproj
+++ b/IssueLabeler/src/Tester/Tester.csproj
@@ -7,15 +7,18 @@
-
+
-
+
+
+
-
+
+
diff --git a/IssueLabeler/src/Trainer/Args.cs b/IssueLabeler/src/Trainer/Args.cs
new file mode 100644
index 0000000..f9a3a49
--- /dev/null
+++ b/IssueLabeler/src/Trainer/Args.cs
@@ -0,0 +1,93 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Actions.Core.Services;
+
+public struct Args
+{
+ public string? IssuesDataPath { get; set; }
+ public string? IssuesModelPath { get; set; }
+ public string? PullsDataPath { get; set; }
+ public string? PullsModelPath { get; set; }
+
+ static void ShowUsage(string? message, ICoreService action)
+ {
+ // If you provide a path for issue data, you must also provide a path for the issue model, and vice versa.
+ // If you provide a path for pull data, you must also provide a path for the pull model, and vice versa.
+ // At least one pair of paths(either issue or pull) must be provided.
+ action.WriteNotice($$"""
+ ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}}
+
+ Required for training the issues model:
+ --issues-data Path to existing issue data file (TSV file).
+ --issues-model Path to existing issue prediction model file (ZIP file).
+
+ Required for training the pull requests model:
+ --pulls-data Path to existing pull request data file (TSV file).
+ --pulls-model Path to existing pull request prediction model file (ZIP file).
+ """);
+
+ Environment.Exit(1);
+ }
+
+ public static Args? Parse(string[] args, ICoreService action)
+ {
+ Queue arguments = new(args);
+ ArgUtils argUtils = new(action, ShowUsage, arguments);
+ Args argsData = new();
+
+ while (arguments.Count > 0)
+ {
+ string argument = arguments.Dequeue();
+
+ switch (argument)
+ {
+ case "--issues-data":
+ if (!argUtils.TryGetPath("--issues-data", out string? IssuesDataPath))
+ {
+ return null;
+ }
+ argsData.IssuesDataPath = IssuesDataPath;
+ break;
+
+ case "--issues-model":
+ if (!argUtils.TryGetPath("--issues-model", out string? IssuesModelPath))
+ {
+ return null;
+ }
+ argsData.IssuesModelPath = IssuesModelPath;
+ break;
+
+ case "--pulls-data":
+ if (!argUtils.TryGetPath("--pulls-data", out string? PullsDataPath))
+ {
+ return null;
+ }
+ argsData.PullsDataPath = PullsDataPath;
+ break;
+
+ case "--pulls-model":
+ if (!argUtils.TryGetPath("--pulls-model", out string? PullsModelPath))
+ {
+ return null;
+ }
+ argsData.PullsModelPath = PullsModelPath;
+ break;
+
+ default:
+ ShowUsage($"Unrecognized argument: {argument}", action);
+ return null;
+ }
+ }
+
+ if ((argsData.IssuesDataPath is null != argsData.IssuesModelPath is null) ||
+ (argsData.PullsDataPath is null != argsData.PullsModelPath is null) ||
+ (argsData.IssuesModelPath is null && argsData.PullsModelPath is null))
+ {
+ ShowUsage(null, action);
+ return null;
+ }
+
+ return argsData;
+ }
+}
diff --git a/IssueLabeler/src/Trainer/Trainer.cs b/IssueLabeler/src/Trainer/Trainer.cs
new file mode 100644
index 0000000..8efc117
--- /dev/null
+++ b/IssueLabeler/src/Trainer/Trainer.cs
@@ -0,0 +1,144 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using Actions.Core.Extensions;
+using Actions.Core.Markdown;
+using Actions.Core.Services;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.ML;
+using Microsoft.ML.Data;
+using Microsoft.ML.Transforms.Text;
+using static DataFileUtils;
+
+using var provider = new ServiceCollection()
+ .AddGitHubActionsCore()
+ .BuildServiceProvider();
+
+var action = provider.GetRequiredService();
+
+var config = Args.Parse(args, action);
+if (config is not Args argsData) return 1;
+
+List tasks = new();
+
+if (argsData.IssuesDataPath is not null && argsData.IssuesModelPath is not null)
+{
+ tasks.Add(Task.Run(() => CreateModel(argsData.IssuesDataPath, argsData.IssuesModelPath, ModelType.Issue, action)));
+}
+
+if (argsData.PullsDataPath is not null && argsData.PullsModelPath is not null)
+{
+ tasks.Add(Task.Run(() => CreateModel(argsData.PullsDataPath, argsData.PullsModelPath, ModelType.PullRequest, action)));
+}
+
+var success = await App.RunTasks(tasks, action);
+return success ? 0 : 1;
+
+static async Task CreateModel(string dataPath, string modelPath, ModelType type, ICoreService action)
+{
+ if (!File.Exists(dataPath))
+ {
+ action.WriteNotice($"The data file '{dataPath}' does not exist.");
+ action.Summary.AddPersistent(summary => summary.AddAlert("The data file does not exist. Training cannot proceed.", AlertType.Caution));
+ await action.Summary.WriteAsync();
+
+ throw new InvalidOperationException($"The data file '{dataPath}' does not exist.");
+ }
+
+ int recordsCounted = File.ReadLines(dataPath).Take(10).Count();
+ if (recordsCounted < 10)
+ {
+ action.WriteNotice($"The data file '{dataPath}' does not contain enough data for training. A minimum of 10 records is required, but only {recordsCounted} exist.");
+ action.Summary.AddPersistent(summary => summary.AddAlert($"Only {recordsCounted} items were found to be used for training. A minimum of 10 records is required. Cannot proceed with training.", AlertType.Caution));
+ await action.Summary.WriteAsync();
+
+ throw new InvalidOperationException($"The data file '{dataPath}' does not contain enough data for training. A minimum of 10 records is required, but only {recordsCounted} exist.");
+ }
+
+ await action.WriteStatusAsync("Loading data into train/test sets...");
+ MLContext mlContext = new();
+
+ TextLoader.Column[] columns = type == ModelType.Issue ? [
+ new("Label", DataKind.String, 0),
+ new("Title", DataKind.String, 1),
+ new("Body", DataKind.String, 2),
+ ] : [
+ new("Label", DataKind.String, 0),
+ new("Title", DataKind.String, 1),
+ new("Body", DataKind.String, 2),
+ new("FileNames", DataKind.String, 3),
+ new("FolderNames", DataKind.String, 4)
+ ];
+
+ TextLoader.Options textLoaderOptions = new()
+ {
+ AllowQuoting = false,
+ AllowSparse = false,
+ EscapeChar = '"',
+ HasHeader = true,
+ ReadMultilines = false,
+ Separators = ['\t'],
+ TrimWhitespace = true,
+ UseThreads = true,
+ Columns = columns
+ };
+
+ var loader = mlContext.Data.CreateTextLoader(textLoaderOptions);
+ var data = loader.Load(dataPath);
+ var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.2);
+
+ await action.WriteStatusAsync("Building pipeline...");
+
+ var xf = mlContext.Transforms;
+ var pipeline = xf.Conversion.MapValueToKey(inputColumnName: "Label", outputColumnName: "LabelKey")
+ .Append(xf.Text.FeaturizeText(
+ "Features",
+ new TextFeaturizingEstimator.Options(),
+ columns.Select(c => c.Name).ToArray()))
+ .AppendCacheCheckpoint(mlContext)
+ .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("LabelKey"))
+ .Append(xf.Conversion.MapKeyToValue("PredictedLabel"));
+
+ await action.WriteStatusAsync("Fitting the model with the training data set...");
+ var trainedModel = pipeline.Fit(split.TrainSet);
+ var testModel = trainedModel.Transform(split.TestSet);
+
+ await action.WriteStatusAsync("Evaluating against the test set...");
+ var metrics = mlContext.MulticlassClassification.Evaluate(testModel, labelColumnName: "LabelKey");
+
+ action.Summary.AddPersistent(summary =>
+ {
+ summary.AddMarkdownHeading($"Finished Training {(type == ModelType.Issue ? "Issues" : "Pull Requests")} Model", 2);
+
+ summary.AddRawMarkdown($"""
+ * MacroAccuracy: {metrics.MacroAccuracy:0.####} (a value between 0 and 1; the closer to 1, the better)
+ * MicroAccuracy: {metrics.MicroAccuracy:0.####} (a value between 0 and 1; the closer to 1, the better)
+ * LogLoss: {metrics.LogLoss:0.####} (the closer to 0, the better)
+ {(metrics.PerClassLogLoss.Count() > 0 ? $" * Class 1: {metrics.PerClassLogLoss[0]:0.####}" : "")}
+ {(metrics.PerClassLogLoss.Count() > 1 ? $" * Class 2: {metrics.PerClassLogLoss[1]:0.####}" : "")}
+ {(metrics.PerClassLogLoss.Count() > 2 ? $" * Class 3: {metrics.PerClassLogLoss[2]:0.####}" : "")}
+ """);
+ });
+
+ await action.Summary.WriteAsync();
+
+ action.WriteInfo($"************************************************************");
+ action.WriteInfo($"MacroAccuracy = {metrics.MacroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better");
+ action.WriteInfo($"MicroAccuracy = {metrics.MicroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better");
+ action.WriteInfo($"LogLoss = {metrics.LogLoss:0.####}, the closer to 0, the better");
+
+ if (metrics.PerClassLogLoss.Count() > 0)
+ action.WriteInfo($"LogLoss for class 1 = {metrics.PerClassLogLoss[0]:0.####}, the closer to 0, the better");
+
+ if (metrics.PerClassLogLoss.Count() > 1)
+ action.WriteInfo($"LogLoss for class 2 = {metrics.PerClassLogLoss[1]:0.####}, the closer to 0, the better");
+
+ if (metrics.PerClassLogLoss.Count() > 2)
+ action.WriteInfo($"LogLoss for class 3 = {metrics.PerClassLogLoss[2]:0.####}, the closer to 0, the better");
+
+ action.WriteInfo($"************************************************************");
+
+ action.WriteInfo($"Saving model to '{modelPath}'...");
+ EnsureOutputDirectory(modelPath);
+ mlContext.Model.Save(trainedModel, split.TrainSet.Schema, modelPath);
+}
diff --git a/src/Predictor/Predictor.csproj b/IssueLabeler/src/Trainer/Trainer.csproj
similarity index 56%
rename from src/Predictor/Predictor.csproj
rename to IssueLabeler/src/Trainer/Trainer.csproj
index 497184a..9fe4e21 100644
--- a/src/Predictor/Predictor.csproj
+++ b/IssueLabeler/src/Trainer/Trainer.csproj
@@ -7,15 +7,17 @@
-
+
-
+
+
+
-
+
diff --git a/IssueLabeler/tests/Common.Tests/ArgUtils.Tests.cs b/IssueLabeler/tests/Common.Tests/ArgUtils.Tests.cs
new file mode 100644
index 0000000..16e488b
--- /dev/null
+++ b/IssueLabeler/tests/Common.Tests/ArgUtils.Tests.cs
@@ -0,0 +1,166 @@
+using Actions.Core;
+using Actions.Core.Services;
+using Actions.Core.Summaries;
+
+namespace Common.Tests
+{
+ public class ArgUtilsTests
+ {
+ private class TestCoreService : ICoreService
+ {
+ private readonly Dictionary _inputs = new();
+
+ public void SetInput(string name, string? value)
+ {
+ _inputs[name] = value;
+ }
+
+ public string? GetInput(string name)
+ {
+ return _inputs.TryGetValue(name, out var value) ? value : null;
+ }
+
+ string ICoreService.GetInput(string name, InputOptions? options) => GetInput(name)!;
+
+ Summary ICoreService.Summary => throw new NotImplementedException();
+ bool ICoreService.IsDebug => throw new NotImplementedException();
+ public void WriteNotice(string message) { }
+ ValueTask ICoreService.ExportVariableAsync(string name, string value) { throw new NotImplementedException(); }
+ void ICoreService.SetSecret(string secret) { throw new NotImplementedException(); }
+ ValueTask ICoreService.AddPathAsync(string inputPath) { throw new NotImplementedException(); }
+ string[] ICoreService.GetMultilineInput(string name, InputOptions? options) { throw new NotImplementedException(); }
+ bool ICoreService.GetBoolInput(string name, InputOptions? options) { throw new NotImplementedException(); }
+ ValueTask ICoreService.SetOutputAsync(string name, T value, System.Text.Json.Serialization.Metadata.JsonTypeInfo? typeInfo) { throw new NotImplementedException(); }
+ void ICoreService.SetCommandEcho(bool enabled) { throw new NotImplementedException(); }
+ void ICoreService.SetFailed(string message) { throw new NotImplementedException(); }
+ void ICoreService.WriteDebug(string message) { throw new NotImplementedException(); }
+ void ICoreService.WriteError(string message, AnnotationProperties? properties) { throw new NotImplementedException(); }
+ void ICoreService.WriteWarning(string message, AnnotationProperties? properties) { throw new NotImplementedException(); }
+ void ICoreService.WriteNotice(string message, AnnotationProperties? properties) { throw new NotImplementedException(); }
+ void ICoreService.WriteInfo(string message) { throw new NotImplementedException(); }
+ void ICoreService.StartGroup(string name) { throw new NotImplementedException(); }
+ void ICoreService.EndGroup() { throw new NotImplementedException(); }
+ ValueTask ICoreService.GroupAsync(string name, Func> action) { throw new NotImplementedException(); }
+ ValueTask ICoreService.SaveStateAsync(string name, T value, System.Text.Json.Serialization.Metadata.JsonTypeInfo? typeInfo) { throw new NotImplementedException(); }
+ string ICoreService.GetState(string name) { throw new NotImplementedException(); }
+ }
+
+ private readonly TestCoreService _testCoreService;
+ private readonly Action _showUsage;
+
+ public ArgUtilsTests()
+ {
+ _testCoreService = new TestCoreService();
+ _showUsage = (message, action) => { };
+ }
+
+ [Fact]
+ public void TryGetString_ShouldReturnTrue_WhenInputIsValid()
+ {
+ _testCoreService.SetInput("testInput", "testValue");
+ var argUtils = new ArgUtils(_testCoreService, _showUsage);
+
+ var result = argUtils.TryGetString("testInput", out var value);
+
+ Assert.True(result);
+ Assert.Equal("testValue", value);
+ }
+
+ [Fact]
+ public void TryGetFlag_ShouldReturnTrue_WhenInputIsTrue()
+ {
+ _testCoreService.SetInput("testFlag", "true");
+ var argUtils = new ArgUtils(_testCoreService, _showUsage);
+
+ var result = argUtils.TryGetFlag("testFlag", out var value);
+
+ Assert.True(result);
+ Assert.True(value);
+ }
+
+ [Fact]
+ public void TryGetRepo_ShouldReturnTrue_WhenInputIsValid()
+ {
+ _testCoreService.SetInput("TEST_REPO", "TEST_ORG/TEST_REPO");
+ var argUtils = new ArgUtils(_testCoreService, _showUsage);
+
+ var result = argUtils.TryGetRepo("TEST_REPO", out var org, out var repo);
+
+ Assert.True(result);
+ Assert.Equal("TEST_ORG", org);
+ Assert.Equal("TEST_REPO", repo);
+ }
+
+ [Fact]
+ public void TryGetPath_ShouldReturnTrue_WhenInputIsValid()
+ {
+ _testCoreService.SetInput("testPath", "C:\\test\\path");
+ var argUtils = new ArgUtils(_testCoreService, _showUsage);
+
+ var result = argUtils.TryGetPath("testPath", out var path);
+
+ Assert.True(result);
+ Assert.Equal(Path.GetFullPath("C:\\test\\path"), path);
+ }
+
+ [Fact]
+ public void TryGetStringArray_ShouldReturnTrue_WhenInputIsValid()
+ {
+ _testCoreService.SetInput("testArray", "value1,value2,value3");
+ var argUtils = new ArgUtils(_testCoreService, _showUsage);
+
+ var result = argUtils.TryGetStringArray("testArray", out var values);
+
+ Assert.True(result);
+ Assert.Equal(new[] { "value1", "value2", "value3" }, values);
+ }
+
+ [Fact]
+ public void TryGetInt_ShouldReturnTrue_WhenInputIsValid()
+ {
+ _testCoreService.SetInput("testInt", "42");
+ var argUtils = new ArgUtils(_testCoreService, _showUsage);
+
+ var result = argUtils.TryGetInt("testInt", out var value);
+
+ Assert.True(result);
+ Assert.Equal(42, value);
+ }
+
+ [Fact]
+ public void TryGetIntArray_ShouldReturnTrue_WhenInputIsValid()
+ {
+ _testCoreService.SetInput("testIntArray", "1,2,3");
+ var argUtils = new ArgUtils(_testCoreService, _showUsage);
+
+ var result = argUtils.TryGetIntArray("testIntArray", out var values);
+
+ Assert.True(result);
+ Assert.Equal(new[] { 1, 2, 3 }, values);
+ }
+
+ [Fact]
+ public void TryGetFloat_ShouldReturnTrue_WhenInputIsValid()
+ {
+ _testCoreService.SetInput("testFloat", "3.14");
+ var argUtils = new ArgUtils(_testCoreService, _showUsage);
+
+ var result = argUtils.TryGetFloat("testFloat", out var value);
+
+ Assert.True(result);
+ Assert.Equal(3.14f, value);
+ }
+
+ [Fact]
+ public void TryGetNumberRanges_ShouldReturnTrue_WhenInputIsValid()
+ {
+ _testCoreService.SetInput("testRanges", "1-3,5,7-9");
+ var argUtils = new ArgUtils(_testCoreService, _showUsage);
+
+ var result = argUtils.TryGetNumberRanges("testRanges", out var values);
+
+ Assert.True(result);
+ Assert.Equal(new List { 1, 2, 3, 5, 7, 8, 9 }, values);
+ }
+ }
+}
diff --git a/IssueLabeler/tests/Common.Tests/Common.Tests.csproj b/IssueLabeler/tests/Common.Tests/Common.Tests.csproj
new file mode 100644
index 0000000..cee86d4
--- /dev/null
+++ b/IssueLabeler/tests/Common.Tests/Common.Tests.csproj
@@ -0,0 +1,25 @@
+
+
+
+ net9.0
+ enable
+ enable
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/IssueLabeler/tests/Common.Tests/DataFileUtilsTests.cs b/IssueLabeler/tests/Common.Tests/DataFileUtilsTests.cs
new file mode 100644
index 0000000..3ca5fd9
--- /dev/null
+++ b/IssueLabeler/tests/Common.Tests/DataFileUtilsTests.cs
@@ -0,0 +1,63 @@
+using System;
+using System.IO;
+
+namespace Common.Tests
+{
+ public class DataFileUtilsTests
+ {
+ [Fact]
+ public void EnsureOutputDirectory_ShouldCreateDirectory_WhenDirectoryDoesNotExist()
+ {
+ string tempFilePath = Path.Combine(Path.GetTempPath(), "testDir", "testFile.txt");
+ string tempDirPath = Path.GetDirectoryName(tempFilePath)!;
+
+ try
+ {
+ DataFileUtils.EnsureOutputDirectory(tempFilePath);
+ Assert.True(Directory.Exists(tempDirPath));
+ }
+ finally
+ {
+ if (Directory.Exists(tempDirPath))
+ {
+ Directory.Delete(tempDirPath, recursive: true);
+ }
+ }
+ }
+
+ [Fact]
+ public void SanitizeText_ShouldReplaceSpecialCharacters()
+ {
+ string input = "Line1\r\nLine2\t\"Quoted\"";
+ string expected = "Line1 Line2 `Quoted`";
+
+ string result = DataFileUtils.SanitizeText(input);
+
+ Assert.Equal(expected, result);
+ }
+
+ [Fact]
+ public void SanitizeTextArray_ShouldJoinAndSanitizeStrings()
+ {
+ string[] input = ["\tLine1\r\n", "Line2\t", "\" Quo\ted\""];
+ string expected = "Line1 Line2 ` Quo ed`";
+
+ string result = DataFileUtils.SanitizeTextArray(input);
+
+ Assert.Equal(expected, result);
+ }
+
+ [Fact]
+ public void FormatIssueRecord_ShouldReturnTabSeparatedString()
+ {
+ string label = "area-testing";
+ string title = "Issue title";
+ string body = "Issue body\r\nwith new line";
+ string[] expected = ["area-testing","Issue title","Issue body with new line"];
+
+ string[] result = DataFileUtils.FormatIssueRecord(label, title, body).Split('\t');
+
+ Assert.Equal(expected, result);
+ }
+ }
+}
diff --git a/download/action.yml b/download/action.yml
new file mode 100644
index 0000000..c1bfb5e
--- /dev/null
+++ b/download/action.yml
@@ -0,0 +1,107 @@
+name: "Download Data"
+description: "Download GitHub issues or pull requests and cache the data."
+
+branding:
+ color: "purple"
+ icon: "tag"
+
+inputs:
+ type:
+ description: "The type of data to download. Must be either 'issues' or 'pulls'."
+ required: true
+ label_prefix:
+ description: "The label prefix to be used for model training. Must end in a non-alphanumeric character."
+ required: true
+ excluded_authors:
+ description: "Comma-separated list of authors to exclude."
+ limit:
+ description: "Max number of items to download (newest items are used). Defaults to the max number of pages times the page size."
+ page_size:
+ description: "Number of items per page in GitHub API requests. Defaults to 100 for issues, 25 for pull requests."
+ page_limit:
+ description: "Maximum number of pages to retrieve. Defaults to 1000 for issues, 4000 for pull requests."
+ retries:
+ description: "Comma-separated list of retry delays in seconds. Defaults to '30,30,300,300,3000,3000'."
+ repository:
+ description: "The org/repo to download data from. Defaults to current repository."
+ cache_key:
+ description: "The cache key suffix to use for saving data."
+ default: "staged"
+
+runs:
+ using: "composite"
+ steps:
+ - name: "Validate inputs and set cache variables"
+ shell: bash
+ run: |
+ if [[ "${{ inputs.type }}" != "issues" && "${{ inputs.type }}" != "pulls" ]]; then
+ echo "::error::'type' must be either 'issues' or 'pulls'. Value provided: '${{ inputs.type }}'"
+ echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY
+ echo "\`type\` must be either 'issues' or 'pulls'." >> $GITHUB_STEP_SUMMARY
+ exit 1
+ fi
+
+ echo "DATA_PATH=${{ format('labeler-cache/{0}-data.tsv', inputs.type) }}" >> $GITHUB_ENV
+ echo "CACHE_KEY=${{ format('issue-labeler/data/{0}/{1}', inputs.type, inputs.cache_key || 'staged') }}" >> $GITHUB_ENV
+
+ - name: "Check for Existing Cache Entry"
+ id: check-cache
+ uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: ${{ env.DATA_PATH }}
+ key: ${{ env.CACHE_KEY }}
+ lookup-only: true
+ fail-on-cache-miss: false
+
+ - name: "Abort if Existing Cache Exists"
+ shell: bash
+ run: |
+ if [[ "${{ steps.check-cache.outputs.cache-hit }}" == "true" ]]; then
+ echo "::error::Cache key '${{ env.CACHE_KEY }}' already exists. Cannot proceed with downloading."
+ echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY
+ echo "> Cache key '${{ env.CACHE_KEY }}' already exists. Cannot proceed with downloading." >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "> [!TIP]" >> $GITHUB_STEP_SUMMARY
+ echo "> Either use a different \`cache_key\` value or delete the existing cache entry from the [Action Caches](/${{ github.repository }}/actions/caches) page and run the workflow again." >> $GITHUB_STEP_SUMMARY
+ exit 1
+ fi
+
+ - name: "Clone the ${{ github.action_repository }} repository with ref '{{ github.action_ref }}'"
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ env:
+ ISSUE_LABELER_REPO: ${{ github.action_repository }}
+ ISSUE_LABELER_REF: ${{ github.action_ref }}
+ with:
+ repository: ${{ env.ISSUE_LABELER_REPO }}
+ ref: ${{ env.ISSUE_LABELER_REF }}
+
+ - name: "Set up the .NET SDK"
+ uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1
+ with:
+ dotnet-version: "9.0.x"
+
+ - name: "Run Downloader"
+ shell: bash
+ run: |
+ dotnet run -c Release --project IssueLabeler/src/Downloader -- \
+ ${{ format('--repo "{0}"', inputs.repository || github.repository) }} \
+ ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \
+ ${{ format('--{0}-data "{1}"', inputs.type, env.DATA_PATH) }} \
+ ${{ (inputs.excluded_authors != null && format('--excluded-authors {0}', inputs.excluded_authors)) || '' }} \
+ ${{ (inputs.limit && format('--{0}-limit {1}', inputs.type, inputs.limit)) || '' }} \
+ ${{ (inputs.page_size && format('--page-size {0}', inputs.page_size)) || '' }} \
+ ${{ (inputs.page_limit && format('--page-limit {0}', inputs.page_limit)) || '' }} \
+ ${{ (inputs.retries && format('--retries "{0}"', inputs.retries)) || '' }}
+
+ - name: "Save the Downloaded Data to Cache"
+ uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: ${{ env.DATA_PATH }}
+ key: ${{ env.CACHE_KEY }}
+
+ - name: "Write Final Summary"
+ shell: bash
+ run: |
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "## ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} Data Available as '${{ inputs.cache_key }}'" >> $GITHUB_STEP_SUMMARY
+ echo "The '${{ inputs.cache_key }}' data is saved to cache and available for training a model." >> $GITHUB_STEP_SUMMARY
diff --git a/predict/action.yml b/predict/action.yml
new file mode 100644
index 0000000..ef33b1b
--- /dev/null
+++ b/predict/action.yml
@@ -0,0 +1,53 @@
+name: "Predict Labels"
+description: "Predict labels for Issues and Pull Requests using models already restored from cache."
+
+inputs:
+ issues:
+ description: "Issue Numbers (comma-separated list of ranges)."
+ required: true
+
+ pulls:
+ description: "Pull Request Numbers (comma-separated list of ranges)."
+ required: true
+
+ label_prefix:
+ description: "The label prefix used for prediction. Must end with a non-alphanumeric character. Defaults to 'area-'."
+ required: false
+ default: "area-"
+
+ threshold:
+ description: "The minimum confidence score for a label prediction, as a decimal between 0.00 and 1.00. Defaults to 0.40."
+ required: false
+ default: "0.40"
+
+ default_label:
+ description: "The default label to apply if no prediction meets the threshold. Leave blank for no default label."
+
+ excluded_authors:
+ description: "Comma-separated list of authors to exclude. Defaults to none."
+
+ retries:
+ description: "Comma-separated list of retry delays in seconds. Defaults to '30,30,300,300,3000,3000'."
+ required: false
+ default: "30,30,300,300,3000,3000"
+
+ test:
+ description: "Run in test mode, outputting predictions without applying labels."
+ required: false
+
+ verbose:
+ description: "Enable verbose output."
+ required: false
+
+branding:
+ color: "purple"
+ icon: "tag"
+
+runs:
+ using: docker
+ # Reference the docker container image using a published sha256 digest
+ # to ensure an immutable version is always used.
+ image: docker://ghcr.io/dotnet/issue-labeler/predictor@sha256:
+ env:
+ INPUT_ISSUES_MODEL: "labeler-cache/issues-model.zip"
+ INPUT_PULLS_MODEL: "labeler-cache/pulls-model.zip"
diff --git a/promote/action.yml b/promote/action.yml
new file mode 100644
index 0000000..0eb2139
--- /dev/null
+++ b/promote/action.yml
@@ -0,0 +1,130 @@
+name: "Promote Model"
+description: "Promote a model from staging to 'ACTIVE', backing up the currently 'ACTIVE' model."
+
+inputs:
+ type:
+ description: "The model to promote. Must be 'issues' or 'pulls'."
+ required: true
+
+ staged_key:
+ description: "The suffix for the staged cache entry to promote. Defaults to 'staged'."
+ required: false
+ default: "staged"
+
+ backup_key:
+ description: "The suffix for the backup cache entry. Defaults to 'backup'."
+ required: false
+ default: "backup"
+
+branding:
+ color: "purple"
+ icon: "arrow-up"
+
+runs:
+ using: "composite"
+ steps:
+ - name: "Validate Inputs"
+ shell: bash
+ run: |
+ if [[ "${{ inputs.type }}" != "issues" && "${{ inputs.type }}" != "pulls" ]]; then
+ echo "::error::'type' must be either 'issues' or 'pulls'. Value provided: '${{ inputs.type }}'."
+ echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY
+ echo "\`type\` must be either 'issues' or 'pulls'." >> $GITHUB_STEP_SUMMARY
+ exit 1
+ fi
+
+ - name: "Set Environment Variables"
+ shell: bash
+ run: |
+ echo "CACHE_PATH=labeler-cache/${{ inputs.type }}-model.zip" >> $GITHUB_ENV
+ echo "STAGED_KEY=issue-labeler/model/${{ inputs.type }}/${{ inputs.staged_key || 'staged' }}" >> $GITHUB_ENV
+ echo "ACTIVE_KEY=issue-labeler/model/${{ inputs.type }}/ACTIVE" >> $GITHUB_ENV
+ echo "BACKUP_KEY=issue-labeler/model/${{ inputs.type }}/${{ inputs.backup_key }}" >> $GITHUB_ENV
+
+ - name: "Check for Existing Staged Cache Entry"
+ id: check-staged
+ uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: ${{ env.CACHE_PATH }}
+ key: ${{ env.STAGED_KEY }}
+ lookup-only: true
+ fail-on-cache-miss: true
+
+ - name: "Check for Existing Backup Cache Entry"
+ if: ${{ steps.check-staged.outputs.cache-hit == 'true' }}
+ id: check-backup
+ uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: ${{ env.CACHE_PATH }}
+ key: ${{ env.BACKUP_KEY }}
+ lookup-only: true
+ fail-on-cache-miss: false
+
+ - name: "Restore Existing Active Cache Entry"
+ if: ${{ steps.check-staged.outputs.cache-hit == 'true' }}
+ id: check-active
+ uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: ${{ env.CACHE_PATH }}
+ key: ${{ env.ACTIVE_KEY }}
+ fail-on-cache-miss: false
+
+ - name: "Abort if Backup Cache Entry Already Exists"
+ if: ${{ steps.check-active.outputs.cache-hit == 'true' && steps.check-backup.outputs.cache-hit == 'true' }}
+ shell: bash
+ run: |
+ echo "::error::Backup cache key '${{ env.BACKUP_KEY }}' already exists. Cannot proceed with promotion."
+ echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY
+ echo "Backup cache key '${{ env.BACKUP_KEY }}' already exists. Cannot proceed with promotion." >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "> [!TIP]" >> $GITHUB_STEP_SUMMARY
+ echo "> Either use a different \`backup_key\` value or delete the existing cache entry from the [Action Caches](/${{ github.repository }}/actions/caches) page and run the workflow again." >> $GITHUB_STEP_SUMMARY
+ exit 1
+
+ - name: "Cache Backup of Current Active Cache Entry"
+ if: ${{ steps.check-active.outputs.cache-hit == 'true' }}
+ id: backup-file
+ uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: ${{ env.CACHE_PATH }}
+ key: ${{ env.BACKUP_KEY }}
+
+ - name: "Remove Local Copy of Current Active Cache Entry"
+ if: ${{ steps.check-active.outputs.cache-hit == 'true' }}
+ shell: bash
+ run: |
+ rm ${{ env.CACHE_PATH }}
+
+ - name: "Restore the Staged Cache Entry to Promote"
+ uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: ${{ env.CACHE_PATH }}
+ key: ${{ env.STAGED_KEY }}
+ fail-on-cache-miss: true
+
+ - name: "Delete Existing Active Cache Entry"
+ if: ${{ steps.check-active.outputs.cache-hit == 'true' }}
+ shell: bash
+ run: |
+ gh cache delete "${{ env.ACTIVE_KEY }}"
+ env:
+ GH_TOKEN: ${{ github.token }}
+
+ - name: "Save the Staged Cache Entry as the ACTIVE Cache Entry"
+ uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: ${{ env.CACHE_PATH }}
+ key: ${{ env.ACTIVE_KEY }}
+
+ - name: "Write Summary"
+ shell: bash
+ run: |
+ echo "> [!NOTE]" >> $GITHUB_STEP_SUMMARY
+ echo "> The ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} model was promoted from '${{ env.STAGED_KEY }}' to 'ACTIVE'." >> $GITHUB_STEP_SUMMARY
+
+ if [[ "${{ steps.check-active.outputs.cache-hit }}" == "true" ]]; then
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "> [!NOTE]" >> $GITHUB_STEP_SUMMARY
+ echo "> The previous 'ACTIVE' ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} model was backed up as '${{ env.BACKUP_KEY }}'." >> $GITHUB_STEP_SUMMARY
+ echo "> If the previous model needs to be restored, promote '${{ env.BACKUP_KEY }}' and supply a different \`backup_key\`." >> $GITHUB_STEP_SUMMARY
+ fi
diff --git a/restore/action.yml b/restore/action.yml
new file mode 100644
index 0000000..3d05a5e
--- /dev/null
+++ b/restore/action.yml
@@ -0,0 +1,72 @@
+name: "Restore Model from Cache"
+description: "Restore a model from cache for label prediction or cache retention."
+
+inputs:
+ type:
+ description: "The model to restore. Must be 'issues' or 'pulls'."
+ required: true
+
+ cache_key:
+ description: "The cache key suffix to use for loading the model. Defaults to 'ACTIVE'."
+ required: true
+ default: "ACTIVE"
+
+ fail-on-cache-miss:
+ description: "Set to 'true' to fail the job if the model cannot be restored from cache. Defaults to 'false'."
+
+ quiet:
+ description: "Set to 'true' to suppress output into the GitHub action summary. Defaults to 'false'."
+
+
+outputs:
+ cache-hit:
+ description: "A string value ('true' or 'false') indicating whether the model was successfully restored from cache."
+ value: ${{ steps.restore-cache.outputs.cache-hit }}
+
+branding:
+ color: "purple"
+ icon: "arrow-down"
+
+runs:
+ using: "composite"
+ steps:
+ - name: "Validate Inputs"
+ shell: bash
+ run: |
+ if [[ "${{ inputs.type }}" != "issues" && "${{ inputs.type }}" != "pulls" ]]; then
+ echo "::error::'type' must be either 'issues' or 'pulls'. Value provided: '${{ inputs.type }}'."
+ echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY
+ echo "\`type\` must be either 'Issues' or 'Pull Requests'." >> $GITHUB_STEP_SUMMARY
+ exit 1
+ fi
+
+ - name: "Restore Model from Cache"
+ id: restore-cache
+ uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: "labeler-cache/${{ inputs.type }}-model.zip"
+ key: "issue-labeler/model/${{ inputs.type }}/${{ inputs.cache_key || 'ACTIVE' }}"
+ fail-on-cache-miss: false
+
+ - name: "Produce Success Output"
+ if: ${{ inputs.quiet != 'true' }}
+ shell: bash
+ run: |
+ if [[ "${{ steps.restore-cache.outputs.cache-hit }}" == "true" ]]; then
+ echo "> [!NOTE]" >> $GITHUB_STEP_SUMMARY
+ echo "> The ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} model was successfully restored from cache." >> $GITHUB_STEP_SUMMARY
+ else
+ echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY
+ echo "> The ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} model was not restored from cache. Label prediction cannot proceed." >> $GITHUB_STEP_SUMMARY
+
+ if [[ "${{ inputs.fail-on-cache-miss }}" != "true" ]]; then
+ echo "> The workflow is gracefully exiting without failure." >> $GITHUB_STEP_SUMMARY
+ fi
+ fi
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "> [!TIP]" >> $GITHUB_STEP_SUMMARY
+ echo "> Refer to the [GitHub documentation](https://docs.github.com/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction) for details about cache retention policies." >> $GITHUB_STEP_SUMMARY
+
+ if [[ "${{ steps.restore-cache.outputs.cache-hit }}" != "true" && "${{ inputs.fail-on-cache-miss }}" == "true" ]]; then
+ exit 1
+ fi
diff --git a/src/Common/ArgUtils.cs b/src/Common/ArgUtils.cs
deleted file mode 100644
index e2ca961..0000000
--- a/src/Common/ArgUtils.cs
+++ /dev/null
@@ -1,213 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics.CodeAnalysis;
-using System.Text.RegularExpressions;
-
-public static class ArgUtils
-{
- public static bool TryDequeueString(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out string? argValue)
- {
- argValue = Dequeue(args);
- if (argValue is null)
- {
- showUsage($"Argument '{argName}' has an empty value.");
- return false;
- }
-
- return true;
- }
-
- public static bool TryDequeueRepo(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out string? org, [NotNullWhen(true)] out string? repo)
- {
- string? orgRepo = Dequeue(args);
- if (orgRepo is null || !orgRepo.Contains('/'))
- {
- showUsage($$"""Argument '{{argName}}' has an empty value or is not in the format of '{org}/{repo}'.""");
- org = null;
- repo = null;
- return false;
- }
-
- string[] parts = orgRepo.Split('/');
- org = parts[0];
- repo = parts[1];
- return true;
- }
-
- public static bool TryDequeueRepoList(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out string? org, [NotNullWhen(true)] out List? repos)
- {
- string? orgRepos = ArgUtils.Dequeue(args);
- org = null;
- repos = null;
-
- if (orgRepos is null)
- {
- showUsage($$"""Argument '{argName}' has an empty value or is not in the format of '{org}/{repo}'.""");
- return false;
- }
-
- foreach (var orgRepo in orgRepos.Split(',').Select(r => r.Trim()))
- {
- if (!orgRepo.Contains('/'))
- {
- showUsage($"Argument '--repo' is not in the format of '{{org}}/{{repo}}': {orgRepo}");
- return false;
- }
-
- string[] parts = orgRepo.Split('/');
-
- if (org is not null && org != parts[0])
- {
- showUsage("All '--repo' values must be from the same org.");
- return false;
- }
-
- org ??= parts[0];
- repos ??= [];
- repos.Add(parts[1]);
- }
-
- return (org is not null && repos is not null);
- }
-
- public static bool TryDequeueLabelPrefix(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out Func? labelPredicate)
- {
- if (!TryDequeueString(args, showUsage, argName, out string? labelPrefix))
- {
- labelPredicate = null;
- return false;
- }
-
- // Require that the label prefix end in something other than a letter or number
- // This promotes the pattern of prefixes that are clear, rather than a prefix that
- // could be matched as the beginning of another word in the label
- if (Regex.IsMatch(labelPrefix.AsSpan(^1),"[a-zA-Z0-9]"))
- {
- showUsage($"""
- Argument '{argName}' must end in something other than a letter or number.
-
- The recommended label prefix terminating character is '-'.
- The recommended label prefix for applying area labels is 'area-'.
- """);
- labelPredicate = null;
- return false;
- }
-
- labelPredicate = (label) => label.StartsWith(labelPrefix, StringComparison.OrdinalIgnoreCase);
- return true;
- }
-
- public static bool TryDequeuePath(Queue args, Action showUsage, string argName, out string? path)
- {
- if (!TryDequeueString(args, showUsage, argName, out path))
- {
- return false;
- }
-
- if (!Path.IsPathRooted(path))
- {
- path = Path.GetFullPath(path);
- }
-
- return true;
- }
-
- public static bool TryDequeueStringArray(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out string[]? argValues)
- {
- if (TryDequeueString(args, showUsage, argName, out string? argString))
- {
- argValues = argString.Split(',', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
- return true;
- }
-
- argValues = null;
- return false;
- }
-
- public static bool TryDequeueInt(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out int? argValue)
- {
- if (TryDequeueString(args, showUsage, argName, out string? argString) && int.TryParse(argString, out int parsedValue))
- {
- argValue = parsedValue;
- return true;
- }
-
- argValue = null;
- return false;
- }
-
- public static bool TryDequeueIntArray(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out int[]? argValues)
- {
- if (TryDequeueString(args, showUsage, argName, out string? argString))
- {
- argValues = argString.Split(',').Select(r => int.Parse(r)).ToArray();
- return true;
- }
-
- argValues = null;
- return false;
- }
-
- public static bool TryDequeueFloat(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out float? argValue)
- {
- if (TryDequeueString(args, showUsage, argName, out string? argString) && float.TryParse(argString, out float parsedValue))
- {
- argValue = parsedValue;
- return true;
- }
-
- argValue = null;
- return false;
- }
-
- public static bool TryDequeueNumberRanges(Queue args, Action showUsage, string argName, out List? argValues)
- {
- if (!TryDequeueString(args, showUsage, argName, out string? argString))
- {
- argValues = null;
- return false;
- }
-
- List numbers = new();
-
- foreach (var range in argString.Split(','))
- {
- var beginEnd = range.Split('-');
-
- if (beginEnd.Length == 1)
- {
- numbers.Add(ulong.Parse(beginEnd[0]));
- }
- else if (beginEnd.Length == 2)
- {
- var begin = ulong.Parse(beginEnd[0]);
- var end = ulong.Parse(beginEnd[1]);
-
- for (var number = begin; number <= end; number++)
- {
- numbers.Add(number);
- }
- }
- else
- {
- showUsage($"Argument '{argName}' must be comma-separated list of numbers and/or dash-separated ranges. Example: 1-3,5,7-9.");
- argValues = null;
- return false;
- }
- }
-
- argValues = numbers;
- return true;
- }
-
- public static string? Dequeue(Queue args)
- {
- if (args.TryDequeue(out string? argValue))
- {
- return string.IsNullOrWhiteSpace(argValue) ? null : argValue;
- }
-
- return null;
- }
-}
diff --git a/src/Downloader/Args.cs b/src/Downloader/Args.cs
deleted file mode 100644
index 7483dc4..0000000
--- a/src/Downloader/Args.cs
+++ /dev/null
@@ -1,190 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-
-public struct Args
-{
- public string Org { get; set; }
- public List Repos { get; set; }
- public string GithubToken { get; set; }
- public string? IssueDataPath { get; set; }
- public int? IssueLimit { get; set; }
- public string? PullDataPath { get; set; }
- public int? PullLimit { get; set; }
- public int? PageSize { get; set; }
- public int? PageLimit { get; set; }
- public int[] Retries { get; set; }
- public string[]? ExcludedAuthors { get; set; }
- public Predicate LabelPredicate { get; set; }
- public bool Verbose { get; set; }
-
- static void ShowUsage(string? message = null)
- {
- string executableName = Process.GetCurrentProcess().ProcessName;
-
- Console.WriteLine($$"""
- ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}}
-
- Usage:
- {{executableName}} --repo {org/repo1}[,{org/repo2},...] --label-prefix {label-prefix} [options]
-
- Required arguments:
- --repo The GitHub repositories in format org/repo (comma separated for multiple).
- --label-prefix Prefix for label predictions. Must end with a character other than a letter or number.
-
- Required for downloading issue data:
- --issue-data Path for issue data file to create (TSV file).
-
- Required for downloading pull request data:
- --pull-data Path for pull request data file to create (TSV file).
-
- Optional arguments:
- --issue-limit Maximum number of issues to download.
- --pull-limit Maximum number of pull requests to download.
- --page-size Number of items per page in GitHub API requests.
- --page-limit Maximum number of pages to retrieve.
- --excluded-authors Comma-separated list of authors to exclude.
- --retries Comma-separated retry delays in seconds. Default: 30,30,300,300,3000,3000.
- --token GitHub access token. Default: Read from GITHUB_TOKEN env var.
- --verbose Enable verbose output.
- """);
-
- Environment.Exit(1);
- }
-
- public static Args? Parse(string[] args)
- {
- Args argsData = new()
- {
- Retries = [30, 30, 300, 300, 3000, 3000]
- };
-
- Queue arguments = new(args);
- while (arguments.Count > 0)
- {
- string argument = arguments.Dequeue();
-
- switch (argument)
- {
- case "--token":
- if (!ArgUtils.TryDequeueString(arguments, ShowUsage, "--token", out string? token))
- {
- return null;
- }
- argsData.GithubToken = token;
- break;
-
- case "--repo":
- if (!ArgUtils.TryDequeueRepoList(arguments, ShowUsage, "--repo", out string? org, out List? repos))
- {
- return null;
- }
- argsData.Org = org;
- argsData.Repos = repos;
- break;
-
- case "--issue-data":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-data", out string? issueDataPath))
- {
- return null;
- }
- argsData.IssueDataPath = issueDataPath;
- break;
-
- case "--issue-limit":
- if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--issue-limit", out int? issueLimit))
- {
- return null;
- }
- argsData.IssueLimit = issueLimit;
- break;
-
- case "--pull-data":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-data", out string? pullDataPath))
- {
- return null;
- }
- argsData.PullDataPath = pullDataPath;
- break;
-
- case "--pull-limit":
- if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--pull-limit", out int? pullLimit))
- {
- return null;
- }
- argsData.PullLimit = pullLimit;
- break;
-
- case "--page-size":
- if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--page-size", out int? pageSize))
- {
- return null;
- }
- argsData.PageSize = pageSize;
- break;
-
- case "--page-limit":
- if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--page-limit", out int? pageLimit))
- {
- return null;
- }
- argsData.PageLimit = pageLimit;
- break;
-
- case "--excluded-authors":
- if (!ArgUtils.TryDequeueStringArray(arguments, ShowUsage, "--excluded-authors", out string[]? excludedAuthors))
- {
- return null;
- }
- argsData.ExcludedAuthors = excludedAuthors;
- break;
-
- case "--retries":
- if (!ArgUtils.TryDequeueIntArray(arguments, ShowUsage, "--retries", out int[]? retries))
- {
- return null;
- }
- argsData.Retries = retries;
- break;
-
- case "--label-prefix":
- if (!ArgUtils.TryDequeueLabelPrefix(arguments, ShowUsage, "--label-prefix", out Func? labelPredicate))
- {
- return null;
- }
- argsData.LabelPredicate = new(labelPredicate);
- break;
-
- case "--verbose":
- argsData.Verbose = true;
- break;
- default:
- ShowUsage($"Unrecognized argument: {argument}");
- return null;
- }
- }
-
- if (argsData.Org is null || argsData.Repos is null || argsData.LabelPredicate is null ||
- (argsData.IssueDataPath is null && argsData.PullDataPath is null))
- {
- ShowUsage();
- return null;
- }
-
- if (argsData.GithubToken is null)
- {
- string? token = Environment.GetEnvironmentVariable("GITHUB_TOKEN");
-
- if (string.IsNullOrEmpty(token))
- {
- ShowUsage("Argument '--token' not specified and environment variable GITHUB_TOKEN is empty.");
- return null;
- }
-
- argsData.GithubToken = token;
- }
-
- return argsData;
- }
-}
diff --git a/src/GitHubClient/GitHubApi.cs b/src/GitHubClient/GitHubApi.cs
deleted file mode 100644
index fc70bd3..0000000
--- a/src/GitHubClient/GitHubApi.cs
+++ /dev/null
@@ -1,423 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Collections.Concurrent;
-using System.Net.Http.Json;
-using GraphQL;
-using GraphQL.Client.Http;
-using GraphQL.Client.Serializer.SystemTextJson;
-
-namespace GitHubClient;
-
-public class GitHubApi
-{
- private static ConcurrentDictionary _graphQLClients = new();
- private static ConcurrentDictionary _restClients = new();
-
- private static GraphQLHttpClient GetGraphQLClient(string githubToken) =>
- _graphQLClients.GetOrAdd(githubToken, token =>
- {
- GraphQLHttpClient client = new("https://api.github.com/graphql", new SystemTextJsonSerializer());
- client.HttpClient.DefaultRequestHeaders.Authorization =
- new System.Net.Http.Headers.AuthenticationHeaderValue(
- scheme: "bearer",
- parameter: token);
-
- client.HttpClient.Timeout = TimeSpan.FromMinutes(2);
-
- return client;
- });
-
- private static HttpClient GetRestClient(string githubToken) =>
- _restClients.GetOrAdd(githubToken, token =>
- {
- HttpClient client = new();
- client.DefaultRequestHeaders.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue(
- scheme: "bearer",
- parameter: token);
- client.DefaultRequestHeaders.Accept.Add(new("application/vnd.github+json"));
- client.DefaultRequestHeaders.Add("X-GitHub-Api-Version", "2022-11-28");
- client.DefaultRequestHeaders.Add("User-Agent", "Issue-Labeler");
-
- return client;
- });
-
- public static async IAsyncEnumerable<(Issue Issue, string Label)> DownloadIssues(
- string githubToken,
- string org, string repo,
- Predicate labelPredicate,
- int? issueLimit,
- int pageSize,
- int pageLimit,
- int[] retries,
- string[] excludedAuthors,
- bool verbose = false)
- {
- await foreach (var item in DownloadItems("issues", githubToken, org, repo, labelPredicate, issueLimit, pageSize, pageLimit, retries, excludedAuthors, verbose))
- {
- yield return (item.Item, item.Label);
- }
- }
-
- public static async IAsyncEnumerable<(PullRequest PullRequest, string Label)> DownloadPullRequests(
- string githubToken,
- string org,
- string repo,
- Predicate labelPredicate,
- int? pullLimit,
- int pageSize,
- int pageLimit,
- int[] retries,
- string[] excludedAuthors,
- bool verbose = false)
- {
- var items = DownloadItems("pullRequests", githubToken, org, repo, labelPredicate, pullLimit, pageSize, pageLimit, retries, excludedAuthors, verbose);
-
- await foreach (var item in items)
- {
- yield return (item.Item, item.Label);
- }
- }
-
- private static async IAsyncEnumerable<(T Item, string Label)> DownloadItems(
- string itemQueryName,
- string githubToken,
- string org,
- string repo,
- Predicate labelPredicate,
- int? itemLimit,
- int pageSize,
- int pageLimit,
- int[] retries,
- string[] excludedAuthors,
- bool verbose) where T : Issue
- {
- pageSize = Math.Min(pageSize, 100);
-
- int pageNumber = 0;
- string? after = null;
- bool hasNextPage = true;
- int loadedCount = 0;
- int includedCount = 0;
- int? totalCount = null;
- byte retry = 0;
- bool finished = false;
-
- do
- {
- Console.WriteLine($"Downloading {itemQueryName} page {pageNumber + 1} from {org}/{repo}...{(retry > 0 ? $" (retry {retry} of {retries.Length}) " : "")}{(after is not null ? $" (cursor: '{after}')" : "")}");
-
- Page page;
-
- try
- {
- page = await GetItemsPage(githubToken, org, repo, pageSize, after, itemQueryName, excludedAuthors);
- }
- catch (Exception ex) when (
- ex is HttpIOException ||
- ex is HttpRequestException ||
- ex is GraphQLHttpRequestException ||
- ex is TaskCanceledException
- )
- {
- Console.WriteLine($"Exception caught during query.\n {ex.Message}");
-
- if (retry >= retries.Length - 1)
- {
- Console.WriteLine($"Retry limit of {retries.Length} reached. Aborting.");
- break;
- }
- else
- {
- Console.WriteLine($"Waiting {retries[retry]} seconds before retry {retry + 1} of {retries.Length}...");
- await Task.Delay(retries[retry] * 1000);
- retry++;
-
- continue;
- }
- }
-
- if (after == page.EndCursor)
- {
- Console.WriteLine($"Paging did not progress. Cursor: '{after}'. Aborting.");
- break;
- }
-
- pageNumber++;
- after = page.EndCursor;
- hasNextPage = page.HasNextPage;
- loadedCount += page.Nodes.Length;
- totalCount ??= page.TotalCount;
- retry = 0;
-
- foreach (T item in page.Nodes)
- {
- if (excludedAuthors.Contains(item.Author.Login, StringComparer.InvariantCultureIgnoreCase))
- {
- if (verbose) Console.WriteLine($"{itemQueryName} {org}/{repo}#{item.Number} - Excluded from output. Author '{item.Author.Login}' is in excluded list.");
- continue;
- }
-
- // If there are more labels, there might be other applicable
- // labels that were not loaded and the model is incomplete.
- if (item.Labels.HasNextPage)
- {
- if (verbose) Console.WriteLine($"{itemQueryName} {org}/{repo}#{item.Number} - Excluded from output. Not all labels were loaded.");
- continue;
- }
-
- // Only items with exactly one applicable label are used for the model.
- string[] labels = Array.FindAll(item.LabelNames, labelPredicate);
- if (labels.Length != 1)
- {
- if (verbose) Console.WriteLine($"{itemQueryName} {org}/{repo}#{item.Number} - Excluded from output. {labels.Length} applicable labels found.");
- continue;
- }
-
- // Exactly one applicable label was found on the item. Include it in the model.
- if (verbose) Console.WriteLine($"{itemQueryName} {org}/{repo}#{item.Number} - Included in output. Applicable label: '{labels[0]}'.");
-
- yield return (item, labels[0]);
-
- includedCount++;
-
- if (itemLimit.HasValue && includedCount >= itemLimit)
- {
- break;
- }
- }
-
- finished = (!hasNextPage || pageNumber >= pageLimit || (itemLimit.HasValue && includedCount >= itemLimit));
-
- Console.WriteLine(
- $"Included: {includedCount} (limit: {(itemLimit.HasValue ? itemLimit : "none")}) | " +
- $"Downloaded: {loadedCount} (total: {totalCount}) | " +
- $"Pages: {pageNumber} (limit: {pageLimit})");
- }
- while (!finished);
- }
-
- private static async Task> GetItemsPage(string githubToken, string org, string repo, int pageSize, string? after, string itemQueryName, string[] excludedAuthors) where T : Issue
- {
- GraphQLHttpClient client = GetGraphQLClient(githubToken);
-
- string files = typeof(T) == typeof(PullRequest) ? "files (first: 100) { nodes { path } }" : "";
-
- GraphQLRequest query = new GraphQLRequest
- {
- Query = $$"""
- query ($owner: String!, $repo: String!, $after: String) {
- repository (owner: $owner, name: $repo) {
- result:{{itemQueryName}} (after: $after, first: {{pageSize}}, orderBy: {field: CREATED_AT, direction: DESC}) {
- nodes {
- number
- title
- author { login }
- body: bodyText
- labels (first: 25) {
- nodes { name },
- pageInfo { hasNextPage }
- }
- {{files}}
- }
- pageInfo {
- hasNextPage
- endCursor
- }
- totalCount
- }
- }
- }
- """,
- Variables = new
- {
- Owner = org,
- Repo = repo,
- After = after
- }
- };
-
- var response = await client.SendQueryAsync>>(query);
-
- if (response.Errors?.Any() ?? false)
- {
- string errors = string.Join("\n\n", response.Errors.Select((e, i) => $"{i + 1}. {e.Message}").ToArray());
- throw new ApplicationException($"GraphQL request returned errors.\n\n{errors}");
- }
- else if (response.Data is null || response.Data.Repository is null || response.Data.Repository.Result is null)
- {
- throw new ApplicationException("GraphQL response did not include the repository result data");
- }
-
- return response.Data.Repository.Result;
- }
-
- public static async Task GetIssue(string githubToken, string org, string repo, ulong number, int[] retries, bool verbose) =>
- await GetItem(githubToken, org, repo, number, retries, verbose, "issue");
-
- public static async Task GetPullRequest(string githubToken, string org, string repo, ulong number, int[] retries, bool verbose) =>
- await GetItem(githubToken, org, repo, number, retries, verbose, "pullRequest");
-
- private static async Task GetItem(string githubToken, string org, string repo, ulong number, int[] retries, bool verbose, string itemQueryName) where T : Issue
- {
- GraphQLHttpClient client = GetGraphQLClient(githubToken);
- string files = typeof(T) == typeof(PullRequest) ? "files (first: 100) { nodes { path } }" : "";
-
- GraphQLRequest query = new GraphQLRequest
- {
- Query = $$"""
- query ($owner: String!, $repo: String!, $number: Int!) {
- repository (owner: $owner, name: $repo) {
- result:{{itemQueryName}} (number: $number) {
- number
- title
- author { login }
- body: bodyText
- labels (first: 25) {
- nodes { name },
- pageInfo { hasNextPage }
- }
- {{files}}
- }
- }
- }
- """,
- Variables = new
- {
- Owner = org,
- Repo = repo,
- Number = number
- }
- };
-
- byte retry = 0;
-
- while (retry < retries.Length)
- {
- try
- {
- var response = await client.SendQueryAsync>(query);
-
- if (!(response.Errors?.Any() ?? false) && response.Data?.Repository?.Result is not null)
- {
- return response.Data.Repository.Result;
- }
-
- if (response.Errors?.Any() ?? false)
- {
- // These errors occur when an issue/pull does not exist or when the API rate limit has been exceeded
- if (response.Errors.Any(e => e.Message.StartsWith("API rate limit exceeded")))
- {
- Console.WriteLine($"""
- [{itemQueryName} #{number}] Failed to retrieve data.
- Rate limit has been reached.
- {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")}
- """);
- }
- else
- {
- // Could not detect this as a rate limit issue. Do not retry.
-
- string errors = string.Join("\n\n", response.Errors.Select((e, i) => $"{i + 1}. {e.Message}").ToArray());
-
- Console.WriteLine($"""
- [{itemQueryName} #{number}] Failed to retrieve data.
- GraphQL request returned errors:
-
- {errors}
- """);
-
- return null;
- }
- }
- else
- {
- // Do not retry as these errors are not recoverable
- // This is usually a bug during development when the query/response model is incorrect
- Console.WriteLine($"""
- [{itemQueryName} #{number}] Failed to retrieve data.
- GraphQL response did not include the repository result data.
- """);
-
- return null;
- }
- }
- catch (Exception ex) when (
- ex is HttpIOException ||
- ex is HttpRequestException ||
- ex is GraphQLHttpRequestException ||
- ex is TaskCanceledException
- )
- {
- // Retry on exceptions as they can be temporary network issues
- Console.WriteLine($"""
- [{itemQueryName} #{number}] Failed to retrieve data.
- Exception caught during query.
-
- {ex.Message}
-
- {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")}
- """);
- }
-
- await Task.Delay(retries[retry++] * 1000);
- }
-
- return null;
- }
-
- public static async Task AddLabel(string githubToken, string org, string repo, string type, ulong number, string label, int[] retries)
- {
- var client = GetRestClient(githubToken);
- byte retry = 0;
-
- while (retry < retries.Length)
- {
- var response = await client.PostAsJsonAsync(
- $"https://api.github.com/repos/{org}/{repo}/issues/{number}/labels",
- new string[] { label },
- CancellationToken.None);
-
- if (response.IsSuccessStatusCode)
- {
- return null;
- }
-
- Console.WriteLine($"""
- [{type} #{number}] Failed to add label '{label}'. {response.ReasonPhrase} ({response.StatusCode})
- {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")}
- """);
-
- await Task.Delay(retries[retry++] * 1000);
- }
-
- return $"Failed to add label '{label}' after {retries.Length} retries.";
- }
-
- public static async Task RemoveLabel(string githubToken, string org, string repo, string type, ulong number, string label, int[] retries)
- {
- var client = GetRestClient(githubToken);
- byte retry = 0;
-
- while (retry < retries.Length)
- {
- var response = await client.DeleteAsync(
- $"https://api.github.com/repos/{org}/{repo}/issues/{number}/labels/{label}",
- CancellationToken.None);
-
- if (response.IsSuccessStatusCode)
- {
- return null;
- }
-
- Console.WriteLine($"""
- [{type} #{number}] Failed to remove label '{label}'. {response.ReasonPhrase} ({response.StatusCode})
- {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")}
- """);
-
- await Task.Delay(retries[retry++] * 1000);
- }
-
- return $"Failed to remove label '{label}' after {retries.Length} retries.";
- }
-}
diff --git a/src/Predictor/Args.cs b/src/Predictor/Args.cs
deleted file mode 100644
index 7117fda..0000000
--- a/src/Predictor/Args.cs
+++ /dev/null
@@ -1,210 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-
-public struct Args
-{
- public string Org { get; set; }
- public string Repo { get; set; }
- public string GithubToken { get; set; }
- public string? IssueModelPath { get; set; }
- public List? IssueNumbers { get; set; }
- public string? PullModelPath { get; set; }
- public List? PullNumbers { get; set; }
- public float Threshold { get; set; }
- public Func LabelPredicate { get; set; }
- public string? DefaultLabel { get; set; }
- public int[] Retries { get; set; }
- public bool Verbose { get; set; }
- public string[]? ExcludedAuthors { get; set; }
- public bool Test { get; set; }
-
- static void ShowUsage(string? message = null)
- {
- string executableName = Process.GetCurrentProcess().ProcessName;
-
- Console.WriteLine($$"""
- ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}}
-
- Usage:
- {{executableName}} --repo {org/repo} --label-prefix {label-prefix} [options]
-
- Required arguments:
- --repo GitHub repository in the format {org}/{repo}.
- --label-prefix Prefix for label predictions. Must end with a character other than a letter or number.
-
- Required for predicting issue labels:
- --issue-model Path to existing issue prediction model file (ZIP file).
- --issue-numbers Comma-separated list of issue number ranges. Example: 1-3,7,5-9.
-
- Required for predicting pull request labels:
- --pull-model Path to existing pull request prediction model file (ZIP file).
- --pull-numbers Comma-separated list of pull request number ranges. Example: 1-3,7,5-9.
-
- Optional arguments:
- --default-label Default label to use if no label is predicted.
- --threshold Minimum prediction confidence threshold. Range (0,1]. Default 0.4.
- --retries Comma-separated retry delays in seconds. Default: 30,30,300,300,3000,3000.
- --excluded-authors Comma-separated list of authors to exclude.
- --token GitHub token. Default: read from GITHUB_TOKEN env var.
- --test Run in test mode, outputting predictions without applying labels.
- --verbose Enable verbose output.
- """);
-
- Environment.Exit(1);
- }
-
- public static Args? Parse(string[] args)
- {
- Args argsData = new()
- {
- Threshold = 0.4f,
- Retries = [30, 30, 300, 300, 3000, 3000]
- };
-
- Queue arguments = new(args);
- while (arguments.Count > 0)
- {
- string argument = arguments.Dequeue();
-
- switch (argument)
- {
- case "--token":
- if (!ArgUtils.TryDequeueString(arguments, ShowUsage, "--token", out string? token))
- {
- return null;
- }
- argsData.GithubToken = token;
- break;
-
- case "--repo":
- if (!ArgUtils.TryDequeueRepo(arguments, ShowUsage, "--repo", out string? org, out string? repo))
- {
- return null;
- }
- argsData.Org = org;
- argsData.Repo = repo;
- break;
-
- case "--issue-model":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-model", out string? issueModelPath))
- {
- return null;
- }
- argsData.IssueModelPath = issueModelPath;
- break;
-
- case "--issue-numbers":
- if (!ArgUtils.TryDequeueNumberRanges(arguments, ShowUsage, "--issue-numbers", out List? issueNumbers))
- {
- return null;
- }
- argsData.IssueNumbers = issueNumbers;
- break;
-
- case "--pull-model":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-model", out string? pullModelPath))
- {
- return null;
- }
- argsData.PullModelPath = pullModelPath;
- break;
-
- case "--pull-numbers":
- if (!ArgUtils.TryDequeueNumberRanges(arguments, ShowUsage, "--pull-numbers", out List? pullNumbers))
- {
- return null;
- }
- argsData.PullNumbers = pullNumbers;
- break;
-
- case "--label-prefix":
- if (!ArgUtils.TryDequeueLabelPrefix(arguments, ShowUsage, "--label-prefix", out Func? labelPredicate))
- {
- return null;
- }
- argsData.LabelPredicate = labelPredicate;
- break;
-
- case "--threshold":
- if (!ArgUtils.TryDequeueFloat(arguments, ShowUsage, "--threshold", out float? threshold))
- {
- return null;
- }
- argsData.Threshold = threshold.Value;
- break;
-
- case "--default-label":
- if (!ArgUtils.TryDequeueString(arguments, ShowUsage, "--default-label", out string? defaultLabel))
- {
- return null;
- }
- argsData.DefaultLabel = defaultLabel;
- break;
-
- case "--retries":
- if (!ArgUtils.TryDequeueIntArray(arguments, ShowUsage, "--retries", out int[]? retries))
- {
- return null;
- }
- argsData.Retries = retries;
- break;
-
- case "--excluded-authors":
- if (!ArgUtils.TryDequeueStringArray(arguments, ShowUsage, "--excluded-authors", out string[]? excludedAuthors))
- {
- return null;
- }
- argsData.ExcludedAuthors = excludedAuthors;
- break;
-
- case "--test":
- argsData.Test = true;
- break;
-
- case "--verbose":
- argsData.Verbose = true;
- break;
-
- default:
- ShowUsage($"Unrecognized argument: {argument}");
- return null;
- }
- }
-
- // Check if any required argsDatauration properties are missing or invalid.
- // The conditions are:
- // - Org is null
- // - Repo is null
- // - gitHubToken is null and the environment variable was not set
- // - Threshold is 0
- // - LabelPredicate is null
- // - IssueModelPath is null while IssueNumbers is not null, or vice versa
- // - PullModelPath is null while PullNumbers is not null, or vice versa
- // - Both IssueModelPath and PullModelPath are null
- if (argsData.Org is null || argsData.Repo is null || argsData.Threshold == 0 || argsData.LabelPredicate is null ||
- (argsData.IssueModelPath is null != argsData.IssueNumbers is null) ||
- (argsData.PullModelPath is null != argsData.PullNumbers is null) ||
- (argsData.IssueModelPath is null && argsData.PullModelPath is null))
- {
- ShowUsage();
- return null;
- }
-
- if (argsData.GithubToken is null)
- {
- string? token = Environment.GetEnvironmentVariable("GITHUB_TOKEN");
-
- if (string.IsNullOrEmpty(token))
- {
- ShowUsage("Argument '--token' not specified and environment variable GITHUB_TOKEN is empty.");
- return null;
- }
-
- argsData.GithubToken = token;
- }
-
- return argsData;
- }
-}
diff --git a/src/Predictor/Predictor.cs b/src/Predictor/Predictor.cs
deleted file mode 100644
index 2ac2557..0000000
--- a/src/Predictor/Predictor.cs
+++ /dev/null
@@ -1,218 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using Microsoft.ML;
-using Microsoft.ML.Data;
-using GitHubClient;
-
-var config = Args.Parse(args);
-if (config is not Args argsData) return;
-
-List> tasks = new();
-
-if (argsData.IssueModelPath is not null && argsData.IssueNumbers is not null)
-{
- Console.WriteLine("Loading issues model...");
- var issueContext = new MLContext();
- var issueModel = issueContext.Model.Load(argsData.IssueModelPath, out _);
- var issuePredictor = issueContext.Model.CreatePredictionEngine(issueModel);
- Console.WriteLine("Issues prediction engine ready.");
-
- foreach (ulong issueNumber in argsData.IssueNumbers)
- {
- var result = await GitHubApi.GetIssue(argsData.GithubToken, argsData.Org, argsData.Repo, issueNumber, argsData.Retries, argsData.Verbose);
-
- if (result is null)
- {
- Console.WriteLine($"[Issue #{issueNumber}] could not be found or downloaded. Skipped.");
- continue;
- }
-
- if (argsData.ExcludedAuthors is not null && argsData.ExcludedAuthors.Contains(result.Author.Login, StringComparer.InvariantCultureIgnoreCase))
- {
- Console.WriteLine($"[Issue #{issueNumber}] Author '{result.Author.Login}' is in excluded list. Skipped.");
- continue;
- }
-
- tasks.Add(Task.Run(() => ProcessPrediction(
- issuePredictor,
- issueNumber,
- new Issue(result),
- argsData.LabelPredicate,
- argsData.DefaultLabel,
- ModelType.Issue,
- argsData.Retries,
- argsData.Test
- )));
-
- Console.WriteLine($"[Issue #{issueNumber}] Queued for prediction.");
- }
-}
-
-if (argsData.PullModelPath is not null && argsData.PullNumbers is not null)
-{
- Console.WriteLine("Loading pulls model...");
- var pullContext = new MLContext();
- var pullModel = pullContext.Model.Load(argsData.PullModelPath, out _);
- var pullPredictor = pullContext.Model.CreatePredictionEngine(pullModel);
- Console.WriteLine("Pulls prediction engine ready.");
-
- foreach (ulong pullNumber in argsData.PullNumbers)
- {
- var result = await GitHubApi.GetPullRequest(argsData.GithubToken, argsData.Org, argsData.Repo, pullNumber, argsData.Retries, argsData.Verbose);
-
- if (result is null)
- {
- Console.WriteLine($"[Pull Request #{pullNumber}] could not be found or downloaded. Skipped.");
- continue;
- }
-
- if (argsData.ExcludedAuthors is not null && argsData.ExcludedAuthors.Contains(result.Author.Login))
- {
- Console.WriteLine($"[Pull Request #{pullNumber}] Author '{result.Author.Login}' is in excluded list. Skipped.");
- continue;
- }
-
- tasks.Add(Task.Run(() => ProcessPrediction(
- pullPredictor,
- pullNumber,
- new PullRequest(result),
- argsData.LabelPredicate,
- argsData.DefaultLabel,
- ModelType.PullRequest,
- argsData.Retries,
- argsData.Test
- )));
-
- Console.WriteLine($"[Pull Request #{pullNumber}] Queued for prediction.");
- }
-}
-
-var allTasks = Task.WhenAll(tasks);
-
-try
-{
- allTasks.Wait();
-}
-catch (AggregateException) { }
-
-foreach (var prediction in allTasks.Result)
-{
- Console.WriteLine($"""
- [{prediction.Type} #{prediction.Number}{(prediction.Success ? "" : " FAILURE")}]
- {string.Join("\n ", prediction.Output)}
-
- """);
-}
-
-async Task<(ModelType, ulong, bool, string[])> ProcessPrediction(PredictionEngine predictor, ulong number, T issueOrPull, Func labelPredicate, string? defaultLabel, ModelType type, int[] retries, bool test) where T : Issue
-{
- List output = new();
- string? error = null;
-
- if (issueOrPull.HasMoreLabels)
- {
- output.Add($"[{type} #{number}] No action taken. Too many labels applied already; cannot be sure no applicable label is already applied.");
- return (type, number, true, output.ToArray());
- }
-
- var applicableLabel = issueOrPull.Labels?.FirstOrDefault(labelPredicate);
-
- bool hasDefaultLabel =
- (defaultLabel is not null) &&
- (issueOrPull.Labels?.Any(l => l.Equals(defaultLabel, StringComparison.OrdinalIgnoreCase)) ?? false);
-
- if (applicableLabel is not null)
- {
- output.Add($"Applicable label '{applicableLabel}' already exists.");
-
- if (hasDefaultLabel && defaultLabel is not null)
- {
- if (!test)
- {
- error = await GitHubApi.RemoveLabel(argsData.GithubToken, argsData.Org, argsData.Repo, type.ToString(), number, defaultLabel, argsData.Retries);
- }
-
- output.Add(error ?? $"Removed default label '{defaultLabel}'.");
- }
-
- return (type, number, error is null, output.ToArray());
- }
-
- var prediction = predictor.Predict(issueOrPull);
-
- if (prediction.Score is null || prediction.Score.Length == 0)
- {
- output.Add("No prediction was made.");
- return (type, number, true, output.ToArray());
- }
-
- VBuffer> labels = default;
- predictor.OutputSchema[nameof(LabelPrediction.Score)].GetSlotNames(ref labels);
-
- var predictions = prediction.Score
- .Select((score, index) => new
- {
- Score = score,
- Label = labels.GetItemOrDefault(index).ToString()
- })
- // Ensure predicted labels match the expected predicate
- .Where(prediction => labelPredicate(prediction.Label))
- // Capture the top 3 for including in the output
- .OrderByDescending(p => p.Score)
- .Take(3);
-
- output.Add("Label predictions:");
- output.AddRange(predictions.Select(p => $" '{p.Label}' - Score: {p.Score}"));
-
- var bestScore = predictions.FirstOrDefault(p => p.Score >= argsData.Threshold);
- output.Add(bestScore is not null ?
- $"Label '{bestScore.Label}' meets threshold of {argsData.Threshold}." :
- $"No label meets the threshold of {argsData.Threshold}.");
-
- if (bestScore is not null)
- {
- if (!test)
- {
- error = await GitHubApi.AddLabel(argsData.GithubToken, argsData.Org, argsData.Repo, type.ToString(), number, bestScore.Label, retries);
- }
-
- output.Add(error ?? $"Added label '{bestScore.Label}'");
-
- if (error is not null)
- {
- return (type, number, false, output.ToArray());
- }
-
- if (hasDefaultLabel && defaultLabel is not null)
- {
- if (!test)
- {
- error = await GitHubApi.RemoveLabel(argsData.GithubToken, argsData.Org, argsData.Repo, type.ToString(), number, defaultLabel, retries);
- }
-
- output.Add(error ?? $"Removed default label '{defaultLabel}'");
- }
-
- return (type, number, error is null, output.ToArray());
- }
-
- if (defaultLabel is not null)
- {
- if (hasDefaultLabel)
- {
- output.Add($"Default label '{defaultLabel}' is already applied.");
- }
- else
- {
- if (!test)
- {
- error = await GitHubApi.AddLabel(argsData.GithubToken, argsData.Org, argsData.Repo, type.ToString(), number, defaultLabel, argsData.Retries);
- }
-
- output.Add(error ?? $"Applied default label '{defaultLabel}'.");
- }
- }
-
- return (type, number, error is null, output.ToArray());
-}
diff --git a/src/Tester/Args.cs b/src/Tester/Args.cs
deleted file mode 100644
index c402b55..0000000
--- a/src/Tester/Args.cs
+++ /dev/null
@@ -1,184 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-
-public struct Args
-{
- public string? Org { get; set; }
- public List Repos { get; set; }
- public string? GithubToken { get; set; }
- public string? IssueDataPath { get; set; }
- public string? IssueModelPath { get; set; }
- public int? IssueLimit { get; set; }
- public string? PullDataPath { get; set; }
- public string? PullModelPath { get; set; }
- public int? PullLimit { get; set; }
- public float? Threshold { get; set; }
- public Predicate LabelPredicate { get; set; }
- public string[]? ExcludedAuthors { get; set; }
-
- static void ShowUsage(string? message = null)
- {
- // The entire condition is used to determine if the configuration is invalid.
- // If any of the following are true, the configuration is considered invalid:
- // • The LabelPredicate is null.
- // • Both IssueDataPath and PullDataPath are null, and either Org, Repos, or GithubToken is null.
- // • Both IssueModelPath and PullModelPath are null.
-
- string executableName = Process.GetCurrentProcess().ProcessName;
-
- Console.WriteLine($$"""
- ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}}
-
- Usage:
- {{executableName}} --repo {org/repo1}[,{org/repo2},...] --label-prefix {label-prefix} [options]
-
- Required arguments:
- --repo The GitHub repositories in format org/repo (comma separated for multiple).
- --label-prefix Prefix for label predictions. Must end with a character other than a letter or number.
-
- Required for testing the issue model:
- --issue-data Path to existing issue data file (TSV file).
- --issue-model Path to existing issue prediction model file (ZIP file).
-
- Required for testing the pull request model:
- --pull-data Path to existing pull request data file (TSV file).
- --pull-model Path to existing pull request prediction model file (ZIP file).
-
- Optional arguments:
- --threshold Minimum prediction confidence threshold. Range (0,1]. Default 0.4.
- --issue-limit Maximum number of issues to download. Default: No limit.
- --pull-limit Maximum number of pull requests to download. Default: No limit.
- --excluded-authors Comma-separated list of authors to exclude.
- --token GitHub access token. Default: read from GITHUB_TOKEN env var.
- """);
-
-
- Environment.Exit(1);
- }
-
- public static Args? Parse(string[] args)
- {
- Args argsData = new()
- {
- Threshold = 0.4f
- };
-
- Queue arguments = new(args);
- while (arguments.Count > 0)
- {
- string argument = arguments.Dequeue();
-
- switch (argument)
- {
- case "--token":
- if (!ArgUtils.TryDequeueString(arguments, ShowUsage, "--token", out string? token))
- {
- return null;
- }
- argsData.GithubToken = token;
- break;
-
- case "--repo":
- if (!ArgUtils.TryDequeueRepoList(arguments, ShowUsage, "--repo", out string? org, out List? repos))
- {
- return null;
- }
- argsData.Org = org;
- argsData.Repos = repos;
- break;
-
- case "--issue-data":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-data", out string? issueDataPath))
- {
- return null;
- }
- argsData.IssueDataPath = issueDataPath;
- break;
-
- case "--issue-model":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-model", out string? issueModelPath))
- {
- return null;
- }
- argsData.IssueModelPath = issueModelPath;
- break;
-
- case "--issue-limit":
- if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--issue-limit", out int? issueLimit))
- {
- return null;
- }
- argsData.IssueLimit = issueLimit;
- break;
-
- case "--pull-data":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-data", out string? pullDataPath))
- {
- return null;
- }
- argsData.PullDataPath = pullDataPath;
- break;
-
- case "--pull-model":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-model", out string? pullModelPath))
- {
- return null;
- }
- argsData.PullModelPath = pullModelPath;
- break;
-
- case "--pull-limit":
- if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--pull-limit", out int? pullLimit))
- {
- return null;
- }
- argsData.PullLimit = pullLimit;
- break;
-
- case "--label-prefix":
- if (!ArgUtils.TryDequeueLabelPrefix(arguments, ShowUsage, "--label-prefix", out Func? labelPredicate))
- {
- return null;
- }
- argsData.LabelPredicate = new(labelPredicate);
- break;
-
- case "--threshold":
- if (!ArgUtils.TryDequeueFloat(arguments, ShowUsage, "--threshold", out float? threshold))
- {
- return null;
- }
- argsData.Threshold = threshold.Value;
- break;
-
- case "--excluded-authors":
- if (!ArgUtils.TryDequeueStringArray(arguments, ShowUsage, "--excluded-authors", out string[]? excludedAuthors))
- {
- return null;
- }
- argsData.ExcludedAuthors = excludedAuthors;
- break;
-
- default:
- ShowUsage($"Unrecognized argument: {argument}");
- return null;
- }
- }
-
- if (argsData.LabelPredicate is null ||
- (
- argsData.IssueDataPath is null && argsData.PullDataPath is null &&
- (argsData.Org is null || argsData.Repos.Count == 0 || argsData.GithubToken is null)
- ) ||
- (argsData.IssueModelPath is null && argsData.PullModelPath is null)
- )
- {
- ShowUsage();
- return null;
- }
-
- return argsData;
- }
-}
diff --git a/src/Tester/Tester.cs b/src/Tester/Tester.cs
deleted file mode 100644
index 71717b1..0000000
--- a/src/Tester/Tester.cs
+++ /dev/null
@@ -1,223 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using Microsoft.ML;
-using Microsoft.ML.Data;
-using GitHubClient;
-
-var config = Args.Parse(args);
-if (config is not Args argsData) return;
-
-List tasks = [];
-
-if (argsData.IssueModelPath is not null)
-{
- tasks.Add(Task.Run(() => TestIssues()));
-}
-
-if (argsData.PullModelPath is not null)
-{
- tasks.Add(Task.Run(() => TestPullRequests()));
-}
-
-await Task.WhenAll(tasks);
-
-async IAsyncEnumerable ReadData(string dataPath, Func readLine, int? rowLimit)
-{
- var allLines = File.ReadLinesAsync(dataPath);
- ulong rowNum = 0;
- rowLimit ??= 50000;
-
- await foreach (var line in allLines)
- {
- // Skip the header row
- if (rowNum == 0)
- {
- rowNum++;
- continue;
- }
-
- string[] columns = line.Split('\t');
- yield return readLine(rowNum, columns);
-
- if ((int)rowNum++ >= rowLimit)
- {
- break;
- }
- }
-}
-
-async IAsyncEnumerable DownloadIssues(string githubToken, string org, string repo)
-{
- await foreach (var result in GitHubApi.DownloadIssues(githubToken, org, repo, argsData.LabelPredicate, argsData.IssueLimit, 100, 1000, [30, 30, 30], argsData.ExcludedAuthors ?? []))
- {
- yield return new(result.Issue, argsData.LabelPredicate);
- }
-}
-
-async Task TestIssues()
-{
- if (argsData.IssueDataPath is not null)
- {
- var issueList = ReadData(argsData.IssueDataPath, (num, columns) => new Issue()
- {
- Number = num,
- Label = columns[0],
- Title = columns[1],
- Body = columns[2]
- }, argsData.IssueLimit);
-
- await TestPredictions(issueList, argsData.IssueModelPath);
- return;
- }
-
- if (argsData.GithubToken is not null && argsData.Org is not null && argsData.Repos is not null)
- {
- foreach (var repo in argsData.Repos)
- {
- Console.WriteLine($"Downloading and testing issues from {argsData.Org}/{repo}.");
-
- var issueList = DownloadIssues(argsData.GithubToken, argsData.Org, repo);
- await TestPredictions(issueList, argsData.IssueModelPath);
- }
- }
-}
-
-async IAsyncEnumerable DownloadPullRequests(string githubToken, string org, string repo)
-{
- await foreach (var result in GitHubApi.DownloadPullRequests(githubToken, org, repo, argsData.LabelPredicate, argsData.PullLimit, 25, 4000, [30, 30, 30], argsData.ExcludedAuthors ?? []))
- {
- yield return new(result.PullRequest, argsData.LabelPredicate);
- }
-}
-
-async Task TestPullRequests()
-{
- if (argsData.PullDataPath is not null)
- {
- var pullList = ReadData(argsData.PullDataPath, (num, columns) => new PullRequest()
- {
- Number = num,
- Label = columns[0],
- Title = columns[1],
- Body = columns[2],
- FileNames = columns[3],
- FolderNames = columns[4]
- }, argsData.PullLimit);
-
- await TestPredictions(pullList, argsData.PullModelPath);
- return;
- }
-
- if (argsData.GithubToken is not null && argsData.Org is not null && argsData.Repos is not null)
- {
- foreach (var repo in argsData.Repos)
- {
- Console.WriteLine($"Downloading and testing pull requests from {argsData.Org}/{repo}.");
-
- var pullList = DownloadPullRequests(argsData.GithubToken, argsData.Org, repo);
- await TestPredictions(pullList, argsData.PullModelPath);
- }
- }
-}
-
-static string GetStats(List values)
-{
- if (values.Count == 0)
- {
- return "N/A";
- }
-
- float min = values.Min();
- float average = values.Average();
- float max = values.Max();
- double deviation = Math.Sqrt(values.Average(v => Math.Pow(v - average, 2)));
-
- return $"{min} | {average} | {max} | {deviation}";
-}
-
-async Task TestPredictions(IAsyncEnumerable results, string modelPath) where T : Issue
-{
- var context = new MLContext();
- var model = context.Model.Load(modelPath, out _);
- var predictor = context.Model.CreatePredictionEngine(model);
- var itemType = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue";
-
- int matches = 0;
- int mismatches = 0;
- int noPrediction = 0;
- int noExisting = 0;
-
- List matchScores = [];
- List mismatchScores = [];
-
- await foreach (var result in results)
- {
- (string? predictedLabel, float? score) = GetPrediction(
- predictor,
- result,
- argsData.Threshold);
-
- if (predictedLabel is null && result.Label is not null)
- {
- noPrediction++;
- }
- else if (predictedLabel is not null && result.Label is null)
- {
- noExisting++;
- }
- else if (predictedLabel?.ToLower() == result.Label?.ToLower())
- {
- matches++;
-
- if (score.HasValue)
- {
- matchScores.Add(score.Value);
- }
- }
- else
- {
- mismatches++;
-
- if (score.HasValue)
- {
- mismatchScores.Add(score.Value);
- }
- }
-
- float total = matches + mismatches + noPrediction + noExisting;
- Console.WriteLine($"{itemType} #{result.Number} - Predicted: {(predictedLabel ?? "")} - Existing: {(result.Label ?? "")}");
- Console.WriteLine($" Matches : {matches} ({(float)matches / total:P2}) - Min | Avg | Max | StdDev: {GetStats(matchScores)}");
- Console.WriteLine($" Mismatches : {mismatches} ({(float)mismatches / total:P2}) - Min | Avg | Max | StdDev: {GetStats(mismatchScores)}");
- Console.WriteLine($" No Prediction: {noPrediction} ({(float)noPrediction / total:P2})");
- Console.WriteLine($" No Existing : {noExisting} ({(float)noExisting / total:P2})");
- }
-
- Console.WriteLine("Test Complete");
-}
-
-(string? PredictedLabel, float? PredictionScore) GetPrediction(PredictionEngine predictor, T issueOrPull, float? threshold) where T : Issue
-{
- var prediction = predictor.Predict(issueOrPull);
- var itemType = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue";
-
- if (prediction.Score is null || prediction.Score.Length == 0)
- {
- Console.WriteLine($"No prediction was made for {itemType} #{issueOrPull.Number}.");
- return (null, null);
- }
-
- VBuffer> labels = default;
- predictor.OutputSchema[nameof(LabelPrediction.Score)].GetSlotNames(ref labels);
-
- var bestScore = prediction.Score
- .Select((score, index) => new
- {
- Score = score,
- Label = labels.GetItemOrDefault(index).ToString()
- })
- .OrderByDescending(p => p.Score)
- .FirstOrDefault(p => threshold is null || p.Score >= threshold);
-
- return bestScore is not null ? (bestScore.Label, bestScore.Score) : ((string?)null, (float?)null);
-}
diff --git a/src/Trainer/Args.cs b/src/Trainer/Args.cs
deleted file mode 100644
index 90d1d53..0000000
--- a/src/Trainer/Args.cs
+++ /dev/null
@@ -1,97 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using System.Diagnostics;
-
-public struct Args
-{
- public string? IssueDataPath { get; set; }
- public string? IssueModelPath { get; set; }
- public string? PullDataPath { get; set; }
- public string? PullModelPath { get; set; }
-
- static void ShowUsage(string? message = null)
- {
- // If you provide a path for issue data, you must also provide a path for the issue model, and vice versa.
- // If you provide a path for pull data, you must also provide a path for the pull model, and vice versa.
- // At least one pair of paths(either issue or pull) must be provided.
- string executableName = Process.GetCurrentProcess().ProcessName;
-
- Console.WriteLine($$"""
- ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}}
-
- Usage:
- {{executableName}} [options]
-
- Required for training the issue model:
- --issue-data Path to existing issue data file (TSV file).
- --issue-model Path for issue prediction model file to create (ZIP file).
-
- Required for training the pull request model:
- --pull-data Path to existing pull request data file (TSV file).
- --pull-model Path for pull request prediction model file to create (ZIP file).
- """);
-
- Environment.Exit(1);
- }
-
- public static Args? Parse(string[] args)
- {
- Args argsData = new();
-
- Queue arguments = new(args);
- while (arguments.Count > 0)
- {
- string argument = arguments.Dequeue();
-
- switch (argument)
- {
- case "--issue-data":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-data", out string? issueDataPath))
- {
- return null;
- }
- argsData.IssueDataPath = issueDataPath;
- break;
-
- case "--issue-model":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-model", out string? issueModelPath))
- {
- return null;
- }
- argsData.IssueModelPath = issueModelPath;
- break;
-
- case "--pull-data":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-data", out string? pullDataPath))
- {
- return null;
- }
- argsData.PullDataPath = pullDataPath;
- break;
-
- case "--pull-model":
- if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-model", out string? pullModelPath))
- {
- return null;
- }
- argsData.PullModelPath = pullModelPath;
- break;
-
- default:
- ShowUsage($"Unrecognized argument: {argument}");
- return null;
- }
- }
-
- if ((argsData.IssueDataPath is null != argsData.IssueModelPath is null) ||
- (argsData.PullDataPath is null != argsData.PullModelPath is null) ||
- (argsData.IssueModelPath is null && argsData.PullModelPath is null))
- {
- ShowUsage();
- return null;
- }
-
- return argsData;
- }
-}
diff --git a/src/Trainer/Trainer.cs b/src/Trainer/Trainer.cs
deleted file mode 100644
index f6b780f..0000000
--- a/src/Trainer/Trainer.cs
+++ /dev/null
@@ -1,97 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-using static DataFileUtils;
-using Microsoft.ML;
-using Microsoft.ML.Data;
-using Microsoft.ML.Transforms.Text;
-
-var config = Args.Parse(args);
-if (config is not Args argsData)
-{
- return;
-}
-
-if (argsData.IssueDataPath is not null && argsData.IssueModelPath is not null)
-{
- CreateModel(argsData.IssueDataPath, argsData.IssueModelPath, ModelType.Issue);
-}
-
-if (argsData.PullDataPath is not null && argsData.PullModelPath is not null)
-{
- CreateModel(argsData.PullDataPath, argsData.PullModelPath, ModelType.PullRequest);
-}
-
-static void CreateModel(string dataPath, string modelPath, ModelType type)
-{
- Console.WriteLine("Loading data into train/test sets...");
- MLContext mlContext = new();
-
- TextLoader.Column[] columns = type == ModelType.Issue ? [
- new("Label", DataKind.String, 0),
- new("Title", DataKind.String, 1),
- new("Body", DataKind.String, 2),
- ] : [
- new("Label", DataKind.String, 0),
- new("Title", DataKind.String, 1),
- new("Body", DataKind.String, 2),
- new("FileNames", DataKind.String, 3),
- new("FolderNames", DataKind.String, 4)
- ];
-
- TextLoader.Options textLoaderOptions = new()
- {
- AllowQuoting = false,
- AllowSparse = false,
- EscapeChar = '"',
- HasHeader = true,
- ReadMultilines = false,
- Separators = ['\t'],
- TrimWhitespace = true,
- UseThreads = true,
- Columns = columns
- };
-
- var loader = mlContext.Data.CreateTextLoader(textLoaderOptions);
- var data = loader.Load(dataPath);
- var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.2);
-
- Console.WriteLine("Building pipeline...");
-
- var xf = mlContext.Transforms;
- var pipeline = xf.Conversion.MapValueToKey(inputColumnName: "Label", outputColumnName: "LabelKey")
- .Append(xf.Text.FeaturizeText(
- "Features",
- new TextFeaturizingEstimator.Options(),
- columns.Select(c => c.Name).ToArray()))
- .AppendCacheCheckpoint(mlContext)
- .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("LabelKey"))
- .Append(xf.Conversion.MapKeyToValue("PredictedLabel"));
-
- Console.WriteLine("Fitting the model with the training data set...");
- var trainedModel = pipeline.Fit(split.TrainSet);
- var testModel = trainedModel.Transform(split.TestSet);
-
- Console.WriteLine("Evaluating against the test set...");
- var metrics = mlContext.MulticlassClassification.Evaluate(testModel, labelColumnName: "LabelKey");
-
- Console.WriteLine($"************************************************************");
- Console.WriteLine($"MacroAccuracy = {metrics.MacroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better");
- Console.WriteLine($"MicroAccuracy = {metrics.MicroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better");
- Console.WriteLine($"LogLoss = {metrics.LogLoss:0.####}, the closer to 0, the better");
-
- if (metrics.PerClassLogLoss.Count() > 0)
- Console.WriteLine($"LogLoss for class 1 = {metrics.PerClassLogLoss[0]:0.####}, the closer to 0, the better");
-
- if (metrics.PerClassLogLoss.Count() > 1)
- Console.WriteLine($"LogLoss for class 2 = {metrics.PerClassLogLoss[1]:0.####}, the closer to 0, the better");
-
- if (metrics.PerClassLogLoss.Count() > 2)
- Console.WriteLine($"LogLoss for class 3 = {metrics.PerClassLogLoss[2]:0.####}, the closer to 0, the better");
-
- Console.WriteLine($"************************************************************");
-
- Console.WriteLine($"Saving model to '{modelPath}'...");
- EnsureOutputDirectory(modelPath);
- mlContext.Model.Save(trainedModel, split.TrainSet.Schema, modelPath);
-}
diff --git a/src/Trainer/Trainer.csproj b/src/Trainer/Trainer.csproj
deleted file mode 100644
index 5739801..0000000
--- a/src/Trainer/Trainer.csproj
+++ /dev/null
@@ -1,17 +0,0 @@
-
-
-
- Exe
- enable
- enable
-
-
-
-
-
-
-
-
-
-
-
diff --git a/test/action.yml b/test/action.yml
new file mode 100644
index 0000000..ca32c69
--- /dev/null
+++ b/test/action.yml
@@ -0,0 +1,83 @@
+name: "Test Model"
+description: "Test predictions against the Issues and/or Pull Requests model by downloading data and comparing predictions against existing labels."
+
+branding:
+ color: "purple"
+ icon: "tag"
+
+inputs:
+ type:
+ description: "The model to test. Must be either 'issues' or 'pulls'."
+ required: true
+ label_prefix:
+ description: "The label prefix to use for model training. Must end with a non-alphanumeric character."
+ required: true
+ threshold:
+ description: "The minimum confidence score for a label prediction. Must be a number between 0.00 and 1.00. The recommended value is 0.40."
+ required: true
+ excluded_authors:
+ description: "A comma-separated list of authors to exclude."
+ limit:
+ description: "The maximum number of items to download. The newest items are downloaded."
+ page_size:
+ description: "The number of items per page in GitHub API requests. Defaults to 100 for Issues and 25 for Pull Requests."
+ page_limit:
+ description: "The maximum number of pages to retrieve. Defaults to 1000 for Issues and 4000 for Pull Requests."
+ retries:
+ description: "A comma-separated list of retry delays in seconds. Defaults to '30,30,300,300,3000,3000'."
+ cache_key:
+ description: "The cache key suffix to use for saving data. Defaults to 'staged'."
+ default: staged
+ repository:
+ description: "The org/repo to download data from. Defaults to the current repository."
+ verbose:
+ description: "Enable verbose output."
+
+runs:
+ using: "composite"
+ steps:
+ - name: "Validate Inputs"
+ shell: bash
+ run: |
+ if [[ "${{ inputs.type }}" != "issues" && "${{ inputs.type }}" != "pulls" ]]; then
+ echo "::error::'type' must be either 'issues' or 'pulls'. Value provided: '${{ inputs.type }}'."
+ echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY
+ echo "\`type\` must be either 'issues' or 'pulls'." >> $GITHUB_STEP_SUMMARY
+ exit 1
+ fi
+
+ - name: "Clone the ${{ github.action_repository }} repository with ref '{{ github.action_ref }}'"
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ env:
+ ISSUE_LABELER_REPO: ${{ github.action_repository }}
+ ISSUE_LABELER_REF: ${{ github.action_ref }}
+ with:
+ repository: ${{ env.ISSUE_LABELER_REPO }}
+ ref: ${{ env.ISSUE_LABELER_REF }}
+
+ - name: "Restore model from cache"
+ uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: "labeler-cache/${{ inputs.type }}-model.zip"
+ key: "issue-labeler/model/${{ inputs.type }}/${{ inputs.cache_key || 'staged' }}"
+ fail-on-cache-miss: true
+
+ - name: "Set up the .NET SDK"
+ uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1
+ with:
+ dotnet-version: "9.0.x"
+
+ - name: "Run Tester"
+ shell: bash
+ run: |
+ dotnet run -c Release --project IssueLabeler/src/Tester -- \
+ ${{ format('--repo "{0}"', inputs.repository || github.repository) }} \
+ ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \
+ ${{ format('--threshold {0}', inputs.threshold) }} \
+ ${{ format('--{0}-model "labeler-cache/{0}-model.zip"', inputs.type) || '' }} \
+ ${{ inputs.excluded_authors && format('--excluded-authors "{0}"', inputs.excluded_authors) || '' }} \
+ ${{ inputs.limit && format('--{0}-limit {1}', inputs.type, inputs.limit) || '' }} \
+ ${{ inputs.page_size && format('--page-size {0}', inputs.page_size) || '' }} \
+ ${{ inputs.page_limit && format('--page-limit {0}', inputs.page_limit) || '' }} \
+ ${{ inputs.retries && format('--retries {0}', inputs.retries) || '' }} \
+ ${{ inputs.verbose && '--verbose' || '' }}
diff --git a/tests/Shared.Tests/ArgUtilsTests.cs b/tests/Shared.Tests/ArgUtilsTests.cs
deleted file mode 100644
index 91a3299..0000000
--- a/tests/Shared.Tests/ArgUtilsTests.cs
+++ /dev/null
@@ -1,239 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text.RegularExpressions;
-using NSubstitute;
-
-namespace Shared.Tests;
-
-[TestClass]
-public class ArgUtilsTests
-{
- [TestMethod]
- public void TryDequeueString_ShouldReturnTrue_WhenValueIsPresent()
- {
- var args = new Queue(["value"]);
- var showUsage = Substitute.For>();
- string? argValue;
-
- var result = ArgUtils.TryDequeueString(args, showUsage, "test-arg-name", out argValue);
-
- Assert.IsTrue(result);
- Assert.AreEqual("value", argValue);
- showUsage.DidNotReceive().Invoke(Arg.Any());
- }
-
- [TestMethod]
- public void TryDequeueString_ShouldReturnFalse_WhenValueIsNull()
- {
- var args = new Queue([""]);
- var showUsage = Substitute.For>();
- string? argValue;
-
- var result = ArgUtils.TryDequeueString(args, showUsage, "test-arg-name", out argValue);
-
- Assert.IsFalse(result);
- Assert.IsNull(argValue);
- showUsage.Received(1).Invoke("Argument 'test-arg-name' has an empty value.");
- }
-
- [TestMethod]
- public void TryDequeueRepo_ShouldReturnTrue_WhenValueIsValid()
- {
- var args = new Queue(["org/repo"]);
- var showUsage = Substitute.For>();
- string? org;
- string? repo;
-
- var result = ArgUtils.TryDequeueRepo(args, showUsage, "test-arg-name", out org, out repo);
-
- Assert.IsTrue(result);
- Assert.AreEqual("org", org);
- Assert.AreEqual("repo", repo);
- showUsage.DidNotReceive().Invoke(Arg.Any());
- }
-
- [TestMethod]
- public void TryDequeueRepo_ShouldReturnFalse_WhenValueIsInvalid()
- {
- var args = new Queue(["invalid"]);
- var showUsage = Substitute.For>();
- string? org;
- string? repo;
-
- var result = ArgUtils.TryDequeueRepo(args, showUsage, "test-arg-name", out org, out repo);
-
- Assert.IsFalse(result);
- Assert.IsNull(org);
- Assert.IsNull(repo);
- showUsage.Received(1).Invoke("Argument 'test-arg-name' has an empty value or is not in the format of '{org}/{repo}'.");
- }
-
- [TestMethod]
- public void TryDequeueRepoList_ShouldReturnTrue_WhenValuesAreValid()
- {
- var args = new Queue(["org/repo1,org/repo2"]);
- var showUsage = Substitute.For>();
- string? org;
- List? repos;
-
- var result = ArgUtils.TryDequeueRepoList(args, showUsage, "test-arg-name", out org, out repos);
-
- Assert.IsTrue(result);
- Assert.AreEqual("org", org);
- CollectionAssert.AreEqual(new List { "repo1", "repo2" }, repos);
- showUsage.DidNotReceive().Invoke(Arg.Any());
- }
-
- [TestMethod]
- public void TryDequeueRepoList_ShouldReturnFalse_WhenValuesAreInvalid()
- {
- var args = new Queue(["invalid"]);
- var showUsage = Substitute.For>();
- string? org;
- List? repos;
-
- var result = ArgUtils.TryDequeueRepoList(args, showUsage, "test-arg-name", out org, out repos);
-
- Assert.IsFalse(result);
- Assert.IsNull(org);
- Assert.IsNull(repos);
- showUsage.Received(1).Invoke("Argument '--repo' is not in the format of '{org}/{repo}': invalid");
- }
-
- [TestMethod]
- public void TryDequeueLabelPrefix_ShouldReturnTrue_WhenValueIsValid()
- {
- var args = new Queue(["area-"]);
- var showUsage = Substitute.For>();
- Func? labelPredicate;
-
- var result = ArgUtils.TryDequeueLabelPrefix(args, showUsage, "test-arg-name", out labelPredicate);
-
- Assert.IsTrue(result);
- Assert.IsNotNull(labelPredicate);
- Assert.IsTrue(labelPredicate("area-label"));
- showUsage.DidNotReceive().Invoke(Arg.Any());
- }
-
- [TestMethod]
- public void TryDequeueLabelPrefix_ShouldReturnFalse_WhenValueIsInvalid()
- {
- var args = new Queue(["area"]);
- var showUsage = Substitute.For>();
- Func? labelPredicate;
-
- var result = ArgUtils.TryDequeueLabelPrefix(args, showUsage, "test-arg-name", out labelPredicate);
-
- Assert.IsFalse(result);
- Assert.IsNull(labelPredicate);
- showUsage.Received(1).Invoke(Arg.Is(s => s.Contains("Argument 'test-arg-name' must end in something other than a letter or number.")));
- }
-
- [TestMethod]
- public void TryDequeuePath_ShouldReturnTrue_WhenValueIsValid()
- {
- var args = new Queue(new[] { "/mnt/c/path/to/file" });
- var showUsage = Substitute.For>();
- string? path;
-
- var result = ArgUtils.TryDequeuePath(args, showUsage, "test-arg-name", out path);
-
- Assert.IsTrue(result);
- Assert.AreEqual("/mnt/c/path/to/file", path);
- showUsage.DidNotReceive().Invoke(Arg.Any());
- }
-
- [TestMethod]
- public void TryDequeuePath_ShouldReturnFalse_WhenValueIsInvalid()
- {
- var args = new Queue([""]);
- var showUsage = Substitute.For>();
- string? path;
-
- var result = ArgUtils.TryDequeuePath(args, showUsage, "test-arg-name", out path);
-
- Assert.IsFalse(result);
- Assert.IsNull(path);
- showUsage.Received(1).Invoke("Argument 'test-arg-name' has an empty value.");
- }
-
- [TestMethod]
- public void TryDequeueStringArray_ValidInput_ReturnsTrue()
- {
- var args = new Queue(["value1,value2,value3"]);
- var showUsage = Substitute.For>();
- bool result = ArgUtils.TryDequeueStringArray(args, showUsage, "test-arg-name", out string[]? argValues);
-
- Assert.IsTrue(result);
- Assert.IsNotNull(argValues);
- Assert.AreEqual(3, argValues.Length);
- CollectionAssert.Contains(argValues, "value1");
- CollectionAssert.Contains(argValues, "value2");
- CollectionAssert.Contains(argValues, "value3");
- showUsage.DidNotReceive().Invoke(Arg.Any());
- }
-
- [TestMethod]
- public void TryDequeueInt_ValidInput_ReturnsTrue()
- {
- var args = new Queue(["123"]);
- var showUsage = Substitute.For>();
- bool result = ArgUtils.TryDequeueInt(args, showUsage, "test-arg-name", out int? argValue);
-
- Assert.IsTrue(result);
- Assert.IsNotNull(argValue);
- Assert.AreEqual(123, argValue);
- showUsage.DidNotReceive().Invoke(Arg.Any());
- }
-
- [TestMethod]
- public void TryDequeueIntArray_ValidInput_ReturnsTrue()
- {
- var args = new Queue(["1,2,3"]);
- var showUsage = Substitute.For>();
- bool result = ArgUtils.TryDequeueIntArray(args, showUsage, "test-arg-name", out int[]? argValues);
-
- Assert.IsTrue(result);
- Assert.IsNotNull(argValues);
- Assert.AreEqual(3, argValues.Length);
- CollectionAssert.Contains(argValues, 1);
- CollectionAssert.Contains(argValues, 2);
- CollectionAssert.Contains(argValues, 3);
- showUsage.DidNotReceive().Invoke(Arg.Any());
- }
-
- [TestMethod]
- public void TryDequeueFloat_ValidInput_ReturnsTrue()
- {
- var args = new Queue(["123.45"]);
- var showUsage = Substitute.For>();
- bool result = ArgUtils.TryDequeueFloat(args, showUsage, "test-arg-name", out float? argValue);
-
- Assert.IsTrue(result);
- Assert.IsNotNull(argValue);
- Assert.AreEqual(123.45f, argValue);
- showUsage.DidNotReceive().Invoke(Arg.Any());
- }
-
- [TestMethod]
- public void TryDequeueNumberRanges_ValidInput_ReturnsTrue()
- {
- var args = new Queue(["1-3,5,7-9"]);
- var showUsage = Substitute.For>();
- bool result = ArgUtils.TryDequeueNumberRanges(args, showUsage, "test-arg-name", out List? argValues);
-
- Assert.IsTrue(result);
- Assert.IsNotNull(argValues);
- Assert.AreEqual(7, argValues.Count);
- CollectionAssert.Contains(argValues, (ulong)1);
- CollectionAssert.Contains(argValues, (ulong)2);
- CollectionAssert.Contains(argValues, (ulong)3);
- CollectionAssert.Contains(argValues, (ulong)5);
- CollectionAssert.Contains(argValues, (ulong)7);
- CollectionAssert.Contains(argValues, (ulong)8);
- CollectionAssert.Contains(argValues, (ulong)9);
- showUsage.DidNotReceive().Invoke(Arg.Any());
- }
-}
diff --git a/tests/Shared.Tests/DataFileUtilsTests.cs b/tests/Shared.Tests/DataFileUtilsTests.cs
deleted file mode 100644
index e07a777..0000000
--- a/tests/Shared.Tests/DataFileUtilsTests.cs
+++ /dev/null
@@ -1,73 +0,0 @@
-using System;
-using System.IO;
-using Microsoft.VisualStudio.TestTools.UnitTesting;
-using NSubstitute;
-
-namespace Shared.Tests;
-
-[TestClass]
-public class DataFileUtilsTests
-{
- [TestMethod]
- public void EnsureOutputDirectory_ShouldCreateDirectory_WhenDirectoryDoesNotExist()
- {
- var tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());
- var outputFile = Path.Combine(tempPath, "file.txt");
-
- DataFileUtils.EnsureOutputDirectory(outputFile);
-
- Assert.IsTrue(Directory.Exists(tempPath));
-
- Directory.Delete(tempPath, true);
- }
-
- [TestMethod]
- public void SanitizeText_ShouldReplaceSpecialCharacters()
- {
- var input = "text\rwith\nspecial\tcharacters\"";
- var expected = "text with special characters`";
-
- var result = DataFileUtils.SanitizeText(input);
-
- Assert.AreEqual(expected, result);
- }
-
- [TestMethod]
- public void SanitizeTextArray_ShouldJoinSanitizedTexts()
- {
- var input = new[] { "text1", "text2\r\n", "text3\t" };
- var expected = "text1 text2 text3";
-
- var result = DataFileUtils.SanitizeTextArray(input);
-
- Assert.AreEqual(expected, result);
- }
-
- [TestMethod]
- public void FormatIssueRecord_ShouldFormatCorrectly()
- {
- var label = "bug";
- var title = "Issue title";
- var body = "Issue body";
- var expected = "bug\tIssue title\tIssue body";
-
- var result = DataFileUtils.FormatIssueRecord(label, title, body);
-
- Assert.AreEqual(expected, result);
- }
-
- [TestMethod]
- public void FormatPullRequestRecord_ShouldFormatCorrectly()
- {
- var label = "enhancement";
- var title = "PR title";
- var body = "PR body";
- var fileNames = new[] { "file1.cs", "file2.cs" };
- var folderNames = new[] { "folder1", "folder2" };
- var expected = "enhancement\tPR title\tPR body\tfile1.cs file2.cs\tfolder1 folder2";
-
- var result = DataFileUtils.FormatPullRequestRecord(label, title, body, fileNames, folderNames);
-
- Assert.AreEqual(expected, result);
- }
-}
diff --git a/tests/Shared.Tests/Shared.Tests.csproj b/tests/Shared.Tests/Shared.Tests.csproj
deleted file mode 100644
index 448119d..0000000
--- a/tests/Shared.Tests/Shared.Tests.csproj
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-
- enable
- enable
-
-
-
-
-
-
-
-
-
-
-
diff --git a/train/action.yml b/train/action.yml
new file mode 100644
index 0000000..14f33c7
--- /dev/null
+++ b/train/action.yml
@@ -0,0 +1,105 @@
+name: "Train Model"
+description: "Train the Issues or Pull Requests model for label prediction."
+
+inputs:
+ type:
+ description: "The model to train. Must be either 'issues' or 'pulls'."
+ required: true
+ data_cache_key:
+ description: "The cache key suffix to use for the downloaded data. Defaults to 'staged'."
+ default: staged
+ model_cache_key:
+ description: "The cache key suffix to use for the trained model. Defaults to 'staged'."
+ default: staged
+
+branding:
+ color: "purple"
+ icon: "tag"
+
+runs:
+ using: "composite"
+ steps:
+ - name: "Validate Inputs"
+ shell: bash
+ run: |
+ if [[ "${{ inputs.type }}" != "issues" && "${{ inputs.type }}" != "pulls" ]]; then
+ echo "::error::'type' must be either 'issues' or 'pulls'. Value provided: '${{ inputs.type }}'."
+ echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY
+ echo "\`type\` must be either 'issues' or 'pulls'." >> $GITHUB_STEP_SUMMARY
+ exit 1
+ fi
+
+ - name: "Set Cache Variables"
+ shell: bash
+ run: |
+ echo "DATA_PATH=labeler-cache/${{ inputs.type }}-data.tsv" >> $GITHUB_ENV
+ echo "DATA_CACHE_KEY=${{ format('issue-labeler/data/{0}/{1}', inputs.type, inputs.data_cache_key) }}" >> $GITHUB_ENV
+ echo "MODEL_PATH=labeler-cache/${{ inputs.type }}-model.zip" >> $GITHUB_ENV
+ echo "MODEL_CACHE_KEY=${{ format('issue-labeler/model/{0}/{1}', inputs.type, inputs.model_cache_key) }}" >> $GITHUB_ENV
+
+ - name: "Check for an existing model"
+ id: check-cache
+ uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: ${{ env.MODEL_PATH }}
+ key: ${{ env.MODEL_CACHE_KEY }}
+ lookup-only: true
+
+ - name: "Abort if there is an existing model with the specified cache key"
+ shell: bash
+ run: |
+ if [[ "${{ steps.check-cache.outputs.cache-hit }}" == "true" ]]; then
+ echo "::error::Cache key '${{ env.MODEL_CACHE_KEY }}' already exists. Cannot proceed with training."
+ echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY
+ echo "Cache key '${{ env.MODEL_CACHE_KEY }}' already exists. Cannot proceed with training." >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "> [!TIP]" >> $GITHUB_STEP_SUMMARY
+ echo "Use a different \`model_cache_key\` value or delete the existing cache entry from the [Action Caches](/${{ github.repository }}/actions/caches) page and run the workflow again." >> $GITHUB_STEP_SUMMARY
+ exit 1
+ fi
+
+ - name: "Clone the ${{ github.action_repository }} repository with ref '{{ github.action_ref }}'"
+ uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+ env:
+ ISSUE_LABELER_REPO: ${{ github.action_repository }}
+ ISSUE_LABELER_REF: ${{ github.action_ref }}
+ with:
+ repository: ${{ env.ISSUE_LABELER_REPO }}
+ ref: ${{ env.ISSUE_LABELER_REF }}
+
+ - name: "Restore Data from Cache"
+ uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: ${{ env.DATA_PATH }}
+ key: ${{ env.DATA_CACHE_KEY }}
+ fail-on-cache-miss: true
+
+ - name: "Set up the .NET SDK"
+ uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1
+ with:
+ dotnet-version: 9.0.x
+
+ - name: "Run Trainer"
+ shell: bash
+ run: |
+ dotnet run -c Release --project IssueLabeler/src/Trainer -- \
+ ${{ format('--{0}-data "{1}"', inputs.type, env.DATA_PATH) }} \
+ ${{ format('--{0}-model "{1}"', inputs.type, env.MODEL_PATH) }}
+
+ - name: "Save Model to Cache"
+ uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+ with:
+ path: ${{ env.MODEL_PATH }}
+ key: ${{ env.MODEL_CACHE_KEY }}
+
+ - name: "Write Final Summary"
+ shell: bash
+ run: |
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "## ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} Model Available as '${{ inputs.model_cache_key }}'." >> $GITHUB_STEP_SUMMARY
+
+ if [[ "${{ inputs.model_cache_key }}" == "ACTIVE" ]]; then
+ echo "Label predictions will now use this model." >> $GITHUB_STEP_SUMMARY
+ else
+ echo "The '${{ inputs.model_cache_key }}' model is saved to cache and available to test or promote." >> $GITHUB_STEP_SUMMARY
+ fi