diff --git a/.github/workflows/build-predictor.yml b/.github/workflows/build-predictor.yml deleted file mode 100644 index 2073843..0000000 --- a/.github/workflows/build-predictor.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: "Build Predictor App" - -on: - workflow_call: - inputs: - rebuild: - description: "Force a rebuild of the app" - type: boolean - -env: - BUILD_CACHE_KEY: "issue-labeler/predictor-app" - GH_TOKEN: ${{ github.token }} - -jobs: - check-cache: - runs-on: ubuntu-24.04 - permissions: - actions: write - steps: - - name: "Check the cache for an existing build of the Predictor" - id: restore-predictor-app - uses: actions/cache/restore@v4 - with: - path: labeler-build/Predictor - key: ${{ env.BUILD_CACHE_KEY }} - lookup-only: true - fail-on-cache-miss: false - - - name: "Show instructions for rebuilding" - if: ${{ steps.restore-predictor-app.outputs.cache-hit == 'true' && !inputs.rebuild }} - run: echo "To rebuild the predictor app, delete the '${{ env.BUILD_CACHE_KEY }}' action cache entry or rerun the 'build-predictor' workflow with 'rebuild' set to true." - - - name: "Delete existing cache entry" - if: ${{ steps.restore-predictor-app.outputs.cache-hit == 'true' && inputs.rebuild }} - run: | - gh api --method DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ github.repository }}/actions/caches?key=${{ env.BUILD_CACHE_KEY }} - - outputs: - needs-build: ${{ steps.restore-predictor-app.outputs.cache-hit != 'true' || inputs.rebuild }} - - build-predictor: - runs-on: ubuntu-24.04 - needs: check-cache - if: ${{ needs.check-cache.outputs.needs-build == 'true' }} - steps: - - name: "Check out the 'dotnet/issue-labeler' repo" - uses: actions/checkout@v4 - with: - repository: dotnet/issue-labeler - ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1 - - - uses: actions/setup-dotnet@v4 - with: - dotnet-version: "9.0.x" - - - name: "Build Predictor" - run: dotnet publish --self-contained -r linux-x64 -c Release -o ./labeler-build/Predictor ./src/Predictor - - - name: "Save Predictor app to cache" - uses: actions/cache/save@v4 - with: - path: labeler-build/Predictor - key: ${{ env.BUILD_CACHE_KEY }} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 72d04bc..0000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Build and Test - -on: - push: - branches: - - main - pull_request: - branches: - - main - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - - name: Setup .NET - uses: actions/setup-dotnet@v2 - with: - dotnet-version: '9.x' - - - name: Build solution - run: dotnet build --configuration Release - - - name: Run tests - run: dotnet test --configuration Release --no-build --verbosity minimal diff --git a/.github/workflows/cache-retention.yml b/.github/workflows/cache-retention.yml deleted file mode 100644 index 1777ece..0000000 --- a/.github/workflows/cache-retention.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: "Cache Retention" - -on: - workflow_call: - inputs: - skip_issue_model: - description: "Skip cache retention of the issue model" - type: boolean - skip_pull_model: - description: "Skip cache retention of the pull model" - type: boolean - -jobs: - restore-predictor: - runs-on: ubuntu-24.04 - steps: - - name: "Check the cache for an existing build of the Predictor" - uses: actions/cache/restore@v4 - with: - path: labeler-build/Predictor - key: issue-labeler/predictor-app - fail-on-cache-miss: true - - restore-issue-model: - if: ${{ !inputs.skip_issue_model }} - runs-on: ubuntu-24.04 - steps: - - name: "Restore issue model from cache" - uses: actions/cache/restore@v4 - with: - path: labeler-cache/issue-model.zip - key: issue-labeler/issues/model/LIVE - fail-on-cache-miss: true - - restore-pull-model: - if: ${{ !inputs.skip_pull_model }} - runs-on: ubuntu-24.04 - steps: - - name: "Restore pull model from cache" - uses: actions/cache/restore@v4 - with: - path: labeler-cache/pull-model.zip - key: issue-labeler/pulls/model/LIVE - fail-on-cache-miss: true diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml new file mode 100644 index 0000000..bc7d5ec --- /dev/null +++ b/.github/workflows/ci-build.yml @@ -0,0 +1,46 @@ +# CI Build and Test of the IssueLabeler solution +name: "CI Build" + +on: + push: + branches: + - main + paths: + - ".github/workflows/ci-*.yml" + - "IssueLabeler/**" + + pull_request: + branches: + - main + + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: "Set up the .NET SDK" + uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1 + with: + dotnet-version: 9.0.x + + - name: "Build the IssueLabeler solution" + run: dotnet build IssueLabeler/ --configuration Release + + test: + needs: build + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: "Set up the .NET SDK" + uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1 + with: + dotnet-version: 9.0.x + + - name: "Run tests from the IssueLabeler solution" + run: dotnet test IssueLabeler/ diff --git a/.github/workflows/download-issues.yml b/.github/workflows/download-issues.yml deleted file mode 100644 index 2e4e7ad..0000000 --- a/.github/workflows/download-issues.yml +++ /dev/null @@ -1,122 +0,0 @@ -name: "Download Issues" - -on: - workflow_call: - inputs: - github_token: - description: "The GitHub token (defaults to action token)" - type: string - repository: - description: "The org/repo to download data from (defaults to current repository)" - type: string - - label_prefix: - description: "Label prefix" - type: string - required: true - - issue_limit: - description: "Max number of items to include in the model" - type: number - page_size: - description: "The number of items to include on each request (max 100)" - type: number - page_limit: - description: "Max pages of items to download" - type: number - retries: - description: "Comma-separated list of retry delays in seconds" - type: string - data_cache_key: - description: "The optional cache key suffix to use for saving the data" - type: string - backup_cache_key: - description: "The cache key suffix to use for backing up the last downloaded data" - type: string - default: "backup" - -permissions: - issues: read - actions: write - -env: - DATA_PATH: labeler-cache/issue-data.tsv - DATA_CACHE_KEY: issue-labeler/issues/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }} - BACKUP_CACHE_KEY: issue-labeler/issues/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }}/${{ inputs.backup_cache_key }} - GH_TOKEN: ${{ github.token }} - -jobs: - download-issues: - runs-on: ubuntu-24.04 - steps: - - name: "Restore existing data from cache" - id: check-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.DATA_PATH }} - key: ${{ env.DATA_CACHE_KEY }} - fail-on-cache-miss: false - - - name: "Check for existing backup cache entry" - id: check-backup - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - uses: actions/cache/restore@v4 - with: - path: ${{ env.DATA_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - lookup-only: true - fail-on-cache-miss: false - - - name: "Abort if backup cache entry already exists" - if: ${{ steps.check-backup.outputs.cache-hit == 'true' }} - run: | - echo "Cannot save backup of existing data. Backup cache key already exists." - echo "Key: ${{ env.BACKUP_CACHE_KEY }}" - - exit 1 - - - name: "Cache backup of existing data" - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - id: backup-data - uses: actions/cache/save@v4 - with: - path: ${{ env.DATA_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - - - name: "Delete existing cache entry" - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - run: | - gh api --method DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ github.repository }}/actions/caches?key=${{ env.DATA_CACHE_KEY }} - - rm ${{ env.DATA_PATH }} - - - name: "Check out the 'dotnet/issue-labeler' repo" - uses: actions/checkout@v4 - with: - repository: dotnet/issue-labeler - ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1 - - - uses: actions/setup-dotnet@v4 - with: - dotnet-version: "9.0.x" - - - name: "Run Downloader" - run: | - dotnet run -c Release --project ./src/Downloader -- \ - ${{ format('--token "{0}"', inputs.github_token || secrets.GITHUB_TOKEN) }} \ - ${{ format('--repo "{0}"', inputs.repository || github.repository) }} \ - ${{ format('--issue-data "{0}"', env.DATA_PATH) }} \ - ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \ - ${{ inputs.retries && format('--retries "{0}"', inputs.retries) }} \ - ${{ inputs.issue_limit && format('--issue-limit {0}', inputs.issue_limit) || '' }} \ - ${{ inputs.page_size && format('--page-size {0}', inputs.page_size) || '' }} \ - ${{ inputs.page_limit && format('--page-limit {0}', inputs.page_limit) || '' }} - - - name: "Save data to cache" - uses: actions/cache/save@v4 - with: - path: ${{ env.DATA_PATH }} - key: ${{ env.DATA_CACHE_KEY }} diff --git a/.github/workflows/download-pulls.yml b/.github/workflows/download-pulls.yml deleted file mode 100644 index ef8173a..0000000 --- a/.github/workflows/download-pulls.yml +++ /dev/null @@ -1,122 +0,0 @@ -name: "Download Pulls" - -on: - workflow_call: - inputs: - github_token: - description: "The GitHub token (defaults to action token)" - type: string - repository: - description: "The org/repo to download data from (defaults to current repository)" - type: string - - label_prefix: - description: "Label prefix" - type: string - required: true - - pull_limit: - description: "Max number of items to include in the model" - type: number - page_size: - description: "The number of items to include on each request (max 100)" - type: number - page_limit: - description: "Max pages of items to download" - type: number - retries: - description: "Comma-separated list of retry delays in seconds" - type: string - data_cache_key: - description: "The optional cache key suffix to use for saving the data" - type: string - backup_cache_key: - description: "The cache key suffix to use for backing up the last downloaded data" - type: string - default: "backup" - -permissions: - pull-requests: read - actions: write - -env: - DATA_PATH: labeler-cache/pull-data.tsv - DATA_CACHE_KEY: issue-labeler/pulls/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }} - BACKUP_CACHE_KEY: issue-labeler/pulls/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }}/${{ inputs.backup_cache_key }} - GH_TOKEN: ${{ github.token }} - -jobs: - download-pulls: - runs-on: ubuntu-24.04 - steps: - - name: "Restore existing data from cache" - id: check-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.DATA_PATH }} - key: ${{ env.DATA_CACHE_KEY }} - fail-on-cache-miss: false - - - name: "Check for existing backup cache entry" - id: check-backup - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - uses: actions/cache/restore@v4 - with: - path: ${{ env.DATA_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - lookup-only: true - fail-on-cache-miss: false - - - name: "Abort if backup cache entry already exists" - if: ${{ steps.check-backup.outputs.cache-hit == 'true' }} - run: | - echo "Cannot save backup of existing data. Backup cache key already exists." - echo "Key: ${{ env.BACKUP_CACHE_KEY }}" - - exit 1 - - - name: "Cache backup of existing data" - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - id: backup-data - uses: actions/cache/save@v4 - with: - path: ${{ env.DATA_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - - - name: "Delete existing cache entry" - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - run: | - gh api --method DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ github.repository }}/actions/caches?key=${{ env.DATA_CACHE_KEY }} - - rm ${{ env.DATA_PATH }} - - - name: "Check out the 'dotnet/issue-labeler' repo" - uses: actions/checkout@v4 - with: - repository: dotnet/issue-labeler - ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1 - - - uses: actions/setup-dotnet@v4 - with: - dotnet-version: "9.0.x" - - - name: "Run Downloader" - run: | - dotnet run -c Release --project ./src/Downloader -- \ - ${{ format('--token "{0}"', inputs.github_token || secrets.GITHUB_TOKEN) }} \ - ${{ format('--repo "{0}"', inputs.repository || github.repository) }} \ - ${{ format('--pull-data "{0}"', env.DATA_PATH) }} \ - ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \ - ${{ inputs.retries && format('--retries "{0}"', inputs.retries) }} \ - ${{ inputs.pull_limit && format('--pull-limit {0}', inputs.pull_limit) || '' }} \ - ${{ inputs.page_size && format('--page-size {0}', inputs.page_size) || '' }} \ - ${{ inputs.page_limit && format('--page-limit {0}', inputs.page_limit) || '' }} - - - name: "Save data to cache" - uses: actions/cache/save@v4 - with: - path: ${{ env.DATA_PATH }} - key: ${{ env.DATA_CACHE_KEY }} diff --git a/.github/workflows/labeler-build-predictor.yml b/.github/workflows/labeler-build-predictor.yml deleted file mode 100644 index 8a12b31..0000000 --- a/.github/workflows/labeler-build-predictor.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: "Labeler: Build Predictor App" - -on: - # Allow dispatching the workflow via the Actions UI - workflow_dispatch: - inputs: - rebuild: - description: "Force a rebuild of the app" - type: boolean - -jobs: - build-predictor: - permissions: - actions: write - uses: dotnet/issue-labeler/.github/workflows/build-predictor.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1 - with: - rebuild: ${{ inputs.rebuild }} diff --git a/.github/workflows/labeler-cache-retention.yml b/.github/workflows/labeler-cache-retention.yml index ea12d2b..ab3e0de 100644 --- a/.github/workflows/labeler-cache-retention.yml +++ b/.github/workflows/labeler-cache-retention.yml @@ -1,13 +1,35 @@ +# Regularly restore the prediction models from cache to prevent cache eviction name: "Labeler: Cache Retention" +# For more information about GitHub's action cache limits and eviction policy, see: +# https://docs.github.com/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction-policy + on: schedule: - cron: "36 4 * * *" # 4:36 every day (arbitrary time daily) workflow_dispatch: + inputs: + cache_key: + description: "The cache key suffix to use for restoring the model from cache. Defaults to 'ACTIVE'." + required: true + default: "ACTIVE" + +env: + CACHE_KEY: ${{ inputs.cache_key || 'ACTIVE' }} jobs: - cache-retention: - # Do not run the workflow on forks outside the 'dotnet' org - if: ${{ github.repository_owner == 'dotnet' }} - uses: dotnet/issue-labeler/.github/workflows/cache-retention.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1 + restore-cache: + # Do not automatically run the workflow on forks outside the 'dotnet' org + if: ${{ github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet' }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + type: ["issues", "pulls"] + steps: + - uses: dotnet/issue-labeler/restore@main + with: + type: ${{ matrix.type }} + cache_key: ${{ env.CACHE_KEY }} + fail-on-cache-miss: true diff --git a/.github/workflows/labeler-predict-issues.yml b/.github/workflows/labeler-predict-issues.yml index f1783e7..7e10ba7 100644 --- a/.github/workflows/labeler-predict-issues.yml +++ b/.github/workflows/labeler-predict-issues.yml @@ -1,32 +1,55 @@ -name: "Labeler: Predict Issue Labels" +# Predict labels for Issues using a trained model +name: "Labeler: Predict (Issues)" on: - # Only automatically predict area labels when issues are originally opened + # Only automatically predict area labels when issues are first opened issues: types: opened # Allow dispatching the workflow via the Actions UI, specifying ranges of numbers workflow_dispatch: inputs: - issue_numbers: - description: "Issue Numbers (comma-separated list of ranges)" - type: string - model_cache_key: - description: "The cache key suffix to use for loading the model" - type: string + issues: + description: "Issue Numbers (comma-separated list of ranges)." required: true - default: "LIVE" + cache_key: + description: "The cache key suffix to use for restoring the model. Defaults to 'ACTIVE'." + required: true + default: "ACTIVE" + +env: + # Do not allow failure for jobs triggered automatically (as this causes red noise on the workflows list) + ALLOW_FAILURE: ${{ github.event_name == 'workflow_dispatch' }} + + LABEL_PREFIX: "area-" + THRESHOLD: 0.40 + DEFAULT_LABEL: "needs-area-label" jobs: - predict-issues: - # Do not run the workflow on forks outside the 'dotnet' org - if: ${{ github.repository_owner == 'dotnet' && (inputs.issue_numbers || github.event.issue.number) }} + predict-issue-label: + # Do not automatically run the workflow on forks outside the 'dotnet' org + if: ${{ github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet' }} + runs-on: ubuntu-latest permissions: issues: write - uses: dotnet/issue-labeler/.github/workflows/predict-issues.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1 - with: - model_cache_key: ${{ inputs.model_cache_key }} - issue_numbers: ${{ inputs.issue_numbers || github.event.issue.number }} - label_prefix: "area-" - threshold: 0.40 - default_label: "needs-area-label" + steps: + - name: "Restore issues model from cache" + id: restore-model + uses: dotnet/issue-labeler/restore@main + with: + type: issues + fail-on-cache-miss: ${{ env.ALLOW_FAILURE }} + quiet: true + + - name: "Predict issue labels" + id: prediction + if: ${{ steps.restore-model.outputs.cache-hit == 'true' }} + uses: dotnet/issue-labeler/predict@main + with: + issues: ${{ inputs.issues || github.event.issue.number }} + label_prefix: ${{ env.LABEL_PREFIX }} + threshold: ${{ env.THRESHOLD }} + default_label: ${{ env.DEFAULT_LABEL }} + env: + GITHUB_TOKEN: ${{ github.token }} + continue-on-error: ${{ !env.ALLOW_FAILURE }} diff --git a/.github/workflows/labeler-predict-pulls.yml b/.github/workflows/labeler-predict-pulls.yml index 5b286a4..af2f68c 100644 --- a/.github/workflows/labeler-predict-pulls.yml +++ b/.github/workflows/labeler-predict-pulls.yml @@ -1,4 +1,5 @@ -name: "Labeler: Predict Pull Labels" +# Predict labels for Pull Requests using a trained model +name: "Labeler: Predict (Pulls)" on: # Per to the following documentation: @@ -17,25 +18,47 @@ on: # Allow dispatching the workflow via the Actions UI, specifying ranges of numbers workflow_dispatch: inputs: - pull_numbers: - description: "Pull Numbers (comma-separated list of ranges)" - type: string - model_cache_key: - description: "The cache key suffix to use for loading the model" - type: string + pulls: + description: "Pull Request Numbers (comma-separated list of ranges)." required: true - default: "LIVE" + cache_key: + description: "The cache key suffix to use for restoring the model. Defaults to 'ACTIVE'." + required: true + default: "ACTIVE" + +env: + # Do not allow failure for jobs triggered automatically (this can block PR merge) + ALLOW_FAILURE: ${{ github.event_name == 'workflow_dispatch' }} + + LABEL_PREFIX: "area-" + THRESHOLD: 0.40 + DEFAULT_LABEL: "needs-area-label" jobs: - predict-pulls: - # Do not run the workflow on forks outside the 'dotnet' org - if: ${{ github.repository_owner == 'dotnet' && (inputs.pull_numbers || github.event.number) }} + predict-pull-label: + # Do not automatically run the workflow on forks outside the 'dotnet' org + if: ${{ github.event_name == 'workflow_dispatch' || github.repository_owner == 'dotnet' }} + runs-on: ubuntu-latest permissions: pull-requests: write - uses: dotnet/issue-labeler/.github/workflows/predict-pulls.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1 - with: - model_cache_key: ${{ inputs.model_cache_key }} - pull_numbers: ${{ inputs.pull_numbers || github.event.number }} - label_prefix: "area-" - threshold: 0.40 - default_label: "needs-area-label" + steps: + - name: "Restore pulls model from cache" + id: restore-model + uses: dotnet/issue-labeler/restore@main + with: + type: pulls + fail-on-cache-miss: ${{ env.ALLOW_FAILURE }} + quiet: true + + - name: "Predict pull labels" + id: prediction + if: ${{ steps.restore-model.outputs.cache-hit == 'true' }} + uses: dotnet/issue-labeler/predict@main + with: + pulls: ${{ inputs.pulls || github.event.number }} + label_prefix: ${{ env.LABEL_PREFIX }} + threshold: ${{ env.THRESHOLD }} + default_label: ${{ env.DEFAULT_LABEL }} + env: + GITHUB_TOKEN: ${{ github.token }} + continue-on-error: ${{ !env.ALLOW_FAILURE }} diff --git a/.github/workflows/labeler-promote.yml b/.github/workflows/labeler-promote.yml index 97f40af..965d502 100644 --- a/.github/workflows/labeler-promote.yml +++ b/.github/workflows/labeler-promote.yml @@ -1,42 +1,49 @@ -name: "Labeler: Promote Models" +# Promote a model from staging to 'ACTIVE', backing up the currently 'ACTIVE' model +name: "Labeler: Promotion" on: # Dispatched via the Actions UI, promotes the staged models from - # a staging slot into the prediction environment + # a staged slot into the prediction environment workflow_dispatch: inputs: - promote_issues: + issues: description: "Issues: Promote Model" type: boolean required: true - promote_pulls: + pulls: description: "Pulls: Promote Model" type: boolean required: true - model_cache_key: - description: "The cache key suffix to promote into the 'LIVE' cache" - type: string + staged_key: + description: "The cache key suffix to use for promoting a staged model to 'ACTIVE'. Defaults to 'staged'." required: true - default: "staging" - backup_cache_key: - description: "The cache key suffix to use for backing up the currently promoted model" - type: string + default: "staged" + backup_key: + description: "The cache key suffix to use for backing up the currently active model. Defaults to 'backup'." default: "backup" permissions: actions: write jobs: - labeler-promote-issues: - if: ${{ inputs.promote_issues }} - uses: dotnet/issue-labeler/.github/workflows/promote-issues.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1 - with: - model_cache_key: ${{ inputs.model_cache_key }} - backup_cache_key: ${{ inputs.backup_cache_key }} + promote-issues: + if: ${{ inputs.issues }} + runs-on: ubuntu-latest + steps: + - name: "Promote Model for Issues" + uses: dotnet/issue-labeler/promote@main + with: + type: "issues" + staged_key: ${{ inputs.staged_key }} + backup_key: ${{ inputs.backup_key }} - labeler-promote-pulls: - if: ${{ inputs.promote_pulls }} - uses: dotnet/issue-labeler/.github/workflows/promote-pulls.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1 - with: - model_cache_key: ${{ inputs.model_cache_key }} - backup_cache_key: ${{ inputs.backup_cache_key }} + promote-pulls: + if: ${{ inputs.pulls }} + runs-on: ubuntu-latest + steps: + - name: "Promote Model for Pull Requests" + uses: dotnet/issue-labeler/promote@main + with: + type: "pulls" + staged_key: ${{ inputs.staged_key }} + backup_key: ${{ inputs.backup_key }} diff --git a/.github/workflows/labeler-train.yml b/.github/workflows/labeler-train.yml index bb56563..3d7f542 100644 --- a/.github/workflows/labeler-train.yml +++ b/.github/workflows/labeler-train.yml @@ -1,68 +1,158 @@ -name: "Labeler: Train Models" +# Train the Issues and Pull Requests models for label prediction +name: "Labeler: Training" on: - # Dispatched via the Actions UI, stages new models for promotion consideration - # Each step of the workflow can be run independently: Download, Train, and Test workflow_dispatch: inputs: - download_issues: - description: "Issues: Download Data" - type: boolean - default: true - train_issues: - description: "Issues: Train Model" - type: boolean - default: true - test_issues: - description: "Issues: Test Model" - type: boolean - default: true - download_pulls: - description: "Pulls: Download Data" - type: boolean - default: true - train_pulls: - description: "Pulls: Train Model" - type: boolean - default: true - test_pulls: - description: "Pulls: Test Model" - type: boolean - default: true + type: + description: "Issues or Pull Requests" + type: choice + required: true + default: "Both" + options: + - "Both" + - "Issues" + - "Pull Requests" - data_limit: - description: "Max number of items to include in the model" - type: number + steps: + description: "Training Steps" + type: choice + required: true + default: "All" + options: + - "All" + - "Download Data" + - "Train Model" + - "Test Model" - github_token: - description: "The GitHub token (defaults to action token)" - type: string repository: - description: "The org/repo to download data from (defaults to current repository)" - type: string + description: "The org/repo to download data from. Defaults to the current repository." + limit: + description: "Max number of items to download for training/testing the model (newest items are used). Defaults to the max number of pages times the page size." + type: number + page_size: + description: "Number of items per page in GitHub API requests. Defaults to 100 for issues, 25 for pull requests." + type: number + page_limit: + description: "Maximum number of pages to download for training/testing the model. Defaults to 1000 for issues, 4000 for pull requests." + type: number cache_key_suffix: - description: "The cache key suffix to use for staging data/models (use 'LIVE' to bypass staging)" - type: string + description: "The cache key suffix to use for staged data/models (use 'ACTIVE' to bypass staging). Defaults to 'staged'." required: true - default: "staging" + default: "staged" + +env: + CACHE_KEY: ${{ inputs.cache_key_suffix }} + REPOSITORY: ${{ inputs.repository || github.repository }} + LABEL_PREFIX: "area-" + THRESHOLD: "0.40" + LIMIT: ${{ inputs.limit }} + PAGE_SIZE: ${{ inputs.page_size }} + PAGE_LIMIT: ${{ inputs.page_limit }} jobs: - labeler-train: + download-issues: + if: ${{ contains(fromJSON('["Both", "Issues"]'), inputs.type) && contains(fromJSON('["All", "Download Data"]'), inputs.steps) }} + runs-on: ubuntu-latest permissions: issues: read + steps: + - name: "Download Issues" + uses: dotnet/issue-labeler/download@main + with: + type: "issues" + cache_key: ${{ env.CACHE_KEY }} + repository: ${{ env.REPOSITORY }} + label_prefix: ${{ env.LABEL_PREFIX }} + limit: ${{ env.LIMIT }} + page_size: ${{ env.PAGE_SIZE }} + page_limit: ${{ env.PAGE_LIMIT }} + env: + GITHUB_TOKEN: ${{ github.token }} + + download-pulls: + if: ${{ contains(fromJSON('["Both", "Pull Requests"]'), inputs.type) && contains(fromJSON('["All", "Download Data"]'), inputs.steps) }} + runs-on: ubuntu-latest + permissions: + pull-requests: read + steps: + - name: "Download Pull Requests" + uses: dotnet/issue-labeler/download@main + with: + type: "pulls" + cache_key: ${{ env.CACHE_KEY }} + repository: ${{ env.REPOSITORY }} + label_prefix: ${{ env.LABEL_PREFIX }} + limit: ${{ env.LIMIT }} + page_size: ${{ env.PAGE_SIZE }} + page_limit: ${{ env.PAGE_LIMIT }} + env: + GITHUB_TOKEN: ${{ github.token }} + + train-issues: + if: ${{ always() && contains(fromJSON('["Both", "Issues"]'), inputs.type) && contains(fromJSON('["All", "Train Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.download-issues.result) }} + runs-on: ubuntu-latest + permissions: {} + needs: download-issues + steps: + - name: "Train Model for Issues" + uses: dotnet/issue-labeler/train@main + with: + type: "issues" + data_cache_key: ${{ env.CACHE_KEY }} + model_cache_key: ${{ env.CACHE_KEY }} + + train-pulls: + if: ${{ always() && contains(fromJSON('["Both", "Pull Requests"]'), inputs.type) && contains(fromJSON('["All", "Train Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.download-pulls.result) }} + runs-on: ubuntu-latest + permissions: {} + needs: download-pulls + steps: + - name: "Train Model for Pull Requests" + uses: dotnet/issue-labeler/train@main + with: + type: "pulls" + data_cache_key: ${{ env.CACHE_KEY }} + model_cache_key: ${{ env.CACHE_KEY }} + + test-issues: + if: ${{ always() && contains(fromJSON('["Both", "Issues"]'), inputs.type) && contains(fromJSON('["All", "Test Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.train-issues.result) }} + runs-on: ubuntu-latest + permissions: + issues: read + needs: train-issues + steps: + - name: "Test Model for Issues" + uses: dotnet/issue-labeler/test@main + with: + type: "issues" + cache_key: ${{ env.CACHE_KEY }} + repository: ${{ env.REPOSITORY }} + label_prefix: ${{ env.LABEL_PREFIX }} + threshold: ${{ env.THRESHOLD }} + limit: ${{ env.LIMIT }} + page_size: ${{ env.PAGE_SIZE }} + page_limit: ${{ env.PAGE_LIMIT }} + env: + GITHUB_TOKEN: ${{ github.token }} + + test-pulls: + if: ${{ always() && contains(fromJSON('["Both", "Pull Requests"]'), inputs.type) && contains(fromJSON('["All", "Test Model"]'), inputs.steps) && contains(fromJSON('["success", "skipped"]'), needs.train-pulls.result) }} + runs-on: ubuntu-latest + permissions: pull-requests: read - actions: write - uses: dotnet/issue-labeler/.github/workflows/train.yml@f0c098669828a134c0313adf3f58c1909e555d86 # v1.0.1 - with: - download_issues: ${{ inputs.download_issues }} - train_issues: ${{ inputs.train_issues }} - test_issues: ${{ inputs.test_issues }} - download_pulls: ${{ inputs.download_pulls }} - train_pulls: ${{ inputs.train_pulls }} - test_pulls: ${{ inputs.test_pulls }} - data_limit: ${{ inputs.data_limit && fromJSON(inputs.data_limit) || 0 }} - github_token: ${{ inputs.github_token }} - repository: ${{ inputs.repository }} - cache_key_suffix: ${{ inputs.cache_key_suffix }} - label_prefix: "area-" - threshold: 0.40 + needs: train-pulls + steps: + - name: "Test Model for Pull Requests" + uses: dotnet/issue-labeler/test@main + with: + type: "pulls" + cache_key: ${{ env.CACHE_KEY }} + repository: ${{ env.REPOSITORY }} + label_prefix: ${{ env.LABEL_PREFIX }} + threshold: ${{ env.THRESHOLD }} + limit: ${{ env.LIMIT }} + page_size: ${{ env.PAGE_SIZE }} + page_limit: ${{ env.PAGE_LIMIT }} + env: + GITHUB_TOKEN: ${{ github.token }} diff --git a/.github/workflows/predict-issues.yml b/.github/workflows/predict-issues.yml deleted file mode 100644 index f97eab9..0000000 --- a/.github/workflows/predict-issues.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: "Predict Issue Labels" - -on: - workflow_call: - inputs: - issue_numbers: - description: "Issue Numbers" - type: string - required: true - label_prefix: - description: "Label Prefix" - type: string - required: true - threshold: - description: "The minimum confidence score for a label prediction" - type: number - required: true - default_label: - description: "Default Label (leave blank for no default label)" - type: string - model_cache_key: - description: "The cache key suffix to use for loading the model" - type: string - required: true - default: "LIVE" - -permissions: - issues: write - -env: - MODEL_PATH: labeler-cache/issue-model.zip - MODEL_CACHE_KEY: issue-labeler/issues/model/${{ inputs.model_cache_key }} - BUILD_CACHE_KEY: "issue-labeler/predictor-app" - -jobs: - predict-issues: - runs-on: ubuntu-24.04 - steps: - - name: "Restore the Predictor app from cache" - id: restore-predictor-app - uses: actions/cache/restore@v4 - with: - path: labeler-build/Predictor - key: ${{ env.BUILD_CACHE_KEY }} - fail-on-cache-miss: true - - - name: "Restore model from cache" - id: restore-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.MODEL_CACHE_KEY }} - fail-on-cache-miss: true - - - name: "Run Predictor" - run: | - ./labeler-build/Predictor/Predictor \ - ${{ format('--token "{0}"', secrets.GITHUB_TOKEN) }} \ - ${{ format('--repo "{0}"', github.repository) }} \ - ${{ format('--issue-model "{0}"', env.MODEL_PATH) }} \ - ${{ format('--issue-numbers "{0}"', inputs.issue_numbers) }} \ - ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \ - ${{ format('--threshold {0}', inputs.threshold) }} \ - ${{ inputs.default_label && format('--default-label "{0}"', inputs.default_label) }} diff --git a/.github/workflows/predict-pulls.yml b/.github/workflows/predict-pulls.yml deleted file mode 100644 index 3ef239a..0000000 --- a/.github/workflows/predict-pulls.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: "Predict Pull Labels" - -on: - workflow_call: - inputs: - pull_numbers: - description: "Pull Numbers" - type: string - required: true - label_prefix: - description: "Label Prefix" - type: string - required: true - threshold: - description: "The minimum confidence score for a label prediction" - type: number - required: true - default_label: - description: "Default Label (leave blank for no default label)" - type: string - model_cache_key: - description: "The cache key suffix to use for loading the model" - type: string - required: true - default: "LIVE" - -permissions: - pull-requests: write - -env: - MODEL_PATH: labeler-cache/pull-model.zip - MODEL_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.model_cache_key }} - BUILD_CACHE_KEY: "issue-labeler/predictor-app" - -jobs: - predict-pulls: - runs-on: ubuntu-24.04 - steps: - - name: "Restore the Predictor app from cache" - id: restore-predictor-app - uses: actions/cache/restore@v4 - with: - path: labeler-build/Predictor - key: ${{ env.BUILD_CACHE_KEY }} - fail-on-cache-miss: true - - - name: "Restore model from cache" - id: restore-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.MODEL_CACHE_KEY }} - fail-on-cache-miss: true - - - name: "Run Predictor" - run: | - ./labeler-build/Predictor/Predictor \ - ${{ format('--token "{0}"', secrets.GITHUB_TOKEN) }} \ - ${{ format('--repo "{0}"', github.repository) }} \ - ${{ format('--pull-model "{0}"', env.MODEL_PATH) }} \ - ${{ format('--pull-numbers "{0}"', inputs.pull_numbers) }} \ - ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \ - ${{ format('--threshold {0}', inputs.threshold) }} \ - ${{ inputs.default_label && format('--default-label "{0}"', inputs.default_label) }} diff --git a/.github/workflows/promote-issues.yml b/.github/workflows/promote-issues.yml deleted file mode 100644 index 86f500a..0000000 --- a/.github/workflows/promote-issues.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: "Promote Issues Model" - -on: - workflow_call: - inputs: - model_cache_key: - description: "The cache key suffix to promote from staging" - type: string - required: true - backup_cache_key: - description: "The cache key suffix to use for backing up the currently promoted model" - type: string - default: "backup" - -env: - MODEL_PATH: labeler-cache/issue-model.zip - MODEL_CACHE_KEY: issue-labeler/issues/model/${{ inputs.model_cache_key }} - PROMOTION_CACHE_KEY: issue-labeler/issues/model/LIVE - BACKUP_CACHE_KEY: issue-labeler/issues/model/${{ inputs.backup_cache_key }} - GH_TOKEN: ${{ github.token }} - -permissions: - actions: write - -jobs: - promote-issues: - runs-on: ubuntu-24.04 - - steps: - - name: "Check for existing backup cache entry" - id: check-backup - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - lookup-only: true - fail-on-cache-miss: false - - - name: "Abort if backup cache entry already exists" - if: ${{ steps.check-backup.outputs.cache-hit == 'true' }} - run: | - echo "Cannot save backup of currently promoted model. Backup cache key already exists." - echo "Key: ${{ env.BACKUP_CACHE_KEY }}" - - exit 1 - - - name: "Restore existing promotion cache entry if one exists" - if: ${{ steps.check-backup.outputs.cache-hit != 'true'}} - id: check-promotion - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.PROMOTION_CACHE_KEY }} - fail-on-cache-miss: false - - - name: "Cache backup of existing promotion model" - if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }} - id: backup-model - uses: actions/cache/save@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - - - name: "Remove local copy of currently promoted model" - if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }} - run: rm ${{ env.MODEL_PATH }} - - - name: "Restore model to be promoted from cache" - id: restore-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.MODEL_CACHE_KEY }} - fail-on-cache-miss: true - - - name: "Delete existing cache entry" - if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }} - run: | - gh api --method DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ github.repository }}/actions/caches?key=${{ env.PROMOTION_CACHE_KEY }} - - - name: "Save promoted model to cache" - uses: actions/cache/save@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.PROMOTION_CACHE_KEY }} diff --git a/.github/workflows/promote-pulls.yml b/.github/workflows/promote-pulls.yml deleted file mode 100644 index b1c847c..0000000 --- a/.github/workflows/promote-pulls.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: "Promote Pulls Model" - -on: - workflow_call: - inputs: - model_cache_key: - description: "The cache key suffix to promote from staging" - type: string - required: true - backup_cache_key: - description: "The cache key suffix to use for backing up the currently promoted model" - type: string - default: "backup" - -env: - MODEL_PATH: labeler-cache/pull-model.zip - MODEL_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.model_cache_key }} - PROMOTION_CACHE_KEY: issue-labeler/pulls/model/LIVE - BACKUP_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.backup_cache_key }} - GH_TOKEN: ${{ github.token }} - -permissions: - actions: write - -jobs: - promote-pulls: - runs-on: ubuntu-24.04 - - steps: - - name: "Check for existing backup cache entry" - id: check-backup - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - lookup-only: true - fail-on-cache-miss: false - - - name: "Abort if backup cache entry already exists" - if: ${{ steps.check-backup.outputs.cache-hit == 'true' }} - run: | - echo "Cannot save backup of currently promoted model. Backup cache key already exists." - echo "Key: ${{ env.BACKUP_CACHE_KEY }}" - - exit 1 - - - name: "Restore existing promotion cache entry if one exists" - if: ${{ steps.check-backup.outputs.cache-hit != 'true'}} - id: check-promotion - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.PROMOTION_CACHE_KEY }} - fail-on-cache-miss: false - - - name: "Cache backup of existing promotion model" - if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }} - id: backup-model - uses: actions/cache/save@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - - - name: "Remove local copy of currently promoted model" - if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }} - run: rm ${{ env.MODEL_PATH }} - - - name: "Restore model to be promoted from cache" - id: restore-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.MODEL_CACHE_KEY }} - fail-on-cache-miss: true - - - name: "Delete existing cache entry" - if: ${{ steps.check-promotion.outputs.cache-hit == 'true' }} - run: | - gh api --method DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ github.repository }}/actions/caches?key=${{ env.PROMOTION_CACHE_KEY }} - - - name: "Save promoted model to cache" - uses: actions/cache/save@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.PROMOTION_CACHE_KEY }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..6ba0934 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,107 @@ +# Create a new release of the Issue Labeler, publishing the predictor Docker container image to the GitHub container registry +name: "Release" + +on: + workflow_dispatch: + inputs: + image_tags: + description: "The optional semicolon separated list of tags to apply to the published Docker container image. The ref name is added automatically." + +env: + BASE_IMAGE: mcr.microsoft.com/dotnet/runtime:9.0-noble-chiseled + IMAGE_TAGS: ${{ inputs.image_tags && format('{0};{1}', github.ref_name, inputs.image_tags) || github.ref_name }} + PREDICTOR_IMAGE_NAME: ${{ github.repository }}/predictor + PACKAGE_NAME_ESCAPED: issue-labeler%2Fpredictor + GITHUB_API_PACKAGE_OWNER: /orgs/dotnet + +jobs: + publish-predictor: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + outputs: + digest: ${{ steps.published-image.outputs.digest }} + published_image_digest: ${{ steps.published-image.outputs.published_image_digest }} + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: "Set up the .NET SDK" + uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1 + with: + dotnet-version: 9.0.x + + - name: "Log in to the GitHub Container Registry" + uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: "Publish Predictor" + run: | + dotnet publish IssueLabeler/src/Predictor/Predictor.csproj \ + /t:PublishContainer \ + -p DebugType=none \ + -p ContainerBaseImage=${{ env.BASE_IMAGE }} \ + -p ContainerRegistry=ghcr.io \ + -p ContainerImageTags='"${{ env.IMAGE_TAGS }}"' \ + -p ContainerRepository=${{ env.PREDICTOR_IMAGE_NAME }} \ + -p ContainerAuthors=${{ github.repository_owner }} \ + -p ContainerInformationUrl=${{ format('{0}/{1}', github.server_url, github.repository) }} \ + -p ContainerDocumentationUrl=${{ format('{0}/{1}/wiki', github.server_url, github.repository) }} \ + -p ContainerLicenseExpression=${{ format('{0}/{1}/blob/main/LICENSE.TXT', github.server_url, github.repository) }} + + - name: "Capture and output the Docker image digest to the workflow summary" + id: published-image + env: + GH_TOKEN: ${{ github.token }} + run: | + DIGEST=` \ + gh api \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + ${{ format('{0}/packages/container/{1}/versions', env.GITHUB_API_PACKAGE_OWNER, env.PACKAGE_NAME_ESCAPED) }} \ + | jq -r '.[] | select(.metadata.container.tags[] == "v2.0.0") | .name' \ + ` + PUBLISHED_IMAGE_DIGEST=ghcr.io/${{ env.PREDICTOR_IMAGE_NAME }}@${DIGEST} + + echo "digest=$DIGEST" >> $GITHUB_OUTPUT + echo "published_image_digest=$PUBLISHED_IMAGE_DIGEST" >> $GITHUB_OUTPUT + + echo "> [!NOTE]" >> $GITHUB_STEP_SUMMARY + echo "> **Docker container image published.**" >> $GITHUB_STEP_SUMMARY + echo "> Digest: \`$DIGEST\`" >> $GITHUB_STEP_SUMMARY + echo "> Published: \`$PUBLISHED_IMAGE_DIGEST\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + + update-predictor-action: + runs-on: ubuntu-latest + needs: publish-predictor + permissions: + contents: write + packages: read + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: "Update the `predict` action to use the published image digest" + run: | + PREDICT_ACTION="predict/action.yml" + sed -i "s|ghcr.io/${{ env.PREDICTOR_IMAGE_NAME }}@.*|${{ needs.publish-predictor.outputs.published_image_digest }} # ${{ env.IMAGE_TAGS }}|" $PREDICT_ACTION + + git config user.name "GitHub Actions" + git config user.email "actions@github.com" + git add $PREDICT_ACTION + git commit -m "Release '${{ github.ref_name }}' with predictor digest '${{ needs.publish-predictor.outputs.digest }}'" + git push origin ${{ github.ref_name }} + + echo "> [!NOTE]" >> $GITHUB_STEP_SUMMARY + echo "> Updated [\`predict/action.yml\` (${{ github.ref_name }})](${{ format('{0}/{1}/blob/{2}/predict/action.yml', github.server_url, github.repository, github.ref_name) }}) to:" >> $GITHUB_STEP_SUMMARY + echo "> \`${{ needs.publish-predictor.outputs.published_image_digest }}\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + + echo "\`\`\`yml" >> $GITHUB_STEP_SUMMARY + grep -i -B1 -A10 '^\s*using:\s*docker' $PREDICT_ACTION >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/test-issues.yml b/.github/workflows/test-issues.yml deleted file mode 100644 index cfeccc9..0000000 --- a/.github/workflows/test-issues.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: "Test Issues Model" - -on: - workflow_call: - inputs: - github_token: - description: "The GitHub token (defaults to action token)" - type: string - repository: - description: "The org/repo to download data from (defaults to current repository)" - type: string - - label_prefix: - description: "Label Prefix" - type: string - required: true - threshold: - description: "The minimum confidence score for a label prediction" - type: number - required: true - - issue_limit: - description: "Max number of items to include in the test" - type: number - model_cache_key: - description: "The cache key suffix to use for loading the model" - type: string - required: true - -env: - MODEL_PATH: labeler-cache/issue-model.zip - MODEL_CACHE_KEY: issue-labeler/issues/model/${{ inputs.model_cache_key }} - -jobs: - test-issues: - runs-on: ubuntu-24.04 - steps: - - name: "Check out the 'dotnet/issue-labeler' repo" - uses: actions/checkout@v4 - with: - repository: dotnet/issue-labeler - ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1 - - - uses: actions/setup-dotnet@v4 - with: - dotnet-version: "9.0.x" - - # Restore from cache after checkout out the repo to prevent - # the restored files from getting removed during checkout - - name: "Restore model from cache" - id: restore-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.MODEL_CACHE_KEY }} - fail-on-cache-miss: true - - - name: "Run Tester" - run: | - dotnet run -c Release --project ./src/Tester -- \ - ${{ format('--token "{0}"', inputs.github_token || secrets.GITHUB_TOKEN) }} \ - ${{ format('--repo "{0}"',inputs.repository || github.repository) }} \ - ${{ format('--issue-model "{0}"', env.MODEL_PATH) }} \ - ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \ - ${{ format('--threshold {0}', inputs.threshold) }} \ - ${{ inputs.issue_limit && format('--issue-limit {0}', inputs.issue_limit) || '' }} diff --git a/.github/workflows/test-pulls.yml b/.github/workflows/test-pulls.yml deleted file mode 100644 index 0cd8afb..0000000 --- a/.github/workflows/test-pulls.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: "Test Pulls Model" - -on: - workflow_call: - inputs: - github_token: - description: "The GitHub token (defaults to action token)" - type: string - repository: - description: "The org/repo to download data from (defaults to current repository)" - type: string - - label_prefix: - description: "Label Prefix" - type: string - required: true - threshold: - description: "The minimum confidence score for a label prediction" - type: number - required: true - - pull_limit: - description: "Max number of items to include in the test" - type: number - model_cache_key: - description: "The cache key suffix to use for loading the model" - type: string - required: true - -env: - MODEL_PATH: labeler-cache/pull-model.zip - MODEL_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.model_cache_key }} - -jobs: - test-pulls: - runs-on: ubuntu-24.04 - steps: - - name: "Check out the 'dotnet/issue-labeler' repo" - uses: actions/checkout@v4 - with: - repository: dotnet/issue-labeler - ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1 - - - uses: actions/setup-dotnet@v4 - with: - dotnet-version: "9.0.x" - - # Restore from cache after checkout out the repo to prevent - # the restored files from getting removed during checkout - - name: "Restore model from cache" - id: restore-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.MODEL_CACHE_KEY }} - fail-on-cache-miss: true - - - name: "Run Tester" - run: | - dotnet run -c Release --project ./src/Tester -- \ - ${{ format('--token "{0}"', inputs.github_token || secrets.GITHUB_TOKEN) }} \ - ${{ format('--repo "{0}"',inputs.repository || github.repository) }} \ - ${{ format('--pull-model "{0}"', env.MODEL_PATH) }} \ - ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \ - ${{ format('--threshold {0}', inputs.threshold) }} \ - ${{ inputs.pull_limit && format('--pull-limit {0}', inputs.pull_limit) || '' }} diff --git a/.github/workflows/train-issues.yml b/.github/workflows/train-issues.yml deleted file mode 100644 index 333df32..0000000 --- a/.github/workflows/train-issues.yml +++ /dev/null @@ -1,106 +0,0 @@ -name: "Train Issues Model" - -on: - workflow_call: - inputs: - data_cache_key: - description: "The optional cache key suffix to use for loading the data" - type: string - model_cache_key: - description: "The cache key suffix to use for saving the model" - type: string - required: true - -permissions: - actions: write - -env: - DATA_PATH: labeler-cache/issue-data.tsv - DATA_CACHE_KEY: issue-labeler/issues/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }} - MODEL_PATH: labeler-cache/issue-model.zip - MODEL_CACHE_KEY: issue-labeler/issues/model/${{ inputs.model_cache_key }} - BACKUP_CACHE_KEY: issue-labeler/issues/model/${{ inputs.model_cache_key }}/backup - GH_TOKEN: ${{ github.token }} - -jobs: - train-issues: - runs-on: ubuntu-24.04 - steps: - - name: "Check out the 'dotnet/issue-labeler' repo" - uses: actions/checkout@v4 - with: - repository: dotnet/issue-labeler - ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1 - - - uses: actions/setup-dotnet@v4 - with: - dotnet-version: "9.0.x" - - # Restore from cache after checkout out the repo to prevent - # the restored files from getting removed during checkout - - name: "Restore data from cache" - id: restore-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.DATA_PATH }} - key: ${{ env.DATA_CACHE_KEY }} - fail-on-cache-miss: true - - - name: "Restore existing model cache entry if one exists" - id: check-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.MODEL_CACHE_KEY }} - fail-on-cache-miss: false - - - name: "Check for existing backup cache entry" - id: check-backup - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - lookup-only: true - fail-on-cache-miss: false - - - name: "Abort if backup cache entry already exists" - if: ${{ steps.check-backup.outputs.cache-hit == 'true' }} - run: | - echo "Cannot save backup of existing model. Backup cache key already exists." - echo "Key: ${{ env.BACKUP_CACHE_KEY }}" - - exit 1 - - - name: "Cache backup of existing model" - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - id: backup-model - uses: actions/cache/save@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - - - name: "Delete restored model" - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - run: | - rm ${{ env.MODEL_PATH }} - - - name: "Run Trainer" - run: | - dotnet run -c Release --project ./src/Trainer -- \ - ${{ format('--issue-data "{0}"', env.DATA_PATH) }} \ - ${{ format('--issue-model "{0}"', env.MODEL_PATH) }} - - - name: "Delete existing model cache entry" - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - run: | - gh api --method DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ github.repository }}/actions/caches?key=${{ env.MODEL_CACHE_KEY }} - - - name: "Save model to cache" - uses: actions/cache/save@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.MODEL_CACHE_KEY }} diff --git a/.github/workflows/train-pulls.yml b/.github/workflows/train-pulls.yml deleted file mode 100644 index a2855f5..0000000 --- a/.github/workflows/train-pulls.yml +++ /dev/null @@ -1,106 +0,0 @@ -name: "Train Pulls Model" - -on: - workflow_call: - inputs: - data_cache_key: - description: "The optional cache key suffix to use for loading the data" - type: string - model_cache_key: - description: "The cache key suffix to use for saving the model" - type: string - required: true - -permissions: - actions: write - -env: - DATA_PATH: labeler-cache/pull-data.tsv - DATA_CACHE_KEY: issue-labeler/pulls/data${{ inputs.data_cache_key && format('/{0}', inputs.data_cache_key) }} - MODEL_PATH: labeler-cache/pull-model.zip - MODEL_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.model_cache_key }} - BACKUP_CACHE_KEY: issue-labeler/pulls/model/${{ inputs.model_cache_key }}/backup - GH_TOKEN: ${{ github.token }} - -jobs: - train-pulls: - runs-on: ubuntu-24.04 - steps: - - name: "Check out the 'dotnet/issue-labeler' repo" - uses: actions/checkout@v4 - with: - repository: dotnet/issue-labeler - ref: d74b8e18f41673790be3d0ca87296a49e81ac19a # Staging v1.0.1 - - - uses: actions/setup-dotnet@v4 - with: - dotnet-version: "9.0.x" - - # Restore from cache after checkout out the repo to prevent - # the restored files from getting removed during checkout - - name: "Restore data from cache" - id: restore-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.DATA_PATH }} - key: ${{ env.DATA_CACHE_KEY }} - fail-on-cache-miss: true - - - name: "Restore existing model cache entry if one exists" - id: check-cache - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.MODEL_CACHE_KEY }} - fail-on-cache-miss: false - - - name: "Check for existing backup cache entry" - id: check-backup - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - uses: actions/cache/restore@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - lookup-only: true - fail-on-cache-miss: false - - - name: "Abort if backup cache entry already exists" - if: ${{ steps.check-backup.outputs.cache-hit == 'true' }} - run: | - echo "Cannot save backup of existing model. Backup cache key already exists." - echo "Key: ${{ env.BACKUP_CACHE_KEY }}" - - exit 1 - - - name: "Cache backup of existing model" - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - id: backup-model - uses: actions/cache/save@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.BACKUP_CACHE_KEY }} - - - name: "Delete restored model" - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - run: | - rm ${{ env.MODEL_PATH }} - - - name: "Run Trainer" - run: | - dotnet run -c Release --project ./src/Trainer -- \ - ${{ format('--pull-data "{0}"', env.DATA_PATH) }} \ - ${{ format('--pull-model "{0}"', env.MODEL_PATH) }} - - - name: "Delete existing model cache entry" - if: ${{ steps.check-cache.outputs.cache-hit == 'true' }} - run: | - gh api --method DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - /repos/${{ github.repository }}/actions/caches?key=${{ env.MODEL_CACHE_KEY }} - - - name: "Save model to cache" - uses: actions/cache/save@v4 - with: - path: ${{ env.MODEL_PATH }} - key: ${{ env.MODEL_CACHE_KEY }} diff --git a/.github/workflows/train.yml b/.github/workflows/train.yml deleted file mode 100644 index d98ef0f..0000000 --- a/.github/workflows/train.yml +++ /dev/null @@ -1,121 +0,0 @@ -name: "Train Models" - -on: - workflow_call: - inputs: - download_issues: - description: "Issues: Download Data" - type: boolean - train_issues: - description: "Issues: Train Model" - type: boolean - test_issues: - description: "Issues: Test Model" - type: boolean - download_pulls: - description: "Pulls: Download Data" - type: boolean - train_pulls: - description: "Pulls: Train Model" - type: boolean - test_pulls: - description: "Pulls: Test Model" - type: boolean - - label_prefix: - description: "Label Prefix" - type: string - required: true - threshold: - description: "The minimum confidence score for a label prediction" - type: number - required: true - - data_limit: - description: "Max number of items to include in the model" - type: number - - github_token: - description: "The GitHub token (defaults to action token)" - type: string - repository: - description: "The org/repo to download data from (defaults to current repository)" - type: string - cache_key_suffix: - description: "The cache key suffix to use for staging data/models (use 'LIVE' to bypass staging)" - type: string - required: true - -jobs: - build-predictor: - uses: dotnet/issue-labeler/.github/workflows/build-predictor.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1 - - labeler-download-issues: - needs: build-predictor - if: ${{ inputs.download_issues }} - permissions: - issues: read - actions: write - uses: dotnet/issue-labeler/.github/workflows/download-issues.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1 - with: - github_token: ${{ inputs.github_token || github.token }} - repository: ${{ inputs.repository || github.repository }} - data_cache_key: ${{ inputs.cache_key_suffix }} - issue_limit: ${{ inputs.data_limit && fromJSON(inputs.data_limit) || 0 }} - label_prefix: ${{ inputs.label_prefix }} - - labeler-train-issues: - needs: labeler-download-issues - if: ${{ inputs.train_issues && always() && (needs.labeler-download-issues.result == 'success' || needs.labeler-download-issues.result == 'skipped') }} - permissions: - actions: write - uses: dotnet/issue-labeler/.github/workflows/train-issues.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1 - with: - data_cache_key: ${{ inputs.cache_key_suffix }} - model_cache_key: ${{ inputs.cache_key_suffix }} - - labeler-test-issues: - needs: [labeler-download-issues, labeler-train-issues] - if: ${{ inputs.test_issues && always() && (needs.labeler-download-issues.result == 'success' || needs.labeler-download-issues.result == 'skipped') && (needs.labeler-train-issues.result == 'success' || needs.labeler-train-issues.result == 'skipped') }} - uses: dotnet/issue-labeler/.github/workflows/test-issues.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1 - with: - github_token: ${{ inputs.github_token || github.token }} - repository: ${{ inputs.repository || github.repository }} - model_cache_key: ${{ inputs.cache_key_suffix }} - label_prefix: ${{ inputs.label_prefix }} - threshold: ${{ inputs.threshold }} - - labeler-download-pulls: - needs: build-predictor - if: ${{ inputs.download_pulls }} - permissions: - pull-requests: read - actions: write - uses: dotnet/issue-labeler/.github/workflows/download-pulls.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1 - with: - github_token: ${{ inputs.github_token || github.token }} - repository: ${{ inputs.repository || github.repository }} - data_cache_key: ${{ inputs.cache_key_suffix }} - pull_limit: ${{ inputs.data_limit && fromJSON(inputs.data_limit) || 0 }} - label_prefix: ${{ inputs.label_prefix }} - - labeler-train-pulls: - needs: labeler-download-pulls - if: ${{ inputs.train_pulls && always() && (needs.labeler-download-pulls.result == 'success' || needs.labeler-download-pulls.result == 'skipped') }} - permissions: - actions: write - uses: dotnet/issue-labeler/.github/workflows/train-pulls.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1 - with: - data_cache_key: ${{ inputs.cache_key_suffix }} - model_cache_key: ${{ inputs.cache_key_suffix }} - - labeler-test-pulls: - needs: [labeler-download-pulls, labeler-train-pulls] - if: ${{ inputs.test_pulls && always() && (needs.labeler-download-pulls.result == 'success' || needs.labeler-download-pulls.result == 'skipped') && (needs.labeler-train-pulls.result == 'success' || needs.labeler-train-pulls.result == 'skipped') }} - uses: dotnet/issue-labeler/.github/workflows/test-pulls.yml@68a3df3b6444ec3e2a37af4d5e2569df1d45201d # Staging v1.0.1 - with: - github_token: ${{ inputs.github_token || github.token }} - repository: ${{ inputs.repository || github.repository }} - model_cache_key: ${{ inputs.cache_key_suffix }} - label_prefix: ${{ inputs.label_prefix }} - threshold: ${{ inputs.threshold }} diff --git a/Directory.Build.props b/Directory.Build.props deleted file mode 100644 index d74c6ef..0000000 --- a/Directory.Build.props +++ /dev/null @@ -1,6 +0,0 @@ - - - - net9.0 - - diff --git a/Directory.Packages.props b/Directory.Packages.props deleted file mode 100644 index ab51761..0000000 --- a/Directory.Packages.props +++ /dev/null @@ -1,20 +0,0 @@ - - - true - - - - - - - - - - - - - - - - - diff --git a/IssueLabeler/Directory.Build.props b/IssueLabeler/Directory.Build.props new file mode 100644 index 0000000..8272204 --- /dev/null +++ b/IssueLabeler/Directory.Build.props @@ -0,0 +1,15 @@ + + + + + net9.0 + true + true + $(MSBuildThisFileDirectory)artifacts + + + + + root + + diff --git a/IssueLabeler/Directory.Packages.props b/IssueLabeler/Directory.Packages.props new file mode 100644 index 0000000..5c936eb --- /dev/null +++ b/IssueLabeler/Directory.Packages.props @@ -0,0 +1,24 @@ + + + true + + + + + + + + + + + + + + + + + + + + + diff --git a/IssueLabeler.sln b/IssueLabeler/IssueLabeler.sln similarity index 58% rename from IssueLabeler.sln rename to IssueLabeler/IssueLabeler.sln index b412f13..14cbb47 100644 --- a/IssueLabeler.sln +++ b/IssueLabeler/IssueLabeler.sln @@ -3,6 +3,8 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.0.31903.59 MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "src\Common\Common.csproj", "{3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}" +EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Downloader", "src\Downloader\Downloader.csproj", "{AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Trainer", "src\Trainer\Trainer.csproj", "{F1FE4054-C44E-487F-90F9-2F111AB7BD9C}" @@ -13,14 +15,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Tester", "src\Tester\Tester EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GitHubClient", "src\GitHubClient\GitHubClient.csproj", "{57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}" EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "1. Downloader", "1. Downloader", "{02EA681E-C7D8-13C7-8484-4AC65E1B71E8}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "2. Trainer", "2. Trainer", "{871B398D-3AB6-4F8B-9BC8-64646BDA0B75}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "3. Tester", "3. Tester", "{48C9A18F-FB08-41D5-9832-492AEFF6B2B2}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "4. Predictor", "4. Predictor", "{79FFE9CC-3518-4A4E-8FAB-DB121EE93AB8}" -EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".github", ".github", "{A2C54AC3-3D94-4CD3-885E-D1892063CC58}" ProjectSection(SolutionItems) = preProject .github\copilot-instructions.md = .github\copilot-instructions.md @@ -28,27 +22,16 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".github", ".github", "{A2C5 EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{43E392F7-70F3-471D-A96A-E413E4387CA6}" ProjectSection(SolutionItems) = preProject - .github\workflows\build-predictor.yml = .github\workflows\build-predictor.yml - .github\workflows\build.yml = .github\workflows\build.yml - .github\workflows\download-issues.yml = .github\workflows\download-issues.yml - .github\workflows\download-pulls.yml = .github\workflows\download-pulls.yml - .github\workflows\labeler-build-predictor.yml = .github\workflows\labeler-build-predictor.yml - .github\workflows\labeler-predict-issues.yml = .github\workflows\labeler-predict-issues.yml - .github\workflows\labeler-predict-pulls.yml = .github\workflows\labeler-predict-pulls.yml - .github\workflows\labeler-promote.yml = .github\workflows\labeler-promote.yml - .github\workflows\labeler-train.yml = .github\workflows\labeler-train.yml - .github\workflows\predict-issues.yml = .github\workflows\predict-issues.yml - .github\workflows\predict-pulls.yml = .github\workflows\predict-pulls.yml - .github\workflows\promote-issues.yml = .github\workflows\promote-issues.yml - .github\workflows\promote-pulls.yml = .github\workflows\promote-pulls.yml - .github\workflows\test-issues.yml = .github\workflows\test-issues.yml - .github\workflows\test-pulls.yml = .github\workflows\test-pulls.yml - .github\workflows\train-issues.yml = .github\workflows\train-issues.yml - .github\workflows\train-pulls.yml = .github\workflows\train-pulls.yml - .github\workflows\train.yml = .github\workflows\train.yml + ..\.github\workflows\build.yml = ..\.github\workflows\build.yml + ..\.github\workflows\labeler-cache-retention.yml = ..\.github\workflows\labeler-cache-retention.yml + ..\.github\workflows\labeler-predict-issues.yml = ..\.github\workflows\labeler-predict-issues.yml + ..\.github\workflows\labeler-predict-pulls.yml = ..\.github\workflows\labeler-predict-pulls.yml + ..\.github\workflows\labeler-promote.yml = ..\.github\workflows\labeler-promote.yml + ..\.github\workflows\labeler-train.yml = ..\.github\workflows\labeler-train.yml + ..\.github\workflows\release.yml = ..\.github\workflows\release.yml EndProjectSection EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Shared.Tests", "tests\Shared.Tests\Shared.Tests.csproj", "{DCE6AA73-1E8A-4EB1-989C-235C11E5ECA4}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common.Tests", "tests\Common.Tests\Common.Tests.csproj", "{D3F816D3-5CAE-4CF1-8977-F92AE96B481B}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -56,6 +39,10 @@ Global Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution + {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Release|Any CPU.Build.0 = Release|Any CPU {AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}.Debug|Any CPU.Build.0 = Debug|Any CPU {AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -76,19 +63,15 @@ Global {57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}.Debug|Any CPU.Build.0 = Debug|Any CPU {57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}.Release|Any CPU.ActiveCfg = Release|Any CPU {57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}.Release|Any CPU.Build.0 = Release|Any CPU - {DCE6AA73-1E8A-4EB1-989C-235C11E5ECA4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {DCE6AA73-1E8A-4EB1-989C-235C11E5ECA4}.Debug|Any CPU.Build.0 = Debug|Any CPU - {DCE6AA73-1E8A-4EB1-989C-235C11E5ECA4}.Release|Any CPU.ActiveCfg = Release|Any CPU - {DCE6AA73-1E8A-4EB1-989C-235C11E5ECA4}.Release|Any CPU.Build.0 = Release|Any CPU + {D3F816D3-5CAE-4CF1-8977-F92AE96B481B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D3F816D3-5CAE-4CF1-8977-F92AE96B481B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D3F816D3-5CAE-4CF1-8977-F92AE96B481B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D3F816D3-5CAE-4CF1-8977-F92AE96B481B}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution - {AB75FE13-DB1A-4B6F-8B27-1486F98EA75C} = {02EA681E-C7D8-13C7-8484-4AC65E1B71E8} - {F1FE4054-C44E-487F-90F9-2F111AB7BD9C} = {871B398D-3AB6-4F8B-9BC8-64646BDA0B75} - {2E39B0A5-2F4A-4D6E-8A0D-0366238CB21E} = {79FFE9CC-3518-4A4E-8FAB-DB121EE93AB8} - {BEA133F4-5686-49DF-83E4-641C26B3CC25} = {48C9A18F-FB08-41D5-9832-492AEFF6B2B2} {43E392F7-70F3-471D-A96A-E413E4387CA6} = {A2C54AC3-3D94-4CD3-885E-D1892063CC58} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution diff --git a/NuGet.config b/IssueLabeler/NuGet.config similarity index 100% rename from NuGet.config rename to IssueLabeler/NuGet.config diff --git a/IssueLabeler/src/Common/App.cs b/IssueLabeler/src/Common/App.cs new file mode 100644 index 0000000..c9dd8e4 --- /dev/null +++ b/IssueLabeler/src/Common/App.cs @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Actions.Core.Markdown; +using Actions.Core.Services; + +/// +/// This class contains methods to run tasks and handle exceptions. +/// +public static class App +{ + /// + /// Runs a list of tasks, catching and handling exceptions by logging them to the action's output and summary. + /// + /// Upon completion, the persistent summary is written. + /// The list of tasks to run, waiting for all tasks to complete. + /// The GitHub action service. + /// A boolean indicating whether all tasks were completed successfully. + public async static Task RunTasks(List tasks, ICoreService action) + { + var allTasks = Task.WhenAll(tasks); + var success = await RunTasks(allTasks, action); + + return success; + } + + /// + /// Runs a list of tasks, catching and handling exceptions by logging them to the action's output and summary. + /// + /// The Task result type. + /// The list of tasks to run, waiting for all tasks to complete. + /// The GitHub action service. + /// A tuple containing the results of the tasks and a boolean indicating whether all tasks were completed successfully. + public async static Task<(TResult[], bool)> RunTasks(List> tasks, ICoreService action) + { + var allTasks = Task.WhenAll(tasks); + var success = await RunTasks(allTasks, action); + + return (allTasks.Result, success); + } + + /// + /// Runs a single task, catching and handling exceptions by logging them to the action's output and summary. + /// + /// The task to run, waiting for it to complete. + /// The GitHub action service. + /// A boolean indicating whether the task was completed successfully. + private async static Task RunTasks(Task task, ICoreService action) + { + var success = false; + + try + { + task.Wait(); + success = true; + } + catch (AggregateException ex) + { + action.WriteError($"Exception occurred: {ex.Message}"); + + action.Summary.AddPersistent(summary => + { + summary.AddAlert("Exception occurred", AlertType.Caution); + summary.AddNewLine(); + summary.AddNewLine(); + summary.AddMarkdownCodeBlock(ex.Message); + }); + } + + await action.Summary.WritePersistentAsync(); + return success; + } +} diff --git a/IssueLabeler/src/Common/ArgUtils.cs b/IssueLabeler/src/Common/ArgUtils.cs new file mode 100644 index 0000000..e728648 --- /dev/null +++ b/IssueLabeler/src/Common/ArgUtils.cs @@ -0,0 +1,423 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Text.RegularExpressions; +using Actions.Core.Services; + +public class ArgUtils +{ + private ICoreService action; + private Action showUsage; + private Queue? arguments { get; } + + /// + /// Create an arguments utility class instance for a GitHub action, with input values retrieved from the GitHub action. + /// + /// The GitHub action service. + /// A method to show usage information for the application. + public ArgUtils(ICoreService action, Action showUsage) + { + this.action = action; + this.showUsage = message => showUsage(message, action); + } + + /// + /// Create an arguments utility class instance for a GitHub action, with input values retrieved from a queue of command-line arguments. + /// + /// The GitHub action service. + /// A method to show usage information for the application. + /// The queue of command-line arguments to extract argument values from. + public ArgUtils(ICoreService action, Action showUsage, Queue arguments) : this(action, showUsage) + { + this.arguments = arguments; + } + + /// + /// Gets the input string for the specified input. + /// + /// + /// When running as a GitHub action, this method will retrieve the input value from the action's inputs. + /// + /// + /// When using the constructor with a queue of command-line arguments, this method will dequeue the next argument from the queue. + /// + /// The name of the input to retrieve. + /// A nullable string containing the input value if retrieved, or null if there is no value specified. + private string? GetInputString(string inputName) + { + string? input = null; + + if (arguments is not null) + { + if (arguments.TryDequeue(out string? argValue)) + { + input = argValue; + } + } + else + { + input = action.GetInput(inputName); + } + + return string.IsNullOrWhiteSpace(input) ? null : input; + } + + /// + /// Try to get a string input value, guarding against null values. + /// + /// The name of the input to retrieve. + /// The output string value if retrieved, or null if there is no value specified or it was empty. + /// true if the input value was retrieved successfully, false otherwise. + public bool TryGetString(string inputName, [NotNullWhen(true)] out string? value) + { + value = GetInputString(inputName); + return value is not null; + } + + /// + /// Determine if the specified flag is provided and set to true. + /// + /// The name of the flag to retrieve. + /// true if the flag is provided and set to true, false otherwise. + /// A boolean indicating if the flag was checked successfully, only returning false if specified as an invalid value. + public bool TryGetFlag(string inputName, [NotNullWhen(true)] out bool? value) + { + string? input = GetInputString(inputName); + + if (input is null) + { + value = false; + return true; + } + + if (!bool.TryParse(input, out bool parsedValue)) + { + showUsage($"Input '{inputName}' must be 'true', 'false', 'TRUE', or 'FALSE'."); + value = null; + return false; + } + + value = parsedValue; + return true; + } + + /// + /// Try to get the GitHub repository name from the input or environment variable. + /// + /// + /// Defaults to the GITHUB_REPOSITORY environment variable if the input is not specified. + /// + /// The name of the input to retrieve. + /// The GitHub organization name, extracted from the specified {org}/{repo} value. + /// The GitHub repository name, extracted from the specified {org}/{repo} value. + /// true if the input value was retrieved successfully, false otherwise. + public bool TryGetRepo(string inputName, [NotNullWhen(true)] out string? org, [NotNullWhen(true)] out string? repo) + { + string? orgRepo = GetInputString(inputName) ?? Environment.GetEnvironmentVariable("GITHUB_REPOSITORY"); + + if (orgRepo is null || !orgRepo.Contains('/')) + { + showUsage($$"""Input '{{inputName}}' has an empty value or is not in the format of '{org}/{repo}'. Value defaults to GITHUB_REPOSITORY environment variable if not specified."""); + org = null; + repo = null; + return false; + } + + string[] parts = orgRepo.Split('/'); + org = parts[0]; + repo = parts[1]; + return true; + } + + /// + /// Try to get the GitHub repository list from the input or environment variable. + /// + /// + /// Defaults to the GITHUB_REPOSITORY environment variable if the input is not specified. + /// + /// + /// All repositories must be from the same organization. + /// + /// The name of the input to retrieve. + /// The GitHub organization name, extracted from the specified {org}/{repo} value. + /// The list of GitHub repository names, extracted from the specified {org}/{repo} value. + /// true if the input value was retrieved successfully, false otherwise. + public bool TryGetRepoList(string inputName, [NotNullWhen(true)] out string? org, [NotNullWhen(true)] out List? repos) + { + string? orgRepos = GetInputString(inputName) ?? Environment.GetEnvironmentVariable("GITHUB_REPOSITORY"); + org = null; + repos = null; + + if (orgRepos is null) + { + showUsage($$"""Input '{{inputName}}' has an empty value or is not in the format of '{org}/{repo}': {{orgRepos}}"""); + return false; + } + + foreach (var orgRepo in orgRepos.Split(',').Select(r => r.Trim())) + { + if (!orgRepo.Contains('/')) + { + showUsage($$"""Input '{{inputName}}' contains a value that is not in the format of '{org}/{repo}': {{orgRepo}}"""); + return false; + } + + string[] parts = orgRepo.Split('/'); + + if (org is not null && org != parts[0]) + { + showUsage($"All '{inputName}' values must be from the same org."); + return false; + } + + org ??= parts[0]; + repos ??= []; + repos.Add(parts[1]); + } + + return (org is not null && repos is not null); + } + + /// + /// Try to get the label prefix from the input. + /// + /// + /// The label prefix must end with a non-alphanumeric character. + /// + /// The name of the input to retrieve. + /// The label predicate function that checks if a label starts with the specified prefix. + /// true if the label prefix was retrieved successfully, false otherwise. + public bool TryGetLabelPrefix(string inputName, [NotNullWhen(true)] out Func? labelPredicate) + { + string? labelPrefix = GetInputString(inputName); + + if (labelPrefix is null) + { + labelPredicate = null; + return false; + } + + // Require that the label prefix end in something other than a letter or number + // This promotes the pattern of prefixes that are clear, rather than a prefix that + // could be matched as the beginning of another word in the label + if (Regex.IsMatch(labelPrefix.AsSpan(^1),"[a-zA-Z0-9]")) + { + showUsage($""" + Input '{inputName}' must end in a non-alphanumeric character. + + The recommended label prefix terminating character is '-'. + The recommended label prefix for applying area labels is 'area-'. + """); + + labelPredicate = null; + return false; + } + + labelPredicate = (label) => label.StartsWith(labelPrefix, StringComparison.OrdinalIgnoreCase); + return true; + } + + /// + /// Try to get a file path from the input. + /// + /// + /// The file path is converted to an absolute path if it is not already absolute. + /// + /// The name of the input to retrieve. + /// The output file path if retrieved, or null if there is no value specified. + /// true if the input value was retrieved successfully, false otherwise. + public bool TryGetPath(string inputName, out string? path) + { + path = GetInputString(inputName); + + if (path is null) + { + return false; + } + + if (!Path.IsPathRooted(path)) + { + path = Path.GetFullPath(path); + } + + return true; + } + + /// + /// Try to get a string array from the input. + /// + /// + /// The string array is split by commas and trimmed of whitespace. + /// + /// The name of the input to retrieve. + /// The output string array if retrieved, or null if there is no value specified. + /// true if the input value was retrieved successfully, false otherwise. + public bool TryGetStringArray(string inputName, [NotNullWhen(true)] out string[]? values) + { + string? input = GetInputString(inputName); + + if (input is null) + { + values = null; + return false; + } + + values = input.Split(',', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries); + return true; + } + + /// + /// Try to get an integer from the input. + /// + /// The name of the input to retrieve. + /// The output integer value if retrieved, or null if there is no value specified. + /// true if the input value was retrieved successfully, false otherwise. + public bool TryGetInt(string inputName, [NotNullWhen(true)] out int? value) => + TryParseInt(inputName, GetInputString(inputName), out value); + + /// + /// Try to parse an integer from the input string. + /// + /// The name of the input to retrieve. + /// The input string to parse. + /// The output integer value if parsed successfully, or null if the input is invalid. + /// true if the input value was parsed successfully, false otherwise. + private bool TryParseInt(string inputName, string? input, [NotNullWhen(true)] out int? value) + { + if (input is null || !int.TryParse(input, out int parsedValue)) + { + showUsage($"Input '{inputName}' must be an integer."); + value = null; + return false; + } + + value = parsedValue; + return true; + } + + /// + /// Try to get an integer array from the input. + /// + /// The name of the input to retrieve. + /// The output integer array if retrieved, or null if there is no value specified. + /// true if the input value was retrieved successfully, false otherwise. + public bool TryGetIntArray(string inputName, [NotNullWhen(true)] out int[]? values) + { + string? input = GetInputString(inputName); + + if (input is not null) + { + string[] inputValues = input.Split(','); + + int[] parsedValues = inputValues.SelectMany(v => { + if (!TryParseInt(inputName, v, out int? value)) + { + return new int[0]; + } + + return [value.Value]; + }).ToArray(); + + if (parsedValues.Length == inputValues.Length) + { + values = parsedValues; + return true; + } + } + + values = null; + return false; + } + + /// + /// Try to get a float from the input. + /// + /// The name of the input to retrieve. + /// The output float value if retrieved, or null if there is no value specified. + /// true if the input value was retrieved successfully, false otherwise. + public bool TryGetFloat(string inputName, [NotNullWhen(true)] out float? value) + { + string? input = GetInputString(inputName); + + if (input is null || !float.TryParse(input, out float parsedValue)) + { + showUsage($"Input '{inputName}' must be a decimal value."); + value = null; + return false; + } + + value = parsedValue; + return true; + } + + /// + /// Try to get a list of number ranges from the input. + /// + /// + /// The input is a comma-separated list of numbers and/or dash-separated ranges. + /// + /// The name of the input to retrieve. + /// The output list of ulong values if retrieved, or null if there is no value specified. + /// true if the input value was retrieved successfully, false otherwise. + public bool TryGetNumberRanges(string inputName, [NotNullWhen(true)] out List? values) + { + string? input = GetInputString(inputName); + + if (input is not null) + { + var showUsageError = () => showUsage($"Input '{inputName}' must be comma-separated list of numbers and/or dash-separated ranges. Example: 1-3,5,7-9."); + List numbers = []; + + foreach (var range in input.Split(',')) + { + var beginEnd = range.Split('-'); + + if (beginEnd.Length == 1) + { + if (!ulong.TryParse(beginEnd[0], out ulong number)) + { + showUsageError(); + values = null; + return false; + } + + numbers.Add(number); + } + else if (beginEnd.Length == 2) + { + if (!ulong.TryParse(beginEnd[0], out ulong begin)) + { + showUsageError(); + values = null; + return false; + } + + if (!ulong.TryParse(beginEnd[1], out ulong end)) + { + showUsageError(); + values = null; + return false; + } + + for (var number = begin; number <= end; number++) + { + numbers.Add(number); + } + } + else + { + showUsageError(); + values = null; + return false; + } + } + + values = numbers; + return true; + } + + values = null; + return false; + } +} diff --git a/IssueLabeler/src/Common/Common.csproj b/IssueLabeler/src/Common/Common.csproj new file mode 100644 index 0000000..e093af2 --- /dev/null +++ b/IssueLabeler/src/Common/Common.csproj @@ -0,0 +1,12 @@ + + + + enable + enable + + + + + + + diff --git a/src/Common/DataFileUtils.cs b/IssueLabeler/src/Common/DataFileUtils.cs similarity index 100% rename from src/Common/DataFileUtils.cs rename to IssueLabeler/src/Common/DataFileUtils.cs diff --git a/IssueLabeler/src/Common/GitHubActionSummary.cs b/IssueLabeler/src/Common/GitHubActionSummary.cs new file mode 100644 index 0000000..bdc927c --- /dev/null +++ b/IssueLabeler/src/Common/GitHubActionSummary.cs @@ -0,0 +1,72 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Actions.Core.Summaries; + +namespace Actions.Core.Services; + +/// +/// This class provides methods to manage the GitHub action summary. +/// +public static class GitHubActionSummary +{ + private static List> persistentSummaryWrites = []; + + /// + /// Add persistent writes to the GitHub action summary, emitting them immediately + /// and storing them for future rewrites when the summary is updated. + /// + /// The GitHub action summary. + /// The invocation that results in adding content to the summary, to be replayed whenever the persistent summary is rewritten. + public static void AddPersistent(this Summary summary, Action writeToSummary) + { + persistentSummaryWrites.Add(writeToSummary); + writeToSummary(summary); + } + + /// + /// Writes a status message to the GitHub action summary and emits it immediately, always printing + /// the status at the top of the summary, with other persistent writes below it. + /// + /// The GitHub action service. + /// The status message to write. + /// The async task. + public static async Task WriteStatusAsync(this ICoreService action, string message) + { + action.WriteInfo(message); + + await action.Summary.WritePersistentAsync(summary => + { + summary.AddMarkdownHeading("Status", 3); + summary.AddRaw(message); + + if (persistentSummaryWrites.Any()) + { + summary.AddMarkdownHeading("Results", 3); + } + }); + } + + /// + /// Writes the persistent summary to the GitHub action summary, clearing it first. + /// + /// The GitHub action summary. + /// An optional action to write a status message to the summary. + /// The async task. + public static async Task WritePersistentAsync(this Summary summary, Action? writeStatus = null) + { + await summary.ClearAsync(); + + if (writeStatus is not null) + { + writeStatus(summary); + } + + foreach (var write in persistentSummaryWrites) + { + write(summary); + } + + await summary.WriteAsync(); + } +} diff --git a/src/Common/ModelType.cs b/IssueLabeler/src/Common/ModelType.cs similarity index 100% rename from src/Common/ModelType.cs rename to IssueLabeler/src/Common/ModelType.cs diff --git a/IssueLabeler/src/Downloader/Args.cs b/IssueLabeler/src/Downloader/Args.cs new file mode 100644 index 0000000..c18705d --- /dev/null +++ b/IssueLabeler/src/Downloader/Args.cs @@ -0,0 +1,176 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Actions.Core.Services; + +public struct Args +{ + public readonly string GitHubToken => Environment.GetEnvironmentVariable("GITHUB_TOKEN")!; + public string Org { get; set; } + public List Repos { get; set; } + public string? IssuesDataPath { get; set; } + public int? IssuesLimit { get; set; } + public string? PullsDataPath { get; set; } + public int? PullsLimit { get; set; } + public int? PageSize { get; set; } + public int? PageLimit { get; set; } + public int[] Retries { get; set; } + public string[]? ExcludedAuthors { get; set; } + public Predicate LabelPredicate { get; set; } + public bool Verbose { get; set; } + + static void ShowUsage(string? message, ICoreService action) + { + action.WriteNotice($$""" + ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}} + + Required environment variables: + GITHUB_TOKEN GitHub token to be used for API calls. + + Required arguments: + --repo The GitHub repositories in format org/repo (comma separated for multiple). + --label-prefix Prefix for label predictions. Must end with a character other than a letter or number. + + Required for downloading issue data: + --issues-data Path for issue data file to create (TSV file). + + Required for downloading pull request data: + --pulls-data Path for pull request data file to create (TSV file). + + Optional arguments: + --issues-limit Maximum number of issues to download. Defaults to: No limit. + --pulls-limit Maximum number of pull requests to download. Defaults to: No limit. + --page-size Number of items per page in GitHub API requests. + --page-limit Maximum number of pages to retrieve. + --excluded-authors Comma-separated list of authors to exclude. + --retries Comma-separated retry delays in seconds. + Defaults to: 30,30,300,300,3000,3000. + --verbose Enable verbose output. + """); + + Environment.Exit(1); + } + + public static Args? Parse(string[] args, ICoreService action) + { + Queue arguments = new(args); + ArgUtils argUtils = new(action, ShowUsage, arguments); + + Args argsData = new() + { + Retries = [30, 30, 300, 300, 3000, 3000] + }; + + if (string.IsNullOrEmpty(argsData.GitHubToken)) + { + ShowUsage("Environment variable GITHUB_TOKEN is empty.", action); + return null; + } + + while (arguments.Count > 0) + { + string argument = arguments.Dequeue(); + + switch (argument) + { + case "--repo": + if (!argUtils.TryGetRepoList("--repo", out string? org, out List? repos)) + { + return null; + } + argsData.Org = org; + argsData.Repos = repos; + break; + + case "--label-prefix": + if (!argUtils.TryGetLabelPrefix("--label-prefix", out Func? labelPredicate)) + { + return null; + } + argsData.LabelPredicate = new(labelPredicate); + break; + + case "--excluded-authors": + if (!argUtils.TryGetStringArray("--excluded-authors", out string[]? excludedAuthors)) + { + return null; + } + argsData.ExcludedAuthors = excludedAuthors; + break; + + case "--issues-data": + if (!argUtils.TryGetPath("--issues-data", out string? IssuesDataPath)) + { + return null; + } + argsData.IssuesDataPath = IssuesDataPath; + break; + + case "--issues-limit": + if (!argUtils.TryGetInt("--issues-limit", out int? IssuesLimit)) + { + return null; + } + argsData.IssuesLimit = IssuesLimit; + break; + + case "--pulls-data": + if (!argUtils.TryGetPath("--pulls-data", out string? PullsDataPath)) + { + return null; + } + argsData.PullsDataPath = PullsDataPath; + break; + + case "--pulls-limit": + if (!argUtils.TryGetInt("--pulls-limit", out int? PullsLimit)) + { + return null; + } + argsData.PullsLimit = PullsLimit; + break; + + case "--page-size": + if (!argUtils.TryGetInt("--page-size", out int? pageSize)) + { + return null; + } + argsData.PageSize = pageSize; + break; + + case "--page-limit": + if (!argUtils.TryGetInt("--page-limit", out int? pageLimit)) + { + return null; + } + argsData.PageLimit = pageLimit; + break; + + case "--retries": + if (!argUtils.TryGetIntArray("--retries", out int[]? retries)) + { + return null; + } + argsData.Retries = retries; + break; + + case "--verbose": + argsData.Verbose = true; + break; + + default: + ShowUsage($"Unrecognized argument: {argument}", action); + return null; + } + } + + if (argsData.Org is null || argsData.Repos is null || argsData.LabelPredicate is null || + (argsData.IssuesDataPath is null && argsData.PullsDataPath is null)) + { + ShowUsage(null, action); + return null; + } + + return argsData; + } +} diff --git a/src/Downloader/Downloader.cs b/IssueLabeler/src/Downloader/Downloader.cs similarity index 59% rename from src/Downloader/Downloader.cs rename to IssueLabeler/src/Downloader/Downloader.cs index 5e68173..4e4df1c 100644 --- a/src/Downloader/Downloader.cs +++ b/IssueLabeler/src/Downloader/Downloader.cs @@ -1,33 +1,39 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using static DataFileUtils; +using Actions.Core.Extensions; +using Actions.Core.Services; using GitHubClient; +using Microsoft.Extensions.DependencyInjection; +using static DataFileUtils; -if (Args.Parse(args) is not Args argsData) -{ - return; -} +using var provider = new ServiceCollection() + .AddGitHubActionsCore() + .BuildServiceProvider(); + +var action = provider.GetRequiredService(); +if (Args.Parse(args, action) is not Args argsData) return 1; List tasks = []; -if (!string.IsNullOrEmpty(argsData.IssueDataPath)) +if (!string.IsNullOrEmpty(argsData.IssuesDataPath)) { - EnsureOutputDirectory(argsData.IssueDataPath); - tasks.Add(Task.Run(() => DownloadIssues(argsData.IssueDataPath))); + EnsureOutputDirectory(argsData.IssuesDataPath); + tasks.Add(Task.Run(() => DownloadIssues(argsData.IssuesDataPath))); } -if (!string.IsNullOrEmpty(argsData.PullDataPath)) +if (!string.IsNullOrEmpty(argsData.PullsDataPath)) { - EnsureOutputDirectory(argsData.PullDataPath); - tasks.Add(Task.Run(() => DownloadPullRequests(argsData.PullDataPath))); + EnsureOutputDirectory(argsData.PullsDataPath); + tasks.Add(Task.Run(() => DownloadPullRequests(argsData.PullsDataPath))); } -await Task.WhenAll(tasks); +var success = await App.RunTasks(tasks, action); +return success ? 0 : 1; async Task DownloadIssues(string outputPath) { - Console.WriteLine($"Issues Data Path: {outputPath}"); + action.WriteInfo($"Issues Data Path: {outputPath}"); byte perFlushCount = 0; @@ -36,9 +42,9 @@ async Task DownloadIssues(string outputPath) foreach (var repo in argsData.Repos) { - await foreach (var result in GitHubApi.DownloadIssues(argsData.GithubToken, argsData.Org, repo, argsData.LabelPredicate, - argsData.IssueLimit, argsData.PageSize ?? 100, argsData.PageLimit ?? 1000, - argsData.Retries, argsData.ExcludedAuthors ?? [], argsData.Verbose)) + await foreach (var result in GitHubApi.DownloadIssues(argsData.GitHubToken, argsData.Org, repo, argsData.LabelPredicate, + argsData.IssuesLimit, argsData.PageSize, argsData.PageLimit, + argsData.Retries, argsData.ExcludedAuthors, action, argsData.Verbose)) { writer.WriteLine(FormatIssueRecord(result.Label, result.Issue.Title, result.Issue.Body)); @@ -55,7 +61,7 @@ async Task DownloadIssues(string outputPath) async Task DownloadPullRequests(string outputPath) { - Console.WriteLine($"Pulls Data Path: {outputPath}"); + action.WriteInfo($"Pulls Data Path: {outputPath}"); byte perFlushCount = 0; @@ -64,9 +70,9 @@ async Task DownloadPullRequests(string outputPath) foreach (var repo in argsData.Repos) { - await foreach (var result in GitHubApi.DownloadPullRequests(argsData.GithubToken, argsData.Org, repo, argsData.LabelPredicate, - argsData.PullLimit, argsData.PageSize ?? 25, argsData.PageLimit ?? 4000, - argsData.Retries, argsData.ExcludedAuthors ?? [], argsData.Verbose)) + await foreach (var result in GitHubApi.DownloadPullRequests(argsData.GitHubToken, argsData.Org, repo, argsData.LabelPredicate, + argsData.PullsLimit, argsData.PageSize, argsData.PageLimit, + argsData.Retries, argsData.ExcludedAuthors, action, argsData.Verbose)) { writer.WriteLine(FormatPullRequestRecord(result.Label, result.PullRequest.Title, result.PullRequest.Body, result.PullRequest.FileNames, result.PullRequest.FolderNames)); diff --git a/src/Downloader/Downloader.csproj b/IssueLabeler/src/Downloader/Downloader.csproj similarity index 57% rename from src/Downloader/Downloader.csproj rename to IssueLabeler/src/Downloader/Downloader.csproj index 497184a..8074961 100644 --- a/src/Downloader/Downloader.csproj +++ b/IssueLabeler/src/Downloader/Downloader.csproj @@ -7,15 +7,17 @@ - + - + + - + + diff --git a/IssueLabeler/src/GitHubClient/GitHubApi.cs b/IssueLabeler/src/GitHubClient/GitHubApi.cs new file mode 100644 index 0000000..2f5d5b3 --- /dev/null +++ b/IssueLabeler/src/GitHubClient/GitHubApi.cs @@ -0,0 +1,573 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Actions.Core.Services; +using GraphQL; +using GraphQL.Client.Http; +using GraphQL.Client.Serializer.SystemTextJson; +using System.Collections.Concurrent; +using System.Net.Http.Json; + +namespace GitHubClient; + +public class GitHubApi +{ + private static ConcurrentDictionary _graphQLClients = new(); + private static ConcurrentDictionary _restClients = new(); + private const int MaxLabelDelaySeconds = 30; + + /// + /// Gets or creates a GraphQL client for the GitHub API using the provided token. + /// + /// The timeout is set to 2 minutes and the client is cached for reuse. + /// The GitHub token to use for authentication. + /// A GraphQLHttpClient instance configured with the provided token and necessary headers. + private static GraphQLHttpClient GetGraphQLClient(string githubToken) => + _graphQLClients.GetOrAdd(githubToken, token => + { + GraphQLHttpClient client = new("https://api.github.com/graphql", new SystemTextJsonSerializer()); + client.HttpClient.DefaultRequestHeaders.Authorization = + new System.Net.Http.Headers.AuthenticationHeaderValue( + scheme: "bearer", + parameter: token); + + client.HttpClient.Timeout = TimeSpan.FromMinutes(2); + + return client; + }); + + /// + /// Gets or creates a REST client for the GitHub API using the provided token. + /// + /// The client is cached for reuse. + /// The GitHub token to use for authentication. + /// An HttpClient instance configured with the provided token and necessary headers. + private static HttpClient GetRestClient(string githubToken) => + _restClients.GetOrAdd(githubToken, token => + { + HttpClient client = new(); + client.DefaultRequestHeaders.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue( + scheme: "bearer", + parameter: token); + client.DefaultRequestHeaders.Accept.Add(new("application/vnd.github+json")); + client.DefaultRequestHeaders.Add("X-GitHub-Api-Version", "2022-11-28"); + client.DefaultRequestHeaders.Add("User-Agent", "Issue-Labeler"); + + return client; + }); + + /// + /// Downloads issues from a GitHub repository, filtering them by label and other criteria. + /// + /// The GitHub token to use for authentication. + /// The GitHub organization name. + /// The GitHub repository name. + /// A predicate function to filter labels. + /// The maximum number of issues to download. + /// The number of items per page in GitHub API requests. + /// The maximum number of pages to retrieve. + /// An array of retry delays in seconds. + /// An array of authors to exclude from the results. + /// The GitHub action service. + /// Emit verbose output into the action log. + /// The downloaded issues as an async enumerable collection of tuples containing the issue and its predicate-matched label (when only one matcing label is found). + public static async IAsyncEnumerable<(Issue Issue, string Label)> DownloadIssues( + string githubToken, + string org, string repo, + Predicate labelPredicate, + int? issuesLimit, + int? pageSize, + int? pageLimit, + int[] retries, + string[]? excludedAuthors, + ICoreService action, + bool verbose = false) + { + await foreach (var item in DownloadItems("issues", githubToken, org, repo, labelPredicate, issuesLimit, pageSize ?? 100, pageLimit ?? 1000, retries, excludedAuthors, action, verbose)) + { + yield return (item.Item, item.Label); + } + } + + /// + /// Downloads pull requests from a GitHub repository, filtering them by label and other criteria. + /// + /// The GitHub token to use for authentication. + /// The GitHub organization name. + /// The GitHub repository name. + /// A predicate function to filter labels. + /// The maximum number of pull requests to download. + /// The number of items per page in GitHub API requests. + /// The maximum number of pages to retrieve. + /// An array of retry delays in seconds. + /// An array of authors to exclude from the results. + /// The GitHub action service. + /// Emit verbose output into the action log. + /// The downloaded pull requests as an async enumerable collection of tuples containing the pull request and its predicate-matched label (when only one matching label is found). + public static async IAsyncEnumerable<(PullRequest PullRequest, string Label)> DownloadPullRequests( + string githubToken, + string org, + string repo, + Predicate labelPredicate, + int? pullsLimit, + int? pageSize, + int? pageLimit, + int[] retries, + string[]? excludedAuthors, + ICoreService action, + bool verbose = false) + { + var items = DownloadItems("pullRequests", githubToken, org, repo, labelPredicate, pullsLimit, pageSize ?? 25, pageLimit ?? 4000, retries, excludedAuthors, action, verbose); + + await foreach (var item in items) + { + yield return (item.Item, item.Label); + } + } + + /// + /// Downloads items from a GitHub repository, filtering them by label and other criteria. + /// + /// + /// The GraphQL query name for the item type (e.g., "issues" or "pullRequests"). + /// The GitHub token to use for authentication. + /// The GitHub organization name. + /// The GitHub repository name. + /// A predicate function to filter labels. + /// The maximum number of issues to download. + /// The number of items per page in GitHub API requests. + /// The maximum number of pages to retrieve. + /// An array of retry delays in seconds. + /// An array of authors to exclude from the results. + /// The GitHub action service. + /// Emit verbose output into the action log. + /// The downloaded items as an async enumerable collection of tuples containing the item and its predicate-matched label (when only one matching label is found). + /// + private static async IAsyncEnumerable<(T Item, string Label)> DownloadItems( + string itemQueryName, + string githubToken, + string org, + string repo, + Predicate labelPredicate, + int? itemLimit, + int pageSize, + int pageLimit, + int[] retries, + string[]? excludedAuthors, + ICoreService action, + bool verbose) where T : Issue + { + pageSize = Math.Min(pageSize, 100); + + string typeNames = typeof(T) == typeof(PullRequest) ? "Pull Requests" : "Issues"; + string typeName = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue"; + + int pageNumber = 0; + string? after = null; + bool hasNextPage = true; + int loadedCount = 0; + int includedCount = 0; + int? totalCount = null; + byte retry = 0; + bool finished = false; + + do + { + action.WriteInfo($"Downloading {typeNames} page {pageNumber + 1} from {org}/{repo}...{(retry > 0 ? $" (retry {retry} of {retries.Length}) " : "")}{(after is not null ? $" (cursor: '{after}')" : "")}"); + + Page page; + + try + { + page = await GetItemsPage(githubToken, org, repo, pageSize, after, itemQueryName); + } + catch (Exception ex) when ( + ex is HttpIOException || + ex is HttpRequestException || + ex is GraphQLHttpRequestException || + ex is TaskCanceledException + ) + { + action.WriteInfo($"Exception caught during query.\n {ex.Message}"); + + if (retry >= retries.Length - 1) + { + await action.WriteStatusAsync($"Retry limit of {retries.Length} reached. Aborting."); + + throw new ApplicationException($""" + Retry limit of {retries.Length} reached. Aborting. + + {ex.Message} + + Total Downloaded: {totalCount} + Applicable for Training: {loadedCount} + Page Number: {pageNumber} + """ + ); + } + else + { + await action.WriteStatusAsync($"Waiting {retries[retry]} seconds before retry {retry + 1} of {retries.Length}..."); + await Task.Delay(retries[retry] * 1000); + retry++; + + continue; + } + } + + if (after == page.EndCursor) + { + action.WriteError($"Paging did not progress. Cursor: '{after}'. Aborting."); + break; + } + + pageNumber++; + after = page.EndCursor; + hasNextPage = page.HasNextPage; + loadedCount += page.Nodes.Length; + totalCount ??= page.TotalCount; + retry = 0; + + foreach (T item in page.Nodes) + { + if (excludedAuthors is not null && item.Author?.Login is not null && excludedAuthors.Contains(item.Author.Login, StringComparer.InvariantCultureIgnoreCase)) + { + if (verbose) action.WriteInfo($"{typeName} {org}/{repo}#{item.Number} - Excluded from output. Author '{item.Author.Login}' is in excluded list."); + continue; + } + + // If there are more labels, there might be other applicable + // labels that were not loaded and the model is incomplete. + if (item.Labels.HasNextPage) + { + if (verbose) action.WriteInfo($"{typeName} {org}/{repo}#{item.Number} - Excluded from output. Not all labels were loaded."); + continue; + } + + // Only items with exactly one applicable label are used for the model. + string[] labels = Array.FindAll(item.LabelNames, labelPredicate); + if (labels.Length != 1) + { + if (verbose) action.WriteInfo($"{typeName} {org}/{repo}#{item.Number} - Excluded from output. {labels.Length} applicable labels found."); + continue; + } + + // Exactly one applicable label was found on the item. Include it in the model. + if (verbose) action.WriteInfo($"{typeName} {org}/{repo}#{item.Number} - Included in output. Applicable label: '{labels[0]}'."); + + yield return (item, labels[0]); + + includedCount++; + + if (itemLimit.HasValue && includedCount >= itemLimit) + { + break; + } + } + + finished = (!hasNextPage || pageNumber >= pageLimit || (itemLimit.HasValue && includedCount >= itemLimit)); + + await action.WriteStatusAsync( + $"Items to Include: {includedCount} (limit: {(itemLimit.HasValue ? itemLimit : "none")}) | " + + $"Items Downloaded: {loadedCount} (total: {totalCount}) | " + + $"Pages Downloaded: {pageNumber} (limit: {pageLimit})"); + + if (finished) + { + action.Summary.AddPersistent(summary => { + summary.AddMarkdownHeading($"Finished Downloading {typeNames} from {org}/{repo}", 2); + summary.AddMarkdownList([ + $"Items to Include: {includedCount} (limit: {(itemLimit.HasValue ? itemLimit : "none")})", + $"Items Downloaded: {loadedCount} (total: {totalCount})", + $"Pages Downloaded: {pageNumber} (limit: {pageLimit})" + ]); + }); + } + } + while (!finished); + } + + /// + /// Retrieves a page of items from a GitHub repository using GraphQL. + /// + /// The type of items to retrieve (e.g., Issue or PullRequest). + /// The GitHub token to use for authentication. + /// The GitHub organization name. + /// The GitHub repository name. + /// The number of items per page in GitHub API requests. + /// The cursor for pagination (null for the first page). + /// The GraphQL query name for the item type (e.g., "issues" or "pullRequests"). + /// The page of items retrieved from the GitHub repository. + /// When the GraphQL request returns errors or the response does not include the expected data. + private static async Task> GetItemsPage(string githubToken, string org, string repo, int pageSize, string? after, string itemQueryName) where T : Issue + { + GraphQLHttpClient client = GetGraphQLClient(githubToken); + + string files = typeof(T) == typeof(PullRequest) ? "files (first: 100) { nodes { path } }" : ""; + + GraphQLRequest query = new GraphQLRequest + { + Query = $$""" + query ($owner: String!, $repo: String!, $after: String) { + repository (owner: $owner, name: $repo) { + result:{{itemQueryName}} (after: $after, first: {{pageSize}}, orderBy: {field: CREATED_AT, direction: DESC}) { + nodes { + number + title + author { login } + body: bodyText + labels (first: 25) { + nodes { name }, + pageInfo { hasNextPage } + } + {{files}} + } + pageInfo { + hasNextPage + endCursor + } + totalCount + } + } + } + """, + Variables = new + { + Owner = org, + Repo = repo, + After = after + } + }; + + var response = await client.SendQueryAsync>>(query); + + if (response.Errors?.Any() ?? false) + { + string errors = string.Join("\n\n", response.Errors.Select((e, i) => $"{i + 1}. {e.Message}").ToArray()); + throw new ApplicationException($"GraphQL request returned errors.\n\n{errors}"); + } + else if (response.Data is null || response.Data.Repository is null || response.Data.Repository.Result is null) + { + throw new ApplicationException("GraphQL response did not include the repository result data"); + } + + return response.Data.Repository.Result; + } + + /// + /// Gets an issue from a GitHub repository using GraphQL. + /// + /// The GitHub token to use for authentication. + /// The GitHub organization name. + /// The GitHub repository name. + /// The issue number. + /// An array of retry delays in seconds. + /// The GitHub action service. + /// Emit verbose output into the action log. + /// The issue retrieved from the GitHub repository, or null if not found. + public static async Task GetIssue(string githubToken, string org, string repo, ulong number, int[] retries, ICoreService action, bool verbose) => + await GetItem(githubToken, org, repo, number, retries, verbose, "issue", action); + + /// + /// Gets a pull request from a GitHub repository using GraphQL. + /// + /// The GitHub token to use for authentication. + /// The GitHub organization name. + /// The GitHub repository name. + /// The pull request number. + /// An array of retry delays in seconds. + /// The GitHub action service. + /// Emit verbose output into the action log. + /// The pull request retrieved from the GitHub repository, or null if not found. + public static async Task GetPullRequest(string githubToken, string org, string repo, ulong number, int[] retries, ICoreService action, bool verbose) => + await GetItem(githubToken, org, repo, number, retries, verbose, "pullRequest", action); + + private static async Task GetItem(string githubToken, string org, string repo, ulong number, int[] retries, bool verbose, string itemQueryName, ICoreService action) where T : Issue + { + GraphQLHttpClient client = GetGraphQLClient(githubToken); + string files = typeof(T) == typeof(PullRequest) ? "files (first: 100) { nodes { path } }" : ""; + + GraphQLRequest query = new GraphQLRequest + { + Query = $$""" + query ($owner: String!, $repo: String!, $number: Int!) { + repository (owner: $owner, name: $repo) { + result:{{itemQueryName}} (number: $number) { + number + title + author { login } + body: bodyText + labels (first: 25) { + nodes { name }, + pageInfo { hasNextPage } + } + {{files}} + } + } + } + """, + Variables = new + { + Owner = org, + Repo = repo, + Number = number + } + }; + + byte retry = 0; + string typeName = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue"; + + while (retry < retries.Length) + { + try + { + var response = await client.SendQueryAsync>(query); + + if (!(response.Errors?.Any() ?? false) && response.Data?.Repository?.Result is not null) + { + return response.Data.Repository.Result; + } + + if (response.Errors?.Any() ?? false) + { + // These errors occur when an issue/pull does not exist or when the API rate limit has been exceeded + if (response.Errors.Any(e => e.Message.StartsWith("API rate limit exceeded"))) + { + action.WriteInfo($""" + [{typeName} {org}/{repo}#{number}] Failed to retrieve data. + Rate limit has been reached. + {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")} + """); + } + else + { + // Could not detect this as a rate limit issue. Do not retry. + string errors = string.Join("\n\n", response.Errors.Select((e, i) => $"{i + 1}. {e.Message}").ToArray()); + + action.WriteInfo($""" + [{typeName} {org}/{repo}#{number}] Failed to retrieve data. + GraphQL request returned errors: + + {errors} + """); + + return null; + } + } + else + { + // Do not retry as these errors are not recoverable + // This is usually a bug during development when the query/response model is incorrect + action.WriteInfo($""" + [{typeName} {org}/{repo}#{number}] Failed to retrieve data. + GraphQL response did not include the repository result data. + """); + + return null; + } + } + catch (Exception ex) when ( + ex is HttpIOException || + ex is HttpRequestException || + ex is GraphQLHttpRequestException || + ex is TaskCanceledException + ) + { + // Retry on exceptions as they can be temporary network issues + action.WriteInfo($""" + [{typeName} {org}/{repo}#{number}] Failed to retrieve data. + Exception caught during query. + + {ex.Message} + + {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")} + """); + } + + await Task.Delay(retries[retry++] * 1000); + } + + return null; + } + + /// + /// Adds a label to an issue or pull request in a GitHub repository. + /// + /// The GitHub token to use for authentication. + /// The GitHub organization name. + /// The GitHub repository name. + /// The type of item (e.g., "issue" or "pull request"). + /// The issue or pull request number. + /// The label to add. + /// An array of retry delays in seconds. A maximum delay of 30 seconds is enforced. + /// The GitHub action service. + /// A string describing a failure, or null if successful. + public static async Task AddLabel(string githubToken, string org, string repo, string type, ulong number, string label, int[] retries, ICoreService action) + { + var client = GetRestClient(githubToken); + byte retry = 0; + + while (retry < retries.Length) + { + var response = await client.PostAsJsonAsync( + $"https://api.github.com/repos/{org}/{repo}/issues/{number}/labels", + new string[] { label }, + CancellationToken.None); + + if (response.IsSuccessStatusCode) + { + return null; + } + + action.WriteInfo($""" + [{type} {org}/{repo}#{number}] Failed to add label '{label}'. {response.ReasonPhrase} ({response.StatusCode}) + {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")} + """); + + int delay = Math.Min(retries[retry++], MaxLabelDelaySeconds); + await Task.Delay(delay * 1000); + } + + return $"Failed to add label '{label}' after {retries.Length} retries."; + } + + /// + /// Removes a label from an issue or pull request in a GitHub repository. + /// + /// The GitHub token to use for authentication. + /// The GitHub organization name. + /// The GitHub repository name. + /// The type of item (e.g., "issue" or "pull request"). + /// The issue or pull request number. + /// The label to add. + /// An array of retry delays in seconds. A maximum delay of 30 seconds is enforced. + /// The GitHub action service. + /// A string describing a failure, or null if successful. + public static async Task RemoveLabel(string githubToken, string org, string repo, string type, ulong number, string label, int[] retries, ICoreService action) + { + var client = GetRestClient(githubToken); + byte retry = 0; + + while (retry < retries.Length) + { + var response = await client.DeleteAsync( + $"https://api.github.com/repos/{org}/{repo}/issues/{number}/labels/{label}", + CancellationToken.None); + + if (response.IsSuccessStatusCode) + { + return null; + } + + action.WriteInfo($""" + [{type} {org}/{repo}#{number}] Failed to remove label '{label}'. {response.ReasonPhrase} ({response.StatusCode}) + {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")} + """); + + int delay = Math.Min(retries[retry++], MaxLabelDelaySeconds); + await Task.Delay(delay * 1000); + } + + return $"Failed to remove label '{label}' after {retries.Length} retries."; + } +} diff --git a/src/GitHubClient/GitHubClient.csproj b/IssueLabeler/src/GitHubClient/GitHubClient.csproj similarity index 78% rename from src/GitHubClient/GitHubClient.csproj rename to IssueLabeler/src/GitHubClient/GitHubClient.csproj index bd43aac..ce9e773 100644 --- a/src/GitHubClient/GitHubClient.csproj +++ b/IssueLabeler/src/GitHubClient/GitHubClient.csproj @@ -6,6 +6,7 @@ + @@ -15,4 +16,8 @@ + + + + diff --git a/src/GitHubClient/QueryModel.cs b/IssueLabeler/src/GitHubClient/QueryModel.cs similarity index 100% rename from src/GitHubClient/QueryModel.cs rename to IssueLabeler/src/GitHubClient/QueryModel.cs diff --git a/IssueLabeler/src/Predictor/Args.cs b/IssueLabeler/src/Predictor/Args.cs new file mode 100644 index 0000000..24389ce --- /dev/null +++ b/IssueLabeler/src/Predictor/Args.cs @@ -0,0 +1,113 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Actions.Core.Services; + +public struct Args +{ + public string GitHubToken => Environment.GetEnvironmentVariable("GITHUB_TOKEN")!; + public string Org { get; set; } + public string Repo { get; set; } + public float Threshold { get; set; } + public Func LabelPredicate { get; set; } + public string[]? ExcludedAuthors { get; set; } + public string? IssuesModelPath { get; set; } + public List? Issues { get; set; } + public string? PullsModelPath { get; set; } + public List? Pulls { get; set; } + public string? DefaultLabel { get; set; } + public int[] Retries { get; set; } + public bool Verbose { get; set; } + public bool Test { get; set; } + + static void ShowUsage(string? message, ICoreService action) + { + action.WriteNotice($$""" + ERROR: Invalid or missing inputs.{{(message is null ? "" : " " + message)}} + + Required environment variables: + GITHUB_TOKEN GitHub token to be used for API calls. + + Inputs are specified as ALL_CAPS environment variables prefixed with 'INPUT_'. + + Required inputs: + REPO GitHub repository in the format {org}/{repo}. + Defaults to: GITHUB_REPOSITORY environment variable. + LABEL_PREFIX Prefix for label predictions. + Must end with a non-alphanumeric character. + + Required inputs for predicting issue labels: + ISSUES_MODEL Path to the issue prediction model file (ZIP file). + ISSUES Comma-separated list of issue number ranges. + Example: 1-3,7,5-9. + + Required inputs for predicting pull request labels: + PULLS_MODEL Path to the pull request prediction model file (ZIP file). + PULLS Comma-separated list of pull request number ranges. + Example: 1-3,7,5-9. + + Optional inputs: + THRESHOLD Minimum prediction confidence threshold. Range (0,1]. + Defaults to: 0.4. + DEFAULT_LABEL Label to apply if no label is predicted. + EXCLUDED_AUTHORS Comma-separated list of authors to exclude. + RETRIES Comma-separated retry delays in seconds. + Defaults to: 30,30,300,300,3000,3000. + TEST Run in test mode, outputting predictions without applying labels. + Must be one of: true, false, TRUE, FALSE + VERBOSE Enable verbose output. + Must be one of: true, false, TRUE, FALSE + """); + + Environment.Exit(1); + } + + public static Args? Parse(string[] args, ICoreService action) + { + ArgUtils argUtils = new(action, ShowUsage); + argUtils.TryGetRepo("repo", out var org, out var repo); + argUtils.TryGetLabelPrefix("label_prefix", out var labelPredicate); + argUtils.TryGetPath("issues_model", out var issuesModelPath); + argUtils.TryGetNumberRanges("issues", out var issues); + argUtils.TryGetPath("pulls_model", out var pullsModelPath); + argUtils.TryGetNumberRanges("pulls", out var pulls); + argUtils.TryGetStringArray("excluded_authors", out var excludedAuthors); + argUtils.TryGetFloat("threshold", out var threshold); + argUtils.TryGetIntArray("retries", out var retries); + argUtils.TryGetString("default_label", out var defaultLabel); + argUtils.TryGetFlag("test", out var test); + argUtils.TryGetFlag("verbose", out var verbose); + + if (org is null || repo is null || threshold is null || labelPredicate is null || + (issues is null && pulls is null)) + { + ShowUsage(null, action); + return null; + } + + Args argsData = new() + { + Org = org, + Repo = repo, + LabelPredicate = labelPredicate, + DefaultLabel = defaultLabel, + IssuesModelPath = issuesModelPath, + Issues = issues, + PullsModelPath = pullsModelPath, + Pulls = pulls, + ExcludedAuthors = excludedAuthors, + Threshold = threshold ?? 0.4f, + Retries = retries ?? [30, 30, 300, 300, 3000, 3000], + Test = test ?? false, + Verbose = verbose ?? false + }; + + if (string.IsNullOrEmpty(argsData.GitHubToken)) + { + ShowUsage("Environment variable GITHUB_TOKEN is empty.", action); + return null; + } + + return argsData; + } +} diff --git a/src/Predictor/Models.cs b/IssueLabeler/src/Predictor/Models.cs similarity index 100% rename from src/Predictor/Models.cs rename to IssueLabeler/src/Predictor/Models.cs diff --git a/IssueLabeler/src/Predictor/Predictor.cs b/IssueLabeler/src/Predictor/Predictor.cs new file mode 100644 index 0000000..f032487 --- /dev/null +++ b/IssueLabeler/src/Predictor/Predictor.cs @@ -0,0 +1,291 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Actions.Core.Extensions; +using Actions.Core.Services; +using Actions.Core.Summaries; +using GitHubClient; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.ML; +using Microsoft.ML.Data; + +using var provider = new ServiceCollection() + .AddGitHubActionsCore() + .BuildServiceProvider(); + +var action = provider.GetRequiredService(); +if (Args.Parse(args, action) is not Args argsData) return 1; + +List> tasks = new(); + +if (argsData.IssuesModelPath is not null && argsData.Issues is not null) +{ + await action.WriteStatusAsync($"Loading prediction engine for issues model..."); + var issueContext = new MLContext(); + var issueModel = issueContext.Model.Load(argsData.IssuesModelPath, out _); + var issuePredictor = issueContext.Model.CreatePredictionEngine(issueModel); + await action.WriteStatusAsync($"Issues prediction engine ready."); + + foreach (ulong issueNumber in argsData.Issues) + { + var result = await GitHubApi.GetIssue(argsData.GitHubToken, argsData.Org, argsData.Repo, issueNumber, argsData.Retries, action, argsData.Verbose); + + if (result is null) + { + action.WriteNotice($"[Issue {argsData.Org}/{argsData.Repo}#{issueNumber}] could not be found or downloaded. Skipped."); + continue; + } + + if (argsData.ExcludedAuthors is not null && result.Author?.Login is not null && argsData.ExcludedAuthors.Contains(result.Author.Login, StringComparer.InvariantCultureIgnoreCase)) + { + action.WriteNotice($"[Issue {argsData.Org}/{argsData.Repo}#{issueNumber}] Author '{result.Author.Login}' is in excluded list. Skipped."); + continue; + } + + tasks.Add(Task.Run(() => ProcessPrediction( + issuePredictor, + issueNumber, + new Issue(result), + argsData.LabelPredicate, + argsData.DefaultLabel, + ModelType.Issue, + argsData.Retries, + argsData.Test + ))); + + action.WriteInfo($"[Issue {argsData.Org}/{argsData.Repo}#{issueNumber}] Queued for prediction."); + } +} + +if (argsData.PullsModelPath is not null && argsData.Pulls is not null) +{ + await action.WriteStatusAsync($"Loading prediction engine for pulls model..."); + var pullContext = new MLContext(); + var pullModel = pullContext.Model.Load(argsData.PullsModelPath, out _); + var pullPredictor = pullContext.Model.CreatePredictionEngine(pullModel); + await action.WriteStatusAsync($"Pulls prediction engine ready."); + + foreach (ulong pullNumber in argsData.Pulls) + { + var result = await GitHubApi.GetPullRequest(argsData.GitHubToken, argsData.Org, argsData.Repo, pullNumber, argsData.Retries, action, argsData.Verbose); + + if (result is null) + { + action.WriteNotice($"[Pull Request {argsData.Org}/{argsData.Repo}#{pullNumber}] could not be found or downloaded. Skipped."); + continue; + } + + if (argsData.ExcludedAuthors is not null && result.Author?.Login is not null && argsData.ExcludedAuthors.Contains(result.Author.Login)) + { + action.WriteNotice($"[Pull Request {argsData.Org}/{argsData.Repo}#{pullNumber}] Author '{result.Author.Login}' is in excluded list. Skipped."); + continue; + } + + tasks.Add(Task.Run(() => ProcessPrediction( + pullPredictor, + pullNumber, + new PullRequest(result), + argsData.LabelPredicate, + argsData.DefaultLabel, + ModelType.PullRequest, + argsData.Retries, + argsData.Test + ))); + + action.WriteInfo($"[Pull Request {argsData.Org}/{argsData.Repo}#{pullNumber}] Queued for prediction."); + } +} + +var (predictionResults, success) = await App.RunTasks(tasks, action); + +foreach (var prediction in predictionResults.OrderBy(p => p.Number)) +{ + action.WriteInfo(prediction.ResultMessage); +} + +await action.Summary.WritePersistentAsync(); +return success ? 0 : 1; + +async Task<(ulong Number, string ResultMessage, bool Success)> ProcessPrediction(PredictionEngine predictor, ulong number, T issueOrPull, Func labelPredicate, string? defaultLabel, ModelType type, int[] retries, bool test) where T : Issue +{ + List> predictionResults = []; + string typeName = type == ModelType.PullRequest ? "Pull Request" : "Issue"; + List resultMessageParts = []; + string? error = null; + + (ulong, string, bool) GetResult(bool success) + { + foreach (var summaryWrite in predictionResults) + { + action.Summary.AddPersistent(summaryWrite); + } + + return (number, $"[{typeName} {argsData.Org}/{argsData.Repo}#{number}] {string.Join(' ', resultMessageParts)}", success); + } + + (ulong, string, bool) Success() => GetResult(true); + (ulong, string, bool) Failure() => GetResult(false); + + predictionResults.Add(summary => summary.AddRawMarkdown($"- **{argsData.Org}/{argsData.Repo}#{number}**", true)); + + if (issueOrPull.HasMoreLabels) + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - Skipping prediction. Too many labels applied already; cannot be sure no applicable label is already applied.", true)); + resultMessageParts.Add("Too many labels applied already."); + + return Success(); + } + + var applicableLabel = issueOrPull.Labels?.FirstOrDefault(labelPredicate); + + bool hasDefaultLabel = + (defaultLabel is not null) && + (issueOrPull.Labels?.Any(l => l.Equals(defaultLabel, StringComparison.OrdinalIgnoreCase)) ?? false); + + if (applicableLabel is not null) + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - No prediction needed. Applicable label `{applicableLabel}` already exists.", true)); + + if (hasDefaultLabel && defaultLabel is not null) + { + if (!test) + { + error = await GitHubApi.RemoveLabel(argsData.GitHubToken, argsData.Org, argsData.Repo, typeName, number, defaultLabel, argsData.Retries, action); + } + + if (error is null) + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - Removed default label `{defaultLabel}`.", true)); + resultMessageParts.Add($"Default label '{defaultLabel}' removed."); + return Success(); + } + else + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - **Error removing default label `{defaultLabel}`**: {error}", true)); + resultMessageParts.Add($"Error occurred removing default label '{defaultLabel}'"); + return Failure(); + } + } + + resultMessageParts.Add($"No prediction needed. Applicable label '{applicableLabel}' already exists."); + return Success(); + } + + var prediction = predictor.Predict(issueOrPull); + + if (prediction.Score is null || prediction.Score.Length == 0) + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - No prediction was made. The prediction engine did not return any possible predictions.", true)); + resultMessageParts.Add("No prediction was made. The prediction engine did not return any possible predictions."); + return Success(); + } + + VBuffer> labels = default; + predictor.OutputSchema[nameof(LabelPrediction.Score)].GetSlotNames(ref labels); + + var predictions = prediction.Score + .Select((score, index) => new + { + Score = score, + Label = labels.GetItemOrDefault(index).ToString() + }) + // Ensure predicted labels match the expected predicate + .Where(prediction => labelPredicate(prediction.Label)) + // Capture the top 3 for including in the output + .OrderByDescending(p => p.Score) + .Take(3); + + var bestScore = predictions.FirstOrDefault(p => p.Score >= argsData.Threshold); + + if (bestScore is not null) + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - Predicted label: `{bestScore.Label}` meets the threshold of {argsData.Threshold}.", true)); + } + else + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - No label prediction met the threshold of {argsData.Threshold}.", true)); + } + + foreach (var labelPrediction in predictions) + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - `{labelPrediction.Label}` - Score: {labelPrediction.Score}", true)); + } + + if (bestScore is not null) + { + if (!test) + { + error = await GitHubApi.AddLabel(argsData.GitHubToken, argsData.Org, argsData.Repo, typeName, number, bestScore.Label, retries, action); + } + + if (error is null) + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - **`{bestScore.Label}` applied**", true)); + resultMessageParts.Add($"Label '{bestScore.Label}' applied."); + + if (hasDefaultLabel && defaultLabel is not null) + { + if (!test) + { + error = await GitHubApi.RemoveLabel(argsData.GitHubToken, argsData.Org, argsData.Repo, typeName, number, defaultLabel, retries, action); + } + + if (error is null) + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - **Removed default label `{defaultLabel}`**", true)); + resultMessageParts.Add($"Default label '{defaultLabel}' removed."); + return Success(); + } + else + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - **Error removing default label `{defaultLabel}`**: {error}", true)); + resultMessageParts.Add($"Error occurred removing default label '{defaultLabel}'"); + return Failure(); + } + } + else + { + return Success(); + } + } + else + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - **Error applying label `{bestScore.Label}`**: {error}", true)); + resultMessageParts.Add($"Error occurred applying label '{bestScore.Label}'"); + return Failure(); + } + } + + if (defaultLabel is not null) + { + if (hasDefaultLabel) + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - Default label `{defaultLabel}` is already applied.", true)); + resultMessageParts.Add($"No prediction made. Default label '{defaultLabel}' is already applied."); + return Success(); + } + else + { + if (!test) + { + error = await GitHubApi.AddLabel(argsData.GitHubToken, argsData.Org, argsData.Repo, typeName, number, defaultLabel, argsData.Retries, action); + } + + if (error is null) + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - **Default label `{defaultLabel}` applied.**", true)); + resultMessageParts.Add($"No prediction made. Default label '{defaultLabel}' applied."); + return Success(); + } + else + { + predictionResults.Add(summary => summary.AddRawMarkdown($" - **Error applying default label `{defaultLabel}`**: {error}", true)); + resultMessageParts.Add($"Error occurred applying default label '{defaultLabel}'"); + return Failure(); + } + } + } + + resultMessageParts.Add("No prediction made. No applicable label found. No action taken."); + return GetResult(error is null); +} diff --git a/IssueLabeler/src/Predictor/Predictor.csproj b/IssueLabeler/src/Predictor/Predictor.csproj new file mode 100644 index 0000000..c15109e --- /dev/null +++ b/IssueLabeler/src/Predictor/Predictor.csproj @@ -0,0 +1,31 @@ + + + + + Exe + enable + enable + + + + + true + true + Predict labels for GitHub issues and pull requests using a machine learning model. + + + + + + + + + + + + + + + + + diff --git a/IssueLabeler/src/Tester/Args.cs b/IssueLabeler/src/Tester/Args.cs new file mode 100644 index 0000000..722d901 --- /dev/null +++ b/IssueLabeler/src/Tester/Args.cs @@ -0,0 +1,190 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Actions.Core.Services; + +public struct Args +{ + public readonly string GitHubToken => Environment.GetEnvironmentVariable("GITHUB_TOKEN")!; + public string Org { get; set; } + public List Repos { get; set; } + public float Threshold { get; set; } + public Predicate LabelPredicate { get; set; } + public string[]? ExcludedAuthors { get; set; } + public string? IssuesModelPath { get; set; } + public int? IssuesLimit { get; set; } + public string? PullsModelPath { get; set; } + public int? PullsLimit { get; set; } + public int? PageSize { get; set; } + public int? PageLimit { get; set; } + public int[] Retries { get; set; } + public bool Verbose { get; set; } + + static void ShowUsage(string? message, ICoreService action) + { + action.WriteNotice($$""" + ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}} + + Required environment variables: + GITHUB_TOKEN GitHub token to be used for API calls. + + Required arguments: + --repo The GitHub repositories in format org/repo (comma separated for multiple). + --label-prefix Prefix for label predictions. Must end with a character other than a letter or number. + + Required for testing the issues model: + --issues-model Path to existing issue prediction model file (ZIP file). + + Required for testing the pull requests model: + --pulls-model Path to existing pull request prediction model file (ZIP file). + + Optional arguments: + --excluded-authors Comma-separated list of authors to exclude. + --threshold Minimum prediction confidence threshold. Range (0,1]. + Defaults to: 0.4. + --issues-limit Maximum number of issues to download. Defaults to: No limit. + --pulls-limit Maximum number of pull requests to download. Defaults to: No limit. + --page-size Number of items per page in GitHub API requests. + Defaults to: 100 for issues, 25 for pull requests. + --page-limit Maximum number of pages to retrieve. + Defaults to: 1000 for issues, 4000 for pull requests. + --retries Comma-separated retry delays in seconds. + Defaults to: 30,30,300,300,3000,3000. + --verbose Enable verbose output. + """); + + Environment.Exit(1); + } + + public static Args? Parse(string[] args, ICoreService action) + { + Queue arguments = new(args); + ArgUtils argUtils = new(action, ShowUsage, arguments); + + Args argsData = new() + { + Threshold = 0.4f, + Retries = [30, 30, 300, 300, 3000, 3000] + }; + + if (string.IsNullOrEmpty(argsData.GitHubToken)) + { + ShowUsage("Environment variable GITHUB_TOKEN is empty.", action); + return null; + } + + while (arguments.Count > 0) + { + string argument = arguments.Dequeue(); + + switch (argument) + { + case "--repo": + if (!argUtils.TryGetRepoList("--repo", out string? org, out List? repos)) + { + return null; + } + argsData.Org = org; + argsData.Repos = repos; + break; + + case "--label-prefix": + if (!argUtils.TryGetLabelPrefix("--label-prefix", out Func? labelPredicate)) + { + return null; + } + argsData.LabelPredicate = new(labelPredicate); + break; + + case "--excluded-authors": + if (!argUtils.TryGetStringArray("--excluded-authors", out string[]? excludedAuthors)) + { + return null; + } + argsData.ExcludedAuthors = excludedAuthors; + break; + + case "--threshold": + if (!argUtils.TryGetFloat("--threshold", out float? threshold)) + { + return null; + } + argsData.Threshold = threshold.Value; + break; + + case "--issues-model": + if (!argUtils.TryGetPath("--issues-model", out string? IssuesModelPath)) + { + return null; + } + argsData.IssuesModelPath = IssuesModelPath; + break; + + case "--issues-limit": + if (!argUtils.TryGetInt("--issues-limit", out int? IssuesLimit)) + { + return null; + } + argsData.IssuesLimit = IssuesLimit; + break; + + case "--pulls-model": + if (!argUtils.TryGetPath("--pulls-model", out string? PullsModelPath)) + { + return null; + } + argsData.PullsModelPath = PullsModelPath; + break; + + case "--pulls-limit": + if (!argUtils.TryGetInt("--pulls-limit", out int? PullsLimit)) + { + return null; + } + argsData.PullsLimit = PullsLimit; + break; + + case "--page-size": + if (!argUtils.TryGetInt("--page-size", out int? pageSize)) + { + return null; + } + argsData.PageSize = pageSize; + break; + + case "--page-limit": + if (!argUtils.TryGetInt("--page-limit", out int? pageLimit)) + { + return null; + } + argsData.PageLimit = pageLimit; + break; + + case "--retries": + if (!argUtils.TryGetIntArray("--retries", out int[]? retries)) + { + return null; + } + argsData.Retries = retries; + break; + + case "--verbose": + argsData.Verbose = true; + break; + + default: + ShowUsage($"Unrecognized argument: {argument}", action); + return null; + } + } + + if (argsData.Org is null || argsData.Repos.Count == 0 || argsData.LabelPredicate is null || + (argsData.IssuesModelPath is null && argsData.PullsModelPath is null)) + { + ShowUsage(null, action); + return null; + } + + return argsData; + } +} diff --git a/src/Tester/Models.cs b/IssueLabeler/src/Tester/Models.cs similarity index 79% rename from src/Tester/Models.cs rename to IssueLabeler/src/Tester/Models.cs index c8f7b07..a616bdb 100644 --- a/src/Tester/Models.cs +++ b/IssueLabeler/src/Tester/Models.cs @@ -3,6 +3,7 @@ public class Issue { + public string Repo { get; set; } public ulong Number { get; set; } public string? Label { get; set; } public string? Title { get; set; } @@ -13,10 +14,9 @@ public class Issue public string? Area { get => Label; } public string? Description { get => Body; } - public Issue() { } - - public Issue(GitHubClient.Issue issue, Predicate labelPredicate) + public Issue(string repo, GitHubClient.Issue issue, Predicate labelPredicate) { + Repo = repo; Number = issue.Number; Title = issue.Title; Body = issue.Body; @@ -31,9 +31,7 @@ public class PullRequest : Issue public string? FileNames { get; set; } public string? FolderNames { get; set; } - public PullRequest() { } - - public PullRequest(GitHubClient.PullRequest pull, Predicate labelPredicate) : base(pull, labelPredicate) + public PullRequest(string repo, GitHubClient.PullRequest pull, Predicate labelPredicate) : base(repo, pull, labelPredicate) { FileNames = string.Join(' ', pull.FileNames); FolderNames = string.Join(' ', pull.FolderNames); diff --git a/IssueLabeler/src/Tester/Tester.cs b/IssueLabeler/src/Tester/Tester.cs new file mode 100644 index 0000000..65c9f7d --- /dev/null +++ b/IssueLabeler/src/Tester/Tester.cs @@ -0,0 +1,259 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Actions.Core.Extensions; +using Actions.Core.Markdown; +using Actions.Core.Services; +using Actions.Core.Summaries; +using GitHubClient; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.ML; +using Microsoft.ML.Data; + +using var provider = new ServiceCollection() + .AddGitHubActionsCore() + .BuildServiceProvider(); + +var action = provider.GetRequiredService(); +var config = Args.Parse(args, action); +if (config is not Args argsData) return 1; + +List> tasks = []; + +if (argsData.IssuesModelPath is not null) +{ + tasks.Add(Task.Run(() => TestIssues())); +} + +if (argsData.PullsModelPath is not null) +{ + tasks.Add(Task.Run(() => TestPullRequests())); +} + +var (results, success) = await App.RunTasks(tasks, action); + +foreach (var (itemType, stats) in results) +{ + AlertType resultAlert = (stats.MatchesPercentage >= 0.65f && stats.MismatchesPercentage < 0.15f) ? AlertType.Note : AlertType.Warning; + + action.Summary.AddPersistent(summary => + { + summary.AddMarkdownHeading($"Finished Testing {(itemType == typeof(PullRequest) ? "Pull Requests" : "Issues")}", 2); + summary.AddAlert($"**{stats.Total}** items were tested with **{stats.MatchesPercentage:P2} matches** and **{stats.MismatchesPercentage:P2} mismatches**.", resultAlert); + summary.AddRawMarkdown($"Testing complete. **{stats.Total}** items tested, with the following results.", true); + summary.AddNewLine(); + + SummaryTableRow headerRow = new([ + new("", Header: true), + new("Total", Header: true, Alignment: TableColumnAlignment.Right), + new("Matches", Header: true, Alignment: TableColumnAlignment.Right), + new("Mismatches", Header: true, Alignment: TableColumnAlignment.Right), + new("No Prediction", Header: true, Alignment: TableColumnAlignment.Right), + new("No Existing Label", Header: true, Alignment: TableColumnAlignment.Right) + ]); + + SummaryTableRow countsRow = new([ + new("Count"), + new($"{stats.Total:N0}"), + new($"{stats.Matches:N0}"), + new($"{stats.Mismatches:N0}"), + new($"{stats.NoPrediction:N0}"), + new($"{stats.NoExisting:N0}") + ]); + + SummaryTableRow percentageRow = new([ + new("Percentage", Header: true), + new($""), + new($"{stats.MatchesPercentage:P2}"), + new($"{stats.MismatchesPercentage:P2}"), + new($"{stats.NoPredictionPercentage:P2}"), + new($"{stats.NoExistingPercentage:P2}") + ]); + + summary.AddMarkdownTable(new(headerRow, [countsRow, percentageRow])); + summary.AddNewLine(); + summary.AddMarkdownList([ + "**Matches**: The predicted label matches the existing label, including when no prediction is made and there is no existing label. Correct prediction.", + "**Mismatches**: The predicted label _does not match_ the existing label. Incorrect prediction.", + "**No Prediction**: No prediction was made, but the existing item had a label. Incorrect prediction.", + "**No Existing Label**: A prediction was made, but there was no existing label. Incorrect prediction." + ]); + summary.AddNewLine(); + summary.AddAlert($"If the **Matches** percentage is **at least 65%** and the **Mismatches** percentage is **less than 10%**, the model testing is considered favorable.", AlertType.Tip); + }); +} + +await action.Summary.WritePersistentAsync(); +return success ? 0 : 1; + +async Task<(Type, TestStats)> TestIssues() +{ + var predictor = GetPredictionEngine(argsData.IssuesModelPath); + var stats = new TestStats(); + + async IAsyncEnumerable DownloadIssues(string githubToken, string repo) + { + await foreach (var result in GitHubApi.DownloadIssues(githubToken, argsData.Org, repo, argsData.LabelPredicate, argsData.IssuesLimit, argsData.PageSize, argsData.PageLimit, argsData.Retries, argsData.ExcludedAuthors, action, argsData.Verbose)) + { + yield return new(repo, result.Issue, argsData.LabelPredicate); + } + } + + action.WriteInfo($"Testing issues from {argsData.Repos.Count} repositories."); + + foreach (var repo in argsData.Repos) + { + await action.WriteStatusAsync($"Downloading and testing issues from {argsData.Org}/{repo}."); + + await foreach (var issue in DownloadIssues(argsData.GitHubToken, repo)) + { + TestPrediction(issue, predictor, stats); + } + + await action.WriteStatusAsync($"Finished Testing Issues from {argsData.Org}/{repo}."); + } + + return (typeof(Issue), stats); +} + +async Task<(Type, TestStats)> TestPullRequests() +{ + var predictor = GetPredictionEngine(argsData.PullsModelPath); + var stats = new TestStats(); + + async IAsyncEnumerable DownloadPullRequests(string githubToken, string repo) + { + await foreach (var result in GitHubApi.DownloadPullRequests(githubToken, argsData.Org, repo, argsData.LabelPredicate, argsData.PullsLimit, argsData.PageSize, argsData.PageLimit, argsData.Retries, argsData.ExcludedAuthors, action, argsData.Verbose)) + { + yield return new(repo, result.PullRequest, argsData.LabelPredicate); + } + } + + foreach (var repo in argsData.Repos) + { + await action.WriteStatusAsync($"Downloading and testing pull requests from {argsData.Org}/{repo}."); + + await foreach (var pull in DownloadPullRequests(argsData.GitHubToken, repo)) + { + TestPrediction(pull, predictor, stats); + } + + await action.WriteStatusAsync($"Finished Testing Pull Requests from {argsData.Org}/{repo}."); + } + + return (typeof(PullRequest), stats); +} + +static string GetStats(List values) +{ + if (values.Count == 0) + { + return "N/A"; + } + + float min = values.Min(); + float average = values.Average(); + float max = values.Max(); + double deviation = Math.Sqrt(values.Average(v => Math.Pow(v - average, 2))); + + return $"{min} | {average} | {max} | {deviation}"; +} + +PredictionEngine GetPredictionEngine(string modelPath) where T : Issue +{ + var context = new MLContext(); + var model = context.Model.Load(modelPath, out _); + + return context.Model.CreatePredictionEngine(model); +} + +void TestPrediction(T result, PredictionEngine predictor, TestStats stats) where T : Issue +{ + var itemType = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue"; + + (string? predictedLabel, float? score) = GetPrediction( + predictor, + result, + argsData.Threshold); + + if (predictedLabel is null && result.Label is not null) + { + stats.NoPrediction++; + } + else if (predictedLabel is not null && result.Label is null) + { + stats.NoExisting++; + } + else if (predictedLabel?.ToLower() == result.Label?.ToLower()) + { + stats.Matches++; + + if (score.HasValue) + { + stats.MatchScores.Add(score.Value); + } + } + else + { + stats.Mismatches++; + + if (score.HasValue) + { + stats.MismatchScores.Add(score.Value); + } + } + + action.StartGroup($"{itemType} {argsData.Org}/{result.Repo}#{result.Number} - Predicted: {(predictedLabel ?? "")} - Existing: {(result.Label ?? "")}"); + action.WriteInfo($"Total : {stats.Total}"); + action.WriteInfo($"Matches : {stats.Matches} ({stats.MatchesPercentage:P2}) - Min | Avg | Max | StdDev: {GetStats(stats.MatchScores)}"); + action.WriteInfo($"Mismatches : {stats.Mismatches} ({stats.MismatchesPercentage:P2}) - Min | Avg | Max | StdDev: {GetStats(stats.MismatchScores)}"); + action.WriteInfo($"No Prediction: {stats.NoPrediction} ({stats.NoPredictionPercentage:P2})"); + action.WriteInfo($"No Existing : {stats.NoExisting} ({stats.NoExistingPercentage:P2})"); + action.EndGroup(); +} + +(string? PredictedLabel, float? PredictionScore) GetPrediction(PredictionEngine predictor, T issueOrPull, float? threshold) where T : Issue +{ + var prediction = predictor.Predict(issueOrPull); + var itemType = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue"; + + if (prediction.Score is null || prediction.Score.Length == 0) + { + action.WriteInfo($"No prediction was made for {itemType} {argsData.Org}/{issueOrPull.Repo}#{issueOrPull.Number}."); + return (null, null); + } + + VBuffer> labels = default; + predictor.OutputSchema[nameof(LabelPrediction.Score)].GetSlotNames(ref labels); + + var bestScore = prediction.Score + .Select((score, index) => new + { + Score = score, + Label = labels.GetItemOrDefault(index).ToString() + }) + .OrderByDescending(p => p.Score) + .FirstOrDefault(p => threshold is null || p.Score >= threshold); + + return bestScore is not null ? (bestScore.Label, bestScore.Score) : ((string?)null, (float?)null); +} + +class TestStats +{ + public TestStats() { } + + public int Matches { get; set; } = 0; + public int Mismatches { get; set; } = 0; + public int NoPrediction { get; set; } = 0; + public int NoExisting { get; set; } = 0; + + public float Total => Matches + Mismatches + NoPrediction + NoExisting; + + public float MatchesPercentage => (float)Matches / Total; + public float MismatchesPercentage => (float)Mismatches / Total; + public float NoPredictionPercentage => (float)NoPrediction / Total; + public float NoExistingPercentage => (float)NoExisting / Total; + + public List MatchScores => []; + public List MismatchScores => []; +} diff --git a/src/Tester/Tester.csproj b/IssueLabeler/src/Tester/Tester.csproj similarity index 61% rename from src/Tester/Tester.csproj rename to IssueLabeler/src/Tester/Tester.csproj index 497184a..aed9a8b 100644 --- a/src/Tester/Tester.csproj +++ b/IssueLabeler/src/Tester/Tester.csproj @@ -7,15 +7,18 @@ - + - + + + - + + diff --git a/IssueLabeler/src/Trainer/Args.cs b/IssueLabeler/src/Trainer/Args.cs new file mode 100644 index 0000000..f9a3a49 --- /dev/null +++ b/IssueLabeler/src/Trainer/Args.cs @@ -0,0 +1,93 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Actions.Core.Services; + +public struct Args +{ + public string? IssuesDataPath { get; set; } + public string? IssuesModelPath { get; set; } + public string? PullsDataPath { get; set; } + public string? PullsModelPath { get; set; } + + static void ShowUsage(string? message, ICoreService action) + { + // If you provide a path for issue data, you must also provide a path for the issue model, and vice versa. + // If you provide a path for pull data, you must also provide a path for the pull model, and vice versa. + // At least one pair of paths(either issue or pull) must be provided. + action.WriteNotice($$""" + ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}} + + Required for training the issues model: + --issues-data Path to existing issue data file (TSV file). + --issues-model Path to existing issue prediction model file (ZIP file). + + Required for training the pull requests model: + --pulls-data Path to existing pull request data file (TSV file). + --pulls-model Path to existing pull request prediction model file (ZIP file). + """); + + Environment.Exit(1); + } + + public static Args? Parse(string[] args, ICoreService action) + { + Queue arguments = new(args); + ArgUtils argUtils = new(action, ShowUsage, arguments); + Args argsData = new(); + + while (arguments.Count > 0) + { + string argument = arguments.Dequeue(); + + switch (argument) + { + case "--issues-data": + if (!argUtils.TryGetPath("--issues-data", out string? IssuesDataPath)) + { + return null; + } + argsData.IssuesDataPath = IssuesDataPath; + break; + + case "--issues-model": + if (!argUtils.TryGetPath("--issues-model", out string? IssuesModelPath)) + { + return null; + } + argsData.IssuesModelPath = IssuesModelPath; + break; + + case "--pulls-data": + if (!argUtils.TryGetPath("--pulls-data", out string? PullsDataPath)) + { + return null; + } + argsData.PullsDataPath = PullsDataPath; + break; + + case "--pulls-model": + if (!argUtils.TryGetPath("--pulls-model", out string? PullsModelPath)) + { + return null; + } + argsData.PullsModelPath = PullsModelPath; + break; + + default: + ShowUsage($"Unrecognized argument: {argument}", action); + return null; + } + } + + if ((argsData.IssuesDataPath is null != argsData.IssuesModelPath is null) || + (argsData.PullsDataPath is null != argsData.PullsModelPath is null) || + (argsData.IssuesModelPath is null && argsData.PullsModelPath is null)) + { + ShowUsage(null, action); + return null; + } + + return argsData; + } +} diff --git a/IssueLabeler/src/Trainer/Trainer.cs b/IssueLabeler/src/Trainer/Trainer.cs new file mode 100644 index 0000000..8efc117 --- /dev/null +++ b/IssueLabeler/src/Trainer/Trainer.cs @@ -0,0 +1,144 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Actions.Core.Extensions; +using Actions.Core.Markdown; +using Actions.Core.Services; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms.Text; +using static DataFileUtils; + +using var provider = new ServiceCollection() + .AddGitHubActionsCore() + .BuildServiceProvider(); + +var action = provider.GetRequiredService(); + +var config = Args.Parse(args, action); +if (config is not Args argsData) return 1; + +List tasks = new(); + +if (argsData.IssuesDataPath is not null && argsData.IssuesModelPath is not null) +{ + tasks.Add(Task.Run(() => CreateModel(argsData.IssuesDataPath, argsData.IssuesModelPath, ModelType.Issue, action))); +} + +if (argsData.PullsDataPath is not null && argsData.PullsModelPath is not null) +{ + tasks.Add(Task.Run(() => CreateModel(argsData.PullsDataPath, argsData.PullsModelPath, ModelType.PullRequest, action))); +} + +var success = await App.RunTasks(tasks, action); +return success ? 0 : 1; + +static async Task CreateModel(string dataPath, string modelPath, ModelType type, ICoreService action) +{ + if (!File.Exists(dataPath)) + { + action.WriteNotice($"The data file '{dataPath}' does not exist."); + action.Summary.AddPersistent(summary => summary.AddAlert("The data file does not exist. Training cannot proceed.", AlertType.Caution)); + await action.Summary.WriteAsync(); + + throw new InvalidOperationException($"The data file '{dataPath}' does not exist."); + } + + int recordsCounted = File.ReadLines(dataPath).Take(10).Count(); + if (recordsCounted < 10) + { + action.WriteNotice($"The data file '{dataPath}' does not contain enough data for training. A minimum of 10 records is required, but only {recordsCounted} exist."); + action.Summary.AddPersistent(summary => summary.AddAlert($"Only {recordsCounted} items were found to be used for training. A minimum of 10 records is required. Cannot proceed with training.", AlertType.Caution)); + await action.Summary.WriteAsync(); + + throw new InvalidOperationException($"The data file '{dataPath}' does not contain enough data for training. A minimum of 10 records is required, but only {recordsCounted} exist."); + } + + await action.WriteStatusAsync("Loading data into train/test sets..."); + MLContext mlContext = new(); + + TextLoader.Column[] columns = type == ModelType.Issue ? [ + new("Label", DataKind.String, 0), + new("Title", DataKind.String, 1), + new("Body", DataKind.String, 2), + ] : [ + new("Label", DataKind.String, 0), + new("Title", DataKind.String, 1), + new("Body", DataKind.String, 2), + new("FileNames", DataKind.String, 3), + new("FolderNames", DataKind.String, 4) + ]; + + TextLoader.Options textLoaderOptions = new() + { + AllowQuoting = false, + AllowSparse = false, + EscapeChar = '"', + HasHeader = true, + ReadMultilines = false, + Separators = ['\t'], + TrimWhitespace = true, + UseThreads = true, + Columns = columns + }; + + var loader = mlContext.Data.CreateTextLoader(textLoaderOptions); + var data = loader.Load(dataPath); + var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.2); + + await action.WriteStatusAsync("Building pipeline..."); + + var xf = mlContext.Transforms; + var pipeline = xf.Conversion.MapValueToKey(inputColumnName: "Label", outputColumnName: "LabelKey") + .Append(xf.Text.FeaturizeText( + "Features", + new TextFeaturizingEstimator.Options(), + columns.Select(c => c.Name).ToArray())) + .AppendCacheCheckpoint(mlContext) + .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("LabelKey")) + .Append(xf.Conversion.MapKeyToValue("PredictedLabel")); + + await action.WriteStatusAsync("Fitting the model with the training data set..."); + var trainedModel = pipeline.Fit(split.TrainSet); + var testModel = trainedModel.Transform(split.TestSet); + + await action.WriteStatusAsync("Evaluating against the test set..."); + var metrics = mlContext.MulticlassClassification.Evaluate(testModel, labelColumnName: "LabelKey"); + + action.Summary.AddPersistent(summary => + { + summary.AddMarkdownHeading($"Finished Training {(type == ModelType.Issue ? "Issues" : "Pull Requests")} Model", 2); + + summary.AddRawMarkdown($""" + * MacroAccuracy: {metrics.MacroAccuracy:0.####} (a value between 0 and 1; the closer to 1, the better) + * MicroAccuracy: {metrics.MicroAccuracy:0.####} (a value between 0 and 1; the closer to 1, the better) + * LogLoss: {metrics.LogLoss:0.####} (the closer to 0, the better) + {(metrics.PerClassLogLoss.Count() > 0 ? $" * Class 1: {metrics.PerClassLogLoss[0]:0.####}" : "")} + {(metrics.PerClassLogLoss.Count() > 1 ? $" * Class 2: {metrics.PerClassLogLoss[1]:0.####}" : "")} + {(metrics.PerClassLogLoss.Count() > 2 ? $" * Class 3: {metrics.PerClassLogLoss[2]:0.####}" : "")} + """); + }); + + await action.Summary.WriteAsync(); + + action.WriteInfo($"************************************************************"); + action.WriteInfo($"MacroAccuracy = {metrics.MacroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better"); + action.WriteInfo($"MicroAccuracy = {metrics.MicroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better"); + action.WriteInfo($"LogLoss = {metrics.LogLoss:0.####}, the closer to 0, the better"); + + if (metrics.PerClassLogLoss.Count() > 0) + action.WriteInfo($"LogLoss for class 1 = {metrics.PerClassLogLoss[0]:0.####}, the closer to 0, the better"); + + if (metrics.PerClassLogLoss.Count() > 1) + action.WriteInfo($"LogLoss for class 2 = {metrics.PerClassLogLoss[1]:0.####}, the closer to 0, the better"); + + if (metrics.PerClassLogLoss.Count() > 2) + action.WriteInfo($"LogLoss for class 3 = {metrics.PerClassLogLoss[2]:0.####}, the closer to 0, the better"); + + action.WriteInfo($"************************************************************"); + + action.WriteInfo($"Saving model to '{modelPath}'..."); + EnsureOutputDirectory(modelPath); + mlContext.Model.Save(trainedModel, split.TrainSet.Schema, modelPath); +} diff --git a/src/Predictor/Predictor.csproj b/IssueLabeler/src/Trainer/Trainer.csproj similarity index 56% rename from src/Predictor/Predictor.csproj rename to IssueLabeler/src/Trainer/Trainer.csproj index 497184a..9fe4e21 100644 --- a/src/Predictor/Predictor.csproj +++ b/IssueLabeler/src/Trainer/Trainer.csproj @@ -7,15 +7,17 @@ - + - + + + - + diff --git a/IssueLabeler/tests/Common.Tests/ArgUtils.Tests.cs b/IssueLabeler/tests/Common.Tests/ArgUtils.Tests.cs new file mode 100644 index 0000000..16e488b --- /dev/null +++ b/IssueLabeler/tests/Common.Tests/ArgUtils.Tests.cs @@ -0,0 +1,166 @@ +using Actions.Core; +using Actions.Core.Services; +using Actions.Core.Summaries; + +namespace Common.Tests +{ + public class ArgUtilsTests + { + private class TestCoreService : ICoreService + { + private readonly Dictionary _inputs = new(); + + public void SetInput(string name, string? value) + { + _inputs[name] = value; + } + + public string? GetInput(string name) + { + return _inputs.TryGetValue(name, out var value) ? value : null; + } + + string ICoreService.GetInput(string name, InputOptions? options) => GetInput(name)!; + + Summary ICoreService.Summary => throw new NotImplementedException(); + bool ICoreService.IsDebug => throw new NotImplementedException(); + public void WriteNotice(string message) { } + ValueTask ICoreService.ExportVariableAsync(string name, string value) { throw new NotImplementedException(); } + void ICoreService.SetSecret(string secret) { throw new NotImplementedException(); } + ValueTask ICoreService.AddPathAsync(string inputPath) { throw new NotImplementedException(); } + string[] ICoreService.GetMultilineInput(string name, InputOptions? options) { throw new NotImplementedException(); } + bool ICoreService.GetBoolInput(string name, InputOptions? options) { throw new NotImplementedException(); } + ValueTask ICoreService.SetOutputAsync(string name, T value, System.Text.Json.Serialization.Metadata.JsonTypeInfo? typeInfo) { throw new NotImplementedException(); } + void ICoreService.SetCommandEcho(bool enabled) { throw new NotImplementedException(); } + void ICoreService.SetFailed(string message) { throw new NotImplementedException(); } + void ICoreService.WriteDebug(string message) { throw new NotImplementedException(); } + void ICoreService.WriteError(string message, AnnotationProperties? properties) { throw new NotImplementedException(); } + void ICoreService.WriteWarning(string message, AnnotationProperties? properties) { throw new NotImplementedException(); } + void ICoreService.WriteNotice(string message, AnnotationProperties? properties) { throw new NotImplementedException(); } + void ICoreService.WriteInfo(string message) { throw new NotImplementedException(); } + void ICoreService.StartGroup(string name) { throw new NotImplementedException(); } + void ICoreService.EndGroup() { throw new NotImplementedException(); } + ValueTask ICoreService.GroupAsync(string name, Func> action) { throw new NotImplementedException(); } + ValueTask ICoreService.SaveStateAsync(string name, T value, System.Text.Json.Serialization.Metadata.JsonTypeInfo? typeInfo) { throw new NotImplementedException(); } + string ICoreService.GetState(string name) { throw new NotImplementedException(); } + } + + private readonly TestCoreService _testCoreService; + private readonly Action _showUsage; + + public ArgUtilsTests() + { + _testCoreService = new TestCoreService(); + _showUsage = (message, action) => { }; + } + + [Fact] + public void TryGetString_ShouldReturnTrue_WhenInputIsValid() + { + _testCoreService.SetInput("testInput", "testValue"); + var argUtils = new ArgUtils(_testCoreService, _showUsage); + + var result = argUtils.TryGetString("testInput", out var value); + + Assert.True(result); + Assert.Equal("testValue", value); + } + + [Fact] + public void TryGetFlag_ShouldReturnTrue_WhenInputIsTrue() + { + _testCoreService.SetInput("testFlag", "true"); + var argUtils = new ArgUtils(_testCoreService, _showUsage); + + var result = argUtils.TryGetFlag("testFlag", out var value); + + Assert.True(result); + Assert.True(value); + } + + [Fact] + public void TryGetRepo_ShouldReturnTrue_WhenInputIsValid() + { + _testCoreService.SetInput("TEST_REPO", "TEST_ORG/TEST_REPO"); + var argUtils = new ArgUtils(_testCoreService, _showUsage); + + var result = argUtils.TryGetRepo("TEST_REPO", out var org, out var repo); + + Assert.True(result); + Assert.Equal("TEST_ORG", org); + Assert.Equal("TEST_REPO", repo); + } + + [Fact] + public void TryGetPath_ShouldReturnTrue_WhenInputIsValid() + { + _testCoreService.SetInput("testPath", "C:\\test\\path"); + var argUtils = new ArgUtils(_testCoreService, _showUsage); + + var result = argUtils.TryGetPath("testPath", out var path); + + Assert.True(result); + Assert.Equal(Path.GetFullPath("C:\\test\\path"), path); + } + + [Fact] + public void TryGetStringArray_ShouldReturnTrue_WhenInputIsValid() + { + _testCoreService.SetInput("testArray", "value1,value2,value3"); + var argUtils = new ArgUtils(_testCoreService, _showUsage); + + var result = argUtils.TryGetStringArray("testArray", out var values); + + Assert.True(result); + Assert.Equal(new[] { "value1", "value2", "value3" }, values); + } + + [Fact] + public void TryGetInt_ShouldReturnTrue_WhenInputIsValid() + { + _testCoreService.SetInput("testInt", "42"); + var argUtils = new ArgUtils(_testCoreService, _showUsage); + + var result = argUtils.TryGetInt("testInt", out var value); + + Assert.True(result); + Assert.Equal(42, value); + } + + [Fact] + public void TryGetIntArray_ShouldReturnTrue_WhenInputIsValid() + { + _testCoreService.SetInput("testIntArray", "1,2,3"); + var argUtils = new ArgUtils(_testCoreService, _showUsage); + + var result = argUtils.TryGetIntArray("testIntArray", out var values); + + Assert.True(result); + Assert.Equal(new[] { 1, 2, 3 }, values); + } + + [Fact] + public void TryGetFloat_ShouldReturnTrue_WhenInputIsValid() + { + _testCoreService.SetInput("testFloat", "3.14"); + var argUtils = new ArgUtils(_testCoreService, _showUsage); + + var result = argUtils.TryGetFloat("testFloat", out var value); + + Assert.True(result); + Assert.Equal(3.14f, value); + } + + [Fact] + public void TryGetNumberRanges_ShouldReturnTrue_WhenInputIsValid() + { + _testCoreService.SetInput("testRanges", "1-3,5,7-9"); + var argUtils = new ArgUtils(_testCoreService, _showUsage); + + var result = argUtils.TryGetNumberRanges("testRanges", out var values); + + Assert.True(result); + Assert.Equal(new List { 1, 2, 3, 5, 7, 8, 9 }, values); + } + } +} diff --git a/IssueLabeler/tests/Common.Tests/Common.Tests.csproj b/IssueLabeler/tests/Common.Tests/Common.Tests.csproj new file mode 100644 index 0000000..cee86d4 --- /dev/null +++ b/IssueLabeler/tests/Common.Tests/Common.Tests.csproj @@ -0,0 +1,25 @@ + + + + net9.0 + enable + enable + false + + + + + + + + + + + + + + + + + + diff --git a/IssueLabeler/tests/Common.Tests/DataFileUtilsTests.cs b/IssueLabeler/tests/Common.Tests/DataFileUtilsTests.cs new file mode 100644 index 0000000..3ca5fd9 --- /dev/null +++ b/IssueLabeler/tests/Common.Tests/DataFileUtilsTests.cs @@ -0,0 +1,63 @@ +using System; +using System.IO; + +namespace Common.Tests +{ + public class DataFileUtilsTests + { + [Fact] + public void EnsureOutputDirectory_ShouldCreateDirectory_WhenDirectoryDoesNotExist() + { + string tempFilePath = Path.Combine(Path.GetTempPath(), "testDir", "testFile.txt"); + string tempDirPath = Path.GetDirectoryName(tempFilePath)!; + + try + { + DataFileUtils.EnsureOutputDirectory(tempFilePath); + Assert.True(Directory.Exists(tempDirPath)); + } + finally + { + if (Directory.Exists(tempDirPath)) + { + Directory.Delete(tempDirPath, recursive: true); + } + } + } + + [Fact] + public void SanitizeText_ShouldReplaceSpecialCharacters() + { + string input = "Line1\r\nLine2\t\"Quoted\""; + string expected = "Line1 Line2 `Quoted`"; + + string result = DataFileUtils.SanitizeText(input); + + Assert.Equal(expected, result); + } + + [Fact] + public void SanitizeTextArray_ShouldJoinAndSanitizeStrings() + { + string[] input = ["\tLine1\r\n", "Line2\t", "\" Quo\ted\""]; + string expected = "Line1 Line2 ` Quo ed`"; + + string result = DataFileUtils.SanitizeTextArray(input); + + Assert.Equal(expected, result); + } + + [Fact] + public void FormatIssueRecord_ShouldReturnTabSeparatedString() + { + string label = "area-testing"; + string title = "Issue title"; + string body = "Issue body\r\nwith new line"; + string[] expected = ["area-testing","Issue title","Issue body with new line"]; + + string[] result = DataFileUtils.FormatIssueRecord(label, title, body).Split('\t'); + + Assert.Equal(expected, result); + } + } +} diff --git a/download/action.yml b/download/action.yml new file mode 100644 index 0000000..c1bfb5e --- /dev/null +++ b/download/action.yml @@ -0,0 +1,107 @@ +name: "Download Data" +description: "Download GitHub issues or pull requests and cache the data." + +branding: + color: "purple" + icon: "tag" + +inputs: + type: + description: "The type of data to download. Must be either 'issues' or 'pulls'." + required: true + label_prefix: + description: "The label prefix to be used for model training. Must end in a non-alphanumeric character." + required: true + excluded_authors: + description: "Comma-separated list of authors to exclude." + limit: + description: "Max number of items to download (newest items are used). Defaults to the max number of pages times the page size." + page_size: + description: "Number of items per page in GitHub API requests. Defaults to 100 for issues, 25 for pull requests." + page_limit: + description: "Maximum number of pages to retrieve. Defaults to 1000 for issues, 4000 for pull requests." + retries: + description: "Comma-separated list of retry delays in seconds. Defaults to '30,30,300,300,3000,3000'." + repository: + description: "The org/repo to download data from. Defaults to current repository." + cache_key: + description: "The cache key suffix to use for saving data." + default: "staged" + +runs: + using: "composite" + steps: + - name: "Validate inputs and set cache variables" + shell: bash + run: | + if [[ "${{ inputs.type }}" != "issues" && "${{ inputs.type }}" != "pulls" ]]; then + echo "::error::'type' must be either 'issues' or 'pulls'. Value provided: '${{ inputs.type }}'" + echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY + echo "\`type\` must be either 'issues' or 'pulls'." >> $GITHUB_STEP_SUMMARY + exit 1 + fi + + echo "DATA_PATH=${{ format('labeler-cache/{0}-data.tsv', inputs.type) }}" >> $GITHUB_ENV + echo "CACHE_KEY=${{ format('issue-labeler/data/{0}/{1}', inputs.type, inputs.cache_key || 'staged') }}" >> $GITHUB_ENV + + - name: "Check for Existing Cache Entry" + id: check-cache + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ env.DATA_PATH }} + key: ${{ env.CACHE_KEY }} + lookup-only: true + fail-on-cache-miss: false + + - name: "Abort if Existing Cache Exists" + shell: bash + run: | + if [[ "${{ steps.check-cache.outputs.cache-hit }}" == "true" ]]; then + echo "::error::Cache key '${{ env.CACHE_KEY }}' already exists. Cannot proceed with downloading." + echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY + echo "> Cache key '${{ env.CACHE_KEY }}' already exists. Cannot proceed with downloading." >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "> [!TIP]" >> $GITHUB_STEP_SUMMARY + echo "> Either use a different \`cache_key\` value or delete the existing cache entry from the [Action Caches](/${{ github.repository }}/actions/caches) page and run the workflow again." >> $GITHUB_STEP_SUMMARY + exit 1 + fi + + - name: "Clone the ${{ github.action_repository }} repository with ref '{{ github.action_ref }}'" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + env: + ISSUE_LABELER_REPO: ${{ github.action_repository }} + ISSUE_LABELER_REF: ${{ github.action_ref }} + with: + repository: ${{ env.ISSUE_LABELER_REPO }} + ref: ${{ env.ISSUE_LABELER_REF }} + + - name: "Set up the .NET SDK" + uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1 + with: + dotnet-version: "9.0.x" + + - name: "Run Downloader" + shell: bash + run: | + dotnet run -c Release --project IssueLabeler/src/Downloader -- \ + ${{ format('--repo "{0}"', inputs.repository || github.repository) }} \ + ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \ + ${{ format('--{0}-data "{1}"', inputs.type, env.DATA_PATH) }} \ + ${{ (inputs.excluded_authors != null && format('--excluded-authors {0}', inputs.excluded_authors)) || '' }} \ + ${{ (inputs.limit && format('--{0}-limit {1}', inputs.type, inputs.limit)) || '' }} \ + ${{ (inputs.page_size && format('--page-size {0}', inputs.page_size)) || '' }} \ + ${{ (inputs.page_limit && format('--page-limit {0}', inputs.page_limit)) || '' }} \ + ${{ (inputs.retries && format('--retries "{0}"', inputs.retries)) || '' }} + + - name: "Save the Downloaded Data to Cache" + uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ env.DATA_PATH }} + key: ${{ env.CACHE_KEY }} + + - name: "Write Final Summary" + shell: bash + run: | + echo "" >> $GITHUB_STEP_SUMMARY + echo "## ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} Data Available as '${{ inputs.cache_key }}'" >> $GITHUB_STEP_SUMMARY + echo "The '${{ inputs.cache_key }}' data is saved to cache and available for training a model." >> $GITHUB_STEP_SUMMARY diff --git a/predict/action.yml b/predict/action.yml new file mode 100644 index 0000000..ef33b1b --- /dev/null +++ b/predict/action.yml @@ -0,0 +1,53 @@ +name: "Predict Labels" +description: "Predict labels for Issues and Pull Requests using models already restored from cache." + +inputs: + issues: + description: "Issue Numbers (comma-separated list of ranges)." + required: true + + pulls: + description: "Pull Request Numbers (comma-separated list of ranges)." + required: true + + label_prefix: + description: "The label prefix used for prediction. Must end with a non-alphanumeric character. Defaults to 'area-'." + required: false + default: "area-" + + threshold: + description: "The minimum confidence score for a label prediction, as a decimal between 0.00 and 1.00. Defaults to 0.40." + required: false + default: "0.40" + + default_label: + description: "The default label to apply if no prediction meets the threshold. Leave blank for no default label." + + excluded_authors: + description: "Comma-separated list of authors to exclude. Defaults to none." + + retries: + description: "Comma-separated list of retry delays in seconds. Defaults to '30,30,300,300,3000,3000'." + required: false + default: "30,30,300,300,3000,3000" + + test: + description: "Run in test mode, outputting predictions without applying labels." + required: false + + verbose: + description: "Enable verbose output." + required: false + +branding: + color: "purple" + icon: "tag" + +runs: + using: docker + # Reference the docker container image using a published sha256 digest + # to ensure an immutable version is always used. + image: docker://ghcr.io/dotnet/issue-labeler/predictor@sha256: + env: + INPUT_ISSUES_MODEL: "labeler-cache/issues-model.zip" + INPUT_PULLS_MODEL: "labeler-cache/pulls-model.zip" diff --git a/promote/action.yml b/promote/action.yml new file mode 100644 index 0000000..0eb2139 --- /dev/null +++ b/promote/action.yml @@ -0,0 +1,130 @@ +name: "Promote Model" +description: "Promote a model from staging to 'ACTIVE', backing up the currently 'ACTIVE' model." + +inputs: + type: + description: "The model to promote. Must be 'issues' or 'pulls'." + required: true + + staged_key: + description: "The suffix for the staged cache entry to promote. Defaults to 'staged'." + required: false + default: "staged" + + backup_key: + description: "The suffix for the backup cache entry. Defaults to 'backup'." + required: false + default: "backup" + +branding: + color: "purple" + icon: "arrow-up" + +runs: + using: "composite" + steps: + - name: "Validate Inputs" + shell: bash + run: | + if [[ "${{ inputs.type }}" != "issues" && "${{ inputs.type }}" != "pulls" ]]; then + echo "::error::'type' must be either 'issues' or 'pulls'. Value provided: '${{ inputs.type }}'." + echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY + echo "\`type\` must be either 'issues' or 'pulls'." >> $GITHUB_STEP_SUMMARY + exit 1 + fi + + - name: "Set Environment Variables" + shell: bash + run: | + echo "CACHE_PATH=labeler-cache/${{ inputs.type }}-model.zip" >> $GITHUB_ENV + echo "STAGED_KEY=issue-labeler/model/${{ inputs.type }}/${{ inputs.staged_key || 'staged' }}" >> $GITHUB_ENV + echo "ACTIVE_KEY=issue-labeler/model/${{ inputs.type }}/ACTIVE" >> $GITHUB_ENV + echo "BACKUP_KEY=issue-labeler/model/${{ inputs.type }}/${{ inputs.backup_key }}" >> $GITHUB_ENV + + - name: "Check for Existing Staged Cache Entry" + id: check-staged + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ env.CACHE_PATH }} + key: ${{ env.STAGED_KEY }} + lookup-only: true + fail-on-cache-miss: true + + - name: "Check for Existing Backup Cache Entry" + if: ${{ steps.check-staged.outputs.cache-hit == 'true' }} + id: check-backup + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ env.CACHE_PATH }} + key: ${{ env.BACKUP_KEY }} + lookup-only: true + fail-on-cache-miss: false + + - name: "Restore Existing Active Cache Entry" + if: ${{ steps.check-staged.outputs.cache-hit == 'true' }} + id: check-active + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ env.CACHE_PATH }} + key: ${{ env.ACTIVE_KEY }} + fail-on-cache-miss: false + + - name: "Abort if Backup Cache Entry Already Exists" + if: ${{ steps.check-active.outputs.cache-hit == 'true' && steps.check-backup.outputs.cache-hit == 'true' }} + shell: bash + run: | + echo "::error::Backup cache key '${{ env.BACKUP_KEY }}' already exists. Cannot proceed with promotion." + echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY + echo "Backup cache key '${{ env.BACKUP_KEY }}' already exists. Cannot proceed with promotion." >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "> [!TIP]" >> $GITHUB_STEP_SUMMARY + echo "> Either use a different \`backup_key\` value or delete the existing cache entry from the [Action Caches](/${{ github.repository }}/actions/caches) page and run the workflow again." >> $GITHUB_STEP_SUMMARY + exit 1 + + - name: "Cache Backup of Current Active Cache Entry" + if: ${{ steps.check-active.outputs.cache-hit == 'true' }} + id: backup-file + uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ env.CACHE_PATH }} + key: ${{ env.BACKUP_KEY }} + + - name: "Remove Local Copy of Current Active Cache Entry" + if: ${{ steps.check-active.outputs.cache-hit == 'true' }} + shell: bash + run: | + rm ${{ env.CACHE_PATH }} + + - name: "Restore the Staged Cache Entry to Promote" + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ env.CACHE_PATH }} + key: ${{ env.STAGED_KEY }} + fail-on-cache-miss: true + + - name: "Delete Existing Active Cache Entry" + if: ${{ steps.check-active.outputs.cache-hit == 'true' }} + shell: bash + run: | + gh cache delete "${{ env.ACTIVE_KEY }}" + env: + GH_TOKEN: ${{ github.token }} + + - name: "Save the Staged Cache Entry as the ACTIVE Cache Entry" + uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ env.CACHE_PATH }} + key: ${{ env.ACTIVE_KEY }} + + - name: "Write Summary" + shell: bash + run: | + echo "> [!NOTE]" >> $GITHUB_STEP_SUMMARY + echo "> The ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} model was promoted from '${{ env.STAGED_KEY }}' to 'ACTIVE'." >> $GITHUB_STEP_SUMMARY + + if [[ "${{ steps.check-active.outputs.cache-hit }}" == "true" ]]; then + echo "" >> $GITHUB_STEP_SUMMARY + echo "> [!NOTE]" >> $GITHUB_STEP_SUMMARY + echo "> The previous 'ACTIVE' ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} model was backed up as '${{ env.BACKUP_KEY }}'." >> $GITHUB_STEP_SUMMARY + echo "> If the previous model needs to be restored, promote '${{ env.BACKUP_KEY }}' and supply a different \`backup_key\`." >> $GITHUB_STEP_SUMMARY + fi diff --git a/restore/action.yml b/restore/action.yml new file mode 100644 index 0000000..3d05a5e --- /dev/null +++ b/restore/action.yml @@ -0,0 +1,72 @@ +name: "Restore Model from Cache" +description: "Restore a model from cache for label prediction or cache retention." + +inputs: + type: + description: "The model to restore. Must be 'issues' or 'pulls'." + required: true + + cache_key: + description: "The cache key suffix to use for loading the model. Defaults to 'ACTIVE'." + required: true + default: "ACTIVE" + + fail-on-cache-miss: + description: "Set to 'true' to fail the job if the model cannot be restored from cache. Defaults to 'false'." + + quiet: + description: "Set to 'true' to suppress output into the GitHub action summary. Defaults to 'false'." + + +outputs: + cache-hit: + description: "A string value ('true' or 'false') indicating whether the model was successfully restored from cache." + value: ${{ steps.restore-cache.outputs.cache-hit }} + +branding: + color: "purple" + icon: "arrow-down" + +runs: + using: "composite" + steps: + - name: "Validate Inputs" + shell: bash + run: | + if [[ "${{ inputs.type }}" != "issues" && "${{ inputs.type }}" != "pulls" ]]; then + echo "::error::'type' must be either 'issues' or 'pulls'. Value provided: '${{ inputs.type }}'." + echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY + echo "\`type\` must be either 'Issues' or 'Pull Requests'." >> $GITHUB_STEP_SUMMARY + exit 1 + fi + + - name: "Restore Model from Cache" + id: restore-cache + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: "labeler-cache/${{ inputs.type }}-model.zip" + key: "issue-labeler/model/${{ inputs.type }}/${{ inputs.cache_key || 'ACTIVE' }}" + fail-on-cache-miss: false + + - name: "Produce Success Output" + if: ${{ inputs.quiet != 'true' }} + shell: bash + run: | + if [[ "${{ steps.restore-cache.outputs.cache-hit }}" == "true" ]]; then + echo "> [!NOTE]" >> $GITHUB_STEP_SUMMARY + echo "> The ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} model was successfully restored from cache." >> $GITHUB_STEP_SUMMARY + else + echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY + echo "> The ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} model was not restored from cache. Label prediction cannot proceed." >> $GITHUB_STEP_SUMMARY + + if [[ "${{ inputs.fail-on-cache-miss }}" != "true" ]]; then + echo "> The workflow is gracefully exiting without failure." >> $GITHUB_STEP_SUMMARY + fi + fi + echo "" >> $GITHUB_STEP_SUMMARY + echo "> [!TIP]" >> $GITHUB_STEP_SUMMARY + echo "> Refer to the [GitHub documentation](https://docs.github.com/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction) for details about cache retention policies." >> $GITHUB_STEP_SUMMARY + + if [[ "${{ steps.restore-cache.outputs.cache-hit }}" != "true" && "${{ inputs.fail-on-cache-miss }}" == "true" ]]; then + exit 1 + fi diff --git a/src/Common/ArgUtils.cs b/src/Common/ArgUtils.cs deleted file mode 100644 index e2ca961..0000000 --- a/src/Common/ArgUtils.cs +++ /dev/null @@ -1,213 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics.CodeAnalysis; -using System.Text.RegularExpressions; - -public static class ArgUtils -{ - public static bool TryDequeueString(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out string? argValue) - { - argValue = Dequeue(args); - if (argValue is null) - { - showUsage($"Argument '{argName}' has an empty value."); - return false; - } - - return true; - } - - public static bool TryDequeueRepo(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out string? org, [NotNullWhen(true)] out string? repo) - { - string? orgRepo = Dequeue(args); - if (orgRepo is null || !orgRepo.Contains('/')) - { - showUsage($$"""Argument '{{argName}}' has an empty value or is not in the format of '{org}/{repo}'."""); - org = null; - repo = null; - return false; - } - - string[] parts = orgRepo.Split('/'); - org = parts[0]; - repo = parts[1]; - return true; - } - - public static bool TryDequeueRepoList(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out string? org, [NotNullWhen(true)] out List? repos) - { - string? orgRepos = ArgUtils.Dequeue(args); - org = null; - repos = null; - - if (orgRepos is null) - { - showUsage($$"""Argument '{argName}' has an empty value or is not in the format of '{org}/{repo}'."""); - return false; - } - - foreach (var orgRepo in orgRepos.Split(',').Select(r => r.Trim())) - { - if (!orgRepo.Contains('/')) - { - showUsage($"Argument '--repo' is not in the format of '{{org}}/{{repo}}': {orgRepo}"); - return false; - } - - string[] parts = orgRepo.Split('/'); - - if (org is not null && org != parts[0]) - { - showUsage("All '--repo' values must be from the same org."); - return false; - } - - org ??= parts[0]; - repos ??= []; - repos.Add(parts[1]); - } - - return (org is not null && repos is not null); - } - - public static bool TryDequeueLabelPrefix(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out Func? labelPredicate) - { - if (!TryDequeueString(args, showUsage, argName, out string? labelPrefix)) - { - labelPredicate = null; - return false; - } - - // Require that the label prefix end in something other than a letter or number - // This promotes the pattern of prefixes that are clear, rather than a prefix that - // could be matched as the beginning of another word in the label - if (Regex.IsMatch(labelPrefix.AsSpan(^1),"[a-zA-Z0-9]")) - { - showUsage($""" - Argument '{argName}' must end in something other than a letter or number. - - The recommended label prefix terminating character is '-'. - The recommended label prefix for applying area labels is 'area-'. - """); - labelPredicate = null; - return false; - } - - labelPredicate = (label) => label.StartsWith(labelPrefix, StringComparison.OrdinalIgnoreCase); - return true; - } - - public static bool TryDequeuePath(Queue args, Action showUsage, string argName, out string? path) - { - if (!TryDequeueString(args, showUsage, argName, out path)) - { - return false; - } - - if (!Path.IsPathRooted(path)) - { - path = Path.GetFullPath(path); - } - - return true; - } - - public static bool TryDequeueStringArray(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out string[]? argValues) - { - if (TryDequeueString(args, showUsage, argName, out string? argString)) - { - argValues = argString.Split(',', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries); - return true; - } - - argValues = null; - return false; - } - - public static bool TryDequeueInt(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out int? argValue) - { - if (TryDequeueString(args, showUsage, argName, out string? argString) && int.TryParse(argString, out int parsedValue)) - { - argValue = parsedValue; - return true; - } - - argValue = null; - return false; - } - - public static bool TryDequeueIntArray(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out int[]? argValues) - { - if (TryDequeueString(args, showUsage, argName, out string? argString)) - { - argValues = argString.Split(',').Select(r => int.Parse(r)).ToArray(); - return true; - } - - argValues = null; - return false; - } - - public static bool TryDequeueFloat(Queue args, Action showUsage, string argName, [NotNullWhen(true)] out float? argValue) - { - if (TryDequeueString(args, showUsage, argName, out string? argString) && float.TryParse(argString, out float parsedValue)) - { - argValue = parsedValue; - return true; - } - - argValue = null; - return false; - } - - public static bool TryDequeueNumberRanges(Queue args, Action showUsage, string argName, out List? argValues) - { - if (!TryDequeueString(args, showUsage, argName, out string? argString)) - { - argValues = null; - return false; - } - - List numbers = new(); - - foreach (var range in argString.Split(',')) - { - var beginEnd = range.Split('-'); - - if (beginEnd.Length == 1) - { - numbers.Add(ulong.Parse(beginEnd[0])); - } - else if (beginEnd.Length == 2) - { - var begin = ulong.Parse(beginEnd[0]); - var end = ulong.Parse(beginEnd[1]); - - for (var number = begin; number <= end; number++) - { - numbers.Add(number); - } - } - else - { - showUsage($"Argument '{argName}' must be comma-separated list of numbers and/or dash-separated ranges. Example: 1-3,5,7-9."); - argValues = null; - return false; - } - } - - argValues = numbers; - return true; - } - - public static string? Dequeue(Queue args) - { - if (args.TryDequeue(out string? argValue)) - { - return string.IsNullOrWhiteSpace(argValue) ? null : argValue; - } - - return null; - } -} diff --git a/src/Downloader/Args.cs b/src/Downloader/Args.cs deleted file mode 100644 index 7483dc4..0000000 --- a/src/Downloader/Args.cs +++ /dev/null @@ -1,190 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; - -public struct Args -{ - public string Org { get; set; } - public List Repos { get; set; } - public string GithubToken { get; set; } - public string? IssueDataPath { get; set; } - public int? IssueLimit { get; set; } - public string? PullDataPath { get; set; } - public int? PullLimit { get; set; } - public int? PageSize { get; set; } - public int? PageLimit { get; set; } - public int[] Retries { get; set; } - public string[]? ExcludedAuthors { get; set; } - public Predicate LabelPredicate { get; set; } - public bool Verbose { get; set; } - - static void ShowUsage(string? message = null) - { - string executableName = Process.GetCurrentProcess().ProcessName; - - Console.WriteLine($$""" - ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}} - - Usage: - {{executableName}} --repo {org/repo1}[,{org/repo2},...] --label-prefix {label-prefix} [options] - - Required arguments: - --repo The GitHub repositories in format org/repo (comma separated for multiple). - --label-prefix Prefix for label predictions. Must end with a character other than a letter or number. - - Required for downloading issue data: - --issue-data Path for issue data file to create (TSV file). - - Required for downloading pull request data: - --pull-data Path for pull request data file to create (TSV file). - - Optional arguments: - --issue-limit Maximum number of issues to download. - --pull-limit Maximum number of pull requests to download. - --page-size Number of items per page in GitHub API requests. - --page-limit Maximum number of pages to retrieve. - --excluded-authors Comma-separated list of authors to exclude. - --retries Comma-separated retry delays in seconds. Default: 30,30,300,300,3000,3000. - --token GitHub access token. Default: Read from GITHUB_TOKEN env var. - --verbose Enable verbose output. - """); - - Environment.Exit(1); - } - - public static Args? Parse(string[] args) - { - Args argsData = new() - { - Retries = [30, 30, 300, 300, 3000, 3000] - }; - - Queue arguments = new(args); - while (arguments.Count > 0) - { - string argument = arguments.Dequeue(); - - switch (argument) - { - case "--token": - if (!ArgUtils.TryDequeueString(arguments, ShowUsage, "--token", out string? token)) - { - return null; - } - argsData.GithubToken = token; - break; - - case "--repo": - if (!ArgUtils.TryDequeueRepoList(arguments, ShowUsage, "--repo", out string? org, out List? repos)) - { - return null; - } - argsData.Org = org; - argsData.Repos = repos; - break; - - case "--issue-data": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-data", out string? issueDataPath)) - { - return null; - } - argsData.IssueDataPath = issueDataPath; - break; - - case "--issue-limit": - if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--issue-limit", out int? issueLimit)) - { - return null; - } - argsData.IssueLimit = issueLimit; - break; - - case "--pull-data": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-data", out string? pullDataPath)) - { - return null; - } - argsData.PullDataPath = pullDataPath; - break; - - case "--pull-limit": - if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--pull-limit", out int? pullLimit)) - { - return null; - } - argsData.PullLimit = pullLimit; - break; - - case "--page-size": - if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--page-size", out int? pageSize)) - { - return null; - } - argsData.PageSize = pageSize; - break; - - case "--page-limit": - if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--page-limit", out int? pageLimit)) - { - return null; - } - argsData.PageLimit = pageLimit; - break; - - case "--excluded-authors": - if (!ArgUtils.TryDequeueStringArray(arguments, ShowUsage, "--excluded-authors", out string[]? excludedAuthors)) - { - return null; - } - argsData.ExcludedAuthors = excludedAuthors; - break; - - case "--retries": - if (!ArgUtils.TryDequeueIntArray(arguments, ShowUsage, "--retries", out int[]? retries)) - { - return null; - } - argsData.Retries = retries; - break; - - case "--label-prefix": - if (!ArgUtils.TryDequeueLabelPrefix(arguments, ShowUsage, "--label-prefix", out Func? labelPredicate)) - { - return null; - } - argsData.LabelPredicate = new(labelPredicate); - break; - - case "--verbose": - argsData.Verbose = true; - break; - default: - ShowUsage($"Unrecognized argument: {argument}"); - return null; - } - } - - if (argsData.Org is null || argsData.Repos is null || argsData.LabelPredicate is null || - (argsData.IssueDataPath is null && argsData.PullDataPath is null)) - { - ShowUsage(); - return null; - } - - if (argsData.GithubToken is null) - { - string? token = Environment.GetEnvironmentVariable("GITHUB_TOKEN"); - - if (string.IsNullOrEmpty(token)) - { - ShowUsage("Argument '--token' not specified and environment variable GITHUB_TOKEN is empty."); - return null; - } - - argsData.GithubToken = token; - } - - return argsData; - } -} diff --git a/src/GitHubClient/GitHubApi.cs b/src/GitHubClient/GitHubApi.cs deleted file mode 100644 index fc70bd3..0000000 --- a/src/GitHubClient/GitHubApi.cs +++ /dev/null @@ -1,423 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Collections.Concurrent; -using System.Net.Http.Json; -using GraphQL; -using GraphQL.Client.Http; -using GraphQL.Client.Serializer.SystemTextJson; - -namespace GitHubClient; - -public class GitHubApi -{ - private static ConcurrentDictionary _graphQLClients = new(); - private static ConcurrentDictionary _restClients = new(); - - private static GraphQLHttpClient GetGraphQLClient(string githubToken) => - _graphQLClients.GetOrAdd(githubToken, token => - { - GraphQLHttpClient client = new("https://api.github.com/graphql", new SystemTextJsonSerializer()); - client.HttpClient.DefaultRequestHeaders.Authorization = - new System.Net.Http.Headers.AuthenticationHeaderValue( - scheme: "bearer", - parameter: token); - - client.HttpClient.Timeout = TimeSpan.FromMinutes(2); - - return client; - }); - - private static HttpClient GetRestClient(string githubToken) => - _restClients.GetOrAdd(githubToken, token => - { - HttpClient client = new(); - client.DefaultRequestHeaders.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue( - scheme: "bearer", - parameter: token); - client.DefaultRequestHeaders.Accept.Add(new("application/vnd.github+json")); - client.DefaultRequestHeaders.Add("X-GitHub-Api-Version", "2022-11-28"); - client.DefaultRequestHeaders.Add("User-Agent", "Issue-Labeler"); - - return client; - }); - - public static async IAsyncEnumerable<(Issue Issue, string Label)> DownloadIssues( - string githubToken, - string org, string repo, - Predicate labelPredicate, - int? issueLimit, - int pageSize, - int pageLimit, - int[] retries, - string[] excludedAuthors, - bool verbose = false) - { - await foreach (var item in DownloadItems("issues", githubToken, org, repo, labelPredicate, issueLimit, pageSize, pageLimit, retries, excludedAuthors, verbose)) - { - yield return (item.Item, item.Label); - } - } - - public static async IAsyncEnumerable<(PullRequest PullRequest, string Label)> DownloadPullRequests( - string githubToken, - string org, - string repo, - Predicate labelPredicate, - int? pullLimit, - int pageSize, - int pageLimit, - int[] retries, - string[] excludedAuthors, - bool verbose = false) - { - var items = DownloadItems("pullRequests", githubToken, org, repo, labelPredicate, pullLimit, pageSize, pageLimit, retries, excludedAuthors, verbose); - - await foreach (var item in items) - { - yield return (item.Item, item.Label); - } - } - - private static async IAsyncEnumerable<(T Item, string Label)> DownloadItems( - string itemQueryName, - string githubToken, - string org, - string repo, - Predicate labelPredicate, - int? itemLimit, - int pageSize, - int pageLimit, - int[] retries, - string[] excludedAuthors, - bool verbose) where T : Issue - { - pageSize = Math.Min(pageSize, 100); - - int pageNumber = 0; - string? after = null; - bool hasNextPage = true; - int loadedCount = 0; - int includedCount = 0; - int? totalCount = null; - byte retry = 0; - bool finished = false; - - do - { - Console.WriteLine($"Downloading {itemQueryName} page {pageNumber + 1} from {org}/{repo}...{(retry > 0 ? $" (retry {retry} of {retries.Length}) " : "")}{(after is not null ? $" (cursor: '{after}')" : "")}"); - - Page page; - - try - { - page = await GetItemsPage(githubToken, org, repo, pageSize, after, itemQueryName, excludedAuthors); - } - catch (Exception ex) when ( - ex is HttpIOException || - ex is HttpRequestException || - ex is GraphQLHttpRequestException || - ex is TaskCanceledException - ) - { - Console.WriteLine($"Exception caught during query.\n {ex.Message}"); - - if (retry >= retries.Length - 1) - { - Console.WriteLine($"Retry limit of {retries.Length} reached. Aborting."); - break; - } - else - { - Console.WriteLine($"Waiting {retries[retry]} seconds before retry {retry + 1} of {retries.Length}..."); - await Task.Delay(retries[retry] * 1000); - retry++; - - continue; - } - } - - if (after == page.EndCursor) - { - Console.WriteLine($"Paging did not progress. Cursor: '{after}'. Aborting."); - break; - } - - pageNumber++; - after = page.EndCursor; - hasNextPage = page.HasNextPage; - loadedCount += page.Nodes.Length; - totalCount ??= page.TotalCount; - retry = 0; - - foreach (T item in page.Nodes) - { - if (excludedAuthors.Contains(item.Author.Login, StringComparer.InvariantCultureIgnoreCase)) - { - if (verbose) Console.WriteLine($"{itemQueryName} {org}/{repo}#{item.Number} - Excluded from output. Author '{item.Author.Login}' is in excluded list."); - continue; - } - - // If there are more labels, there might be other applicable - // labels that were not loaded and the model is incomplete. - if (item.Labels.HasNextPage) - { - if (verbose) Console.WriteLine($"{itemQueryName} {org}/{repo}#{item.Number} - Excluded from output. Not all labels were loaded."); - continue; - } - - // Only items with exactly one applicable label are used for the model. - string[] labels = Array.FindAll(item.LabelNames, labelPredicate); - if (labels.Length != 1) - { - if (verbose) Console.WriteLine($"{itemQueryName} {org}/{repo}#{item.Number} - Excluded from output. {labels.Length} applicable labels found."); - continue; - } - - // Exactly one applicable label was found on the item. Include it in the model. - if (verbose) Console.WriteLine($"{itemQueryName} {org}/{repo}#{item.Number} - Included in output. Applicable label: '{labels[0]}'."); - - yield return (item, labels[0]); - - includedCount++; - - if (itemLimit.HasValue && includedCount >= itemLimit) - { - break; - } - } - - finished = (!hasNextPage || pageNumber >= pageLimit || (itemLimit.HasValue && includedCount >= itemLimit)); - - Console.WriteLine( - $"Included: {includedCount} (limit: {(itemLimit.HasValue ? itemLimit : "none")}) | " + - $"Downloaded: {loadedCount} (total: {totalCount}) | " + - $"Pages: {pageNumber} (limit: {pageLimit})"); - } - while (!finished); - } - - private static async Task> GetItemsPage(string githubToken, string org, string repo, int pageSize, string? after, string itemQueryName, string[] excludedAuthors) where T : Issue - { - GraphQLHttpClient client = GetGraphQLClient(githubToken); - - string files = typeof(T) == typeof(PullRequest) ? "files (first: 100) { nodes { path } }" : ""; - - GraphQLRequest query = new GraphQLRequest - { - Query = $$""" - query ($owner: String!, $repo: String!, $after: String) { - repository (owner: $owner, name: $repo) { - result:{{itemQueryName}} (after: $after, first: {{pageSize}}, orderBy: {field: CREATED_AT, direction: DESC}) { - nodes { - number - title - author { login } - body: bodyText - labels (first: 25) { - nodes { name }, - pageInfo { hasNextPage } - } - {{files}} - } - pageInfo { - hasNextPage - endCursor - } - totalCount - } - } - } - """, - Variables = new - { - Owner = org, - Repo = repo, - After = after - } - }; - - var response = await client.SendQueryAsync>>(query); - - if (response.Errors?.Any() ?? false) - { - string errors = string.Join("\n\n", response.Errors.Select((e, i) => $"{i + 1}. {e.Message}").ToArray()); - throw new ApplicationException($"GraphQL request returned errors.\n\n{errors}"); - } - else if (response.Data is null || response.Data.Repository is null || response.Data.Repository.Result is null) - { - throw new ApplicationException("GraphQL response did not include the repository result data"); - } - - return response.Data.Repository.Result; - } - - public static async Task GetIssue(string githubToken, string org, string repo, ulong number, int[] retries, bool verbose) => - await GetItem(githubToken, org, repo, number, retries, verbose, "issue"); - - public static async Task GetPullRequest(string githubToken, string org, string repo, ulong number, int[] retries, bool verbose) => - await GetItem(githubToken, org, repo, number, retries, verbose, "pullRequest"); - - private static async Task GetItem(string githubToken, string org, string repo, ulong number, int[] retries, bool verbose, string itemQueryName) where T : Issue - { - GraphQLHttpClient client = GetGraphQLClient(githubToken); - string files = typeof(T) == typeof(PullRequest) ? "files (first: 100) { nodes { path } }" : ""; - - GraphQLRequest query = new GraphQLRequest - { - Query = $$""" - query ($owner: String!, $repo: String!, $number: Int!) { - repository (owner: $owner, name: $repo) { - result:{{itemQueryName}} (number: $number) { - number - title - author { login } - body: bodyText - labels (first: 25) { - nodes { name }, - pageInfo { hasNextPage } - } - {{files}} - } - } - } - """, - Variables = new - { - Owner = org, - Repo = repo, - Number = number - } - }; - - byte retry = 0; - - while (retry < retries.Length) - { - try - { - var response = await client.SendQueryAsync>(query); - - if (!(response.Errors?.Any() ?? false) && response.Data?.Repository?.Result is not null) - { - return response.Data.Repository.Result; - } - - if (response.Errors?.Any() ?? false) - { - // These errors occur when an issue/pull does not exist or when the API rate limit has been exceeded - if (response.Errors.Any(e => e.Message.StartsWith("API rate limit exceeded"))) - { - Console.WriteLine($""" - [{itemQueryName} #{number}] Failed to retrieve data. - Rate limit has been reached. - {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")} - """); - } - else - { - // Could not detect this as a rate limit issue. Do not retry. - - string errors = string.Join("\n\n", response.Errors.Select((e, i) => $"{i + 1}. {e.Message}").ToArray()); - - Console.WriteLine($""" - [{itemQueryName} #{number}] Failed to retrieve data. - GraphQL request returned errors: - - {errors} - """); - - return null; - } - } - else - { - // Do not retry as these errors are not recoverable - // This is usually a bug during development when the query/response model is incorrect - Console.WriteLine($""" - [{itemQueryName} #{number}] Failed to retrieve data. - GraphQL response did not include the repository result data. - """); - - return null; - } - } - catch (Exception ex) when ( - ex is HttpIOException || - ex is HttpRequestException || - ex is GraphQLHttpRequestException || - ex is TaskCanceledException - ) - { - // Retry on exceptions as they can be temporary network issues - Console.WriteLine($""" - [{itemQueryName} #{number}] Failed to retrieve data. - Exception caught during query. - - {ex.Message} - - {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")} - """); - } - - await Task.Delay(retries[retry++] * 1000); - } - - return null; - } - - public static async Task AddLabel(string githubToken, string org, string repo, string type, ulong number, string label, int[] retries) - { - var client = GetRestClient(githubToken); - byte retry = 0; - - while (retry < retries.Length) - { - var response = await client.PostAsJsonAsync( - $"https://api.github.com/repos/{org}/{repo}/issues/{number}/labels", - new string[] { label }, - CancellationToken.None); - - if (response.IsSuccessStatusCode) - { - return null; - } - - Console.WriteLine($""" - [{type} #{number}] Failed to add label '{label}'. {response.ReasonPhrase} ({response.StatusCode}) - {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")} - """); - - await Task.Delay(retries[retry++] * 1000); - } - - return $"Failed to add label '{label}' after {retries.Length} retries."; - } - - public static async Task RemoveLabel(string githubToken, string org, string repo, string type, ulong number, string label, int[] retries) - { - var client = GetRestClient(githubToken); - byte retry = 0; - - while (retry < retries.Length) - { - var response = await client.DeleteAsync( - $"https://api.github.com/repos/{org}/{repo}/issues/{number}/labels/{label}", - CancellationToken.None); - - if (response.IsSuccessStatusCode) - { - return null; - } - - Console.WriteLine($""" - [{type} #{number}] Failed to remove label '{label}'. {response.ReasonPhrase} ({response.StatusCode}) - {(retry < retries.Length ? $"Will proceed with retry {retry + 1} of {retries.Length} after {retries[retry]} seconds..." : $"Retry limit of {retries.Length} reached.")} - """); - - await Task.Delay(retries[retry++] * 1000); - } - - return $"Failed to remove label '{label}' after {retries.Length} retries."; - } -} diff --git a/src/Predictor/Args.cs b/src/Predictor/Args.cs deleted file mode 100644 index 7117fda..0000000 --- a/src/Predictor/Args.cs +++ /dev/null @@ -1,210 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; - -public struct Args -{ - public string Org { get; set; } - public string Repo { get; set; } - public string GithubToken { get; set; } - public string? IssueModelPath { get; set; } - public List? IssueNumbers { get; set; } - public string? PullModelPath { get; set; } - public List? PullNumbers { get; set; } - public float Threshold { get; set; } - public Func LabelPredicate { get; set; } - public string? DefaultLabel { get; set; } - public int[] Retries { get; set; } - public bool Verbose { get; set; } - public string[]? ExcludedAuthors { get; set; } - public bool Test { get; set; } - - static void ShowUsage(string? message = null) - { - string executableName = Process.GetCurrentProcess().ProcessName; - - Console.WriteLine($$""" - ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}} - - Usage: - {{executableName}} --repo {org/repo} --label-prefix {label-prefix} [options] - - Required arguments: - --repo GitHub repository in the format {org}/{repo}. - --label-prefix Prefix for label predictions. Must end with a character other than a letter or number. - - Required for predicting issue labels: - --issue-model Path to existing issue prediction model file (ZIP file). - --issue-numbers Comma-separated list of issue number ranges. Example: 1-3,7,5-9. - - Required for predicting pull request labels: - --pull-model Path to existing pull request prediction model file (ZIP file). - --pull-numbers Comma-separated list of pull request number ranges. Example: 1-3,7,5-9. - - Optional arguments: - --default-label Default label to use if no label is predicted. - --threshold Minimum prediction confidence threshold. Range (0,1]. Default 0.4. - --retries Comma-separated retry delays in seconds. Default: 30,30,300,300,3000,3000. - --excluded-authors Comma-separated list of authors to exclude. - --token GitHub token. Default: read from GITHUB_TOKEN env var. - --test Run in test mode, outputting predictions without applying labels. - --verbose Enable verbose output. - """); - - Environment.Exit(1); - } - - public static Args? Parse(string[] args) - { - Args argsData = new() - { - Threshold = 0.4f, - Retries = [30, 30, 300, 300, 3000, 3000] - }; - - Queue arguments = new(args); - while (arguments.Count > 0) - { - string argument = arguments.Dequeue(); - - switch (argument) - { - case "--token": - if (!ArgUtils.TryDequeueString(arguments, ShowUsage, "--token", out string? token)) - { - return null; - } - argsData.GithubToken = token; - break; - - case "--repo": - if (!ArgUtils.TryDequeueRepo(arguments, ShowUsage, "--repo", out string? org, out string? repo)) - { - return null; - } - argsData.Org = org; - argsData.Repo = repo; - break; - - case "--issue-model": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-model", out string? issueModelPath)) - { - return null; - } - argsData.IssueModelPath = issueModelPath; - break; - - case "--issue-numbers": - if (!ArgUtils.TryDequeueNumberRanges(arguments, ShowUsage, "--issue-numbers", out List? issueNumbers)) - { - return null; - } - argsData.IssueNumbers = issueNumbers; - break; - - case "--pull-model": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-model", out string? pullModelPath)) - { - return null; - } - argsData.PullModelPath = pullModelPath; - break; - - case "--pull-numbers": - if (!ArgUtils.TryDequeueNumberRanges(arguments, ShowUsage, "--pull-numbers", out List? pullNumbers)) - { - return null; - } - argsData.PullNumbers = pullNumbers; - break; - - case "--label-prefix": - if (!ArgUtils.TryDequeueLabelPrefix(arguments, ShowUsage, "--label-prefix", out Func? labelPredicate)) - { - return null; - } - argsData.LabelPredicate = labelPredicate; - break; - - case "--threshold": - if (!ArgUtils.TryDequeueFloat(arguments, ShowUsage, "--threshold", out float? threshold)) - { - return null; - } - argsData.Threshold = threshold.Value; - break; - - case "--default-label": - if (!ArgUtils.TryDequeueString(arguments, ShowUsage, "--default-label", out string? defaultLabel)) - { - return null; - } - argsData.DefaultLabel = defaultLabel; - break; - - case "--retries": - if (!ArgUtils.TryDequeueIntArray(arguments, ShowUsage, "--retries", out int[]? retries)) - { - return null; - } - argsData.Retries = retries; - break; - - case "--excluded-authors": - if (!ArgUtils.TryDequeueStringArray(arguments, ShowUsage, "--excluded-authors", out string[]? excludedAuthors)) - { - return null; - } - argsData.ExcludedAuthors = excludedAuthors; - break; - - case "--test": - argsData.Test = true; - break; - - case "--verbose": - argsData.Verbose = true; - break; - - default: - ShowUsage($"Unrecognized argument: {argument}"); - return null; - } - } - - // Check if any required argsDatauration properties are missing or invalid. - // The conditions are: - // - Org is null - // - Repo is null - // - gitHubToken is null and the environment variable was not set - // - Threshold is 0 - // - LabelPredicate is null - // - IssueModelPath is null while IssueNumbers is not null, or vice versa - // - PullModelPath is null while PullNumbers is not null, or vice versa - // - Both IssueModelPath and PullModelPath are null - if (argsData.Org is null || argsData.Repo is null || argsData.Threshold == 0 || argsData.LabelPredicate is null || - (argsData.IssueModelPath is null != argsData.IssueNumbers is null) || - (argsData.PullModelPath is null != argsData.PullNumbers is null) || - (argsData.IssueModelPath is null && argsData.PullModelPath is null)) - { - ShowUsage(); - return null; - } - - if (argsData.GithubToken is null) - { - string? token = Environment.GetEnvironmentVariable("GITHUB_TOKEN"); - - if (string.IsNullOrEmpty(token)) - { - ShowUsage("Argument '--token' not specified and environment variable GITHUB_TOKEN is empty."); - return null; - } - - argsData.GithubToken = token; - } - - return argsData; - } -} diff --git a/src/Predictor/Predictor.cs b/src/Predictor/Predictor.cs deleted file mode 100644 index 2ac2557..0000000 --- a/src/Predictor/Predictor.cs +++ /dev/null @@ -1,218 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using Microsoft.ML; -using Microsoft.ML.Data; -using GitHubClient; - -var config = Args.Parse(args); -if (config is not Args argsData) return; - -List> tasks = new(); - -if (argsData.IssueModelPath is not null && argsData.IssueNumbers is not null) -{ - Console.WriteLine("Loading issues model..."); - var issueContext = new MLContext(); - var issueModel = issueContext.Model.Load(argsData.IssueModelPath, out _); - var issuePredictor = issueContext.Model.CreatePredictionEngine(issueModel); - Console.WriteLine("Issues prediction engine ready."); - - foreach (ulong issueNumber in argsData.IssueNumbers) - { - var result = await GitHubApi.GetIssue(argsData.GithubToken, argsData.Org, argsData.Repo, issueNumber, argsData.Retries, argsData.Verbose); - - if (result is null) - { - Console.WriteLine($"[Issue #{issueNumber}] could not be found or downloaded. Skipped."); - continue; - } - - if (argsData.ExcludedAuthors is not null && argsData.ExcludedAuthors.Contains(result.Author.Login, StringComparer.InvariantCultureIgnoreCase)) - { - Console.WriteLine($"[Issue #{issueNumber}] Author '{result.Author.Login}' is in excluded list. Skipped."); - continue; - } - - tasks.Add(Task.Run(() => ProcessPrediction( - issuePredictor, - issueNumber, - new Issue(result), - argsData.LabelPredicate, - argsData.DefaultLabel, - ModelType.Issue, - argsData.Retries, - argsData.Test - ))); - - Console.WriteLine($"[Issue #{issueNumber}] Queued for prediction."); - } -} - -if (argsData.PullModelPath is not null && argsData.PullNumbers is not null) -{ - Console.WriteLine("Loading pulls model..."); - var pullContext = new MLContext(); - var pullModel = pullContext.Model.Load(argsData.PullModelPath, out _); - var pullPredictor = pullContext.Model.CreatePredictionEngine(pullModel); - Console.WriteLine("Pulls prediction engine ready."); - - foreach (ulong pullNumber in argsData.PullNumbers) - { - var result = await GitHubApi.GetPullRequest(argsData.GithubToken, argsData.Org, argsData.Repo, pullNumber, argsData.Retries, argsData.Verbose); - - if (result is null) - { - Console.WriteLine($"[Pull Request #{pullNumber}] could not be found or downloaded. Skipped."); - continue; - } - - if (argsData.ExcludedAuthors is not null && argsData.ExcludedAuthors.Contains(result.Author.Login)) - { - Console.WriteLine($"[Pull Request #{pullNumber}] Author '{result.Author.Login}' is in excluded list. Skipped."); - continue; - } - - tasks.Add(Task.Run(() => ProcessPrediction( - pullPredictor, - pullNumber, - new PullRequest(result), - argsData.LabelPredicate, - argsData.DefaultLabel, - ModelType.PullRequest, - argsData.Retries, - argsData.Test - ))); - - Console.WriteLine($"[Pull Request #{pullNumber}] Queued for prediction."); - } -} - -var allTasks = Task.WhenAll(tasks); - -try -{ - allTasks.Wait(); -} -catch (AggregateException) { } - -foreach (var prediction in allTasks.Result) -{ - Console.WriteLine($""" - [{prediction.Type} #{prediction.Number}{(prediction.Success ? "" : " FAILURE")}] - {string.Join("\n ", prediction.Output)} - - """); -} - -async Task<(ModelType, ulong, bool, string[])> ProcessPrediction(PredictionEngine predictor, ulong number, T issueOrPull, Func labelPredicate, string? defaultLabel, ModelType type, int[] retries, bool test) where T : Issue -{ - List output = new(); - string? error = null; - - if (issueOrPull.HasMoreLabels) - { - output.Add($"[{type} #{number}] No action taken. Too many labels applied already; cannot be sure no applicable label is already applied."); - return (type, number, true, output.ToArray()); - } - - var applicableLabel = issueOrPull.Labels?.FirstOrDefault(labelPredicate); - - bool hasDefaultLabel = - (defaultLabel is not null) && - (issueOrPull.Labels?.Any(l => l.Equals(defaultLabel, StringComparison.OrdinalIgnoreCase)) ?? false); - - if (applicableLabel is not null) - { - output.Add($"Applicable label '{applicableLabel}' already exists."); - - if (hasDefaultLabel && defaultLabel is not null) - { - if (!test) - { - error = await GitHubApi.RemoveLabel(argsData.GithubToken, argsData.Org, argsData.Repo, type.ToString(), number, defaultLabel, argsData.Retries); - } - - output.Add(error ?? $"Removed default label '{defaultLabel}'."); - } - - return (type, number, error is null, output.ToArray()); - } - - var prediction = predictor.Predict(issueOrPull); - - if (prediction.Score is null || prediction.Score.Length == 0) - { - output.Add("No prediction was made."); - return (type, number, true, output.ToArray()); - } - - VBuffer> labels = default; - predictor.OutputSchema[nameof(LabelPrediction.Score)].GetSlotNames(ref labels); - - var predictions = prediction.Score - .Select((score, index) => new - { - Score = score, - Label = labels.GetItemOrDefault(index).ToString() - }) - // Ensure predicted labels match the expected predicate - .Where(prediction => labelPredicate(prediction.Label)) - // Capture the top 3 for including in the output - .OrderByDescending(p => p.Score) - .Take(3); - - output.Add("Label predictions:"); - output.AddRange(predictions.Select(p => $" '{p.Label}' - Score: {p.Score}")); - - var bestScore = predictions.FirstOrDefault(p => p.Score >= argsData.Threshold); - output.Add(bestScore is not null ? - $"Label '{bestScore.Label}' meets threshold of {argsData.Threshold}." : - $"No label meets the threshold of {argsData.Threshold}."); - - if (bestScore is not null) - { - if (!test) - { - error = await GitHubApi.AddLabel(argsData.GithubToken, argsData.Org, argsData.Repo, type.ToString(), number, bestScore.Label, retries); - } - - output.Add(error ?? $"Added label '{bestScore.Label}'"); - - if (error is not null) - { - return (type, number, false, output.ToArray()); - } - - if (hasDefaultLabel && defaultLabel is not null) - { - if (!test) - { - error = await GitHubApi.RemoveLabel(argsData.GithubToken, argsData.Org, argsData.Repo, type.ToString(), number, defaultLabel, retries); - } - - output.Add(error ?? $"Removed default label '{defaultLabel}'"); - } - - return (type, number, error is null, output.ToArray()); - } - - if (defaultLabel is not null) - { - if (hasDefaultLabel) - { - output.Add($"Default label '{defaultLabel}' is already applied."); - } - else - { - if (!test) - { - error = await GitHubApi.AddLabel(argsData.GithubToken, argsData.Org, argsData.Repo, type.ToString(), number, defaultLabel, argsData.Retries); - } - - output.Add(error ?? $"Applied default label '{defaultLabel}'."); - } - } - - return (type, number, error is null, output.ToArray()); -} diff --git a/src/Tester/Args.cs b/src/Tester/Args.cs deleted file mode 100644 index c402b55..0000000 --- a/src/Tester/Args.cs +++ /dev/null @@ -1,184 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; - -public struct Args -{ - public string? Org { get; set; } - public List Repos { get; set; } - public string? GithubToken { get; set; } - public string? IssueDataPath { get; set; } - public string? IssueModelPath { get; set; } - public int? IssueLimit { get; set; } - public string? PullDataPath { get; set; } - public string? PullModelPath { get; set; } - public int? PullLimit { get; set; } - public float? Threshold { get; set; } - public Predicate LabelPredicate { get; set; } - public string[]? ExcludedAuthors { get; set; } - - static void ShowUsage(string? message = null) - { - // The entire condition is used to determine if the configuration is invalid. - // If any of the following are true, the configuration is considered invalid: - // • The LabelPredicate is null. - // • Both IssueDataPath and PullDataPath are null, and either Org, Repos, or GithubToken is null. - // • Both IssueModelPath and PullModelPath are null. - - string executableName = Process.GetCurrentProcess().ProcessName; - - Console.WriteLine($$""" - ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}} - - Usage: - {{executableName}} --repo {org/repo1}[,{org/repo2},...] --label-prefix {label-prefix} [options] - - Required arguments: - --repo The GitHub repositories in format org/repo (comma separated for multiple). - --label-prefix Prefix for label predictions. Must end with a character other than a letter or number. - - Required for testing the issue model: - --issue-data Path to existing issue data file (TSV file). - --issue-model Path to existing issue prediction model file (ZIP file). - - Required for testing the pull request model: - --pull-data Path to existing pull request data file (TSV file). - --pull-model Path to existing pull request prediction model file (ZIP file). - - Optional arguments: - --threshold Minimum prediction confidence threshold. Range (0,1]. Default 0.4. - --issue-limit Maximum number of issues to download. Default: No limit. - --pull-limit Maximum number of pull requests to download. Default: No limit. - --excluded-authors Comma-separated list of authors to exclude. - --token GitHub access token. Default: read from GITHUB_TOKEN env var. - """); - - - Environment.Exit(1); - } - - public static Args? Parse(string[] args) - { - Args argsData = new() - { - Threshold = 0.4f - }; - - Queue arguments = new(args); - while (arguments.Count > 0) - { - string argument = arguments.Dequeue(); - - switch (argument) - { - case "--token": - if (!ArgUtils.TryDequeueString(arguments, ShowUsage, "--token", out string? token)) - { - return null; - } - argsData.GithubToken = token; - break; - - case "--repo": - if (!ArgUtils.TryDequeueRepoList(arguments, ShowUsage, "--repo", out string? org, out List? repos)) - { - return null; - } - argsData.Org = org; - argsData.Repos = repos; - break; - - case "--issue-data": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-data", out string? issueDataPath)) - { - return null; - } - argsData.IssueDataPath = issueDataPath; - break; - - case "--issue-model": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-model", out string? issueModelPath)) - { - return null; - } - argsData.IssueModelPath = issueModelPath; - break; - - case "--issue-limit": - if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--issue-limit", out int? issueLimit)) - { - return null; - } - argsData.IssueLimit = issueLimit; - break; - - case "--pull-data": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-data", out string? pullDataPath)) - { - return null; - } - argsData.PullDataPath = pullDataPath; - break; - - case "--pull-model": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-model", out string? pullModelPath)) - { - return null; - } - argsData.PullModelPath = pullModelPath; - break; - - case "--pull-limit": - if (!ArgUtils.TryDequeueInt(arguments, ShowUsage, "--pull-limit", out int? pullLimit)) - { - return null; - } - argsData.PullLimit = pullLimit; - break; - - case "--label-prefix": - if (!ArgUtils.TryDequeueLabelPrefix(arguments, ShowUsage, "--label-prefix", out Func? labelPredicate)) - { - return null; - } - argsData.LabelPredicate = new(labelPredicate); - break; - - case "--threshold": - if (!ArgUtils.TryDequeueFloat(arguments, ShowUsage, "--threshold", out float? threshold)) - { - return null; - } - argsData.Threshold = threshold.Value; - break; - - case "--excluded-authors": - if (!ArgUtils.TryDequeueStringArray(arguments, ShowUsage, "--excluded-authors", out string[]? excludedAuthors)) - { - return null; - } - argsData.ExcludedAuthors = excludedAuthors; - break; - - default: - ShowUsage($"Unrecognized argument: {argument}"); - return null; - } - } - - if (argsData.LabelPredicate is null || - ( - argsData.IssueDataPath is null && argsData.PullDataPath is null && - (argsData.Org is null || argsData.Repos.Count == 0 || argsData.GithubToken is null) - ) || - (argsData.IssueModelPath is null && argsData.PullModelPath is null) - ) - { - ShowUsage(); - return null; - } - - return argsData; - } -} diff --git a/src/Tester/Tester.cs b/src/Tester/Tester.cs deleted file mode 100644 index 71717b1..0000000 --- a/src/Tester/Tester.cs +++ /dev/null @@ -1,223 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using Microsoft.ML; -using Microsoft.ML.Data; -using GitHubClient; - -var config = Args.Parse(args); -if (config is not Args argsData) return; - -List tasks = []; - -if (argsData.IssueModelPath is not null) -{ - tasks.Add(Task.Run(() => TestIssues())); -} - -if (argsData.PullModelPath is not null) -{ - tasks.Add(Task.Run(() => TestPullRequests())); -} - -await Task.WhenAll(tasks); - -async IAsyncEnumerable ReadData(string dataPath, Func readLine, int? rowLimit) -{ - var allLines = File.ReadLinesAsync(dataPath); - ulong rowNum = 0; - rowLimit ??= 50000; - - await foreach (var line in allLines) - { - // Skip the header row - if (rowNum == 0) - { - rowNum++; - continue; - } - - string[] columns = line.Split('\t'); - yield return readLine(rowNum, columns); - - if ((int)rowNum++ >= rowLimit) - { - break; - } - } -} - -async IAsyncEnumerable DownloadIssues(string githubToken, string org, string repo) -{ - await foreach (var result in GitHubApi.DownloadIssues(githubToken, org, repo, argsData.LabelPredicate, argsData.IssueLimit, 100, 1000, [30, 30, 30], argsData.ExcludedAuthors ?? [])) - { - yield return new(result.Issue, argsData.LabelPredicate); - } -} - -async Task TestIssues() -{ - if (argsData.IssueDataPath is not null) - { - var issueList = ReadData(argsData.IssueDataPath, (num, columns) => new Issue() - { - Number = num, - Label = columns[0], - Title = columns[1], - Body = columns[2] - }, argsData.IssueLimit); - - await TestPredictions(issueList, argsData.IssueModelPath); - return; - } - - if (argsData.GithubToken is not null && argsData.Org is not null && argsData.Repos is not null) - { - foreach (var repo in argsData.Repos) - { - Console.WriteLine($"Downloading and testing issues from {argsData.Org}/{repo}."); - - var issueList = DownloadIssues(argsData.GithubToken, argsData.Org, repo); - await TestPredictions(issueList, argsData.IssueModelPath); - } - } -} - -async IAsyncEnumerable DownloadPullRequests(string githubToken, string org, string repo) -{ - await foreach (var result in GitHubApi.DownloadPullRequests(githubToken, org, repo, argsData.LabelPredicate, argsData.PullLimit, 25, 4000, [30, 30, 30], argsData.ExcludedAuthors ?? [])) - { - yield return new(result.PullRequest, argsData.LabelPredicate); - } -} - -async Task TestPullRequests() -{ - if (argsData.PullDataPath is not null) - { - var pullList = ReadData(argsData.PullDataPath, (num, columns) => new PullRequest() - { - Number = num, - Label = columns[0], - Title = columns[1], - Body = columns[2], - FileNames = columns[3], - FolderNames = columns[4] - }, argsData.PullLimit); - - await TestPredictions(pullList, argsData.PullModelPath); - return; - } - - if (argsData.GithubToken is not null && argsData.Org is not null && argsData.Repos is not null) - { - foreach (var repo in argsData.Repos) - { - Console.WriteLine($"Downloading and testing pull requests from {argsData.Org}/{repo}."); - - var pullList = DownloadPullRequests(argsData.GithubToken, argsData.Org, repo); - await TestPredictions(pullList, argsData.PullModelPath); - } - } -} - -static string GetStats(List values) -{ - if (values.Count == 0) - { - return "N/A"; - } - - float min = values.Min(); - float average = values.Average(); - float max = values.Max(); - double deviation = Math.Sqrt(values.Average(v => Math.Pow(v - average, 2))); - - return $"{min} | {average} | {max} | {deviation}"; -} - -async Task TestPredictions(IAsyncEnumerable results, string modelPath) where T : Issue -{ - var context = new MLContext(); - var model = context.Model.Load(modelPath, out _); - var predictor = context.Model.CreatePredictionEngine(model); - var itemType = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue"; - - int matches = 0; - int mismatches = 0; - int noPrediction = 0; - int noExisting = 0; - - List matchScores = []; - List mismatchScores = []; - - await foreach (var result in results) - { - (string? predictedLabel, float? score) = GetPrediction( - predictor, - result, - argsData.Threshold); - - if (predictedLabel is null && result.Label is not null) - { - noPrediction++; - } - else if (predictedLabel is not null && result.Label is null) - { - noExisting++; - } - else if (predictedLabel?.ToLower() == result.Label?.ToLower()) - { - matches++; - - if (score.HasValue) - { - matchScores.Add(score.Value); - } - } - else - { - mismatches++; - - if (score.HasValue) - { - mismatchScores.Add(score.Value); - } - } - - float total = matches + mismatches + noPrediction + noExisting; - Console.WriteLine($"{itemType} #{result.Number} - Predicted: {(predictedLabel ?? "")} - Existing: {(result.Label ?? "")}"); - Console.WriteLine($" Matches : {matches} ({(float)matches / total:P2}) - Min | Avg | Max | StdDev: {GetStats(matchScores)}"); - Console.WriteLine($" Mismatches : {mismatches} ({(float)mismatches / total:P2}) - Min | Avg | Max | StdDev: {GetStats(mismatchScores)}"); - Console.WriteLine($" No Prediction: {noPrediction} ({(float)noPrediction / total:P2})"); - Console.WriteLine($" No Existing : {noExisting} ({(float)noExisting / total:P2})"); - } - - Console.WriteLine("Test Complete"); -} - -(string? PredictedLabel, float? PredictionScore) GetPrediction(PredictionEngine predictor, T issueOrPull, float? threshold) where T : Issue -{ - var prediction = predictor.Predict(issueOrPull); - var itemType = typeof(T) == typeof(PullRequest) ? "Pull Request" : "Issue"; - - if (prediction.Score is null || prediction.Score.Length == 0) - { - Console.WriteLine($"No prediction was made for {itemType} #{issueOrPull.Number}."); - return (null, null); - } - - VBuffer> labels = default; - predictor.OutputSchema[nameof(LabelPrediction.Score)].GetSlotNames(ref labels); - - var bestScore = prediction.Score - .Select((score, index) => new - { - Score = score, - Label = labels.GetItemOrDefault(index).ToString() - }) - .OrderByDescending(p => p.Score) - .FirstOrDefault(p => threshold is null || p.Score >= threshold); - - return bestScore is not null ? (bestScore.Label, bestScore.Score) : ((string?)null, (float?)null); -} diff --git a/src/Trainer/Args.cs b/src/Trainer/Args.cs deleted file mode 100644 index 90d1d53..0000000 --- a/src/Trainer/Args.cs +++ /dev/null @@ -1,97 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; - -public struct Args -{ - public string? IssueDataPath { get; set; } - public string? IssueModelPath { get; set; } - public string? PullDataPath { get; set; } - public string? PullModelPath { get; set; } - - static void ShowUsage(string? message = null) - { - // If you provide a path for issue data, you must also provide a path for the issue model, and vice versa. - // If you provide a path for pull data, you must also provide a path for the pull model, and vice versa. - // At least one pair of paths(either issue or pull) must be provided. - string executableName = Process.GetCurrentProcess().ProcessName; - - Console.WriteLine($$""" - ERROR: Invalid or missing arguments.{{(message is null ? "" : " " + message)}} - - Usage: - {{executableName}} [options] - - Required for training the issue model: - --issue-data Path to existing issue data file (TSV file). - --issue-model Path for issue prediction model file to create (ZIP file). - - Required for training the pull request model: - --pull-data Path to existing pull request data file (TSV file). - --pull-model Path for pull request prediction model file to create (ZIP file). - """); - - Environment.Exit(1); - } - - public static Args? Parse(string[] args) - { - Args argsData = new(); - - Queue arguments = new(args); - while (arguments.Count > 0) - { - string argument = arguments.Dequeue(); - - switch (argument) - { - case "--issue-data": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-data", out string? issueDataPath)) - { - return null; - } - argsData.IssueDataPath = issueDataPath; - break; - - case "--issue-model": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--issue-model", out string? issueModelPath)) - { - return null; - } - argsData.IssueModelPath = issueModelPath; - break; - - case "--pull-data": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-data", out string? pullDataPath)) - { - return null; - } - argsData.PullDataPath = pullDataPath; - break; - - case "--pull-model": - if (!ArgUtils.TryDequeuePath(arguments, ShowUsage, "--pull-model", out string? pullModelPath)) - { - return null; - } - argsData.PullModelPath = pullModelPath; - break; - - default: - ShowUsage($"Unrecognized argument: {argument}"); - return null; - } - } - - if ((argsData.IssueDataPath is null != argsData.IssueModelPath is null) || - (argsData.PullDataPath is null != argsData.PullModelPath is null) || - (argsData.IssueModelPath is null && argsData.PullModelPath is null)) - { - ShowUsage(); - return null; - } - - return argsData; - } -} diff --git a/src/Trainer/Trainer.cs b/src/Trainer/Trainer.cs deleted file mode 100644 index f6b780f..0000000 --- a/src/Trainer/Trainer.cs +++ /dev/null @@ -1,97 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using static DataFileUtils; -using Microsoft.ML; -using Microsoft.ML.Data; -using Microsoft.ML.Transforms.Text; - -var config = Args.Parse(args); -if (config is not Args argsData) -{ - return; -} - -if (argsData.IssueDataPath is not null && argsData.IssueModelPath is not null) -{ - CreateModel(argsData.IssueDataPath, argsData.IssueModelPath, ModelType.Issue); -} - -if (argsData.PullDataPath is not null && argsData.PullModelPath is not null) -{ - CreateModel(argsData.PullDataPath, argsData.PullModelPath, ModelType.PullRequest); -} - -static void CreateModel(string dataPath, string modelPath, ModelType type) -{ - Console.WriteLine("Loading data into train/test sets..."); - MLContext mlContext = new(); - - TextLoader.Column[] columns = type == ModelType.Issue ? [ - new("Label", DataKind.String, 0), - new("Title", DataKind.String, 1), - new("Body", DataKind.String, 2), - ] : [ - new("Label", DataKind.String, 0), - new("Title", DataKind.String, 1), - new("Body", DataKind.String, 2), - new("FileNames", DataKind.String, 3), - new("FolderNames", DataKind.String, 4) - ]; - - TextLoader.Options textLoaderOptions = new() - { - AllowQuoting = false, - AllowSparse = false, - EscapeChar = '"', - HasHeader = true, - ReadMultilines = false, - Separators = ['\t'], - TrimWhitespace = true, - UseThreads = true, - Columns = columns - }; - - var loader = mlContext.Data.CreateTextLoader(textLoaderOptions); - var data = loader.Load(dataPath); - var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.2); - - Console.WriteLine("Building pipeline..."); - - var xf = mlContext.Transforms; - var pipeline = xf.Conversion.MapValueToKey(inputColumnName: "Label", outputColumnName: "LabelKey") - .Append(xf.Text.FeaturizeText( - "Features", - new TextFeaturizingEstimator.Options(), - columns.Select(c => c.Name).ToArray())) - .AppendCacheCheckpoint(mlContext) - .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("LabelKey")) - .Append(xf.Conversion.MapKeyToValue("PredictedLabel")); - - Console.WriteLine("Fitting the model with the training data set..."); - var trainedModel = pipeline.Fit(split.TrainSet); - var testModel = trainedModel.Transform(split.TestSet); - - Console.WriteLine("Evaluating against the test set..."); - var metrics = mlContext.MulticlassClassification.Evaluate(testModel, labelColumnName: "LabelKey"); - - Console.WriteLine($"************************************************************"); - Console.WriteLine($"MacroAccuracy = {metrics.MacroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better"); - Console.WriteLine($"MicroAccuracy = {metrics.MicroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better"); - Console.WriteLine($"LogLoss = {metrics.LogLoss:0.####}, the closer to 0, the better"); - - if (metrics.PerClassLogLoss.Count() > 0) - Console.WriteLine($"LogLoss for class 1 = {metrics.PerClassLogLoss[0]:0.####}, the closer to 0, the better"); - - if (metrics.PerClassLogLoss.Count() > 1) - Console.WriteLine($"LogLoss for class 2 = {metrics.PerClassLogLoss[1]:0.####}, the closer to 0, the better"); - - if (metrics.PerClassLogLoss.Count() > 2) - Console.WriteLine($"LogLoss for class 3 = {metrics.PerClassLogLoss[2]:0.####}, the closer to 0, the better"); - - Console.WriteLine($"************************************************************"); - - Console.WriteLine($"Saving model to '{modelPath}'..."); - EnsureOutputDirectory(modelPath); - mlContext.Model.Save(trainedModel, split.TrainSet.Schema, modelPath); -} diff --git a/src/Trainer/Trainer.csproj b/src/Trainer/Trainer.csproj deleted file mode 100644 index 5739801..0000000 --- a/src/Trainer/Trainer.csproj +++ /dev/null @@ -1,17 +0,0 @@ - - - - Exe - enable - enable - - - - - - - - - - - diff --git a/test/action.yml b/test/action.yml new file mode 100644 index 0000000..ca32c69 --- /dev/null +++ b/test/action.yml @@ -0,0 +1,83 @@ +name: "Test Model" +description: "Test predictions against the Issues and/or Pull Requests model by downloading data and comparing predictions against existing labels." + +branding: + color: "purple" + icon: "tag" + +inputs: + type: + description: "The model to test. Must be either 'issues' or 'pulls'." + required: true + label_prefix: + description: "The label prefix to use for model training. Must end with a non-alphanumeric character." + required: true + threshold: + description: "The minimum confidence score for a label prediction. Must be a number between 0.00 and 1.00. The recommended value is 0.40." + required: true + excluded_authors: + description: "A comma-separated list of authors to exclude." + limit: + description: "The maximum number of items to download. The newest items are downloaded." + page_size: + description: "The number of items per page in GitHub API requests. Defaults to 100 for Issues and 25 for Pull Requests." + page_limit: + description: "The maximum number of pages to retrieve. Defaults to 1000 for Issues and 4000 for Pull Requests." + retries: + description: "A comma-separated list of retry delays in seconds. Defaults to '30,30,300,300,3000,3000'." + cache_key: + description: "The cache key suffix to use for saving data. Defaults to 'staged'." + default: staged + repository: + description: "The org/repo to download data from. Defaults to the current repository." + verbose: + description: "Enable verbose output." + +runs: + using: "composite" + steps: + - name: "Validate Inputs" + shell: bash + run: | + if [[ "${{ inputs.type }}" != "issues" && "${{ inputs.type }}" != "pulls" ]]; then + echo "::error::'type' must be either 'issues' or 'pulls'. Value provided: '${{ inputs.type }}'." + echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY + echo "\`type\` must be either 'issues' or 'pulls'." >> $GITHUB_STEP_SUMMARY + exit 1 + fi + + - name: "Clone the ${{ github.action_repository }} repository with ref '{{ github.action_ref }}'" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + env: + ISSUE_LABELER_REPO: ${{ github.action_repository }} + ISSUE_LABELER_REF: ${{ github.action_ref }} + with: + repository: ${{ env.ISSUE_LABELER_REPO }} + ref: ${{ env.ISSUE_LABELER_REF }} + + - name: "Restore model from cache" + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: "labeler-cache/${{ inputs.type }}-model.zip" + key: "issue-labeler/model/${{ inputs.type }}/${{ inputs.cache_key || 'staged' }}" + fail-on-cache-miss: true + + - name: "Set up the .NET SDK" + uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1 + with: + dotnet-version: "9.0.x" + + - name: "Run Tester" + shell: bash + run: | + dotnet run -c Release --project IssueLabeler/src/Tester -- \ + ${{ format('--repo "{0}"', inputs.repository || github.repository) }} \ + ${{ format('--label-prefix "{0}"', inputs.label_prefix) }} \ + ${{ format('--threshold {0}', inputs.threshold) }} \ + ${{ format('--{0}-model "labeler-cache/{0}-model.zip"', inputs.type) || '' }} \ + ${{ inputs.excluded_authors && format('--excluded-authors "{0}"', inputs.excluded_authors) || '' }} \ + ${{ inputs.limit && format('--{0}-limit {1}', inputs.type, inputs.limit) || '' }} \ + ${{ inputs.page_size && format('--page-size {0}', inputs.page_size) || '' }} \ + ${{ inputs.page_limit && format('--page-limit {0}', inputs.page_limit) || '' }} \ + ${{ inputs.retries && format('--retries {0}', inputs.retries) || '' }} \ + ${{ inputs.verbose && '--verbose' || '' }} diff --git a/tests/Shared.Tests/ArgUtilsTests.cs b/tests/Shared.Tests/ArgUtilsTests.cs deleted file mode 100644 index 91a3299..0000000 --- a/tests/Shared.Tests/ArgUtilsTests.cs +++ /dev/null @@ -1,239 +0,0 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Text.RegularExpressions; -using NSubstitute; - -namespace Shared.Tests; - -[TestClass] -public class ArgUtilsTests -{ - [TestMethod] - public void TryDequeueString_ShouldReturnTrue_WhenValueIsPresent() - { - var args = new Queue(["value"]); - var showUsage = Substitute.For>(); - string? argValue; - - var result = ArgUtils.TryDequeueString(args, showUsage, "test-arg-name", out argValue); - - Assert.IsTrue(result); - Assert.AreEqual("value", argValue); - showUsage.DidNotReceive().Invoke(Arg.Any()); - } - - [TestMethod] - public void TryDequeueString_ShouldReturnFalse_WhenValueIsNull() - { - var args = new Queue([""]); - var showUsage = Substitute.For>(); - string? argValue; - - var result = ArgUtils.TryDequeueString(args, showUsage, "test-arg-name", out argValue); - - Assert.IsFalse(result); - Assert.IsNull(argValue); - showUsage.Received(1).Invoke("Argument 'test-arg-name' has an empty value."); - } - - [TestMethod] - public void TryDequeueRepo_ShouldReturnTrue_WhenValueIsValid() - { - var args = new Queue(["org/repo"]); - var showUsage = Substitute.For>(); - string? org; - string? repo; - - var result = ArgUtils.TryDequeueRepo(args, showUsage, "test-arg-name", out org, out repo); - - Assert.IsTrue(result); - Assert.AreEqual("org", org); - Assert.AreEqual("repo", repo); - showUsage.DidNotReceive().Invoke(Arg.Any()); - } - - [TestMethod] - public void TryDequeueRepo_ShouldReturnFalse_WhenValueIsInvalid() - { - var args = new Queue(["invalid"]); - var showUsage = Substitute.For>(); - string? org; - string? repo; - - var result = ArgUtils.TryDequeueRepo(args, showUsage, "test-arg-name", out org, out repo); - - Assert.IsFalse(result); - Assert.IsNull(org); - Assert.IsNull(repo); - showUsage.Received(1).Invoke("Argument 'test-arg-name' has an empty value or is not in the format of '{org}/{repo}'."); - } - - [TestMethod] - public void TryDequeueRepoList_ShouldReturnTrue_WhenValuesAreValid() - { - var args = new Queue(["org/repo1,org/repo2"]); - var showUsage = Substitute.For>(); - string? org; - List? repos; - - var result = ArgUtils.TryDequeueRepoList(args, showUsage, "test-arg-name", out org, out repos); - - Assert.IsTrue(result); - Assert.AreEqual("org", org); - CollectionAssert.AreEqual(new List { "repo1", "repo2" }, repos); - showUsage.DidNotReceive().Invoke(Arg.Any()); - } - - [TestMethod] - public void TryDequeueRepoList_ShouldReturnFalse_WhenValuesAreInvalid() - { - var args = new Queue(["invalid"]); - var showUsage = Substitute.For>(); - string? org; - List? repos; - - var result = ArgUtils.TryDequeueRepoList(args, showUsage, "test-arg-name", out org, out repos); - - Assert.IsFalse(result); - Assert.IsNull(org); - Assert.IsNull(repos); - showUsage.Received(1).Invoke("Argument '--repo' is not in the format of '{org}/{repo}': invalid"); - } - - [TestMethod] - public void TryDequeueLabelPrefix_ShouldReturnTrue_WhenValueIsValid() - { - var args = new Queue(["area-"]); - var showUsage = Substitute.For>(); - Func? labelPredicate; - - var result = ArgUtils.TryDequeueLabelPrefix(args, showUsage, "test-arg-name", out labelPredicate); - - Assert.IsTrue(result); - Assert.IsNotNull(labelPredicate); - Assert.IsTrue(labelPredicate("area-label")); - showUsage.DidNotReceive().Invoke(Arg.Any()); - } - - [TestMethod] - public void TryDequeueLabelPrefix_ShouldReturnFalse_WhenValueIsInvalid() - { - var args = new Queue(["area"]); - var showUsage = Substitute.For>(); - Func? labelPredicate; - - var result = ArgUtils.TryDequeueLabelPrefix(args, showUsage, "test-arg-name", out labelPredicate); - - Assert.IsFalse(result); - Assert.IsNull(labelPredicate); - showUsage.Received(1).Invoke(Arg.Is(s => s.Contains("Argument 'test-arg-name' must end in something other than a letter or number."))); - } - - [TestMethod] - public void TryDequeuePath_ShouldReturnTrue_WhenValueIsValid() - { - var args = new Queue(new[] { "/mnt/c/path/to/file" }); - var showUsage = Substitute.For>(); - string? path; - - var result = ArgUtils.TryDequeuePath(args, showUsage, "test-arg-name", out path); - - Assert.IsTrue(result); - Assert.AreEqual("/mnt/c/path/to/file", path); - showUsage.DidNotReceive().Invoke(Arg.Any()); - } - - [TestMethod] - public void TryDequeuePath_ShouldReturnFalse_WhenValueIsInvalid() - { - var args = new Queue([""]); - var showUsage = Substitute.For>(); - string? path; - - var result = ArgUtils.TryDequeuePath(args, showUsage, "test-arg-name", out path); - - Assert.IsFalse(result); - Assert.IsNull(path); - showUsage.Received(1).Invoke("Argument 'test-arg-name' has an empty value."); - } - - [TestMethod] - public void TryDequeueStringArray_ValidInput_ReturnsTrue() - { - var args = new Queue(["value1,value2,value3"]); - var showUsage = Substitute.For>(); - bool result = ArgUtils.TryDequeueStringArray(args, showUsage, "test-arg-name", out string[]? argValues); - - Assert.IsTrue(result); - Assert.IsNotNull(argValues); - Assert.AreEqual(3, argValues.Length); - CollectionAssert.Contains(argValues, "value1"); - CollectionAssert.Contains(argValues, "value2"); - CollectionAssert.Contains(argValues, "value3"); - showUsage.DidNotReceive().Invoke(Arg.Any()); - } - - [TestMethod] - public void TryDequeueInt_ValidInput_ReturnsTrue() - { - var args = new Queue(["123"]); - var showUsage = Substitute.For>(); - bool result = ArgUtils.TryDequeueInt(args, showUsage, "test-arg-name", out int? argValue); - - Assert.IsTrue(result); - Assert.IsNotNull(argValue); - Assert.AreEqual(123, argValue); - showUsage.DidNotReceive().Invoke(Arg.Any()); - } - - [TestMethod] - public void TryDequeueIntArray_ValidInput_ReturnsTrue() - { - var args = new Queue(["1,2,3"]); - var showUsage = Substitute.For>(); - bool result = ArgUtils.TryDequeueIntArray(args, showUsage, "test-arg-name", out int[]? argValues); - - Assert.IsTrue(result); - Assert.IsNotNull(argValues); - Assert.AreEqual(3, argValues.Length); - CollectionAssert.Contains(argValues, 1); - CollectionAssert.Contains(argValues, 2); - CollectionAssert.Contains(argValues, 3); - showUsage.DidNotReceive().Invoke(Arg.Any()); - } - - [TestMethod] - public void TryDequeueFloat_ValidInput_ReturnsTrue() - { - var args = new Queue(["123.45"]); - var showUsage = Substitute.For>(); - bool result = ArgUtils.TryDequeueFloat(args, showUsage, "test-arg-name", out float? argValue); - - Assert.IsTrue(result); - Assert.IsNotNull(argValue); - Assert.AreEqual(123.45f, argValue); - showUsage.DidNotReceive().Invoke(Arg.Any()); - } - - [TestMethod] - public void TryDequeueNumberRanges_ValidInput_ReturnsTrue() - { - var args = new Queue(["1-3,5,7-9"]); - var showUsage = Substitute.For>(); - bool result = ArgUtils.TryDequeueNumberRanges(args, showUsage, "test-arg-name", out List? argValues); - - Assert.IsTrue(result); - Assert.IsNotNull(argValues); - Assert.AreEqual(7, argValues.Count); - CollectionAssert.Contains(argValues, (ulong)1); - CollectionAssert.Contains(argValues, (ulong)2); - CollectionAssert.Contains(argValues, (ulong)3); - CollectionAssert.Contains(argValues, (ulong)5); - CollectionAssert.Contains(argValues, (ulong)7); - CollectionAssert.Contains(argValues, (ulong)8); - CollectionAssert.Contains(argValues, (ulong)9); - showUsage.DidNotReceive().Invoke(Arg.Any()); - } -} diff --git a/tests/Shared.Tests/DataFileUtilsTests.cs b/tests/Shared.Tests/DataFileUtilsTests.cs deleted file mode 100644 index e07a777..0000000 --- a/tests/Shared.Tests/DataFileUtilsTests.cs +++ /dev/null @@ -1,73 +0,0 @@ -using System; -using System.IO; -using Microsoft.VisualStudio.TestTools.UnitTesting; -using NSubstitute; - -namespace Shared.Tests; - -[TestClass] -public class DataFileUtilsTests -{ - [TestMethod] - public void EnsureOutputDirectory_ShouldCreateDirectory_WhenDirectoryDoesNotExist() - { - var tempPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()); - var outputFile = Path.Combine(tempPath, "file.txt"); - - DataFileUtils.EnsureOutputDirectory(outputFile); - - Assert.IsTrue(Directory.Exists(tempPath)); - - Directory.Delete(tempPath, true); - } - - [TestMethod] - public void SanitizeText_ShouldReplaceSpecialCharacters() - { - var input = "text\rwith\nspecial\tcharacters\""; - var expected = "text with special characters`"; - - var result = DataFileUtils.SanitizeText(input); - - Assert.AreEqual(expected, result); - } - - [TestMethod] - public void SanitizeTextArray_ShouldJoinSanitizedTexts() - { - var input = new[] { "text1", "text2\r\n", "text3\t" }; - var expected = "text1 text2 text3"; - - var result = DataFileUtils.SanitizeTextArray(input); - - Assert.AreEqual(expected, result); - } - - [TestMethod] - public void FormatIssueRecord_ShouldFormatCorrectly() - { - var label = "bug"; - var title = "Issue title"; - var body = "Issue body"; - var expected = "bug\tIssue title\tIssue body"; - - var result = DataFileUtils.FormatIssueRecord(label, title, body); - - Assert.AreEqual(expected, result); - } - - [TestMethod] - public void FormatPullRequestRecord_ShouldFormatCorrectly() - { - var label = "enhancement"; - var title = "PR title"; - var body = "PR body"; - var fileNames = new[] { "file1.cs", "file2.cs" }; - var folderNames = new[] { "folder1", "folder2" }; - var expected = "enhancement\tPR title\tPR body\tfile1.cs file2.cs\tfolder1 folder2"; - - var result = DataFileUtils.FormatPullRequestRecord(label, title, body, fileNames, folderNames); - - Assert.AreEqual(expected, result); - } -} diff --git a/tests/Shared.Tests/Shared.Tests.csproj b/tests/Shared.Tests/Shared.Tests.csproj deleted file mode 100644 index 448119d..0000000 --- a/tests/Shared.Tests/Shared.Tests.csproj +++ /dev/null @@ -1,16 +0,0 @@ - - - - enable - enable - - - - - - - - - - - diff --git a/train/action.yml b/train/action.yml new file mode 100644 index 0000000..14f33c7 --- /dev/null +++ b/train/action.yml @@ -0,0 +1,105 @@ +name: "Train Model" +description: "Train the Issues or Pull Requests model for label prediction." + +inputs: + type: + description: "The model to train. Must be either 'issues' or 'pulls'." + required: true + data_cache_key: + description: "The cache key suffix to use for the downloaded data. Defaults to 'staged'." + default: staged + model_cache_key: + description: "The cache key suffix to use for the trained model. Defaults to 'staged'." + default: staged + +branding: + color: "purple" + icon: "tag" + +runs: + using: "composite" + steps: + - name: "Validate Inputs" + shell: bash + run: | + if [[ "${{ inputs.type }}" != "issues" && "${{ inputs.type }}" != "pulls" ]]; then + echo "::error::'type' must be either 'issues' or 'pulls'. Value provided: '${{ inputs.type }}'." + echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY + echo "\`type\` must be either 'issues' or 'pulls'." >> $GITHUB_STEP_SUMMARY + exit 1 + fi + + - name: "Set Cache Variables" + shell: bash + run: | + echo "DATA_PATH=labeler-cache/${{ inputs.type }}-data.tsv" >> $GITHUB_ENV + echo "DATA_CACHE_KEY=${{ format('issue-labeler/data/{0}/{1}', inputs.type, inputs.data_cache_key) }}" >> $GITHUB_ENV + echo "MODEL_PATH=labeler-cache/${{ inputs.type }}-model.zip" >> $GITHUB_ENV + echo "MODEL_CACHE_KEY=${{ format('issue-labeler/model/{0}/{1}', inputs.type, inputs.model_cache_key) }}" >> $GITHUB_ENV + + - name: "Check for an existing model" + id: check-cache + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ env.MODEL_PATH }} + key: ${{ env.MODEL_CACHE_KEY }} + lookup-only: true + + - name: "Abort if there is an existing model with the specified cache key" + shell: bash + run: | + if [[ "${{ steps.check-cache.outputs.cache-hit }}" == "true" ]]; then + echo "::error::Cache key '${{ env.MODEL_CACHE_KEY }}' already exists. Cannot proceed with training." + echo "> [!CAUTION]" >> $GITHUB_STEP_SUMMARY + echo "Cache key '${{ env.MODEL_CACHE_KEY }}' already exists. Cannot proceed with training." >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "> [!TIP]" >> $GITHUB_STEP_SUMMARY + echo "Use a different \`model_cache_key\` value or delete the existing cache entry from the [Action Caches](/${{ github.repository }}/actions/caches) page and run the workflow again." >> $GITHUB_STEP_SUMMARY + exit 1 + fi + + - name: "Clone the ${{ github.action_repository }} repository with ref '{{ github.action_ref }}'" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + env: + ISSUE_LABELER_REPO: ${{ github.action_repository }} + ISSUE_LABELER_REF: ${{ github.action_ref }} + with: + repository: ${{ env.ISSUE_LABELER_REPO }} + ref: ${{ env.ISSUE_LABELER_REF }} + + - name: "Restore Data from Cache" + uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ env.DATA_PATH }} + key: ${{ env.DATA_CACHE_KEY }} + fail-on-cache-miss: true + + - name: "Set up the .NET SDK" + uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4.3.1 + with: + dotnet-version: 9.0.x + + - name: "Run Trainer" + shell: bash + run: | + dotnet run -c Release --project IssueLabeler/src/Trainer -- \ + ${{ format('--{0}-data "{1}"', inputs.type, env.DATA_PATH) }} \ + ${{ format('--{0}-model "{1}"', inputs.type, env.MODEL_PATH) }} + + - name: "Save Model to Cache" + uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 + with: + path: ${{ env.MODEL_PATH }} + key: ${{ env.MODEL_CACHE_KEY }} + + - name: "Write Final Summary" + shell: bash + run: | + echo "" >> $GITHUB_STEP_SUMMARY + echo "## ${{ inputs.type == 'issues' && 'Issues' || 'Pull Requests' }} Model Available as '${{ inputs.model_cache_key }}'." >> $GITHUB_STEP_SUMMARY + + if [[ "${{ inputs.model_cache_key }}" == "ACTIVE" ]]; then + echo "Label predictions will now use this model." >> $GITHUB_STEP_SUMMARY + else + echo "The '${{ inputs.model_cache_key }}' model is saved to cache and available to test or promote." >> $GITHUB_STEP_SUMMARY + fi