diff --git a/tools/issue-labeler/Directory.Build.props b/tools/issue-labeler/Directory.Build.props
index 6aab64a60a5..b7220d6e29a 100644
--- a/tools/issue-labeler/Directory.Build.props
+++ b/tools/issue-labeler/Directory.Build.props
@@ -27,6 +27,12 @@
16.11.0
+
+
+ $(MSBuildProjectDirectory)\bin\
+ $(MSBuildProjectDirectory)\obj\
+
+
diff --git a/tools/issue-labeler/IssueLabeler.sln b/tools/issue-labeler/IssueLabeler.sln
index 762fe481d52..f568f876ef4 100644
--- a/tools/issue-labeler/IssueLabeler.sln
+++ b/tools/issue-labeler/IssueLabeler.sln
@@ -15,17 +15,22 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SearchIndexCreator", "src\S
{5B655051-531D-4968-8AF3-1DBA9A9F568C} = {5B655051-531D-4968-8AF3-1DBA9A9F568C}
EndProjectSection
EndProject
+EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Azure.Sdk.Tools.GitHubEventProcessor", "..\github-event-processor\Azure.Sdk.Tools.GitHubEventProcessor\Azure.Sdk.Tools.GitHubEventProcessor.csproj", "{5B655051-531D-4968-8AF3-1DBA9A9F568C}"
EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CreateMikLabelModel", "src\CreateMikLabelModel\CreateMikLabelModel.csproj", "{5966A77B-5114-4608-92AD-524F181FA0FC}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "IssueLabeler.Shared", "src\IssueLabeler.Shared\IssueLabeler.Shared.csproj", "{9E6BA2D8-3BBE-40D6-9DAF-0FC0CD362BD4}"
EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Hubbup.MikLabelModel", "src\Hubbup.MikLabelModel\Hubbup.MikLabelModel.csproj", "{CA47F6FC-382F-4034-9F12-517CC14E5CB0}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Common", "src\IssueLabelerMLPipeline\src\Common\Common.csproj", "{3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}"
EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Sdk.LabelTrainer", "src\Azure.Sdk.Labels\Azure.Sdk.LabelTrainer.csproj", "{DB80D7FD-262D-429D-9700-72EF4D93F317}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Downloader", "src\IssueLabelerMLPipeline\src\Downloader\Downloader.csproj", "{AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}"
EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "IssueLabeler.Shared", "src\IssueLabeler.Shared\IssueLabeler.Shared.csproj", "{9E6BA2D8-3BBE-40D6-9DAF-0FC0CD362BD4}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "GitHubClient", "src\IssueLabelerMLPipeline\src\GitHubClient\GitHubClient.csproj", "{57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Predictor", "src\IssueLabelerMLPipeline\src\Predictor\Predictor.csproj", "{2E39B0A5-2F4A-4D6E-8A0D-0366238CB21E}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tester", "src\IssueLabelerMLPipeline\src\Tester\Tester.csproj", "{BEA133F4-5686-49DF-83E4-641C26B3CC25}"
EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Hubbup.MikLabelModel.Tests", "tests\Hubbup.MikLabelModel.Tests\Hubbup.MikLabelModel.Tests.csproj", "{CD3F13F1-8890-490A-BB47-9382E2131F5D}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Trainer", "src\IssueLabelerMLPipeline\src\Trainer\Trainer.csproj", "{F1FE4054-C44E-487F-90F9-2F111AB7BD9C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Azure.Sdk.Tools.CodeownersUtils", "..\codeowners-utils\Azure.Sdk.Tools.CodeownersUtils\Azure.Sdk.Tools.CodeownersUtils.csproj", "{D27C2C44-3AC4-0732-FF87-DD1697A9DF37}"
EndProject
@@ -39,38 +44,42 @@ Global
{4C9E75AF-468F-4DF7-BACD-EC0C2C66A96F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{4C9E75AF-468F-4DF7-BACD-EC0C2C66A96F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{4C9E75AF-468F-4DF7-BACD-EC0C2C66A96F}.Release|Any CPU.Build.0 = Release|Any CPU
- {0AEAF8DD-C370-4090-B439-9CF364D29869}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {0AEAF8DD-C370-4090-B439-9CF364D29869}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {0AEAF8DD-C370-4090-B439-9CF364D29869}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {0AEAF8DD-C370-4090-B439-9CF364D29869}.Release|Any CPU.Build.0 = Release|Any CPU
{5B655051-531D-4968-8AF3-1DBA9A9F568C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5B655051-531D-4968-8AF3-1DBA9A9F568C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5B655051-531D-4968-8AF3-1DBA9A9F568C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5B655051-531D-4968-8AF3-1DBA9A9F568C}.Release|Any CPU.Build.0 = Release|Any CPU
- {5966A77B-5114-4608-92AD-524F181FA0FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {5966A77B-5114-4608-92AD-524F181FA0FC}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {5966A77B-5114-4608-92AD-524F181FA0FC}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {5966A77B-5114-4608-92AD-524F181FA0FC}.Release|Any CPU.Build.0 = Release|Any CPU
- {CA47F6FC-382F-4034-9F12-517CC14E5CB0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {CA47F6FC-382F-4034-9F12-517CC14E5CB0}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {CA47F6FC-382F-4034-9F12-517CC14E5CB0}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {CA47F6FC-382F-4034-9F12-517CC14E5CB0}.Release|Any CPU.Build.0 = Release|Any CPU
- {DB80D7FD-262D-429D-9700-72EF4D93F317}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {DB80D7FD-262D-429D-9700-72EF4D93F317}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {DB80D7FD-262D-429D-9700-72EF4D93F317}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {DB80D7FD-262D-429D-9700-72EF4D93F317}.Release|Any CPU.Build.0 = Release|Any CPU
{9E6BA2D8-3BBE-40D6-9DAF-0FC0CD362BD4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{9E6BA2D8-3BBE-40D6-9DAF-0FC0CD362BD4}.Debug|Any CPU.Build.0 = Debug|Any CPU
{9E6BA2D8-3BBE-40D6-9DAF-0FC0CD362BD4}.Release|Any CPU.ActiveCfg = Release|Any CPU
{9E6BA2D8-3BBE-40D6-9DAF-0FC0CD362BD4}.Release|Any CPU.Build.0 = Release|Any CPU
- {CD3F13F1-8890-490A-BB47-9382E2131F5D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {CD3F13F1-8890-490A-BB47-9382E2131F5D}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {CD3F13F1-8890-490A-BB47-9382E2131F5D}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {CD3F13F1-8890-490A-BB47-9382E2131F5D}.Release|Any CPU.Build.0 = Release|Any CPU
{D27C2C44-3AC4-0732-FF87-DD1697A9DF37}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D27C2C44-3AC4-0732-FF87-DD1697A9DF37}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D27C2C44-3AC4-0732-FF87-DD1697A9DF37}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D27C2C44-3AC4-0732-FF87-DD1697A9DF37}.Release|Any CPU.Build.0 = Release|Any CPU
+ {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {3F3044DC-A9F8-DE16-79DD-4A0C1649CD06}.Release|Any CPU.Build.0 = Release|Any CPU
+ {AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {AB75FE13-DB1A-4B6F-8B27-1486F98EA75C}.Release|Any CPU.Build.0 = Release|Any CPU
+ {57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {57F2D1DC-DA30-40CA-AE1A-2EFD8139AF25}.Release|Any CPU.Build.0 = Release|Any CPU
+ {2E39B0A5-2F4A-4D6E-8A0D-0366238CB21E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {2E39B0A5-2F4A-4D6E-8A0D-0366238CB21E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {2E39B0A5-2F4A-4D6E-8A0D-0366238CB21E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {2E39B0A5-2F4A-4D6E-8A0D-0366238CB21E}.Release|Any CPU.Build.0 = Release|Any CPU
+ {BEA133F4-5686-49DF-83E4-641C26B3CC25}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {BEA133F4-5686-49DF-83E4-641C26B3CC25}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {BEA133F4-5686-49DF-83E4-641C26B3CC25}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {BEA133F4-5686-49DF-83E4-641C26B3CC25}.Release|Any CPU.Build.0 = Release|Any CPU
+ {F1FE4054-C44E-487F-90F9-2F111AB7BD9C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {F1FE4054-C44E-487F-90F9-2F111AB7BD9C}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {F1FE4054-C44E-487F-90F9-2F111AB7BD9C}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {F1FE4054-C44E-487F-90F9-2F111AB7BD9C}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/tools/issue-labeler/src/Azure.Sdk.Labels/Azure.Sdk.LabelTrainer.csproj b/tools/issue-labeler/src/Azure.Sdk.Labels/Azure.Sdk.LabelTrainer.csproj
deleted file mode 100644
index 9d52a6772dc..00000000000
--- a/tools/issue-labeler/src/Azure.Sdk.Labels/Azure.Sdk.LabelTrainer.csproj
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-
- Exe
- latest
- Azure.Sdk.LabelTrainer
-
-
-
-
-
-
-
-
-
-
diff --git a/tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkCombinedLabelModelTrainer.cs b/tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkCombinedLabelModelTrainer.cs
deleted file mode 100644
index 53ee623cd80..00000000000
--- a/tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkCombinedLabelModelTrainer.cs
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System.Collections.Generic;
-using System.Threading.Tasks;
-using CreateMikLabelModel;
-using CreateMikLabelModel.ML;
-
-namespace Azure.Sdk.LabelTrainer
-{
- ///
- /// Provides functionality related to training label models, including building and curating the
- /// sets of data needed to do so.
- ///
- ///
- public class AzureSdkCombinedLabelModelTrainer : LabelModelTrainer
- {
- /// The set of core Azure SDK language repositories which should be used for training the combined model.
- private static readonly string[] AzureSdkLanguageRepositories = new[]
- {
- "Azure/azure-sdk-for-net",
- "Azure/azure-sdk-for-java",
- "Azure/azure-sdk-for-python",
- "Azure/azure-sdk-for-js",
- "Azure/azure-sdk-for-go",
- "Azure/azure-sdk-for-cpp",
- "Azure/azure-sdk-for-rust",
- };
-
- ///
- /// Initializes a new instance of the class.
- ///
- ///
- /// The logging implementation to use for emitting messages.
- ///
- public AzureSdkCombinedLabelModelTrainer(ILogger logger) : base("Azure/azure-sdk", logger)
- {
- }
-
- ///
- /// Queries repository items for data to use for training the models.
- ///
- ///
- /// The access token to use for the GitHub API.
- /// The base path to use for storing and querying training data.
- /// The processor for preparing training set items from repository issues and pull requests.
- /// The set of filters to apply to data when building the training set. If not provided, training items will not be filtered.
- ///
- /// The set of that were produced.
- ///
- public override Task> QueryTrainingData(
- string gitHubAccessToken,
- string trainingDataBasePath,
- TrainingDataProcessor processor = default,
- TrainingDataFilters filters = default) => QueryTrainingData(gitHubAccessToken, trainingDataBasePath, AzureSdkLanguageRepositories, processor, filters);
- }
-}
diff --git a/tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkTrainingDataFilters.cs b/tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkTrainingDataFilters.cs
deleted file mode 100644
index 8a3f8e4f71d..00000000000
--- a/tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkTrainingDataFilters.cs
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System;
-using System.Linq;
-using CreateMikLabelModel;
-using CreateMikLabelModel.Models;
-using Octokit;
-
-namespace Azure.Sdk.LabelTrainer
-{
- internal class AzureSdkTrainingDataFilters : TrainingDataFilters
- {
- private static readonly string[] AzureSdkRequiredIssueLabelNames = new[] { "customer-reported" };
- private static readonly string[] AzureSdkRequiredPullRequestLabelNames = Array.Empty();
-
- public AzureSdkTrainingDataFilters() : base(
- includeIssues: true,
- includePullRequests: false,
- requiredIssueLabelNames: AzureSdkRequiredIssueLabelNames,
- requiredPullRequestLabelNames: AzureSdkRequiredPullRequestLabelNames)
- {
- }
-
- public override bool PullRequestFilter(PullRequestWithFiles pullRequest) => false;
-
- public override bool IssueFilter(Issue issue)
- {
- var categoryCount = 0;
- var serviceCount = 0;
-
- if (RequiredIssueLabelNames.All(required => issue.Labels.Any(label => label.Name == required)))
- {
- foreach (var label in issue.Labels)
- {
- if (AzureSdkLabel.IsServiceLabel(label))
- {
- ++serviceCount;
- }
-
- if (AzureSdkLabel.IsCategoryLabel(label))
- {
- ++categoryCount;
- }
- }
- }
-
- // To be eligible for the training set, the issue must have all of the required
- // labels and exactly one service label and one category label. Issues that have
- // multiples, even if valid, aren't appropriate for training purposes.
-
- return (categoryCount == 1 && serviceCount == 1);
- }
-
- public override bool LabelFilter(Label label) =>
- AzureSdkLabel.IsServiceLabel(label) || AzureSdkLabel.IsCategoryLabel(label);
- }
-}
diff --git a/tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkTrainingDataProcessor.cs b/tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkTrainingDataProcessor.cs
deleted file mode 100644
index 1aa1396acf0..00000000000
--- a/tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkTrainingDataProcessor.cs
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System.Collections.Generic;
-using CreateMikLabelModel;
-using CreateMikLabelModel.Models;
-using Octokit;
-
-namespace Azure.Sdk.LabelTrainer
-{
- internal class AzureSdkTrainingDataProcessor : TrainingDataProcessor
- {
- ///
- /// Initializes a new instance of the class.
- ///
- ///
- /// The logger to use for reporting information as items are prepared.
- ///
- public AzureSdkTrainingDataProcessor(ILogger logger) : base(logger)
- {
- }
-
- ///
- /// Prepares training data based on repository issues, transforming it into the
- /// appropriate representation.
- ///
- ///
- /// The raw training data, in the form of repository issues.
- /// The name of the repository that was the source of the training data.
- ///
- /// The set of instances prepared from the .
- ///
- public override async IAsyncEnumerable PrepareData(
- IAsyncEnumerable trainingData,
- string repositoryName)
- {
- var itemCount = 0;
-
- await foreach (var issue in trainingData)
- {
- if (issue.Labels.Count > 0)
- {
- foreach (var label in issue.Labels)
- {
- var segment = GetSegment(label);
-
- if (segment != null)
- {
- ++itemCount;
- yield return new TrainingDataItem(label.Name, segment, repositoryName, issue);
- }
- }
- }
- else
- {
- Logger.LogWarning($"Issue: { issue.Id } has no labels and should have been filtered.");
- }
- }
-
- Logger.LogInformation($"Prepared { itemCount } training set items from issue training data.");
- }
-
- ///
- /// Prepares training data based on repository pull requests, transforming it into the
- /// appropriate representation.
- ///
- ///
- /// The raw training data, in the form of repository pull requests.
- /// The name of the repository that was the source of the training data.
- ///
- /// The set of instances prepared from the .
- ///
- public override async IAsyncEnumerable PrepareData(
- IAsyncEnumerable trainingData,
- string repositoryName)
- {
- var itemCount = 0;
-
- await foreach (var pullRequest in trainingData)
- {
- if (pullRequest.PullRequest.Labels.Count > 0)
- {
- foreach (var label in pullRequest.PullRequest.Labels)
- {
- var segment = GetSegment(label);
-
- if (segment != null)
- {
- ++itemCount;
- yield return new TrainingDataItem(label.Name, DefaultSegmentName, repositoryName, pullRequest);
- }
- }
- }
- else
- {
- Logger.LogWarning($"Pull Request: { pullRequest.PullRequest.Id } has no labels and should have been filtered.");
- }
- }
-
- Logger.LogInformation($"Prepared { itemCount } training set items from pull request training data.");
- }
-
- ///
- /// Gets the segment that the training data should be associated with.
- ///
- ///
- /// The label to consider.
- ///
- /// The segment name.
- ///
- private string GetSegment(Label label) => label switch
- {
- null => null,
- _ when AzureSdkLabel.IsCategoryLabel(label) => "Category",
- _ when AzureSdkLabel.IsServiceLabel(label) => "Service",
- _ => null
- };
- }
-}
diff --git a/tools/issue-labeler/src/Azure.Sdk.Labels/Program.cs b/tools/issue-labeler/src/Azure.Sdk.Labels/Program.cs
deleted file mode 100644
index d463bd82434..00000000000
--- a/tools/issue-labeler/src/Azure.Sdk.Labels/Program.cs
+++ /dev/null
@@ -1,152 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System;
-using System.Diagnostics;
-using System.IO;
-using System.Threading.Tasks;
-using CreateMikLabelModel;
-using IssueLabeler.Shared;
-
-namespace Azure.Sdk.LabelTrainer
-{
- ///
- /// Serves as the main entry point for the application.
- ///
- ///
- public static class Program
- {
- /// The file to write output to; the current directory is assumed.
- private const string TraceLogFilename = "trace.log";
-
- ///
- /// This utility will train a set of machine learning models intended to help with prediction of the
- /// labels that should be added to GitHub items for basic categorization and routing.
- ///
- ///
- /// The full path for the repository to train.
- /// The access token to use for interacting with GitHub.
- /// [OPTIONAL] The directory in which to keep the data files; if not specified, the current directory will be assumed. If specified, the directory will be created if it does not exist.
- ///
- ///
- ///
- /// dotnet run -- --repository "Azure/azure-sdk-for-net" --git-hub-token "[[ TOKEN ]]"
- ///
- ///
- ///
- ///
- ///
- /// dotnet run -- --repository "Azure/azure-sdk-for-net" --git-hub-token "[[ TOKEN ]]" --data-file-directory "c:\data\training"
- ///
- ///
- ///
- public static async Task Main(string repository, string gitHubToken, string dataFileDirectory = default)
- {
- if ((string.IsNullOrEmpty(repository)) || (string.IsNullOrEmpty(gitHubToken)))
- {
- Console.WriteLine("");
- Console.WriteLine("The repository path and GitHub access token must be specified.");
- Console.WriteLine("");
- Console.WriteLine("Usage:");
- Console.WriteLine("\tdotnet run -- --repository \"all\" --git-hub-token \"[[ TOKEN ]]\"");
- Console.WriteLine("\tdotnet run -- --repository \"Azure/azure-sdk-for-net\" --git-hub-token \"[[ TOKEN ]]\"");
- Console.WriteLine("\tdotnet run -- --repository \"Azure/azure-sdk-for-js\" --git-hub-token \"[[ TOKEN ]]\" --data-file-directory \"c:\\data\\training\"");
- Console.WriteLine("");
-
- return -1;
- }
-
- // Ensure the path for training data.
-
- dataFileDirectory = string.IsNullOrEmpty(dataFileDirectory)
- ? Environment.CurrentDirectory
- : dataFileDirectory;
-
- if (!Directory.Exists(dataFileDirectory))
- {
- Directory.CreateDirectory(dataFileDirectory);
- }
-
- // Build the set of training data.
-
- var logger = new ConsoleLogger();
-
- var trainer = repository switch
- {
- "all" => new AzureSdkCombinedLabelModelTrainer(logger),
- _ => new LabelModelTrainer(repository, logger)
- };
-
- // Step 1: Download the common set of training items and use them to prepare a training data set. This will include
- // all segments for the different label types needed.
-
- Console.ForegroundColor = ConsoleColor.Green;
- Console.WriteLine(new String('=', 80));
- Console.WriteLine(" Preparing training data");
- Console.WriteLine(new String('=', 80));
- Console.ResetColor();
-
- var filters = new AzureSdkTrainingDataFilters();
- var processor = new AzureSdkTrainingDataProcessor(logger);
- var trainingDataFiles = await trainer.QueryTrainingData(gitHubToken, dataFileDirectory, processor, filters).ConfigureAwait(false);
-
- Console.ForegroundColor = ConsoleColor.Green;
- Console.WriteLine(new String('=', 80));
- Console.WriteLine(" Training data preparation complete.");
- Console.WriteLine(new String('=', 80));
- Console.ResetColor();
-
- // Each segment will produce an dedicated set of models for that specific label type; process each separately.
-
- foreach (var trainingSegment in trainingDataFiles)
- {
- Console.WriteLine();
- Console.ForegroundColor = ConsoleColor.Green;
- Console.WriteLine(new String('=', 80));
- Console.WriteLine($" Processing segment: { trainingSegment.Key }");
- Console.WriteLine(new String('=', 80));
- Console.ResetColor();
-
- // Step 2: Translate the training data into
-
- trainer.GenerateTrainingDatasets(trainingSegment.Value);
- Console.WriteLine();
-
- // Step 3: Train the model.
-
- trainer.TrainModels(trainingSegment.Value);
- Console.WriteLine();
-
- // Step 4: Test the model.
-
- trainer.TestModels(trainingSegment.Value);
-
- // Provide information on where the model files are.
-
- Console.WriteLine();
- Console.WriteLine();
-
- if (!trainingSegment.Value.Issues.SkipProcessing)
- {
- Console.WriteLine($"Final issue model: '{ trainingSegment.Value.Issues.FinalModelPath }'");
- }
-
- if (!trainingSegment.Value.PullRequests.SkipProcessing)
- {
- Console.WriteLine($"Final pull request model: '{ trainingSegment.Value.PullRequests.FinalModelPath }'");
- }
-
- Console.ForegroundColor = ConsoleColor.Green;
- Console.WriteLine(new String('=', 80));
- Console.WriteLine($" Segment: { trainingSegment.Key } complete.");
- Console.WriteLine(new String('=', 80));
- Console.ResetColor();
- }
-
- Console.WriteLine();
- Console.WriteLine();
- Console.WriteLine("==== Training complete ====");
- return 0;
- }
- }
-}
\ No newline at end of file
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/CreateMikLabelModel.csproj b/tools/issue-labeler/src/CreateMikLabelModel/CreateMikLabelModel.csproj
deleted file mode 100644
index 8e894d7cccb..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/CreateMikLabelModel.csproj
+++ /dev/null
@@ -1,17 +0,0 @@
-
-
-
- latest
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingData.cs b/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingData.cs
deleted file mode 100644
index af9cf811466..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingData.cs
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using CreateMikLabelModel.Models;
-using Octokit;
-
-namespace CreateMikLabelModel
-{
- internal static class TrainingData
- {
- public static void WriteTrainingItems(
- IEnumerable trainingItems,
- StreamWriter outputWriter)
- {
- var ordered = trainingItems
- .OrderBy(x => x.CreatedAt.UtcDateTime.ToFileTimeUtc()) //-> first by created date
- .ThenBy(x => x.RepositoryName) //-> then by repo name
- .ThenBy(x => x.Identifier) //-> then by issue number
- .Select(x => x.Data);
-
- foreach (var item in ordered)
- {
- outputWriter.WriteLine(item);
- }
- }
- public static void WriteHeader(StreamWriter outputWriter)
- {
- outputWriter.WriteLine("CombinedID\tID\tLabel\tTitle\tDescription\tAuthor\tIsPR\tFilePaths");
- }
-
- public static string CreateTrainingData(
- string labelName,
- string repositoryName,
- Issue source) => GetCompressedLine(null, labelName, source.User.Login, source.Body, source.Title, source.CreatedAt, source.Id, repositoryName, false);
-
- public static string CreateTrainingData(
- string labelName,
- string repositoryName,
- PullRequestWithFiles source) => GetCompressedLine(source.FilePaths, labelName, source.PullRequest.User.Login, source.PullRequest.Body, source.PullRequest.Title, source.PullRequest.CreatedAt, source.PullRequest.Id, repositoryName, true);
-
- public static string[] SplitFilePaths(string joinedFilePaths) => joinedFilePaths.Split(';');
-
- private static string GetCompressedLine(
- IEnumerable filePaths,
- string label,
- string author,
- string body,
- string title,
- DateTimeOffset createdAt,
- long identifier,
- string repositoryName,
- bool isPullRequest)
- {
- var createdAtTicks = createdAt.UtcDateTime.ToFileTimeUtc();
-
- author ??= "ghost";
- body = (body?? string.Empty).Replace('\r', ' ').Replace('\n', ' ').Replace('\t', ' ').Replace('"', '`');
- title = title.Replace('\r', ' ').Replace('\n', ' ').Replace('\t', ' ').Replace('"', '`');
-
- if (isPullRequest)
- {
- var filePathsJoined = string.Join(";", filePaths);
- return $"{createdAtTicks},{repositoryName},{identifier}\t{identifier}\t{label}\t{title}\t{body}\t{author}\t1\t{filePathsJoined}";
- }
- else
- {
- return $"{createdAtTicks},{repositoryName},{identifier}\t{identifier}\t{label}\t{title}\t{body}\t{author}\t0\t";
- }
- }
- }
-}
\ No newline at end of file
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingDataClient.cs b/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingDataClient.cs
deleted file mode 100644
index e34753bc08d..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingDataClient.cs
+++ /dev/null
@@ -1,208 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Net;
-using System.Net.Http;
-using System.Net.Sockets;
-using System.Threading;
-using System.Threading.Tasks;
-using CreateMikLabelModel.Models;
-using Octokit;
-using Polly;
-
-namespace CreateMikLabelModel
-{
- internal class TrainingDataClient
- {
- private static int s_randomSeed = Environment.TickCount;
- private static readonly ThreadLocal RandomNumberGenerator = new ThreadLocal(() => new Random(Interlocked.Increment(ref s_randomSeed)), false);
-
- private GitHubClient _client;
- private ILogger _logger;
-
- public TrainingDataClient(string githubAccessToken, ILogger logger)
- {
- _client = new GitHubClient(new ProductHeaderValue("Microsoft-ML-IssueBot", "1.0.0.0"))
- {
- Credentials = new Credentials(githubAccessToken)
- };
-
- _logger = logger;
- }
-
- public async IAsyncEnumerable GetIssuesAsync(
- IEnumerable repositories,
- TrainingDataFilters filters,
- DateTimeOffset? startingDate = null)
- {
- var retryPolicy = CreateRetryPolicy>();
-
- var request = new RepositoryIssueRequest
- {
- Since = startingDate,
- Filter = IssueFilter.All,
- State = ItemStateFilter.All
- };
-
- if (filters.RequiredIssueLabelNames != null)
- {
- foreach (var requiredLabel in filters.RequiredIssueLabelNames)
- {
- request.Labels.Add(requiredLabel);
- }
- }
-
- var options = new ApiOptions
- {
- PageSize = 100
- };
-
- foreach (var repository in repositories)
- {
- _logger.LogInformation($"Querying issues for '{ repository }'.");
-
- var repositoryInfo = RepositoryInformation.Parse(repository);
- var issues = await retryPolicy.ExecuteAsync(() => _client.Issue.GetAllForRepository(repositoryInfo.Owner, repositoryInfo.Name, request, options)).ConfigureAwait(false);
-
- _logger.LogInformation($"{ issues.Count } filtered issues were found for '{ repository }' before filtering was applied.");
-
- foreach (var issue in issues)
- {
- if (filters.IssueFilter(issue))
- {
- yield return issue;
- }
- }
- }
- }
-
- public async IAsyncEnumerable GetPullRequestsAsync(
- IEnumerable repositories,
- TrainingDataFilters filters,
- DateTimeOffset? startingDate = null)
- {
- var pullRequestRetryPolicy = CreateRetryPolicy>();
- var fileRetryPolicy = CreateRetryPolicy>();
-
- var request = new PullRequestRequest
- {
- State = ItemStateFilter.All,
- SortProperty = PullRequestSort.Created,
- SortDirection = SortDirection.Descending,
- };
-
- var options = new ApiOptions
- {
- PageSize = 100
- };
-
- foreach (var repository in repositories)
- {
- _logger.LogInformation($"Querying pull requests for '{ repository }'.");
-
- var repositoryInfo = RepositoryInformation.Parse(repository);
- var pullRequests = await pullRequestRetryPolicy.ExecuteAsync(() => _client.PullRequest.GetAllForRepository(repositoryInfo.Owner, repositoryInfo.Name, request, options)).ConfigureAwait(false);
-
- _logger.LogInformation($"{ pullRequests.Count } pull requests were found for '{ repository }' before filtering was applied.");
-
- foreach (var pullRequest in pullRequests)
- {
- // Pull requests can't be filtered by date, so manually scrub any earlier than
- // the requested starting date.
-
- if ((startingDate.HasValue) && (pullRequest.CreatedAt < startingDate.Value))
- {
- continue;
- }
-
- // Pull requests can't be filtered by labels, so manually scrub any that do not
- // have the required labels associated.
-
- if ((filters.RequiredPullRequestLabelNames is { Length: > 0 })
- && (!filters.RequiredPullRequestLabelNames.All(requiredLabel => pullRequest.Labels.Any(label => label.Name == requiredLabel))))
- {
- continue;
- }
-
- var files = await fileRetryPolicy.ExecuteAsync(() => _client.PullRequest.Files(repositoryInfo.Owner, repositoryInfo.Name, pullRequest.Number)).ConfigureAwait(false);
- var pullRequestWithFiles = new PullRequestWithFiles(pullRequest, files.Select(file => file.FileName).ToArray());
-
- if (filters.PullRequestFilter(pullRequestWithFiles))
- {
- yield return pullRequestWithFiles;
- }
- }
- }
- }
-
- private static IAsyncPolicy CreateRetryPolicy(int maxRetryAttempts = 10, int defaultAbuseBackoffSeconds = 30, double exponentialBackoffSeconds = 0.8, double baseJitterSeconds = 2) =>
- Policy
- .Handle(ex => ShouldRetry(ex))
- .WaitAndRetryAsync(
- maxRetryAttempts,
- attempt => CalculateRetryDelay(attempt, exponentialBackoffSeconds, baseJitterSeconds),
- async (exception, attempt) =>
- {
- var delay = exception switch
- {
- RateLimitExceededException rateEx => ((rateEx.Reset - DateTimeOffset.Now).Add(TimeSpan.FromSeconds(5))),
- AbuseException abuseEx => TimeSpan.FromSeconds(abuseEx.RetryAfterSeconds.GetValueOrDefault(defaultAbuseBackoffSeconds)),
- _ => default(TimeSpan?)
- };
-
- if (delay.HasValue)
- {
- await Task.Delay(delay.Value).ConfigureAwait(false);
- }
- });
-
- private static TimeSpan CalculateRetryDelay(int attempt, double exponentialBackoffSeconds, double baseJitterSeconds) =>
- TimeSpan.FromSeconds((Math.Pow(2, attempt) * exponentialBackoffSeconds) + (RandomNumberGenerator.Value.NextDouble() * baseJitterSeconds));
-
- private static bool ShouldRetry(Exception ex) => ((IsRetriableException(ex)) || (IsRetriableException(ex?.InnerException)));
-
- private static bool IsRetriableException(Exception ex)
- {
- if (ex == null)
- {
- return false;
- }
-
- switch (ex)
- {
- case AbuseException _:
- case RateLimitExceededException _:
- case TimeoutException _:
- case TaskCanceledException _:
- case OperationCanceledException _:
- case WebException _:
- case SocketException _:
- case IOException _:
- return true;
-
- case HttpRequestException requestEx:
- return IsRetriableStatus(requestEx.StatusCode);
-
- case ApiException apiEx:
- return IsRetriableStatus(apiEx.StatusCode);
-
- default:
- return false;
- };
- }
-
- private static bool IsRetriableStatus(HttpStatusCode? statusCode) =>
- ((statusCode == null)
- || (statusCode == HttpStatusCode.Unauthorized)
- || (statusCode == ((HttpStatusCode)408))
- || (statusCode == HttpStatusCode.Conflict)
- || (statusCode == ((HttpStatusCode)429))
- || (statusCode == HttpStatusCode.InternalServerError)
- || (statusCode == HttpStatusCode.ServiceUnavailable)
- || (statusCode == HttpStatusCode.GatewayTimeout));
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingDataFilters.cs b/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingDataFilters.cs
deleted file mode 100644
index eff66c42ce2..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingDataFilters.cs
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using CreateMikLabelModel.Models;
-using Octokit;
-
-namespace CreateMikLabelModel
-{
- ///
- /// The set of filters to apply when creating a training set.
- ///
- ///
- public class TrainingDataFilters
- {
- ///
- /// The set of names identifying labels which must all be present on an issue
- /// for it to be included in the training set.
- ///
- ///
- /// If null or empty, issues will not require any specific labels.
- ///
- public string[] RequiredIssueLabelNames { get; init; }
-
- ///
- /// The set of names identifying labels which must all be present on a pull request
- /// for it to be included in the training set.
- ///
- ///
- /// If null or empty, pull requests will not require any specific labels.
- ///
- public string[] RequiredPullRequestLabelNames { get; init; }
-
- ///
- /// Indicates whether or not issues should be included in the
- /// training set. If included, the will be applied
- /// to each issue for individual consideration.
- ///
- ///
- /// true to include issues; otherwise, false.
- ///
- public bool IncludeIssues { get; init; }
-
- ///
- /// Indicates whether or not pull requests should be included in the
- /// training set. If included, the will be applied
- /// to each issue for individual consideration.
- ///
- ///
- /// true to include issues; otherwise, false.
- ///
- public bool IncludePullRequests { get; init; }
-
- ///
- /// Initializes a new instance of the class.
- ///
- ///
- /// A flag indicating whether or not issues should be included in the training set.
- /// A flag indicating whether or not pull requests should be included in the training set.
- /// The set of names identifying labels which all must be present on an issue for it to be included in the training set.
- /// The set of names identifying labels which all must be present on a pull request for it to be included in the training set.
- ///
- public TrainingDataFilters(
- bool includeIssues = true,
- bool includePullRequests = true,
- string[] requiredIssueLabelNames = default,
- string[] requiredPullRequestLabelNames = default)
- {
- IncludeIssues = includeIssues;
- IncludePullRequests = includePullRequests;
- RequiredIssueLabelNames = requiredIssueLabelNames;
- RequiredPullRequestLabelNames = requiredPullRequestLabelNames;
- }
-
- ///
- /// A filter applied to the issues under consideration for use in the training set. The filter
- /// is only considered if is set.
- ///
- ///
- /// The issue to consider.
- ///
- /// true if the should be included in the training set; otherwise, false.
- ///
- public virtual bool IssueFilter(Issue issue) => true;
-
- ///
- /// A filter applied to the pull requests under consideration for use in the training set. The
- /// filter is only considered if is set.
- ///
- ///
- /// The pull request to consider.
- ///
- /// true if the should be included in the training set; otherwise, false.
- ///
- public virtual bool PullRequestFilter(PullRequestWithFiles pullRequest) => true;
-
- ///
- /// A filter applied to the labels under consideration for use in the training set.
- ///
- ///
- /// The label to consider.
- ///
- /// true if the should be included in the training set; otherwise, false.
- ///
- public virtual bool LabelFilter(Label label) => true;
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingDataProcessor.cs b/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingDataProcessor.cs
deleted file mode 100644
index cdbd8935061..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/DL/TrainingDataProcessor.cs
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System.Collections.Generic;
-using CreateMikLabelModel.Models;
-using Octokit;
-
-namespace CreateMikLabelModel
-{
- ///
- /// The processor responsible for preparing training set items from the
- /// raw repository data.
- ///
- ///
- public class TrainingDataProcessor
- {
- /// The name of the default training segment.
- public const string DefaultSegmentName = "Default";
-
- ///
- /// The logger to use for reporting information as items are prepared.
- ///
- ///
- protected ILogger Logger { get; init; }
-
- ///
- /// Initializes a new instance of the class.
- ///
- ///
- /// The logger to use for reporting information as items are prepared.
- ///
- public TrainingDataProcessor(ILogger logger) => Logger = logger;
-
- ///
- /// Prepares training data based on repository issues, transforming it into the
- /// appropriate representation.
- ///
- ///
- /// The raw training data, in the form of repository issues.
- /// The name of the repository that was the source of the training data.
- ///
- /// The set of instances prepared from the .
- ///
- public virtual async IAsyncEnumerable PrepareData(
- IAsyncEnumerable trainingData,
- string repositoryName)
- {
- var itemCount = 0;
-
- await foreach (var issue in trainingData)
- {
- if (issue.Labels.Count > 0)
- {
- foreach (var label in issue.Labels)
- {
- ++itemCount;
- yield return new TrainingDataItem(label.Name, DefaultSegmentName, repositoryName, issue);
- }
- }
- else
- {
- ++itemCount;
- yield return new TrainingDataItem(null, DefaultSegmentName, repositoryName, issue);
- }
- }
-
- Logger.LogInformation($"Prepared { itemCount } training set items from issue training data.");
- }
-
- ///
- /// Prepares training data based on repository pull requests, transforming it into the
- /// appropriate representation.
- ///
- ///
- /// The raw training data, in the form of repository pull requests.
- /// The name of the repository that was the source of the training data.
- ///
- /// The set of instances prepared from the .
- ///
- public virtual async IAsyncEnumerable PrepareData(
- IAsyncEnumerable trainingData,
- string repositoryName)
- {
- var itemCount = 0;
-
- await foreach (var pullRequest in trainingData)
- {
- if (pullRequest.PullRequest.Labels.Count > 0)
- {
- foreach (var label in pullRequest.PullRequest.Labels)
- {
- ++itemCount;
- yield return new TrainingDataItem(label.Name, DefaultSegmentName, repositoryName, pullRequest);
- }
- }
- else
- {
- ++itemCount;
- yield return new TrainingDataItem(null, DefaultSegmentName, repositoryName, pullRequest);
- }
- }
-
- Logger.LogInformation($"Prepared { itemCount } training set items from pull request training data.");
- }
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/LabelModelTrainer.cs b/tools/issue-labeler/src/CreateMikLabelModel/LabelModelTrainer.cs
deleted file mode 100644
index d62b7e0b155..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/LabelModelTrainer.cs
+++ /dev/null
@@ -1,360 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.IO;
-using System.Linq;
-using System.Threading.Tasks;
-using CreateMikLabelModel.ML;
-using CreateMikLabelModel.Models;
-
-namespace CreateMikLabelModel
-{
- ///
- /// Provides functionality related to training label models, including building anc curating the
- /// sets of data needed to do so.
- ///
- ///
- public class LabelModelTrainer
- {
- private ILogger _logger;
-
- ///
- /// The repository that the trainer is associated with.
- ///
- ///
- ///
- /// The full path of the repository, including the owner name. For
- /// example, "Azure/azure-sdk-for-net".
- ///
- ///
- public string RepositoryPath { get; init; }
-
- ///
- /// Initializes a new instance of the class.
- ///
- ///
- /// The repository path to associate the training with.
- /// The logging implementation to use for emitting messages.
- ///
- public LabelModelTrainer(string repositoryPath, ILogger logger)
- {
- _logger = logger ?? throw new ArgumentNullException(nameof(logger));
-
- if (string.IsNullOrWhiteSpace(repositoryPath))
- {
- throw new ArgumentNullException(nameof(repositoryPath));
- }
-
- RepositoryPath = repositoryPath;
- }
-
- ///
- /// Queries repository items for data to use for training the models.
- ///
- ///
- /// The access token to use for the GitHub API.
- /// The base path to use for storing and querying training data.
- /// The processor for preparing training set items from repository issues and pull requests.
- /// The set of filters to apply to data when building the training set. If not provided, training items will not be filtered.
- ///
- /// The set of that were produced.
- ///
- public virtual Task> QueryTrainingData(
- string gitHubAccessToken,
- string trainingDataBasePath,
- TrainingDataProcessor processor = default,
- TrainingDataFilters filters = default) => QueryTrainingData(gitHubAccessToken, trainingDataBasePath, new[] { RepositoryPath }, processor, filters);
-
- ///
- /// Queries repository items for data to use for training the models.
- ///
- ///
- /// The access token to use for the GitHub API.
- /// The base path to use for storing and querying training data.
- /// The group of repositories to include in this training set.
- /// The processor for preparing training set items from repository issues and pull requests.
- /// The set of filters to apply to data when building the training set. If not provided, training items will not be filtered.
- ///
- /// The set of that were produced.
- ///
- public virtual async Task> QueryTrainingData(
- string gitHubAccessToken,
- string trainingDataBasePath,
- string[] trainingRepositoryGroup,
- TrainingDataProcessor processor = default,
- TrainingDataFilters filters = default)
- {
- if (gitHubAccessToken is { Length: 0 })
- {
- throw new ArgumentException("GitHub access token is required.", nameof(gitHubAccessToken));
- }
-
- if (trainingDataBasePath is { Length: 0 })
- {
- throw new ArgumentException("The base path for storing training data is required.", nameof(gitHubAccessToken));
- }
-
- if ((!Directory.Exists(trainingDataBasePath)) || (!ValidateWriteAccess(trainingDataBasePath)))
- {
- throw new ArgumentException("Either the directory does not exist or cannot be written to.", nameof(trainingDataBasePath));
- }
-
- if (trainingRepositoryGroup is { Length: 0 })
- {
- throw new ArgumentException("The repository group is required and should contain at least one item.", nameof(trainingRepositoryGroup));
- }
-
- // If no explicit processor or filters were requested, accept all items as valid for the training set.
-
- processor ??= new TrainingDataProcessor(_logger);
- filters ??= new TrainingDataFilters();
-
- _logger.LogInformation($"Preparing the training set for '{ RepositoryPath }'.");
-
- var stopWatch = Stopwatch.StartNew();
- var trainingSetItemCount = 0;
- var repositoryInformation = RepositoryInformation.Parse(RepositoryPath);
- var trainingItemClient = new TrainingDataClient(gitHubAccessToken, _logger);
- var trainingItems = new Dictionary>();
-
- try
- {
- // Process issues, if they are to be included.
-
- if (filters.IncludeIssues)
- {
- await foreach (var trainingItem in processor.PrepareData(trainingItemClient.GetIssuesAsync(trainingRepositoryGroup, filters), repositoryInformation.Name))
- {
- if (!trainingItems.ContainsKey(trainingItem.SegmentName))
- {
- trainingItems.Add(trainingItem.SegmentName, new List());
- }
-
- trainingItems[trainingItem.SegmentName].Add(trainingItem);
- ++trainingSetItemCount;
- }
- }
-
- // Process pull requests, if they are to be included.
-
- if (filters.IncludePullRequests)
- {
- await foreach (var trainingItem in processor.PrepareData(trainingItemClient.GetPullRequestsAsync(trainingRepositoryGroup, filters), repositoryInformation.Name))
- {
- if (!trainingItems.ContainsKey(trainingItem.SegmentName))
- {
- trainingItems.Add(trainingItem.SegmentName, new List());
- }
-
- trainingItems[trainingItem.SegmentName].Add(trainingItem);
- ++trainingSetItemCount;
- }
- }
- }
- catch (Exception ex)
- {
- throw new ApplicationException("The training set was not able to be successfully prepared.", ex);
- }
-
- stopWatch.Stop();
- _logger.LogInformation($"Done downloading data for training items in {stopWatch.Elapsed.TotalSeconds:0.00} seconds.");
-
- // With the data downloaded and prepared, write the training set data for each segment.
-
- _logger.LogInformation($"Writing out training data files for '{ RepositoryPath }'.");
-
- stopWatch.Restart();
- var trainingFiles = new Dictionary(trainingItems.Keys.Count);
-
- try
- {
- foreach (var segment in trainingItems)
- {
- var segmentFiles = CreateTrainingFilesForSegment(repositoryInformation, segment.Key, trainingDataBasePath, filters);
- trainingFiles.Add(segment.Key, segmentFiles);
-
- using var outputWriter = new StreamWriter(segmentFiles.Issues.InputPath);
- TrainingData.WriteHeader(outputWriter);
- TrainingData.WriteTrainingItems(segment.Value, outputWriter);
- }
- }
- catch (Exception ex)
- {
- throw new ApplicationException("The training data files were not able to be successfully written.", ex);
- }
-
- stopWatch.Stop();
- _logger.LogInformation($"Done writing training data files in {stopWatch.Elapsed.TotalSeconds:0.00} seconds.");
-
- // Return the segments and associated files.
-
- return trainingFiles;
- }
-
- ///
- /// Generates the training datasets for issues and pull requests, writing out
- /// the necessary files to the paths specified by the .
- ///
- ///
- /// The locations of the files, both input and output, associated with training datasets.
- ///
- public void GenerateTrainingDatasets(TrainingDataSegment trainingFiles)
- {
- // Generate the dataset for issues.
-
- _logger.LogInformation("Generating the datasets for issues...");
-
- var stopWatch = Stopwatch.StartNew();
-
- if (!trainingFiles.Issues.SkipProcessing)
- {
- var issueData = TrainingDataset.ProcessIssueTrainingData(trainingFiles.Issues.InputPath).ToArray();
-
- // There is always a header line present; if there are no other lines, then there was no
- // issue data.
-
- if (issueData.Length > 1)
- {
- TrainingDataset.WriteDataset(trainingFiles.Issues, issueData);
- _logger.LogInformation($"{ issueData.Length } issues were included in the datasets.");
- }
- else
- {
- _logger.LogInformation("No issue data was available for use in the datasets.");
- }
- }
- else
- {
- _logger.LogInformation("Issues were configured to be excluded from the datasets; no issue data was used.");
- }
-
- stopWatch.Stop();
- _logger.LogInformation($"Issue datasets are complete in {stopWatch.Elapsed.TotalSeconds:0.00} seconds.");
-
- // Generate the dataset for pull requests.
-
- _logger.LogInformation("Generating the datasets for pull requests...");
- stopWatch.Restart();
-
- if (!trainingFiles.PullRequests.SkipProcessing)
- {
- var pullRequestData = TrainingDataset.ProcessPullRequestTrainingData(trainingFiles.PullRequests.InputPath).ToArray();
-
- // There is always a header line present; if there are no other lines, then there was no
- // pull request data.
-
- if (pullRequestData.Length > 1)
- {
- TrainingDataset.WriteDataset(trainingFiles.PullRequests, pullRequestData);
- _logger.LogInformation($"{ pullRequestData.Length } pull requests were included in the datasets.");
- }
- else
- {
- _logger.LogInformation("No pull request data was available for use in the datasets.");
- }
- }
- else
- {
- _logger.LogInformation("Pull requests were configured to be excluded from the datasets; no pull request data was used.");
- }
-
- stopWatch.Stop();
- _logger.LogInformation($"Pull request datasets are complete in {stopWatch.Elapsed.TotalSeconds:0.00} seconds.");
- }
-
- ///
- /// Trains the machine learning models, using the previously prepared training datasets
- /// identified by the paths specified in the specified.
- ///
- ///
- /// The locations of the files for training datasets to be used for training the ML models.
- ///
- public void TrainModels(TrainingDataSegment trainingFiles)
- {
- var mlHelper = new MLHelper(_logger);
- var stopWatch = Stopwatch.StartNew();
-
- if (!trainingFiles.Issues.SkipProcessing)
- {
- _logger.LogInformation("Training the models for issues...");
- mlHelper.Train(trainingFiles.Issues, false);
- }
- else
- {
- _logger.LogInformation("Issues were configured to be excluded from the training; no issue data trained.");
- }
-
- if (!trainingFiles.PullRequests.SkipProcessing)
- {
- _logger.LogInformation("Training the models for pull requests...");
- mlHelper.Train(trainingFiles.PullRequests, true);
- }
- else
- {
- _logger.LogInformation("Pull requests were configured to be excluded from the training; no pull request data was trained.");
- }
-
- stopWatch.Stop();
- _logger.LogInformation($"Model training complete in {stopWatch.Elapsed.TotalSeconds:0.00} seconds.");
- }
-
- ///
- /// Tests the previously trained machine learning models identified by the paths specified in
- /// the specified.
- ///
- ///
- /// The locations of the files for training datasets to be used for training the ML models.
- ///
- public void TestModels(TrainingDataSegment trainingFiles)
- {
- var mlHelper = new MLHelper(_logger);
- var stopWatch = Stopwatch.StartNew();
-
- if (!trainingFiles.Issues.SkipProcessing)
- {
- _logger.LogInformation("Testing the models for issues...");
- mlHelper.Test(trainingFiles.Issues, false);
- }
-
- if (!trainingFiles.PullRequests.SkipProcessing)
- {
- _logger.LogInformation("Testing the models for pull requests...");
- mlHelper.Test(trainingFiles.PullRequests, true);
- }
-
- stopWatch.Stop();
- _logger.LogInformation($"Model testing complete in {stopWatch.Elapsed.TotalSeconds:0.00} seconds.");
- }
-
- private static bool ValidateWriteAccess(string path)
- {
- try
- {
- using var file = File.Create(Path.Combine(path, Path.GetRandomFileName()), 1, FileOptions.DeleteOnClose);
- file.Close();
-
- return true;
- }
- catch (UnauthorizedAccessException)
- {
- return false;
- }
- }
-
- private static TrainingDataSegment CreateTrainingFilesForSegment(
- RepositoryInformation repository,
- string segmentName,
- string trainingDataBasePath,
- TrainingDataFilters filters)
- {
- var prefix = $"{ repository.Owner }-{ repository.Name }-{segmentName }";
-
- return new TrainingDataSegment(
- new TrainingDataFilePaths(trainingDataBasePath, prefix, forPrs: false, skip: !filters.IncludeIssues),
- new TrainingDataFilePaths(trainingDataBasePath, prefix, forPrs: true, skip: !filters.IncludePullRequests));
- }
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/Logging/ConsoleLogger.cs b/tools/issue-labeler/src/CreateMikLabelModel/Logging/ConsoleLogger.cs
deleted file mode 100644
index 05b1b2e2fc3..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/Logging/ConsoleLogger.cs
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System;
-
-namespace CreateMikLabelModel
-{
- ///
- /// Logs information to the using the
- /// standard streams.
- ///
- ///
- public class ConsoleLogger : ILogger
- {
- ///
- /// Logs an informational message.
- ///
- ///
- /// The message to log.
- ///
- public void LogInformation(string message) => Console.WriteLine(message);
-
- ///
- /// Logs a warning message.
- ///
- ///
- /// The message to log.
- ///
- public void LogWarning(string message)
- {
- var color = Console.ForegroundColor;
-
- Console.ForegroundColor = ConsoleColor.Yellow;
- Console.WriteLine(message);
- Console.ForegroundColor = color;
- }
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/Logging/ILogger.cs b/tools/issue-labeler/src/CreateMikLabelModel/Logging/ILogger.cs
deleted file mode 100644
index 2285c24f616..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/Logging/ILogger.cs
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-namespace CreateMikLabelModel
-{
- ///
- /// Allows messages of different categories to be logged.
- ///
- ///
- public interface ILogger
- {
- ///
- /// Logs an informational message.
- ///
- ///
- /// The message to log.
- ///
- void LogInformation(string message);
-
- ///
- /// Logs a warning message.
- ///
- ///
- /// The message to log.
- ///
- void LogWarning(string message);
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/ML/ExperimentModifier.cs b/tools/issue-labeler/src/CreateMikLabelModel/ML/ExperimentModifier.cs
deleted file mode 100644
index ced65e65731..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/ML/ExperimentModifier.cs
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Microsoft.ML.AutoML;
-using System;
-using System.Collections.Generic;
-
-namespace CreateMikLabelModel.ML
-{
- public struct ExperimentModifier
- {
- public ExperimentModifier(TrainingDataFilePaths paths, bool forPrs)
- {
- // set all to defaults:
- ColumnSetup = (columnInformation, forPrs) =>
- {
- // Customize column information returned by InferColumns API
- columnInformation.CategoricalColumnNames.Clear();
- columnInformation.NumericColumnNames.Clear();
- columnInformation.IgnoredColumnNames.Clear();
- columnInformation.TextColumnNames.Clear();
-
- // NOTE: depending on how the data changes over time this might need to get updated too.
- // Only the Title and Description are needed, but since we are PreFeaturizing them we can
- // ignore them here.
- columnInformation.IgnoredColumnNames.Add("Title");
- columnInformation.IgnoredColumnNames.Add("Description");
- columnInformation.IgnoredColumnNames.Add("Author");
- columnInformation.IgnoredColumnNames.Add("IsPR");
- columnInformation.IgnoredColumnNames.Add("NumMentions");
- columnInformation.IgnoredColumnNames.Add("UserMentions");
- columnInformation.IgnoredColumnNames.Add("ID");
- columnInformation.IgnoredColumnNames.Add("CombinedID");
-
- if (forPrs)
- {
- columnInformation.NumericColumnNames.Add("FileCount");
- columnInformation.IgnoredColumnNames.Add("Files");
- columnInformation.TextColumnNames.Add("FolderNames");
- columnInformation.IgnoredColumnNames.Add("Folders");
- columnInformation.IgnoredColumnNames.Add("FileExtensions");
- columnInformation.TextColumnNames.Add("Filenames");
- }
- };
-
- TrainerSetup = (trainers) =>
- {
- trainers.Clear();
- if (forPrs)
- {
- trainers.Add(MulticlassClassificationTrainer.SdcaMaximumEntropy);
- trainers.Add(MulticlassClassificationTrainer.FastTreeOva);
- }
- else
- {
- trainers.Add(MulticlassClassificationTrainer.SdcaMaximumEntropy);
- // trainers.Add(MulticlassClassificationTrainer.LinearSupportVectorMachinesOva);
- //trainers.Add(MulticlassClassificationTrainer.LightGbm);
- }
- };
-
- ExperimentTime = 300;
- LabelColumnName = "Label";
- ForPrs = forPrs;
- Paths = paths;
- }
-
- public ExperimentModifier(
- bool forPrs,
- uint experimentTime,
- string labelColumnName,
- TrainingDataFilePaths paths,
- Action columnSetup,
- Action> trainerSetup)
- {
- ForPrs = forPrs;
- ExperimentTime = experimentTime;
- LabelColumnName = labelColumnName;
- Paths = paths;
- ColumnSetup = columnSetup;
- TrainerSetup = trainerSetup;
- }
-
- public readonly uint ExperimentTime;
- public readonly string LabelColumnName;
- public readonly Action ColumnSetup;
- public readonly Action> TrainerSetup;
- public readonly bool ForPrs;
- public readonly TrainingDataFilePaths Paths;
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/ML/LoggingHelper.cs b/tools/issue-labeler/src/CreateMikLabelModel/ML/LoggingHelper.cs
deleted file mode 100644
index ffaa7b9fad1..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/ML/LoggingHelper.cs
+++ /dev/null
@@ -1,177 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Microsoft.ML.AutoML;
-using Microsoft.ML.Data;
-using System;
-using System.Collections.Generic;
-using System.Diagnostics;
-using System.Linq;
-using System.Text;
-
-namespace CreateMikLabelModel.ML
-{
- internal class LoggingHelper
- {
- private const int Width = 114;
- private readonly ILogger _logger;
-
- public LoggingHelper(ILogger logger) => _logger = logger;
-
- internal void PrintIterationMetrics(int iteration, string trainerName, MulticlassClassificationMetrics metrics, double? runtimeInSeconds)
- {
- PrintRow($"{iteration,-4} {trainerName,-35} {metrics?.MicroAccuracy ?? double.NaN,14:F4} {metrics?.MacroAccuracy ?? double.NaN,14:F4} {runtimeInSeconds.Value,9:F1}", Width);
- }
-
- internal void PrintIterationException(Exception ex)
- {
- _logger.LogInformation($"Exception during AutoML iteration: {ex}");
- }
-
- internal void PrintMulticlassClassificationMetricsHeader()
- {
- PrintRow($"{"",-4} {"Trainer",-35} {"MicroAccuracy",14} {"MacroAccuracy",14} {"Duration",9}", Width);
- }
-
- private void PrintRow(string message, int width)
- {
- _logger.LogInformation("|" + message.PadRight(width - 2) + "|");
- }
-
- public void ConsoleWriteHeader(params string[] lines)
- {
- _logger.LogInformation(" ");
- foreach (var line in lines)
- {
- _logger.LogInformation(line);
- }
- var maxLength = lines.Select(x => x.Length).Max();
- _logger.LogInformation(new string('#', maxLength));
- }
-
- public static string BuildStringTable(IList arrValues)
- {
- var maxColumnsWidth = GetMaxColumnsWidth(arrValues);
- var headerSpliter = new string('-', maxColumnsWidth.Sum(i => i + 3) - 1);
-
- var sb = new StringBuilder();
- for (var rowIndex = 0; rowIndex < arrValues.Count; rowIndex++)
- {
- if (rowIndex == 0)
- {
- sb.AppendFormat(" {0} ", headerSpliter);
- sb.AppendLine();
- }
-
- for (var colIndex = 0; colIndex < arrValues[0].Length; colIndex++)
- {
- // Print cell
- var cell = arrValues[rowIndex][colIndex];
- cell = cell.PadRight(maxColumnsWidth[colIndex]);
- sb.Append(" | ");
- sb.Append(cell);
- }
-
- // Print end of line
- sb.Append(" | ");
- sb.AppendLine();
-
- // Print splitter
- if (rowIndex == 0)
- {
- sb.AppendFormat(" |{0}| ", headerSpliter);
- sb.AppendLine();
- }
-
- if (rowIndex == arrValues.Count - 1)
- {
- sb.AppendFormat(" {0} ", headerSpliter);
- }
- }
-
- return sb.ToString();
- }
-
- private static int[] GetMaxColumnsWidth(IList arrValues)
- {
- var maxColumnsWidth = new int[arrValues[0].Length];
- for (var colIndex = 0; colIndex < arrValues[0].Length; colIndex++)
- {
- for (var rowIndex = 0; rowIndex < arrValues.Count; rowIndex++)
- {
- var newLength = arrValues[rowIndex][colIndex].Length;
- var oldLength = maxColumnsWidth[colIndex];
-
- if (newLength > oldLength)
- {
- maxColumnsWidth[colIndex] = newLength;
- }
- }
- }
-
- return maxColumnsWidth;
- }
-
- private class ColumnInferencePrinter
- {
- private static readonly string[] TableHeaders = new[] { "Name", "Data Type", "Purpose" };
-
- private readonly ColumnInferenceResults _results;
-
- public ColumnInferencePrinter(ColumnInferenceResults results)
- {
- _results = results;
- }
-
- public void Print()
- {
- var tableRows = new List();
-
- // Add headers
- tableRows.Add(TableHeaders);
-
- // Add column data
- var info = _results.ColumnInformation;
- AppendTableRow(tableRows, info.LabelColumnName, "Label");
- AppendTableRow(tableRows, info.ExampleWeightColumnName, "Weight");
- AppendTableRow(tableRows, info.SamplingKeyColumnName, "Sampling Key");
- AppendTableRows(tableRows, info.CategoricalColumnNames, "Categorical");
- AppendTableRows(tableRows, info.NumericColumnNames, "Numeric");
- AppendTableRows(tableRows, info.TextColumnNames, "Text");
- AppendTableRows(tableRows, info.IgnoredColumnNames, "Ignored");
-
- Console.WriteLine(LoggingHelper.BuildStringTable(tableRows));
- }
-
- private void AppendTableRow(ICollection tableRows,
- string columnName, string columnPurpose)
- {
- if (columnName == null)
- {
- return;
- }
-
- tableRows.Add(new[]
- {
- columnName,
- GetColumnDataType(columnName),
- columnPurpose
- });
- }
-
- private void AppendTableRows(ICollection tableRows,
- IEnumerable columnNames, string columnPurpose)
- {
- foreach (var columnName in columnNames)
- {
- AppendTableRow(tableRows, columnName, columnPurpose);
- }
- }
-
- private string GetColumnDataType(string columnName)
- {
- return _results.TextLoaderOptions.Columns.First(c => c.Name == columnName).DataKind.ToString();
- }
- }
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/ML/MLHelper.cs b/tools/issue-labeler/src/CreateMikLabelModel/ML/MLHelper.cs
deleted file mode 100644
index af75bf252d6..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/ML/MLHelper.cs
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Microsoft.ML;
-using Microsoft.ML.Data;
-using System;
-using System.Diagnostics;
-
-namespace CreateMikLabelModel.ML
-{
- public class MLHelper
- {
- private readonly MLContext _mLContext;
- private readonly ILogger _logger;
-
- public MLHelper(ILogger logger)
- {
- _mLContext = new MLContext(seed: 0);
- _logger = logger;
- }
-
- public void Test(TrainingDataFilePaths files, bool forPrs)
- {
- MulticlassExperimentHelper.TestPrediction(_logger, _mLContext, files, forPrs: forPrs);
- }
-
- public void Train(TrainingDataFilePaths files, bool forPrs)
- {
- var stopWatch = Stopwatch.StartNew();
-
- var st = new ExperimentModifier(files, forPrs);
- Train(st);
-
- stopWatch.Stop();
- _logger.LogInformation($"Done creating model in {stopWatch.ElapsedMilliseconds}ms");
- }
-
- private void Train(ExperimentModifier settings)
- {
- var setup = MulticlassExperimentSettingsHelper.SetupExperiment(_logger, _mLContext, settings, settings.Paths, settings.ForPrs);
-
- // Start experiment
- var textLoader = _mLContext.Data.CreateTextLoader(setup.columnInference.TextLoaderOptions);
- var paths = settings.Paths;
-
- // train once:
- var experimentResult = MulticlassExperimentHelper.Train(
- _logger, _mLContext, setup.experimentSettings, new MulticlassExperimentProgressHandler(_logger), paths, textLoader, setup.columnInference);
-
- // train twice
- _ = MulticlassExperimentHelper.Retrain(experimentResult,
- "refit model",
- new MultiFileSource(paths.TrainPath, paths.ValidatePath),
- paths.ValidatePath,
- paths.FittedModelPath, textLoader, _logger, _mLContext);
-
- // final train:
- _ = MulticlassExperimentHelper.Retrain(_logger, _mLContext, experimentResult, setup.columnInference, paths);
- }
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/ML/MulticlassExperimentHelper.cs b/tools/issue-labeler/src/CreateMikLabelModel/ML/MulticlassExperimentHelper.cs
deleted file mode 100644
index d0fb768866a..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/ML/MulticlassExperimentHelper.cs
+++ /dev/null
@@ -1,248 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using IssueLabeler.Shared;
-using Microsoft.ML;
-using Microsoft.ML.AutoML;
-using Microsoft.ML.Data;
-using Microsoft.ML.Transforms.Text;
-
-namespace CreateMikLabelModel.ML
-{
- public static class MulticlassExperimentHelper
- {
- public static ExperimentResult RunAutoMLExperiment(
- ILogger logger, MLContext mlContext, MulticlassExperimentSettings experimentSettings,
- MulticlassExperimentProgressHandler progressHandler, IDataView dataView, ColumnInferenceResults columnInference)
- {
- new LoggingHelper(logger).ConsoleWriteHeader("=============== Running AutoML experiment ===============");
- logger.LogInformation($"Running AutoML multiclass classification experiment for {experimentSettings.MaxExperimentTimeInSeconds} seconds...");
-
- // Pre-featurize the title and description, and remove features that have less then 2.
- IEstimator preFeaturizer =
- preFeaturizer = mlContext.Transforms.Text.FeaturizeText("TextFeatures",
- new TextFeaturizingEstimator.Options(),
- new[] { "Title", "Description" })
- .Append(mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount("TextFeatures", "TextFeatures", 2))
- .AppendCacheCheckpoint(mlContext);
-
- var experimentResult = mlContext.Auto()
- .CreateMulticlassClassificationExperiment(experimentSettings)
- .Execute(dataView, columnInference.ColumnInformation, progressHandler: progressHandler, preFeaturizer: preFeaturizer);
-
- logger.LogInformation(Environment.NewLine);
- logger.LogInformation($"num models created: {experimentResult.RunDetails.Count()}");
-
- // Get top few runs ranked by accuracy
- var topRuns = experimentResult.RunDetails
- .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.MicroAccuracy))
- .OrderByDescending(r => r.ValidationMetrics.MicroAccuracy)
- .Take(3)
- .ToArray();
-
- logger.LogInformation("Top models ranked by accuracy --");
- logger.LogInformation(CreateRow($"{"",-4} {"Trainer",-35} {"MicroAccuracy",14} {"MacroAccuracy",14} {"Duration",9}", Width));
-
- for (var i = 0; i < topRuns.Length; i++)
- {
- var run = topRuns[i];
- logger.LogInformation(CreateRow($"{i,-4} {run.TrainerName,-35} {run.ValidationMetrics?.MicroAccuracy ?? double.NaN,14:F4} {run.ValidationMetrics?.MacroAccuracy ?? double.NaN,14:F4} {run.RuntimeInSeconds,9:F1}", Width));
- }
- return experimentResult;
- }
-
- public static ExperimentResult Train(
- ILogger logger, MLContext mlContext, MulticlassExperimentSettings experimentSettings,
- MulticlassExperimentProgressHandler progressHandler, TrainingDataFilePaths paths, TextLoader textLoader, ColumnInferenceResults columnInference)
- {
- var data = mlContext.Data.TrainTestSplit(textLoader.Load(paths.TrainPath, paths.ValidatePath), seed: 0);
- var experimentResult = RunAutoMLExperiment(logger, mlContext, experimentSettings, progressHandler, data.TrainSet, columnInference);
-
- EvaluateTrainedModelAndPrintMetrics(logger, mlContext, experimentResult.BestRun.Model, experimentResult.BestRun.TrainerName, data.TestSet);
- SaveModel(logger, mlContext, experimentResult.BestRun.Model, paths.ModelPath, data.TrainSet);
- return experimentResult;
- }
-
- public static ITransformer Retrain(ExperimentResult experimentResult,
- string trainerName, MultiFileSource multiFileSource, string dataPath, string modelPath, TextLoader textLoader, ILogger logger, MLContext mlContext)
- {
- var dataView = textLoader.Load(dataPath);
- new LoggingHelper(logger).ConsoleWriteHeader("=============== Re-fitting best pipeline ===============");
-
- var combinedDataView = textLoader.Load(multiFileSource);
- var bestRun = experimentResult.BestRun;
- var refitModel = bestRun.Estimator.Fit(combinedDataView);
-
- EvaluateTrainedModelAndPrintMetrics(logger, mlContext, refitModel, trainerName, dataView);
- SaveModel(logger, mlContext, refitModel, modelPath, dataView);
- return refitModel;
- }
-
- public static ITransformer Retrain(ILogger logger, MLContext mlContext, ExperimentResult experimentResult,
- ColumnInferenceResults columnInference, TrainingDataFilePaths paths, bool fixedBug = false)
- {
- new LoggingHelper(logger).ConsoleWriteHeader("=============== Re-fitting best pipeline ===============");
-
- var textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions);
- var combinedDataView = textLoader.Load(new MultiFileSource(paths.TrainPath, paths.ValidatePath, paths.TestPath));
- var bestRun = experimentResult.BestRun;
- if (fixedBug)
- {
- // TODO: retry: below gave error but I thought it would work:
- //refitModel = MulticlassExperiment.Retrain(experimentResult,
- // "final model",
- // new MultiFileSource(paths.TrainPath, paths.ValidatePath, paths.FittedPath),
- // paths.TestPath,
- // paths.FinalPath, textLoader, mlContext);
- // but if failed before fixing this maybe the problem was in *EvaluateTrainedModelAndPrintMetrics*
-
- }
- var refitModel = bestRun.Estimator.Fit(combinedDataView);
-
- EvaluateTrainedModelAndPrintMetrics(logger, mlContext, refitModel, "production model", textLoader.Load(paths.TestPath));
- // Save the re-fit model to a.ZIP file
- SaveModel(logger, mlContext, refitModel, paths.FinalModelPath, textLoader.Load(paths.TestPath));
-
- logger.LogInformation($"The model is saved to {paths.FinalModelPath}");
- return refitModel;
- }
-
- private const int Width = 114;
-
- private static string CreateRow(string message, int width) => "|" + message.PadRight(width - 2) + "|";
-
- ///
- /// Evaluate the model and print metrics.
- ///
- private static void EvaluateTrainedModelAndPrintMetrics(ILogger logger, MLContext mlContext, ITransformer model, string trainerName, IDataView dataView)
- {
- logger.LogInformation("===== Evaluating model's accuracy with test data =====");
- var predictions = model.Transform(dataView);
- var metrics = mlContext.MulticlassClassification.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score");
-
- logger.LogInformation($"************************************************************");
- logger.LogInformation($"* Metrics for {trainerName} multi-class classification model ");
- logger.LogInformation($"*-----------------------------------------------------------");
- logger.LogInformation($" MacroAccuracy = {metrics.MacroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better");
- logger.LogInformation($" MicroAccuracy = {metrics.MicroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better");
- logger.LogInformation($" LogLoss = {metrics.LogLoss:0.####}, the closer to 0, the better");
- for (int i = 0; i < metrics.PerClassLogLoss.Count; i++)
- {
- logger.LogInformation($" LogLoss for class {i+1} = {metrics.PerClassLogLoss[i]:0.####}, the closer to 0, the better");
- }
- logger.LogInformation($"************************************************************");
- }
-
- private static void SaveModel(ILogger logger, MLContext mlContext, ITransformer model, string modelPath, IDataView dataview)
- {
- // Save the re-fit model to a.ZIP file
- var consoleHelper = new LoggingHelper(logger);
- consoleHelper.ConsoleWriteHeader("=============== Saving the model ===============");
- mlContext.Model.Save(model, dataview.Schema, modelPath);
- logger.LogInformation($"The model is saved to {modelPath}");
- }
-
- public static void TestPrediction(ILogger logger, MLContext mlContext, TrainingDataFilePaths files, bool forPrs, double threshold = 0.4)
- {
- var trainedModel = mlContext.Model.Load(files.FittedModelPath, out _);
- IEnumerable<(string knownLabel, GitHubIssuePrediction predictedResult, string issueNumber)> predictions = null;
- string Legend1 = $"(includes not labeling issues with confidence lower than threshold. (here {threshold * 100.0f:#,0.00}%))";
- const string Legend2 = "(includes items that could be labeled if threshold was lower.)";
- const string Legend3 = "(those incorrectly labeled)";
- if (forPrs)
- {
- var testData = GetPullRequests(mlContext, files.TestPath);
- logger.LogInformation($"{Environment.NewLine}Number of PRs tested: {testData.Length}");
-
- var prEngine = mlContext.Model.CreatePredictionEngine(trainedModel);
- predictions = testData
- .Select(x => (
- knownLabel: x.Label,
- predictedResult: prEngine.Predict(x),
- issueNumber: x.ID.ToString()
- ));
- }
- else
- {
- var testData = GetIssues(mlContext, files.TestPath);
- logger.LogInformation($"{Environment.NewLine}\tNumber of issues tested: {testData.Length}");
-
- var issueEngine = mlContext.Model.CreatePredictionEngine(trainedModel);
- predictions = testData
- .Select(x => (
- knownLabel: x.Label,
- predictedResult: issueEngine.Predict(x),
- issueNumber: x.ID.ToString()
- ));
- }
-
- var analysis =
- predictions.Select(x =>
- (
- knownLabel: x.knownLabel,
- predictedArea: x.predictedResult.Area,
- maxScore: x.predictedResult.Score.Max(),
- confidentInPrediction: x.predictedResult.Score.Max() >= threshold,
- issueNumber: x.issueNumber
- ));
-
- var countSuccess = analysis.Where(x =>
- (x.confidentInPrediction && x.predictedArea.Equals(x.knownLabel, StringComparison.Ordinal)) ||
- (!x.confidentInPrediction && !x.predictedArea.Equals(x.knownLabel, StringComparison.Ordinal))).Count();
-
- var missedOpportunity = analysis
- .Where(x => !x.confidentInPrediction && x.knownLabel.Equals(x.predictedArea, StringComparison.Ordinal)).Count();
-
- var mistakes = analysis
- .Where(x => x.confidentInPrediction && !x.knownLabel.Equals(x.predictedArea, StringComparison.Ordinal))
- .Select(x => new { Pair = $"\tPredicted: {x.predictedArea}, Actual:{x.knownLabel}", IssueNumbers = x.issueNumber, MaxConfidencePercentage = x.maxScore * 100.0f })
- .GroupBy(x => x.Pair)
- .Select(x => new
- {
- Count = x.Count(),
- PerdictedVsActual = x.Key,
- Items = x,
- })
- .OrderByDescending(x => x.Count);
- int remaining = predictions.Count() - countSuccess - missedOpportunity;
-
- logger.LogInformation($"{Environment.NewLine}\thandled correctly: {countSuccess}{Environment.NewLine}\t{Legend1}{Environment.NewLine}");
- logger.LogInformation($"{Environment.NewLine}\tmissed: {missedOpportunity}{Environment.NewLine}\t{Legend2}{Environment.NewLine}");
- logger.LogInformation($"{Environment.NewLine}\tremaining: {remaining}{Environment.NewLine}\t{Legend3}{Environment.NewLine}");
-
- foreach (var mismatch in mistakes.AsEnumerable())
- {
- logger.LogInformation($"{mismatch.PerdictedVsActual}, NumFound: {mismatch.Count}");
- var sampleIssues = string.Join(Environment.NewLine, mismatch.Items.Select(x => $"\t\tFor #{x.IssueNumbers} was {x.MaxConfidencePercentage:#,0.00}% confident"));
- logger.LogInformation($"{Environment.NewLine}{ sampleIssues }{Environment.NewLine}");
- }
- }
-
- public static GitHubIssue[] GetIssues(MLContext mlContext, string dataFilePath)
- {
- var dataView = mlContext.Data.LoadFromTextFile(
- path: dataFilePath,
- hasHeader: true,
- separatorChar: '\t',
- allowQuoting: true,
- allowSparse: false);
-
- return mlContext.Data.CreateEnumerable(dataView, false).ToArray();
- }
-
- public static GitHubPullRequest[] GetPullRequests(MLContext mlContext, string dataFilePath)
- {
- var dataView = mlContext.Data.LoadFromTextFile(
- path: dataFilePath,
- hasHeader: true,
- separatorChar: '\t',
- allowQuoting: true,
- allowSparse: false);
-
- return mlContext.Data.CreateEnumerable(dataView, false).ToArray();
- }
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/ML/MulticlassExperimentProgressHandler.cs b/tools/issue-labeler/src/CreateMikLabelModel/ML/MulticlassExperimentProgressHandler.cs
deleted file mode 100644
index a10513bbce4..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/ML/MulticlassExperimentProgressHandler.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Microsoft.ML.AutoML;
-using Microsoft.ML.Data;
-using System;
-
-namespace CreateMikLabelModel.ML
-{
- ///
- /// Progress handler that AutoML will invoke after each model it produces and evaluates.
- ///
- public class MulticlassExperimentProgressHandler : IProgress>
- {
- private readonly LoggingHelper _consoleHelper;
- private int _iterationIndex;
-
- public MulticlassExperimentProgressHandler(ILogger logger) => _consoleHelper = new LoggingHelper(logger);
-
-
- public void Report(RunDetail iterationResult)
- {
- if (_iterationIndex++ == 0)
- {
- _consoleHelper.PrintMulticlassClassificationMetricsHeader();
- }
-
- if (iterationResult.Exception != null)
- {
- _consoleHelper.PrintIterationException(iterationResult.Exception);
- }
- else
- {
- _consoleHelper.PrintIterationMetrics(_iterationIndex, iterationResult.TrainerName,
- iterationResult.ValidationMetrics, iterationResult.RuntimeInSeconds);
- }
- }
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/ML/MulticlassExperimentSettingsHelper.cs b/tools/issue-labeler/src/CreateMikLabelModel/ML/MulticlassExperimentSettingsHelper.cs
deleted file mode 100644
index 1ae769c7e96..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/ML/MulticlassExperimentSettingsHelper.cs
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Microsoft.ML;
-using Microsoft.ML.AutoML;
-using System.IO;
-
-namespace CreateMikLabelModel.ML
-{
- public static class MulticlassExperimentSettingsHelper
- {
- public static (ColumnInferenceResults columnInference, MulticlassExperimentSettings experimentSettings) SetupExperiment(
- ILogger logger, MLContext mlContext, ExperimentModifier st, TrainingDataFilePaths paths, bool forPrs)
- {
- var columnInference = InferColumns(logger, mlContext, paths.TrainPath, st.LabelColumnName);
- var columnInformation = columnInference.ColumnInformation;
- st.ColumnSetup(columnInformation, forPrs);
-
- var experimentSettings = new MulticlassExperimentSettings();
- st.TrainerSetup(experimentSettings.Trainers);
- experimentSettings.MaxExperimentTimeInSeconds = st.ExperimentTime;
-
- var cts = new System.Threading.CancellationTokenSource();
- experimentSettings.CancellationToken = cts.Token;
-
- // Set the cache directory to null.
- // This will cause all models produced by AutoML to be kept in memory
- // instead of written to disk after each run, as AutoML is training.
- // (Please note: for an experiment on a large dataset, opting to keep all
- // models trained by AutoML in memory could cause your system to run out
- // of memory.)
- experimentSettings.CacheDirectoryName = Path.GetTempPath();
- experimentSettings.OptimizingMetric = MulticlassClassificationMetric.MicroAccuracy;
- return (columnInference, experimentSettings);
- }
-
- ///
- /// Infer columns in the dataset with AutoML.
- ///
- private static ColumnInferenceResults InferColumns(ILogger logger, MLContext mlContext, string dataPath, string labelColumnName)
- {
- new LoggingHelper(logger).ConsoleWriteHeader("=============== Inferring columns in dataset ===============");
- var columnInference = mlContext.Auto().InferColumns(dataPath, labelColumnName, groupColumns: false);
- return columnInference;
- }
- }
-
-}
\ No newline at end of file
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/ML/TrainingDataFilePaths.cs b/tools/issue-labeler/src/CreateMikLabelModel/ML/TrainingDataFilePaths.cs
deleted file mode 100644
index c908f6ddb4e..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/ML/TrainingDataFilePaths.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System.IO;
-
-namespace CreateMikLabelModel.ML
-{
- public readonly struct TrainingDataFilePaths
- {
- public TrainingDataFilePaths(string folder, string commonPrefix, bool forPrs, bool skip) : this(folder, commonPrefix, string.Empty, forPrs, skip)
- {
- }
-
- public TrainingDataFilePaths(string folder, string commonPrefix, string modelPrefix, bool forPrs, bool skip)
- {
- Folder = folder;
- SkipProcessing = skip;
- InputPath = Path.Combine(Folder, commonPrefix + "-IssueAndPrData.tsv");
- var prefix = forPrs ? "-only-prs" : "-only-issues";
-
- TrainPath = Path.Combine(Folder, commonPrefix + prefix + "-part1.tsv");
- ValidatePath = Path.Combine(Folder, commonPrefix + prefix + "-part2.tsv");
- TestPath = Path.Combine(Folder, commonPrefix + prefix + "-part3.tsv");
- ModelPath = Path.Combine(Folder, commonPrefix + prefix + modelPrefix + "-model.zip");
- FittedModelPath = Path.Combine(Folder, commonPrefix + prefix + modelPrefix + "-fitted-model.zip");
- FinalModelPath = Path.Combine(Folder, commonPrefix + prefix + modelPrefix + "-final-model.zip");
- }
-
- public readonly string Folder;
- public readonly bool SkipProcessing;
- public readonly string TrainPath;
- public readonly string ValidatePath;
- public readonly string TestPath;
- public readonly string ModelPath;
- public readonly string FittedModelPath;
- public readonly string FinalModelPath;
- public readonly string InputPath;
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/ML/TrainingDataSegment.cs b/tools/issue-labeler/src/CreateMikLabelModel/ML/TrainingDataSegment.cs
deleted file mode 100644
index 598e54fe830..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/ML/TrainingDataSegment.cs
+++ /dev/null
@@ -1,7 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-namespace CreateMikLabelModel.ML
-{
- public record TrainingDataSegment(TrainingDataFilePaths Issues, TrainingDataFilePaths PullRequests);
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/ML/TrainingDataset.cs b/tools/issue-labeler/src/CreateMikLabelModel/ML/TrainingDataset.cs
deleted file mode 100644
index 4a2d8f579be..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/ML/TrainingDataset.cs
+++ /dev/null
@@ -1,265 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
-using System.Text.RegularExpressions;
-using IssueLabeler.Shared;
-
-namespace CreateMikLabelModel.ML
-{
- internal static class TrainingDataset
- {
- private const int TrainingDataLineMinimum = 250;
- private const string DataSetBasicHeaders = "CombinedID\tID\tLabel\tTitle\tDescription\tAuthor\tIsPR\tNumMentions\tUserMentions";
- private const string DataSetFileHeaders = DataSetBasicHeaders + "\tFileCount\tFiles\tFilenames\tFileExtensions\tFolderNames\tFolders";
-
- private static readonly Regex UserMentionsExpression = new Regex(@"@[a-zA-Z0-9_//-]+", RegexOptions.Compiled);
- private static readonly DiffHelper DiffHelper = new DiffHelper();
-
- private static readonly Dictionary TrainingDataIndexes = new()
- {
- { "CombinedID", 0 },
- { "ID", 1 },
- { "Label", 2 },
- { "Title", 3 },
- { "Description", 4 },
- { "Author", 5 },
- { "IsPR", 6 },
- { "FilePaths", 7 }
- };
-
- public static IEnumerable ProcessIssueTrainingData(string trainingDataFilePath, bool includeFileColumns = false) =>
- ProcessTrainingData(trainingDataFilePath, includeFileColumns, line => line[TrainingDataIndexes["IsPR"]] != "1");
-
- public static IEnumerable ProcessPullRequestTrainingData(string trainingDataFilePath, bool includeFileColumns = true) =>
- ProcessTrainingData(trainingDataFilePath, includeFileColumns, line => line[TrainingDataIndexes["IsPR"]] == "1");
-
- public static void WriteDataset(
- TrainingDataFilePaths filePaths,
- string[] dataLines)
- {
- if (dataLines.Length < TrainingDataLineMinimum)
- {
- throw new ApplicationException($"At least { TrainingDataLineMinimum } training items are needed to create a training dataset; only { dataLines.Length - 1 } are available.");
- }
-
- var trainingSetCount = (int)Math.Floor(dataLines.Length * 0.8);
- var validateSetCount = (int)Math.Floor(dataLines.Length * 0.1);
- var currentCount = 0;
- var currentIndex = 1;
-
- FileStream datasetFile;
- StreamWriter datasetWriter;
-
- // Create the training set.
-
- using (datasetFile = File.Open(Path.GetFullPath(filePaths.TrainPath), FileMode.OpenOrCreate, FileAccess.Write, FileShare.None))
- using (datasetWriter = new StreamWriter(datasetFile))
- {
- // Write the header.
-
- datasetWriter.WriteLine(dataLines[0]);
-
- // Write the lines that belong in the set.
-
- while (currentCount < trainingSetCount)
- {
- datasetWriter.WriteLine(dataLines[currentIndex]);
-
- ++currentIndex;
- ++currentCount;
- }
- }
-
- // Create the validate set.
-
- currentCount = 0;
-
- using (datasetFile = File.Open(Path.GetFullPath(filePaths.ValidatePath), FileMode.OpenOrCreate, FileAccess.Write, FileShare.None))
- using (datasetWriter = new StreamWriter(datasetFile))
- {
- // Write the header.
-
- datasetWriter.WriteLine(dataLines[0]);
-
- // Write the lines that belong in the set.
-
- while (currentCount < validateSetCount)
- {
- datasetWriter.WriteLine(dataLines[currentIndex]);
-
- ++currentIndex;
- ++currentCount;
- }
- }
-
- // Create the test set using all remaining data.
-
- using (datasetFile = File.Open(Path.GetFullPath(filePaths.TestPath), FileMode.OpenOrCreate, FileAccess.Write, FileShare.None))
- using (datasetWriter = new StreamWriter(datasetFile))
- {
- // Write the header.
-
- datasetWriter.WriteLine(dataLines[0]);
-
- // Write the lines that belong in the set.
-
- while (currentIndex < dataLines.Length)
- {
- datasetWriter.WriteLine(dataLines[currentIndex]);
- ++currentIndex;
- }
- }
- }
-
- private static IEnumerable ProcessTrainingData(
- string trainingDataFilePath,
- bool includeFileColumns,
- Func lineFilter)
- {
- using var dataFileStream = File.Open(Path.GetFullPath(trainingDataFilePath), FileMode.Open, FileAccess.Read, FileShare.Read);
- using var dataFileReader = new StreamReader(dataFileStream);
-
- // Read and validate the training data headers
-
- var dataHeaders = dataFileReader.ReadLine();
-
- if (!ValidateTrainingDataHeaders(dataHeaders))
- {
- throw new ApplicationException("The training data file was not in the expected format.");
- }
-
- // Emit the headers.
-
- yield return (includeFileColumns) ? DataSetFileHeaders : DataSetBasicHeaders;
-
- // Process each line of training data.
-
- var lineCount = 0;
- var lineBuilder = new StringBuilder();
- var line = dataFileReader.ReadLine();
-
- while (line != null)
- {
- var dataElements = line.Split('\t');
-
- // Only process the line if it is accepted by the filter.
-
- if (lineFilter(dataElements))
- {
- if (!byte.TryParse(dataElements[TrainingDataIndexes["IsPR"]], out var isPrBit))
- {
- throw new ApplicationException($"Malformed training data for line '{ lineCount + 1 }'. The 'IsPR' flag could not be parsed.");
- }
-
- if ((isPrBit < 0) || (isPrBit > 1))
- {
- throw new ApplicationException($"Malformed training data for line '{ lineCount + 1 }'. The 'IsPR' flag has an invalid value: '{ isPrBit }' It should be either 0 or 1.");
- }
-
- var mentions = GetUserMentions(dataElements[TrainingDataIndexes["Description"]]);
-
- lineBuilder
- .Append(dataElements[TrainingDataIndexes["CombinedID"]])
- .Append('\t')
- .Append(dataElements[TrainingDataIndexes["ID"]])
- .Append('\t')
- .Append(dataElements[TrainingDataIndexes["Label"]])
- .Append('\t')
- .Append(dataElements[TrainingDataIndexes["Title"]])
- .Append('\t')
- .Append(dataElements[TrainingDataIndexes["Description"]])
- .Append('\t')
- .Append(dataElements[TrainingDataIndexes["Author"]])
- .Append('\t')
- .Append(isPrBit)
- .Append('\t')
- .Append(mentions.Length)
- .Append('\t')
- .Append(string.Join(' ', mentions));
-
- if (includeFileColumns)
- {
- var filePaths = TrainingData.SplitFilePaths(dataElements[TrainingDataIndexes["FilePaths"]] ?? string.Empty)
- .Where(path => !string.IsNullOrWhiteSpace(path))
- .ToArray();
-
- AddFileInformationToLine(lineBuilder, filePaths, (isPrBit == 1));
- }
-
- // Emit the current line.
-
- yield return lineBuilder.ToString();
- }
-
- // Reset state for the next iteration.
-
- lineBuilder.Clear();
- line = dataFileReader.ReadLine();
-
- ++lineCount;
- }
- }
-
- private static string[] GetUserMentions(string description) =>
- UserMentionsExpression
- .Matches(description)
- .Select(match => match.Value)
- .ToArray();
-
- private static void AddFileInformationToLine(
- StringBuilder lineBuilder,
- string[] filePaths,
- bool isPullRequest)
- {
- // If the line is not being added for a pull request or there were no files, then file
- // information will not be included. Add empty placeholder slugs and take no further action.
-
- if ((!isPullRequest) || filePaths.Length == 0)
- {
- lineBuilder
- .Append('\t')
- .Append(0)
- .Append('\t', 5);
-
- return;
- }
-
- var segmentedDiff = DiffHelper.SegmentDiff(filePaths);
-
- lineBuilder
- .Append('\t')
- .Append(string.Join(' ', filePaths))
- .Append('\t')
- .Append(string.Join(' ', segmentedDiff.Filenames))
- .Append('\t')
- .Append(string.Join(' ', segmentedDiff.Extensions))
- .Append('\t')
- .Append(string.Join(' ', segmentedDiff.FolderNames))
- .Append('\t')
- .Append(string.Join(' ', segmentedDiff.Folders));
- }
-
- private static bool ValidateTrainingDataHeaders(string headerLine)
- {
- var index = 0;
-
- foreach (var header in headerLine.Split('\t'))
- {
- if (TrainingDataIndexes[header] != index)
- {
- return false;
- }
-
- ++index;
- }
-
- return true;
- }
-
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/Models/PullRequestWithFiles.cs b/tools/issue-labeler/src/CreateMikLabelModel/Models/PullRequestWithFiles.cs
deleted file mode 100644
index 008aab73906..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/Models/PullRequestWithFiles.cs
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Octokit;
-
-namespace CreateMikLabelModel.Models
-{
- public class PullRequestWithFiles
- {
- public PullRequest PullRequest { get; init; }
- public string[] FilePaths { get; init; }
- public PullRequestWithFiles(PullRequest pullRequest, string[] filePaths) => (PullRequest, FilePaths) = (pullRequest, filePaths);
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/Models/RepositoryInformation.cs b/tools/issue-labeler/src/CreateMikLabelModel/Models/RepositoryInformation.cs
deleted file mode 100644
index baa029bbee2..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/Models/RepositoryInformation.cs
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-namespace CreateMikLabelModel.Models
-{
- public record RepositoryInformation(string Owner, string Name)
- {
- ///
- /// Creates a new instance of the by parsing a repository
- /// path.
- ///
- ///
- /// The full repository path, in the format "Owner/repository-name".
- ///
- ///
- ///
- /// var info = RepositoryInformation.Parse("Azure/azure-sdk-for-net");
- ///
- ///
- ///
- public static RepositoryInformation Parse(string repositoryPath)
- {
- var parts = repositoryPath.Split('/');
- return new(parts[0], parts[1]);
- }
- }
-}
diff --git a/tools/issue-labeler/src/CreateMikLabelModel/Models/TrainingDataItem.cs b/tools/issue-labeler/src/CreateMikLabelModel/Models/TrainingDataItem.cs
deleted file mode 100644
index ac977a88501..00000000000
--- a/tools/issue-labeler/src/CreateMikLabelModel/Models/TrainingDataItem.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System;
-using Octokit;
-
-namespace CreateMikLabelModel.Models
-{
- public record TrainingDataItem(DateTimeOffset CreatedAt, long Identifier, string RepositoryName, string LabelName, string SegmentName, string Data)
- {
- public TrainingDataItem(string labelName, string segmentName, string repositoryName, Issue source) : this(source.CreatedAt, source.Id, repositoryName, labelName, segmentName, TrainingData.CreateTrainingData(labelName, repositoryName, source))
- {
- }
-
- public TrainingDataItem(string labelName, string segmentName, string repositoryName, PullRequestWithFiles source) : this(source.PullRequest.CreatedAt, source.PullRequest.Id, repositoryName, labelName, segmentName, TrainingData.CreateTrainingData(labelName, repositoryName, source))
- {
- }
- }
-}
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/FullPrediction.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/FullPrediction.cs
deleted file mode 100644
index 495127a491e..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/FullPrediction.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-namespace Hubbup.MikLabelModel
-{
- public class FullPrediction
- {
- public string PredictedLabel;
- public float Score;
- public int OriginalSchemaIndex;
-
- public FullPrediction(string predictedLabel, float score, int originalSchemaIndex)
- {
- PredictedLabel = predictedLabel;
- Score = score;
- OriginalSchemaIndex = originalSchemaIndex;
- }
- }
-}
\ No newline at end of file
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/GitHubIssueTransformed.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/GitHubIssueTransformed.cs
deleted file mode 100644
index a6a250b3d08..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/GitHubIssueTransformed.cs
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#pragma warning disable 649 // We don't care about unused fields here, because they are mapped with the input file.
-
-namespace Hubbup.MikLabelModel
-{
- internal class GitHubIssueTransformed
- {
- public string ID;
- public string Area;
- public string Title;
- public string Description;
- }
-}
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/Hubbup.MikLabelModel.csproj b/tools/issue-labeler/src/Hubbup.MikLabelModel/Hubbup.MikLabelModel.csproj
deleted file mode 100644
index 18c62b9b1db..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/Hubbup.MikLabelModel.csproj
+++ /dev/null
@@ -1,14 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/ILabeler.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/ILabeler.cs
deleted file mode 100644
index 2e860d9f3e1..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/ILabeler.cs
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using IssueLabeler.Shared;
-using System.Threading.Tasks;
-
-namespace Hubbup.MikLabelModel
-{
- public interface ILabeler
- {
- Task DispatchLabelsAsync(string owner, string repo, int number);
- Task PredictUsingModelsFromStorageQueue(string owner, string repo, int number);
- }
-}
\ No newline at end of file
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/ILabelerLite.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/ILabelerLite.cs
deleted file mode 100644
index 5e9dde9f1ec..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/ILabelerLite.cs
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using IssueLabeler.Shared;
-using Octokit;
-using System.Threading.Tasks;
-using System;
-using System.Collections.Generic;
-
-namespace Hubbup.MikLabelModel
-{
- public interface ILabelerLite
- {
- Task> QueryLabelPrediction(int issueNumber, string title, string body, string issueUserLogin, string repositoryName, string repositoryOwnerName);
- }
-}
\ No newline at end of file
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/IMikLabelerPathProvider.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/IMikLabelerPathProvider.cs
deleted file mode 100644
index 866004f1985..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/IMikLabelerPathProvider.cs
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-namespace Hubbup.MikLabelModel
-{
- public interface IMikLabelerPathProvider
- {
- (string issuePath, string prPath) GetModelPath();
- }
-}
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/IModelHolderFactory.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/IModelHolderFactory.cs
deleted file mode 100644
index 8dfb0fca480..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/IModelHolderFactory.cs
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Hubbup.MikLabelModel;
-using Microsoft.Extensions.Configuration;
-using Microsoft.Extensions.Logging;
-using System;
-using System.Collections.Concurrent;
-
-namespace IssueLabeler.Shared.Models
-{
- public interface IModelHolderFactory
- {
- IModelHolder CreateModelHolder(string owner, string repo);
- IPredictor GetPredictor(string owner, string repo);
- }
- public class ModelHolderFactory : IModelHolderFactory
- {
- private readonly ConcurrentDictionary<(string, string), IModelHolder> _models = new ConcurrentDictionary<(string, string), IModelHolder>();
- private readonly ILogger _logger;
- private IConfiguration _configuration;
- private readonly IBackgroundTaskQueue _backgroundTaskQueue;
- public ModelHolderFactory(
- ILogger logger,
- IConfiguration configuration,
- IBackgroundTaskQueue backgroundTaskQueue)
- {
- _backgroundTaskQueue = backgroundTaskQueue;
- _configuration = configuration;
- _logger = logger;
- }
-
- public IModelHolder CreateModelHolder(string owner, string repo)
- {
- if (!IsConfigured(repo))
- return null;
- return _models.TryGetValue((owner, repo), out IModelHolder modelHolder) ?
- modelHolder :
- _models.GetOrAdd((owner, repo), InitFor(repo));
- }
-
- private bool IsConfigured(string repo)
- {
- // the following four configuration values are per repo values.
- string configSection = $"IssueModel:{repo}:BlobName";
- if (!string.IsNullOrEmpty(_configuration[configSection]))
- {
- configSection = $"IssueModel:{repo}:BlobName";
- if (!string.IsNullOrEmpty(_configuration[configSection]))
- {
- configSection = $"PrModel:{repo}:PathPrefix";
- if (!string.IsNullOrEmpty(_configuration[configSection]))
- {
- // has both pr and issue config - allowed
- configSection = $"PrModel:{repo}:BlobName";
- return !string.IsNullOrEmpty(_configuration[configSection]);
- }
- else
- {
- // has issue config only - allowed
- configSection = $"PrModel:{repo}:BlobName";
- return string.IsNullOrEmpty(_configuration[configSection]);
- }
- }
- }
- return false;
- }
-
- private IModelHolder InitFor(string repo)
- {
- var mh = new ModelHolder(_logger, _configuration, repo);
- if (!mh.LoadRequested)
- {
- _backgroundTaskQueue.QueueBackgroundWorkItem((ct) => mh.LoadEnginesAsync());
- }
- return mh;
- }
-
- public IPredictor GetPredictor(string owner, string repo)
- {
- var modelHolder = CreateModelHolder(owner, repo);
- if (modelHolder == null)
- {
- throw new InvalidOperationException($"Repo {owner}/{repo} is not yet configured for label prediction.");
- }
- if (!modelHolder.IsIssueEngineLoaded || (!modelHolder.UseIssuesForPrsToo && !modelHolder.IsPrEngineLoaded))
- {
- throw new InvalidOperationException("Issue engine must be loaded.");
- }
- return new Predictor(_logger, modelHolder);
- }
- }
-}
\ No newline at end of file
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/IModelHolderFactoryLite.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/IModelHolderFactoryLite.cs
deleted file mode 100644
index 8f0d9c86dcb..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/IModelHolderFactoryLite.cs
+++ /dev/null
@@ -1,184 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Hubbup.MikLabelModel;
-using IssueLabeler.Shared;
-using Microsoft.Extensions.Configuration;
-using Microsoft.Extensions.Logging;
-using System;
-using System.Collections.Concurrent;
-using System.Threading;
-using System.Threading.Tasks;
-
-namespace Hubbup.MikLabelModel
-{
- public interface IModelHolderFactoryLite
- {
- Task CreateModelHolders(string owner, string repo, string[] modelConfigNames);
- Task CreateModelHolder(string owner, string repo, string modelBlobConfigName = null);
- Task GetPredictor(string owner, string repo, string modelBlobConfigName = null);
- }
- public class ModelHolderFactoryLite : IModelHolderFactoryLite
- {
- private readonly ConcurrentDictionary<(string, string, string), IModelHolder> _models = new ConcurrentDictionary<(string, string, string), IModelHolder>();
- private readonly ILogger _logger;
- private IConfiguration _configuration;
- private SemaphoreSlim _sem = new SemaphoreSlim(1,1);
-
- public ModelHolderFactoryLite(
- ILogger logger,
- IConfiguration configuration)
- {
- _configuration = configuration;
- _logger = logger;
- }
-
- public async Task CreateModelHolders(string owner, string repo, string[] modelConfigNames)
- {
- var modelHolders = new IModelHolder[modelConfigNames.Length];
- var allHeld = true;
-
- // If all of the models are already held, return them.
- for (int index = 0; index < modelConfigNames.Length; ++index)
- {
- if (_models.TryGetValue((owner, repo, modelConfigNames[index]), out var holder))
- {
- modelHolders[index] = holder;
- }
- else
- {
- // At least one model is not held. No sense in checking the rest.
- allHeld = false;
- break;
- }
- }
-
- if (allHeld)
- {
- return modelHolders;
- }
-
- // Some models need to be initialized; acquire the semaphore and initialize.
- try
- {
- if (!_sem.Wait(0))
- {
- await _sem.WaitAsync().ConfigureAwait(false);
- }
-
- for (int index = 0; index < modelConfigNames.Length; ++index)
- {
- modelHolders[index] = await CreateModelHolderInternal(owner, repo, modelConfigNames[index]);
- }
- }
- finally
- {
- if (_sem.CurrentCount <= 0)
- {
- _sem.Release();
- }
- }
-
- return modelHolders;
- }
-
- public async Task CreateModelHolder(string owner, string repo, string modelBlobConfigName = null)
- {
- if (_models.TryGetValue((owner, repo, modelBlobConfigName), out var modelHolder))
- {
- return modelHolder;
- }
-
- try
- {
- if (!_sem.Wait(0))
- {
- await _sem.WaitAsync().ConfigureAwait(false);
- }
-
- return await CreateModelHolderInternal(owner, repo, modelBlobConfigName).ConfigureAwait(false);
- }
- finally
- {
- if (_sem.CurrentCount <= 0)
- {
- _sem.Release();
- }
- }
- }
-
- public async Task CreateModelHolderInternal(string owner, string repo, string modelBlobConfigName)
- {
- IModelHolder modelHolder = null;
-
- if (IsConfigured(repo))
- {
- if (_models.TryGetValue((owner, repo, modelBlobConfigName), out modelHolder))
- {
- return modelHolder;
- }
-
- modelHolder = await InitFor(repo, modelBlobConfigName);
- _models.GetOrAdd((owner, repo, modelBlobConfigName), modelHolder);
- }
-
- return modelHolder;
- }
-
- public async Task GetPredictor(string owner, string repo, string modelBlobConfigName = null)
- {
- var modelHolder = await CreateModelHolder(owner, repo, modelBlobConfigName);
- if (modelHolder == null)
- {
- throw new InvalidOperationException($"Repo {owner}/{repo} is not yet configured for label prediction.");
- }
- if (!modelHolder.IsIssueEngineLoaded || (!modelHolder.UseIssuesForPrsToo && !modelHolder.IsPrEngineLoaded))
- {
- throw new InvalidOperationException("Issue engine must be loaded.");
- }
- return new Predictor(_logger, modelHolder) { ModelName = modelBlobConfigName };
- }
-
- private bool IsConfigured(string repo)
- {
- // the following four configuration values are per repo values.
- string configSection = $"IssueModel.{repo.Replace("-", "_")}.BlobConfigNames";
- if (string.IsNullOrEmpty(_configuration[configSection]))
- {
- configSection = $"IssueModel:{repo}:BlobName";
- if (!string.IsNullOrEmpty(_configuration[configSection]))
- {
- configSection = $"IssueModel:{repo}:BlobName";
- if (!string.IsNullOrEmpty(_configuration[configSection]))
- {
- configSection = $"PrModel:{repo}:PathPrefix";
- if (!string.IsNullOrEmpty(_configuration[configSection]))
- {
- // has both pr and issue config - allowed
- configSection = $"PrModel:{repo}:BlobName";
- return !string.IsNullOrEmpty(_configuration[configSection]);
- }
- else
- {
- // has issue config only - allowed
- configSection = $"PrModel:{repo}:BlobName";
- return string.IsNullOrEmpty(_configuration[configSection]);
- }
- }
- }
- }
- else { return true; }
- return false;
- }
-
- private async Task InitFor(string repo, string modelBlobConfigName = null)
- {
- var mh = new ModelHolder(_logger, _configuration, repo, modelBlobConfigName);
- if (!mh.LoadRequested)
- {
- await mh.LoadEnginesAsync();
- }
- return mh;
- }
- }
-}
\ No newline at end of file
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/Labeler.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/Labeler.cs
deleted file mode 100644
index 735a86a4060..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/Labeler.cs
+++ /dev/null
@@ -1,548 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using IssueLabeler.Shared;
-using IssueLabeler.Shared.Models;
-using Microsoft.Extensions.Configuration;
-using Microsoft.Extensions.Logging;
-using Octokit;
-using System;
-using System.Collections.Concurrent;
-using System.Collections.Generic;
-using System.Globalization;
-using System.Linq;
-using System.Net.Http;
-using System.Text.Json;
-using System.Text.RegularExpressions;
-using System.Threading.Tasks;
-
-namespace Hubbup.MikLabelModel
-{
- public class Labeler : ILabeler
- {
- private IQueueHelper _queueHelper;
- private Regex _regex;
- private readonly Regex _regexIssueMatch;
- private readonly IDiffHelper _diffHelper;
- private readonly ILogger _logger;
- private readonly IHttpClientFactory _httpClientFactory;
- private readonly IModelHolderFactory _modelHolderFactory;
- private readonly IConfiguration _configuration;
- private readonly bool _useIssueLabelerForPrsToo;
- private readonly IGitHubClientWrapper _gitHubClientWrapper;
- private readonly IBackgroundTaskQueue _backgroundTaskQueue;
-
- public Labeler(
- IQueueHelper queueHelper,
- IConfiguration configuration,
- IHttpClientFactory httpClientFactory,
- ILogger logger,
- IBackgroundTaskQueue backgroundTaskQueue,
- IGitHubClientWrapper gitHubClientWrapper,
- IModelHolderFactory modelHolderFactory,
- IDiffHelper diffHelper)
- {
- _queueHelper = queueHelper;
- _backgroundTaskQueue = backgroundTaskQueue;
- _gitHubClientWrapper = gitHubClientWrapper;
- _diffHelper = diffHelper;
- _regexIssueMatch = new Regex(@"[Ff]ix(?:ed|es|)( )+#(\d+)");
- _httpClientFactory = httpClientFactory;
- _logger = logger;
- _configuration = configuration;
- _useIssueLabelerForPrsToo = configuration.GetSection(("UseIssueLabelerForPrsToo")).Get();
- _modelHolderFactory = modelHolderFactory;
- }
-
- public async Task PredictUsingModelsFromStorageQueue(string owner, string repo, int number)
- {
- if (_regex == null)
- {
- _regex = new Regex(@"@[a-zA-Z0-9_//-]+");
- }
- var predictor = _modelHolderFactory.GetPredictor(owner, repo);
-
- var iop = await _gitHubClientWrapper.GetIssue(owner, repo, number);
- bool isPr = iop.PullRequest != null;
-
- string body = iop.Body ?? string.Empty;
- var userMentions = _regex.Matches(body).Select(x => x.Value).ToArray();
- LabelSuggestion labelSuggestion = null;
-
- if (isPr && !_useIssueLabelerForPrsToo)
- {
- var prModel = await CreatePullRequest(owner, repo, iop.Number, iop.Title, iop.Body, userMentions, iop.User.Login);
- labelSuggestion = await predictor.Predict(prModel);
- _logger.LogInformation("predicted with pr model the new way");
- _logger.LogInformation(string.Join(",", labelSuggestion.LabelScores.Select(x => x.LabelName)));
- return labelSuggestion;
- }
- var issueModel = CreateIssue(iop.Number, iop.Title, iop.Body, userMentions, iop.User.Login);
- labelSuggestion = await predictor.Predict(issueModel);
- _logger.LogInformation("predicted with issue model the new way");
- _logger.LogInformation(string.Join(",", labelSuggestion.LabelScores.Select(x => x.LabelName)));
- return labelSuggestion;
- }
-
-
- public Task DispatchLabelsAsync(string owner, string repo, int number)
- {
- var tasks = new List();
- tasks.Add(InnerTask(owner, repo, number));
- return tasks.First();
- }
-
- private readonly ConcurrentDictionary<(string, string), LabelerOptions> _options =
- new ConcurrentDictionary<(string, string), LabelerOptions>();
-
- private LabelerOptions GetOptionsFor(string owner, string repo)
- {
- try
- {
- return _options.TryGetValue((owner, repo), out LabelerOptions options) ?
- options :
- _options.GetOrAdd((owner, repo), new LabelerOptions()
- {
- LabelRetriever = new LabelRetriever(owner, repo),
- PredictionUrl = string.Format(
- CultureInfo.InvariantCulture,
- "{0}/api/WebhookIssue/{1}/{2}/", _configuration[$"{owner}:{repo}:prediction_url"],
- owner, repo),
- Threshold = double.Parse(_configuration[$"{owner}:{repo}:threshold"]),
- CanUpdateIssue = _configuration.GetSection($"{owner}:{repo}:can_update_labels").Get(),
- CanCommentOnIssue = _configuration.GetSection($"{owner}:{repo}:can_comment_on").Get()
- });
- }
- catch
- {
- // the repo is not configured, return null to skip
- _logger.LogError($"{owner}/{repo} is not yet configured.");
- return null;
- }
- }
-
- private class LabelerOptions
- {
- public ILabelRetriever LabelRetriever { get; set; }
- public string PredictionUrl { get; set; }
- public double Threshold { get; set; }
- public bool CanCommentOnIssue { get; set; }
- public bool CanUpdateIssue { get; set; }
- }
-
- private async Task InnerTask(string owner, string repo, int number)
- {
- var options = GetOptionsFor(owner, repo);
- if (options == null)
- {
- return;
- }
- var labelRetriever = options.LabelRetriever;
- string msg = $"! dispatcher app - started query for {owner}/{repo}#{number}";
- _logger.LogInformation(msg);
-
- var iop = await _gitHubClientWrapper.GetIssue(owner, repo, number);
-
- var labels = new HashSet();
- GithubObjectType issueOrPr = iop.PullRequest != null ? GithubObjectType.PullRequest : GithubObjectType.Issue;
-
- if (labelRetriever.ShouldSkipUpdatingLabels(iop.User.Login))
- {
- _logger.LogInformation($"! dispatcher app - skipped for racing for {issueOrPr} {number}.");
- return;
- }
-
- // get non area labels
- labels = await GetNonAreaLabelsAsync(labelRetriever, owner, repo, iop);
-
- bool foundArea = false;
- string theFoundLabel = default;
- if (!labelRetriever.SkipPrediction)
- {
- // find shortcut to get label
- if (iop.PullRequest != null)
- {
- string body = iop.Body ?? string.Empty;
- if (labelRetriever.AllowTakingLinkedIssueLabel)
- {
- (string label, int number) linkedIssue = await GetAnyLinkedIssueLabel(owner, repo, body);
- if (!string.IsNullOrEmpty(linkedIssue.label))
- {
- _logger.LogInformation($"! dispatcher app - PR number {iop.Number} fixes issue number {linkedIssue.number} with area label {linkedIssue.label}.");
- foundArea = true;
- theFoundLabel = linkedIssue.label;
- }
- }
- }
-
- // then try ML prediction
- if (!foundArea)
- {
- var labelSuggestion = await GetLabelSuggestion(options.PredictionUrl, owner, repo, number);
- if (labelSuggestion == null)
- {
- _backgroundTaskQueue.QueueBackgroundWorkItem((ct) => _queueHelper.InsertMessageTask($"TODO - Dispatch labels for: /{owner}/{repo}#{number}"));
- return;
- }
- var topChoice = labelSuggestion.LabelScores.OrderByDescending(x => x.Score).First();
- if (labelRetriever.PreferManualLabelingFor(topChoice.LabelName))
- {
- _logger.LogInformation($"# dispatcher app - skipped: prefer manual prediction instead.");
- }
- else if (topChoice.Score >= options.Threshold || labelRetriever.OkToIgnoreThresholdFor(topChoice.LabelName))
- {
- foundArea = true;
- theFoundLabel = topChoice.LabelName;
- }
- else
- {
- _logger.LogInformation($"! dispatcher app - The Model was not able to assign the label to the {issueOrPr} {number} confidently.");
- }
- }
- }
- await UpdateTask(options, owner, repo, number, foundArea, labels, theFoundLabel, issueOrPr, labelRetriever);
- }
-
- private async Task UpdateTask(
- LabelerOptions options,
- string owner, string repo,
- int number,
- bool foundArea,
- HashSet labels,
- string theFoundLabel,
- GithubObjectType issueOrPr,
- ILabelRetriever labelRetriever)
- {
-
- if (labelRetriever.AddDelayBeforeUpdatingLabels)
- {
- // to avoid race with dotnet-bot
- await Task.Delay(TimeSpan.FromSeconds(10));
- }
-
- // get iop again
- var iop = await _gitHubClientWrapper.GetIssue(owner, repo, number);
-
- var existingLabelList = iop?.Labels?.Where(x => !string.IsNullOrEmpty(x.Name)).Select(x => x.Name).ToList();
- bool issueMissingAreaLabel = !existingLabelList.Where(x => x.StartsWith("area-", StringComparison.OrdinalIgnoreCase)).Any();
-
- // update section
- if (labels.Count > 0 || (foundArea && issueMissingAreaLabel))
- {
- //var issueUpdate = iop.ToUpdate();
- var issueUpdate = new IssueUpdate();
-
- if (foundArea && issueMissingAreaLabel)
- {
- // no area label yet
- issueUpdate.AddLabel(theFoundLabel);
- }
-
- var existingLabelNames = existingLabelList.ToHashSet();
- foreach (var newLabel in labels)
- {
- if (!existingLabelNames.Contains(newLabel))
- {
- issueUpdate.AddLabel(newLabel);
- }
- }
-
- if (options.CanUpdateIssue && issueUpdate.Labels != null && issueUpdate.Labels.Count > 0)
- {
- issueUpdate.Milestone = iop.Milestone?.Number; // The number of milestone associated with the issue.
- foreach (var existingLabel in existingLabelNames)
- {
- issueUpdate.AddLabel(existingLabel);
- }
- await _gitHubClientWrapper.UpdateIssue(owner, repo, number, issueUpdate);
- }
- else if (!options.CanUpdateIssue && issueUpdate.Labels != null && issueUpdate.Labels.Count > 0)
- {
- _logger.LogInformation($"! skipped updating labels for {issueOrPr} {number}. would have become: {string.Join(",", issueUpdate.Labels)}");
- }
- else
- {
- _logger.LogInformation($"! dispatcher app - No update made to labels for {issueOrPr} {number}.");
- }
- }
-
- // comment section
- if (options.CanCommentOnIssue)
- {
- foreach (var labelFound in labels)
- {
- if (!string.IsNullOrEmpty(labelRetriever.CommentFor(labelFound)))
- {
- await _gitHubClientWrapper.CommentOn(owner, repo, iop.Number, labelRetriever.CommentFor(labelFound));
- }
- }
-
- // if newlabels has no area-label and existing does not also. then comment
- if (!foundArea && issueMissingAreaLabel && labelRetriever.CommentWhenMissingAreaLabel)
- {
- if (issueOrPr == GithubObjectType.Issue)
- {
- await _gitHubClientWrapper.CommentOn(owner, repo, iop.Number, labelRetriever.MessageToAddAreaLabelForIssue);
- }
- else
- {
- await _gitHubClientWrapper.CommentOn(owner, repo, iop.Number, labelRetriever.MessageToAddAreaLabelForPr);
- }
- }
- }
- else
- {
- _logger.LogInformation($"! dispatcher app - No comment made to labels for {issueOrPr} {number}.");
- }
- }
-
- private async Task GetLabelSuggestion(string partUrl, string owner, string repo, int number)
- {
- var predictionUrl = @$"{partUrl}{number}";
- var request = new HttpRequestMessage(HttpMethod.Get, predictionUrl);
- var client = _httpClientFactory.CreateClient();
- var response = await client.SendAsync(request);
-
- if (response.IsSuccessStatusCode)
- {
- using var responseStream = await response.Content.ReadAsStreamAsync();
- var remotePrediction = await JsonSerializer.DeserializeAsync(responseStream, new JsonSerializerOptions { PropertyNameCaseInsensitive = true });
- var predictionList = remotePrediction.LabelScores.Select(ls => new LabelScore()
- {
- ScoredLabel = new ScoredLabel { LabelName = ls.LabelName, Score = ls.Score },
- Label = default
- }).Select(x => x.ScoredLabel).ToList();
-
- _logger.LogInformation("! received prediction: {0}", string.Join(",", predictionList.Select(x => x.LabelName)));
-
- return new LabelSuggestion()
- {
- LabelScores = predictionList,
- };
- }
- else
- {
- // queue task again until the suggestion comes back safe
- _logger.LogError($"Could not retrieve label predictions for this issue. Remote HTTP prediction status code {response.StatusCode} from URL '{predictionUrl}'.");
- return null;
- }
- }
-
- private async Task<(string label, int number)> GetAnyLinkedIssueLabel(string owner, string repo, string body)
- {
- Match match = _regexIssueMatch.Match(body);
- if (match.Success && int.TryParse(match.Groups[2].Value, out int issueNumber))
- {
- return (await TryGetIssueLabelForPrAsync(owner, repo, issueNumber), issueNumber);
- }
- return await Task.FromResult<(string, int)>(default);
- }
-
- private async Task> GetNonAreaLabelsAsync(ILabelRetriever labelRetriever, string owner, string repo, Octokit.Issue iop)
- {
- if (_regex == null)
- {
- _regex = new Regex(@"@[a-zA-Z0-9_//-]+");
- }
- string body = iop.Body ?? string.Empty;
- var userMentions = _regex.Matches(body).Select(x => x.Value).ToArray();
- GitHubIssue iopModel = null;
- if (iop.PullRequest != null)
- {
- iopModel = await CreatePullRequest(owner, repo, iop.Number, iop.Title, iop.Body, userMentions, iop.User.Login);
- }
- else
- {
- iopModel = CreateIssue(iop.Number, iop.Title, iop.Body, userMentions, iop.User.Login);
- }
- return labelRetriever.GetNonAreaLabelsForIssueAsync(iopModel);
- }
-
- private static GitHubIssue CreateIssue(int number, string title, string body, string[] userMentions, string author)
- {
- return new GitHubIssue()
- {
- ID = number,
- Title = title,
- Description = body,
- IsPR = 0,
- Author = author,
- UserMentions = string.Join(' ', userMentions),
- NumMentions = userMentions.Length
- };
- }
-
- private async Task CreatePullRequest(string owner, string repo, int number, string title, string body, string[] userMentions, string author)
- {
- var pr = new GitHubPullRequest()
- {
- ID = number,
- Title = title,
- Description = body,
- IsPR = 1,
- Author = author,
- UserMentions = string.Join(' ', userMentions),
- NumMentions = userMentions.Length,
- };
- IReadOnlyList prFiles = await _gitHubClientWrapper.GetPullRequestFiles(owner, repo, number);
- if (prFiles.Count != 0)
- {
- string[] filePaths = prFiles.Select(x => x.FileName).ToArray();
- var segmentedDiff = _diffHelper.SegmentDiff(filePaths);
- pr.Files = string.Join(' ', segmentedDiff.FileDiffs);
- pr.Filenames = string.Join(' ', segmentedDiff.Filenames);
- pr.FileExtensions = string.Join(' ', segmentedDiff.Extensions);
- pr.Folders = _diffHelper.FlattenWithWhitespace(segmentedDiff.Folders);
- pr.FolderNames = _diffHelper.FlattenWithWhitespace(segmentedDiff.FolderNames);
- }
- pr.FileCount = prFiles.Count;
- return pr;
- }
-
- private async Task DoesPrAddNewApiAsync(string owner, string repo, int prNumber)
- {
- var pr = await _gitHubClientWrapper.GetPullRequest(owner, repo, prNumber);
- var diff = new Uri(pr.DiffUrl);
- var httpclient = _httpClientFactory.CreateClient();
- // TODO: fix failure here seen in logs.
- var response = await httpclient.GetAsync(diff.LocalPath);
- response.EnsureSuccessStatusCode();
- var content = await response.Content.ReadAsStringAsync();
- return TakeDiffContentReturnMeaning(content.Split("\n"));
- }
-
- private async Task TryGetIssueLabelForPrAsync(string owner, string repo, int issueNumber)
- {
- var issue = await _gitHubClientWrapper.GetIssue(owner, repo, issueNumber);
- return issue?.Labels?
- .Where(x => !string.IsNullOrEmpty(x.Name))
- .Select(x => x.Name)
- .Where(x => x.StartsWith("area-", StringComparison.OrdinalIgnoreCase)).FirstOrDefault();
- }
-
- private enum DiffContentLineReadingStatus
- {
- readyToStartOver = 0,
- expectingIndex,
- expectingTripleMinus,
- expectingTriplePlus,
- expectingDoubleAtSign
- }
-
- private bool TakeDiffContentReturnMeaning(string[] contentLines)
- {
- string curFile = string.Empty;
- var refFilesWithAdditions = new Dictionary();
- int additions = 0, deletions = 0;
- bool lookingAtRefDiff = false;
- var stat = DiffContentLineReadingStatus.readyToStartOver;
- for (int i = 0; i < contentLines.Length; i++)
- {
- var line = contentLines[i];
- switch (stat)
- {
- case DiffContentLineReadingStatus.readyToStartOver:
- if (ContainsRefChanges(line))
- {
- if (!string.IsNullOrEmpty(curFile) && additions > deletions)
- {
- refFilesWithAdditions.Add(curFile, additions - deletions);
- // reset
- additions = 0;
- deletions = 0;
- }
- lookingAtRefDiff = true;
- curFile = line.Substring(13, line.IndexOf(@".cs b/") + 3 - 13);
- stat = DiffContentLineReadingStatus.expectingIndex;
- }
- else if (line.StartsWith("diff --git"))
- {
- lookingAtRefDiff = false;
- }
- else if (lookingAtRefDiff)
- {
- if (line.StartsWith("+") && !IsUnwantedDiff(line))
- {
- additions++;
- }
- else if (line.StartsWith("-") && !IsUnwantedDiff(line))
- {
- deletions++;
- }
- }
- break;
- case DiffContentLineReadingStatus.expectingIndex:
- if (line.StartsWith("index "))
- {
- stat = DiffContentLineReadingStatus.expectingTripleMinus;
- }
- break;
- case DiffContentLineReadingStatus.expectingTripleMinus:
- if (line.StartsWith("--- "))
- {
- stat = DiffContentLineReadingStatus.expectingTriplePlus;
- }
- break;
- case DiffContentLineReadingStatus.expectingTriplePlus:
- if (line.StartsWith("+++ "))
- {
- stat = DiffContentLineReadingStatus.expectingDoubleAtSign;
- }
- break;
- case DiffContentLineReadingStatus.expectingDoubleAtSign:
- if (line.StartsWith("@@ "))
- {
- stat = DiffContentLineReadingStatus.readyToStartOver;
- }
- break;
- default:
- break;
- }
- }
- if (!string.IsNullOrEmpty(curFile) && additions > deletions)
- {
- refFilesWithAdditions.Add(curFile, additions - deletions);
- }
- return refFilesWithAdditions.Any();
- // given a diff content
- // readyToStartOver = true
- // additions = 0, deletions = 0
- // read all lines
- // for each line, if readyToStartOver and starts with diff: set expectingIndex to true
- // for each line, if expectingIndex and starts with index: set expectingTripleMinus
- // for each line, if expectingTripleMinus and starts ---: set expectingTriplePlus
- // for each line, if expectingTriplePlus and starts with +++: set expectingDoubleAtSign
- // for each line, if expectingTriplePlus and starts with @@: set readyToStartOver
- // for each line, if readyToStartOver and starts with +: additions++ and if starts with - deletions++
- // for each line, if readyToStartOver and starts with +: additions++ and if starts with - deletions++
- // for each line, if readyToStartOver and starts with diff: ... (already planned for)
- //
-
-
- }
-
- private bool IsUnwantedDiff(string line)
- {
- if (string.IsNullOrWhiteSpace(line.Substring(1)))
- {
- return true;
- }
- var trimmed = line.Substring(1).TrimStart();
- if (trimmed.StartsWith("[") || trimmed.StartsWith("#") || trimmed.StartsWith("//") || trimmed.StartsWith("using "))
- {
- return true;
- }
- return false;
- }
-
- private bool ContainsRefChanges(string content)
- {
- if (content.Contains(@"/ref/") && content.Contains(".cs b/src/libraries"))
- {
- return true;
- }
- return false; // diff --git a/src/libraries/(.*)/ref/(.*).cs b/src/libraries/(.*)/ref/(.*).cs
- }
-
- }
-}
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/LabelerLite.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/LabelerLite.cs
deleted file mode 100644
index c4f67f1d64a..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/LabelerLite.cs
+++ /dev/null
@@ -1,153 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Text.RegularExpressions;
-using System.Threading.Tasks;
-using IssueLabeler.Shared;
-using Microsoft.Extensions.Configuration;
-using Microsoft.Extensions.Logging;
-using Octokit;
-
-namespace Hubbup.MikLabelModel
-{
- public class LabelerLite : ILabelerLite
- {
- private static Regex MentionsRegex { get; } = new Regex(@"@[a-zA-Z0-9_//-]+", RegexOptions.Compiled);
-
- private readonly ILogger _logger;
- private readonly IModelHolderFactoryLite _modelHolderFactory;
- private readonly IConfiguration _config;
- private const float defaultConfidenceThreshold = 0.60f;
- private const string defaultModel = "default model";
-
- public LabelerLite(
- ILogger logger,
- IModelHolderFactoryLite modelHolderFactory,
- IConfiguration config)
- {
- _logger = logger;
- _modelHolderFactory = modelHolderFactory;
- _config = config;
- }
-
- public async Task> QueryLabelPrediction(
- int issueNumber,
- string title,
- string body,
- string issueUserLogin,
- string repositoryName,
- string repositoryOwnerName)
- {
- AssertNotNullOrEmpty(title, nameof(title));
- AssertNotNullOrEmpty(body, nameof(body));
- AssertNotNullOrEmpty(issueUserLogin, nameof(issueUserLogin));
- AssertNotNullOrEmpty(repositoryName, nameof(repositoryName));
- AssertNotNullOrEmpty(repositoryOwnerName, nameof(repositoryOwnerName));
-
- _logger.LogInformation($"Predict Labels started query for {repositoryOwnerName}/{repositoryName}#{issueNumber}");
-
- // Query raw predictions
- var issueModel = CreateIssue(issueNumber, title, body, issueUserLogin);
- var predictions = await GetPredictions(repositoryOwnerName, repositoryName, issueNumber, issueModel);
-
- // Determine the confidence threshold to use for filtering predictions
- float confidenceThreshold;
-
- if (!float.TryParse(_config["ConfidenceThreshold"], out confidenceThreshold))
- {
- confidenceThreshold = defaultConfidenceThreshold;
- _logger.LogInformation($"Prediction confidence default threshold of {confidenceThreshold} will be used as no value was configured. {repositoryOwnerName}/{repositoryName}#{issueNumber}");
- }
- else
- {
- _logger.LogInformation($"Prediction confidence threshold of {confidenceThreshold} will be used. {repositoryOwnerName}/{repositoryName}#{issueNumber}");
- }
-
- // Filter predictions based on the confidence threshold.
- var predictedLabels = new List();
-
- foreach (var labelSuggestion in predictions)
- {
- var topChoice = labelSuggestion.LabelScores.OrderByDescending(x => x.Score).First();
-
- if (topChoice.Score >= confidenceThreshold)
- {
- predictedLabels.Add(topChoice.LabelName);
- }
- else
- {
- _logger.LogWarning($"Label prediction was below confidence level `{confidenceThreshold}` for Model:`{labelSuggestion.ModelConfigName ?? defaultModel}`: '{string.Join(", ", labelSuggestion.LabelScores.Select(x => $"{x.LabelName}:[{x.Score}]"))}'");
- }
- }
-
- _logger.LogInformation($"Predict Labels query for {repositoryOwnerName}/{repositoryName}#{issueNumber} suggested {predictedLabels.Count} labels.");
- return predictedLabels;
- }
-
- private async Task> GetPredictions(string owner, string repo, int number, GitHubIssue issueModel)
- {
- List predictions = new List();
- List predictors = new List();
-
- if (_config.TryGetConfigValue($"IssueModel.{repo.Replace("-", "_")}.BlobConfigNames", out var blobConfig))
- {
- var blobConfigs = blobConfig.Split(';', StringSplitOptions.RemoveEmptyEntries);
- foreach (var blobConfigName in blobConfigs)
- {
- // get a prediction for each model
- var predictor = await _modelHolderFactory.GetPredictor(owner, repo, blobConfigName);
- predictors.Add(predictor);
- }
- }
- else
- {
- // Add just the default predictor
- var predictor = await _modelHolderFactory.GetPredictor(owner, repo);
- predictors.Add(predictor);
- }
-
- foreach (var predictor in predictors)
- {
- var labelSuggestion = await predictor.Predict(issueModel);
- labelSuggestion.ModelConfigName = predictor.ModelName;
- if (labelSuggestion == null)
- {
- _logger.LogCritical($"Failed: Unable to get prediction for {owner}/{repo}#{number}. ModelName:{predictor.ModelName}");
- return null;
- }
- _logger.LogInformation($"Prediction results for {owner}/{repo}#{number}, Model:{labelSuggestion.ModelConfigName ?? defaultModel}: '{string.Join(",", labelSuggestion.LabelScores.Select(x => $"{x.LabelName}:{x.Score}"))}'");
- predictions.Add(labelSuggestion);
- }
-
- return predictions;
- }
-
- private static GitHubIssue CreateIssue(int number, string title, string body, string author)
- {
- var userMentions = MentionsRegex.Matches(body ?? string.Empty).Select(x => x.Value).ToArray();
-
- return new GitHubIssue()
- {
- ID = number,
- Title = title,
- Description = body,
- IsPR = 0,
- Author = author,
- UserMentions = string.Join(' ', userMentions),
- NumMentions = userMentions.Length
- };
- }
-
- private static void AssertNotNullOrEmpty(string value, string paramName)
- {
- if (string.IsNullOrEmpty(value))
- {
- throw new ArgumentException($"{paramName} cannot be null or empty.", paramName);
- }
- }
- }
-}
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerModel.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerModel.cs
deleted file mode 100644
index d4d37749394..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerModel.cs
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using IssueLabeler.Shared;
-using Microsoft.ML;
-
-namespace Hubbup.MikLabelModel
-{
- //This "Labeler" class could be used in a different End-User application (Web app, other console app, desktop app, etc.)
- public class MikLabelerModel
- {
- private readonly PredictionEngine _issuePredictionEngine;
- private readonly PredictionEngine _prPredictionEngine;
-
- public MikLabelerModel((string modelPath, string prModelPath) paths)
- {
- var modelPath = paths.modelPath;
- var prModelPath = paths.prModelPath;
- var mlContext = new MLContext(seed: 1);
-
- // Load model from file
- var trainedModel = mlContext.Model.Load(modelPath, inputSchema: out _);
- var trainedPrModel = mlContext.Model.Load(prModelPath, inputSchema: out _);
-
- _issuePredictionEngine = mlContext.Model.CreatePredictionEngine(trainedModel);
- _prPredictionEngine = mlContext.Model.CreatePredictionEngine(trainedPrModel);
- }
-
- public MikLabelerPredictor GetPredictor()
- {
- // Create prediction engine related to the loaded trained model
- return new MikLabelerPredictor(_issuePredictionEngine, _prPredictionEngine);
- }
- }
-}
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerPredictor.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerPredictor.cs
deleted file mode 100644
index 666c1bb2c44..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerPredictor.cs
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using IssueLabeler.Shared;
-using Microsoft.ML;
-using Microsoft.ML.Data;
-using Octokit;
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text.RegularExpressions;
-
-namespace Hubbup.MikLabelModel
-{
- public class MikLabelerPredictor
- {
- private readonly PredictionEngine _predictionEngine;
- private readonly PredictionEngine _prPredictionEngine;
- private readonly Regex _regex = new Regex(@"@[a-zA-Z0-9_//-]+");
- private readonly DiffHelper _diffHelper = new DiffHelper();
-
- public MikLabelerPredictor(PredictionEngine predictionEngine,
- PredictionEngine prPredictionEngine)
- {
- _predictionEngine = predictionEngine;
- _prPredictionEngine = prPredictionEngine;
- }
-
- public LabelSuggestion PredictLabel(Issue issue, string[] filePaths = null)
- {
- var userMentions = issue.Body != null ? _regex.Matches(issue.Body).Select(x => x.Value).ToArray() : new string[0];
-
- List labelPredictions;
- if (filePaths == null)
- {
- var aspnetIssue = new GitHubIssue
- {
- ID = issue.Number,
- Title = issue.Title,
- Description = issue.Body,
- IsPR = 0,
- Author = issue.User.Login,
- UserMentions = string.Join(' ', userMentions),
- NumMentions = userMentions.Length,
- };
- var prediction = _predictionEngine.Predict(aspnetIssue);
- labelPredictions = GetBestThreePredictions(prediction, forPrs: false);
- }
- else
- {
- var segmentedDiff = _diffHelper.SegmentDiff(filePaths);
- var aspnetIssue = new GitHubPullRequest
- {
- ID = issue.Number,
- Title = issue.Title,
- Description = issue.Body,
- IsPR = 1,
- Author = issue.User.Login,
- UserMentions = string.Join(' ', userMentions),
- NumMentions = userMentions.Length,
- FileCount = filePaths.Length,
- Files = string.Join(' ', segmentedDiff.FileDiffs),
- Filenames = string.Join(' ', segmentedDiff.Filenames),
- FileExtensions = string.Join(' ', segmentedDiff.Extensions),
- FolderNames = _diffHelper.FlattenWithWhitespace(segmentedDiff.FolderNames),
- Folders = _diffHelper.FlattenWithWhitespace(segmentedDiff.Folders)
- };
- var prediction = _prPredictionEngine.Predict(aspnetIssue);
- labelPredictions = GetBestThreePredictions(prediction, forPrs: true);
- }
-
- return new LabelSuggestion
- {
- LabelScores = labelPredictions,
- };
- }
-
- public static List GetBestThreePredictions(float[] scores, VBuffer> slotNames)
- {
- var topThreeScores = GetIndexesOfTopScores(scores, 3);
-
- return new List
- {
- new ScoredLabel {LabelName=slotNames.GetItemOrDefault(topThreeScores[0]).ToString(), Score = scores[topThreeScores[0]] },
- new ScoredLabel {LabelName=slotNames.GetItemOrDefault(topThreeScores[1]).ToString(), Score = scores[topThreeScores[1]] },
- new ScoredLabel {LabelName=slotNames.GetItemOrDefault(topThreeScores[2]).ToString(), Score = scores[topThreeScores[2]] },
- };
- }
-
- private List GetBestThreePredictions(GitHubIssuePrediction prediction, bool forPrs)
- {
- var scores = prediction.Score;
-
- VBuffer> slotNames = default;
- if (forPrs)
- {
- _prPredictionEngine.OutputSchema[nameof(GitHubIssuePrediction.Score)].GetSlotNames(ref slotNames);
- }
- else
- {
- _predictionEngine.OutputSchema[nameof(GitHubIssuePrediction.Score)].GetSlotNames(ref slotNames);
- }
-
- var topThreeScores = GetIndexesOfTopScores(scores, 3);
-
- return new List
- {
- new ScoredLabel {LabelName=slotNames.GetItemOrDefault(topThreeScores[0]).ToString(), Score = scores[topThreeScores[0]] },
- new ScoredLabel {LabelName=slotNames.GetItemOrDefault(topThreeScores[1]).ToString(), Score = scores[topThreeScores[1]] },
- new ScoredLabel {LabelName=slotNames.GetItemOrDefault(topThreeScores[2]).ToString(), Score = scores[topThreeScores[2]] },
- };
- }
-
- private static IReadOnlyList GetIndexesOfTopScores(float[] scores, int n)
- {
- var indexedScores = scores
- .Zip(Enumerable.Range(0, scores.Length), (score, index) => new IndexedScore(index, score));
-
- var indexedScoresSortedByScore = indexedScores
- .OrderByDescending(indexedScore => indexedScore.Score);
-
- return indexedScoresSortedByScore
- .Take(n)
- .Select(indexedScore => indexedScore.Index)
- .ToList()
- .AsReadOnly();
- }
-
- private struct IndexedScore
- {
- public IndexedScore(int index, float score) => (Index, Score) = (index, score);
-
- public int Index { get; }
- public float Score { get; }
- }
- }
-}
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerProvider.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerProvider.cs
deleted file mode 100644
index 94164b5967f..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerProvider.cs
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Microsoft.Extensions.Logging;
-using System.Collections.Concurrent;
-using System.Diagnostics;
-
-namespace Hubbup.MikLabelModel
-{
- public class MikLabelerProvider
- {
- private readonly ConcurrentDictionary<(string, string), MikLabelerModel> _mikLabelers = new ConcurrentDictionary<(string, string), MikLabelerModel>();
- private readonly ILogger _logger;
-
- public MikLabelerProvider(ILogger logger)
- {
- _logger = logger;
- }
-
- public MikLabelerModel GetMikLabeler(IMikLabelerPathProvider pathProvider)
- {
- var paths = pathProvider.GetModelPath();
- return _mikLabelers.GetOrAdd(
- paths,
- p =>
- {
- var stopwatch = new Stopwatch();
- stopwatch.Start();
- var model = new MikLabelerModel(p);
- stopwatch.Stop();
- _logger.LogInformation("Creating new MikLabelerModel for paths {PATH} and {PR_PATH} in {TIME}ms", p.Item1, p.Item2, stopwatch.ElapsedMilliseconds);
- return model;
- });
- }
- }
-}
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerStringPathProvider.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerStringPathProvider.cs
deleted file mode 100644
index fb9cdee0302..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/MikLabelerStringPathProvider.cs
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-namespace Hubbup.MikLabelModel
-{
- public class MikLabelerStringPathProvider : IMikLabelerPathProvider
- {
- private readonly string _path;
- private readonly string _prPath;
-
- public MikLabelerStringPathProvider(string issuePath, string prPath)
- {
- _path = issuePath;
- _prPath = prPath;
- }
-
- (string issuePath, string prPath) IMikLabelerPathProvider.GetModelPath()
- {
- return (_path, _prPath);
- }
- }
-}
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/MyTrainerStrategy.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/MyTrainerStrategy.cs
deleted file mode 100644
index 64536d7ae08..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/MyTrainerStrategy.cs
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-namespace Hubbup.MikLabelModel
-{
- public enum MyTrainerStrategy
- {
- SdcaMultiClassTrainer = 1,
- OVAAveragedPerceptronTrainer = 2,
- };
-}
diff --git a/tools/issue-labeler/src/Hubbup.MikLabelModel/Predictor.cs b/tools/issue-labeler/src/Hubbup.MikLabelModel/Predictor.cs
deleted file mode 100644
index ddce0c8b65b..00000000000
--- a/tools/issue-labeler/src/Hubbup.MikLabelModel/Predictor.cs
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using IssueLabeler.Shared;
-using Microsoft.Extensions.Logging;
-using Microsoft.ML;
-using Microsoft.ML.Data;
-using System;
-using System.Linq;
-using System.Threading;
-using System.Threading.Tasks;
-
-namespace Hubbup.MikLabelModel
-{
- public class Predictor : IPredictor
- {
- private static SemaphoreSlim sem = new SemaphoreSlim(1);
- private readonly ILogger logger;
- private readonly IModelHolder modelHolder;
-
- public string ModelName { get; set; }
-
- public Predictor(ILogger logger, IModelHolder modelHolder)
- {
- this.logger = logger;
- this.modelHolder = modelHolder;
- }
-
- public Task Predict(GitHubIssue issue)
- {
- return Predict(issue, modelHolder.IssuePredEngine, logger);
- }
-
- public Task Predict(GitHubPullRequest issue)
- {
- if (modelHolder.UseIssuesForPrsToo)
- {
- return Predict(issue, modelHolder.IssuePredEngine, logger);
- }
- return Predict(issue, modelHolder.PrPredEngine, logger);
- }
-
- private static async Task Predict(
- T issueOrPr,
- PredictionEngine predEngine,
- ILogger logger)
- where T : GitHubIssue
- {
- if (predEngine == null)
- {
- throw new InvalidOperationException("expected prediction engine loaded.");
- }
- GitHubIssuePrediction prediction;
- bool acquired = false;
-
- try
- {
- await sem.WaitAsync();
- acquired = true;
- prediction = predEngine.Predict(issueOrPr);
- }
- finally
- {
- if (acquired)
- {
- sem.Release();
- }
- }
-
- VBuffer> slotNames = default;
- predEngine.OutputSchema[nameof(GitHubIssuePrediction.Score)].GetSlotNames(ref slotNames);
-
- float[] probabilities = prediction.Score;
- var labelPredictions = MikLabelerPredictor.GetBestThreePredictions(probabilities, slotNames);
-
- float maxProbability = probabilities.Max();
- logger.LogInformation($"MaxProbability: {maxProbability} for #{issueOrPr.ID} - '{issueOrPr.Title}'");
- return new LabelSuggestion
- {
- LabelScores = labelPredictions,
- };
- }
- }
-}
diff --git a/tools/issue-labeler/src/IssueLabeler.Shared/AIOutput.cs b/tools/issue-labeler/src/IssueLabeler.Shared/AIOutput.cs
deleted file mode 100644
index 6cea611df25..00000000000
--- a/tools/issue-labeler/src/IssueLabeler.Shared/AIOutput.cs
+++ /dev/null
@@ -1,15 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
-namespace IssueLabeler.Shared
-{
- public class AIOutput
- {
- public string Category { get; set; }
- public string Service { get; set; }
- public string Response { get; set; }
- }
-}
diff --git a/tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkLabel.cs b/tools/issue-labeler/src/IssueLabeler.Shared/AzureSdkLabel.cs
similarity index 80%
rename from tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkLabel.cs
rename to tools/issue-labeler/src/IssueLabeler.Shared/AzureSdkLabel.cs
index 7f6dce4e5c9..ebedc6ae2e9 100644
--- a/tools/issue-labeler/src/Azure.Sdk.Labels/AzureSdkLabel.cs
+++ b/tools/issue-labeler/src/IssueLabeler.Shared/AzureSdkLabel.cs
@@ -1,12 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
-using System;
-using Octokit;
+using IssueLabeler.Shared.Models;
-namespace Azure.Sdk.LabelTrainer
+namespace IssueLabeler.Shared
{
- internal static class AzureSdkLabel
+ public static class AzureSdkLabel
{
public static bool IsServiceLabel(Label label) =>
string.Equals(label.Color, "e99695", StringComparison.InvariantCultureIgnoreCase);
diff --git a/tools/issue-labeler/src/IssueLabeler.Shared/BackgroundTaskQueue.cs b/tools/issue-labeler/src/IssueLabeler.Shared/BackgroundTaskQueue.cs
deleted file mode 100644
index bbb73a96dc4..00000000000
--- a/tools/issue-labeler/src/IssueLabeler.Shared/BackgroundTaskQueue.cs
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Microsoft.Extensions.Logging;
-using System.Collections.Concurrent;
-
-namespace IssueLabeler.Shared
-{
- public interface IBackgroundTaskQueue
- {
- void QueueBackgroundWorkItem(Func workItem);
-
- Task> DequeueAsync(
- CancellationToken cancellationToken);
- }
-
- public class BackgroundTaskQueue : IBackgroundTaskQueue
- {
- private readonly ILogger _logger;
- private ConcurrentQueue> _workItems =
- new ConcurrentQueue>();
- private SemaphoreSlim _signal = new SemaphoreSlim(0);
-
- public BackgroundTaskQueue(
- ILogger logger)
- {
- _logger = logger;
- }
-
- public void QueueBackgroundWorkItem(
- Func workItem)
- {
- if (workItem == null)
- {
- throw new ArgumentNullException(nameof(workItem));
- }
-
- _workItems.Enqueue(workItem);
- _signal.Release();
- }
-
- public async Task> DequeueAsync(
- CancellationToken cancellationToken)
- {
- await _signal.WaitAsync(cancellationToken);
- _workItems.TryDequeue(out var workItem);
- _logger.LogInformation("dequeued work item");
-
- return workItem;
- }
- }
-}
\ No newline at end of file
diff --git a/tools/issue-labeler/src/IssueLabeler.Shared/ConfigHelper.cs b/tools/issue-labeler/src/IssueLabeler.Shared/ConfigHelper.cs
deleted file mode 100644
index a387cd1448d..00000000000
--- a/tools/issue-labeler/src/IssueLabeler.Shared/ConfigHelper.cs
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Microsoft.Extensions.Configuration;
-
-namespace IssueLabeler.Shared
-{
- public static class ConfigHelper
- {
- public static bool TryGetConfigValue(this IConfiguration config, string configName, out string? configValue, string? defaultValue = null)
- {
-
- if (string.IsNullOrEmpty(config[configName]))
- {
- configValue = defaultValue;
- return defaultValue != null;
- }
- configValue = config[configName];
- return true;
- }
- }
-}
diff --git a/tools/issue-labeler/src/IssueLabeler.Shared/DiffHelper.cs b/tools/issue-labeler/src/IssueLabeler.Shared/DiffHelper.cs
deleted file mode 100644
index 35232e4ecb5..00000000000
--- a/tools/issue-labeler/src/IssueLabeler.Shared/DiffHelper.cs
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System.Diagnostics;
-using System.Text;
-
-namespace IssueLabeler.Shared
-{
- public struct SegmentedDiff
- {
- public string[] FileDiffs { get; set; }
- public IEnumerable Filenames { get; set; }
- public IEnumerable Extensions { get; set; }
- public Dictionary Folders { get; set; }
- public Dictionary FolderNames { get; set; }
- public bool AddDocInfo { get; set; }
- public bool PossiblyExtensionsLabel { get; set; }
- }
-
- public class DiffHelper : IDiffHelper
- {
- ///
- /// name of files taken from fileDiffs
- ///
- public IEnumerable FilenamesOf(string[] fileDiffs) => fileDiffs.Select(fileWithDiff => Path.GetFileNameWithoutExtension(fileWithDiff));
-
- ///
- /// file extensions taken from fileDiffs
- ///
- public IEnumerable ExtensionsOf(string[] fileDiffs) => fileDiffs.Select(file => Path.GetExtension(file)).
- Select(extension => string.IsNullOrEmpty(extension) ? "no_extension" : extension);
-
- public SegmentedDiff SegmentDiff(string[] fileDiffs)
- {
- if (fileDiffs == null || string.IsNullOrEmpty(string.Join(';', fileDiffs)))
- {
- throw new ArgumentNullException(nameof(fileDiffs));
- }
- var folderNames = new Dictionary();
- var folders = new Dictionary();
- bool addDocInfo = false, possiblyExtensionsLabel = false;
- string folderWithDiff, subfolder;
- string[] folderNamesInPr;
- foreach (var fileWithDiff in fileDiffs)
- {
- folderWithDiff = Path.GetDirectoryName(fileWithDiff) ?? string.Empty;
- folderNamesInPr = folderWithDiff.Split(Path.DirectorySeparatorChar);
- subfolder = string.Empty;
- if (!string.IsNullOrEmpty(folderWithDiff))
- {
- foreach (var folderNameInPr in folderNamesInPr)
- {
- if (folderNameInPr.Equals("ref", StringComparison.Ordinal) &&
- subfolder.StartsWith("src" + Path.DirectorySeparatorChar + "libraries", StringComparison.Ordinal) &&
- Path.GetExtension(fileWithDiff).Equals(".cs", StringComparison.OrdinalIgnoreCase))
- {
- addDocInfo = true;
- }
- if (subfolder.StartsWith("src" + Path.DirectorySeparatorChar + "libraries" + Path.DirectorySeparatorChar + "Microsoft.Extensions.", StringComparison.Ordinal) &&
- Path.GetExtension(fileWithDiff).Equals(".cs", StringComparison.OrdinalIgnoreCase))
- {
- possiblyExtensionsLabel = true;
- }
- subfolder += folderNameInPr;
- if (folderNames.ContainsKey(folderNameInPr))
- {
- folderNames[folderNameInPr] += 1;
- }
- else
- {
- folderNames.Add(folderNameInPr, 1);
- }
- if (folders.ContainsKey(subfolder))
- {
- folders[subfolder] += 1;
- }
- else
- {
- folders.Add(subfolder, 1);
- }
- subfolder += Path.DirectorySeparatorChar;
- }
- }
- }
- return new SegmentedDiff()
- {
- FileDiffs = fileDiffs,
- Filenames = FilenamesOf(fileDiffs),
- Extensions = ExtensionsOf(fileDiffs),
- Folders = folders,
- FolderNames = folderNames,
- AddDocInfo = addDocInfo,
- PossiblyExtensionsLabel = possiblyExtensionsLabel
- };
- }
-
- ///
- /// flattens a dictionary to be repeated in a space separated format
- ///
- /// a dictionary containing text and number of times they were repeated
- /// space delimited text
- public string FlattenWithWhitespace(Dictionary folder)
- {
- var folderSb = new StringBuilder();
- foreach (var f in folder.OrderByDescending(x => x.Value))
- {
- Debug.Assert(f.Value >= 1);
- folderSb.Append(f.Key);
- for (var j = 0; j < f.Value - 1; j++)
- {
- folderSb.Append(" ").Append(f.Key);
- }
- folderSb.Append(" ");
- }
- if (folderSb.Length == 0)
- {
- return string.Empty;
- }
- folderSb.Length--;
- return folderSb.ToString();
- }
- }
-}
diff --git a/tools/issue-labeler/src/IssueLabeler.Shared/GitHubClientFactory.cs b/tools/issue-labeler/src/IssueLabeler.Shared/GitHubClientFactory.cs
deleted file mode 100644
index 86e42b03f39..00000000000
--- a/tools/issue-labeler/src/IssueLabeler.Shared/GitHubClientFactory.cs
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Azure.Identity;
-using Azure.Security.KeyVault.Secrets;
-using GitHubJwt;
-using Microsoft.Extensions.Configuration;
-using Octokit;
-
-namespace IssueLabeler.Shared
-{
- public sealed class GitHubClientFactory
- {
- private readonly IConfiguration _configuration;
-
- public GitHubClientFactory(IConfiguration configuration)
- {
- _configuration = configuration;
- }
-
- public async Task CreateAsync()
- {
- // See: https://octokitnet.readthedocs.io/en/latest/github-apps/ for details.
-
- string localDevPAT = _configuration["GitHubDeveloperPAT"];
- if (localDevPAT != null)
- {
- return new GitHubClient(new ProductHeaderValue("GHNotif"))
- {
- Credentials = new Credentials(localDevPAT)
- };
- }
- else
- {
- var appId = Convert.ToInt32(_configuration["GitHubAppId"]);
- SecretClient secretClient = new SecretClient(new Uri(_configuration["KeyVaultUri"]), new DefaultAzureCredential());
- KeyVaultSecret secret = await secretClient.GetSecretAsync(_configuration["AppSecretName"]).ConfigureAwait(false);
- string privateKey = secret.Value;
-
-
- var privateKeySource = new PlainStringPrivateKeySource(privateKey);
- var generator = new GitHubJwtFactory(
- privateKeySource,
- new GitHubJwtFactoryOptions
- {
- AppIntegrationId = appId,
- ExpirationSeconds = 8 * 60 // 600 is apparently too high
- });
- var token = generator.CreateEncodedJwtToken();
-
- var client = CreateForToken(token, AuthenticationType.Bearer);
- await client.GitHubApps.GetAllInstallationsForCurrent();
- var installationTokenResult = await client.GitHubApps.CreateInstallationToken(long.Parse(_configuration["InstallationId"]));
-
- return CreateForToken(installationTokenResult.Token, AuthenticationType.Oauth);
- }
- }
-
- private static GitHubClient CreateForToken(string token, AuthenticationType authenticationType)
- {
- var productInformation = new ProductHeaderValue("issuelabelertemplate");
- var client = new GitHubClient(productInformation)
- {
- Credentials = new Credentials(token, authenticationType)
- };
- return client;
- }
-
- public sealed class PlainStringPrivateKeySource : IPrivateKeySource
- {
- private readonly string _key;
-
- public PlainStringPrivateKeySource(string key)
- {
- _key = key;
- }
-
- public TextReader GetPrivateKeyReader()
- {
- return new StringReader(_key);
- }
- }
- }
-}
diff --git a/tools/issue-labeler/src/IssueLabeler.Shared/GitHubClientWrapper.cs b/tools/issue-labeler/src/IssueLabeler.Shared/GitHubClientWrapper.cs
deleted file mode 100644
index 0d042a7cc1e..00000000000
--- a/tools/issue-labeler/src/IssueLabeler.Shared/GitHubClientWrapper.cs
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using Microsoft.Extensions.Configuration;
-using Microsoft.Extensions.Logging;
-using Octokit;
-
-namespace IssueLabeler.Shared
-{
- public interface IGitHubClientWrapper
- {
- Task GetIssue(string owner, string repo, int number);
- Task GetPullRequest(string owner, string repo, int number);
- Task> GetPullRequestFiles(string owner, string repo, int number);
- Task CommentOn(string owner, string repo, int number, string v);
- Task UpdateIssue(string owner, string repo, int number, IssueUpdate issueUpdate);
- }
-
- public class GitHubClientWrapper : IGitHubClientWrapper
- {
- private readonly ILogger _logger;
- private GitHubClient _client;
- private readonly GitHubClientFactory _gitHubClientFactory;
-
- public GitHubClientWrapper(
- ILogger logger,
- GitHubClientFactory gitHubClientFactory)
- {
- _gitHubClientFactory = gitHubClientFactory;
- _logger = logger;
-
- }
-
- // TODO add lambda to remove repetetive logic in this class
- // -> call and pass a lambda calls create, and if fails remake and call it again.
-
- public async Task GetIssue(string owner, string repo, int number)
- {
- if (_client == null)
- {
- _client = await _gitHubClientFactory.CreateAsync();
- }
- Octokit.Issue iop = null;
- try
- {
- iop = await _client.Issue.Get(owner, repo, number);
- }
- catch (Exception ex)
- {
- _logger.LogError($"ex was of type {ex.GetType()}, message: {ex.Message}");
- _client = await _gitHubClientFactory.CreateAsync();
- iop = await _client.Issue.Get(owner, repo, number);
- }
- return iop;
- }
-
- public async Task GetPullRequest(string owner, string repo, int number)
- {
- if (_client == null)
- {
- _client = await _gitHubClientFactory.CreateAsync();
- }
- Octokit.PullRequest iop = null;
- try
- {
- iop = await _client.PullRequest.Get(owner, repo, number);
- }
- catch (Exception ex)
- {
- _logger.LogError($"ex was of type {ex.GetType()}, message: {ex.Message}");
- _client = await _gitHubClientFactory.CreateAsync();
- iop = await _client.PullRequest.Get(owner, repo, number);
- }
- return iop;
- }
-
- public async Task> GetPullRequestFiles(string owner, string repo, int number)
- {
- if (_client == null)
- {
- _client = await _gitHubClientFactory.CreateAsync();
- }
- IReadOnlyList prFiles = null;
- try
- {
- prFiles = await _client.PullRequest.Files(owner, repo, number);
-
- }
- catch (Exception ex)
- {
- _logger.LogError($"ex was of type {ex.GetType()}, message: {ex.Message}");
- _client = await _gitHubClientFactory.CreateAsync();
- prFiles = await _client.PullRequest.Files(owner, repo, number);
- }
- return prFiles;
- }
-
- public async Task UpdateIssue(string owner, string repo, int number, IssueUpdate issueUpdate)
- {
- if (_client == null)
- {
- _client = await _gitHubClientFactory.CreateAsync();
- }
- try
- {
- await _client.Issue.Update(owner, repo, number, issueUpdate);
- }
- catch (Exception ex)
- {
- _logger.LogError($"ex was of type {ex.GetType()}, message: {ex.Message}");
- _client = await _gitHubClientFactory.CreateAsync();
- await _client.Issue.Update(owner, repo, number, issueUpdate);
- }
- }
-
- // lambda -> call and pass a lambda calls create, and if fails remake and call it again.
-
- public async Task CommentOn(string owner, string repo, int number, string comment)
- {
- if (_client == null)
- {
- _client = await _gitHubClientFactory.CreateAsync();
- }
- try
- {
- await _client.Issue.Comment.Create(owner, repo, number, comment);
- }
- catch (Exception ex)
- {
- _logger.LogError($"ex was of type {ex.GetType()}, message: {ex.Message}");
- _client = await _gitHubClientFactory.CreateAsync();
- await _client.Issue.Comment.Create(owner, repo, number, comment);
- }
- }
- }
-}
\ No newline at end of file
diff --git a/tools/issue-labeler/src/IssueLabeler.Shared/GitHubIssue.cs b/tools/issue-labeler/src/IssueLabeler.Shared/GitHubIssue.cs
index 05614f33b67..9fcfb6279ac 100644
--- a/tools/issue-labeler/src/IssueLabeler.Shared/GitHubIssue.cs
+++ b/tools/issue-labeler/src/IssueLabeler.Shared/GitHubIssue.cs
@@ -4,111 +4,26 @@
#pragma warning disable 649 // We don't care about unsused fields here, because they are mapped with the input file.
using Microsoft.ML.Data;
-using Octokit;
namespace IssueLabeler.Shared
{
-
- public class RepoIssueResult
- {
- public string Repo { get; set; }
- public string Owner { get; set; }
- public IReadOnlyList Issues { get; set; }
- public int TotalCount { get; set; }
- public List