diff --git a/build.proj b/build.proj index ef922a7148..786fe9304d 100644 --- a/build.proj +++ b/build.proj @@ -79,6 +79,10 @@ + + diff --git a/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs index 97db2a8a07..2ddc9c4ffa 100644 --- a/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs @@ -7,3 +7,4 @@ using System.Runtime.InteropServices; [assembly: InternalsVisibleTo("Microsoft.ML.TestFramework, PublicKey=002400000480000094000000060200000024000052534131000400000100010015c01ae1f50e8cc09ba9eac9147cf8fd9fce2cfe9f8dce4f7301c4132ca9fb50ce8cbf1df4dc18dd4d210e4345c744ecb3365ed327efdbc52603faa5e21daa11234c8c4a73e51f03bf192544581ebe107adee3a34928e39d04e524a9ce729d5090bfd7dad9d10c722c0def9ccc08ff0a03790e48bcd1f9b6c476063e1966a1c4")] +[assembly: InternalsVisibleTo("Microsoft.ML.Benchmarks, PublicKey=002400000480000094000000060200000024000052534131000400000100010015c01ae1f50e8cc09ba9eac9147cf8fd9fce2cfe9f8dce4f7301c4132ca9fb50ce8cbf1df4dc18dd4d210e4345c744ecb3365ed327efdbc52603faa5e21daa11234c8c4a73e51f03bf192544581ebe107adee3a34928e39d04e524a9ce729d5090bfd7dad9d10c722c0def9ccc08ff0a03790e48bcd1f9b6c476063e1966a1c4")] \ No newline at end of file diff --git a/test/Microsoft.ML.Benchmarks/BigramAndTrigramBenchMark.cs b/test/Microsoft.ML.Benchmarks/BigramAndTrigramBenchMark.cs new file mode 100644 index 0000000000..f4c947abda --- /dev/null +++ b/test/Microsoft.ML.Benchmarks/BigramAndTrigramBenchMark.cs @@ -0,0 +1,85 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using BenchmarkDotNet.Attributes; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.RunTests; +using Microsoft.ML.Runtime.Tools; +using System.IO; +using System.Text; + +namespace Microsoft.ML.Benchmarks +{ + // Adding this class to not print anything to the console. + // This is required for the current version of BenchmarkDotNet + internal class EmptyWriter : TextWriter + { + internal static readonly EmptyWriter Instance = new EmptyWriter(); + public override Encoding Encoding => null; + } + + public class BigramAndTrigramBenchmark + { + private string _dataPath_Wiki; + private string _modelPath_Wiki; + + [GlobalSetup(Targets = new string[] { + nameof(CV_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron), + nameof(CV_Multiclass_WikiDetox_BigramsAndTrichar_LightGBMMulticlass) })] + public void SetupTrainingSpeedTests() + { + _dataPath_Wiki = Path.GetFullPath(TestDatasets.WikiDetox.trainFilename); + + if (!File.Exists(_dataPath_Wiki)) + { + throw new FileNotFoundException($"Could not find {_dataPath_Wiki} Please ensure you have run 'build.cmd -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=true' from the root"); + } + } + + [GlobalSetup(Target = nameof(Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron))] + public void SetupScoringSpeedTests() + { + SetupTrainingSpeedTests(); + _modelPath_Wiki = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.zip"); + string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=OVA{p=AveragedPerceptron{iter=10}} out={" + _modelPath_Wiki + "}"; + using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + { + Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + } + } + + [Benchmark] + public void CV_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron() + { + string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=OVA{p=AveragedPerceptron{iter=10}}"; + using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + { + Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + } + } + + [Benchmark] + public void CV_Multiclass_WikiDetox_BigramsAndTrichar_LightGBMMulticlass() + { + string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=LightGBMMulticlass{}"; + using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + { + Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + } + } + + [Benchmark] + public void Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron() + { + // This benchmark is profiling bulk scoring speed and not training speed. + string modelpath = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.fold000.zip"); + string cmd = @"Test data=" + _dataPath_Wiki + " in=" + modelpath; + using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance)) + { + Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false); + } + } + } +} diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj index dfa673ea82..93e7eddab5 100644 --- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj +++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj @@ -13,8 +13,11 @@ + + + @@ -31,5 +34,10 @@ PreserveNewest + + PreserveNewest + \ No newline at end of file diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs index 272780021b..6e327b5c66 100644 --- a/test/Microsoft.ML.TestFramework/Datasets.cs +++ b/test/Microsoft.ML.TestFramework/Datasets.cs @@ -160,6 +160,13 @@ public static class TestDatasets loaderSettings = "col=Label:R4:11 col=Features:R4:0-10 sep=; header+" }; + public static TestDataset WikiDetox = new TestDataset + { + name = "WikiDetox", + trainFilename = "external/WikiDetoxAnnotated160kRows.tsv", + testFilename = "external/WikiDetoxAnnotated160kRows.tsv" + }; + public static TestDataset winequality = new TestDataset { name = "wine",