diff --git a/build.proj b/build.proj
index ef922a7148..786fe9304d 100644
--- a/build.proj
+++ b/build.proj
@@ -79,6 +79,10 @@
+
+
diff --git a/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs
index 97db2a8a07..2ddc9c4ffa 100644
--- a/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs
+++ b/src/Microsoft.ML.Maml/Properties/AssemblyInfo.cs
@@ -7,3 +7,4 @@
using System.Runtime.InteropServices;
[assembly: InternalsVisibleTo("Microsoft.ML.TestFramework, PublicKey=002400000480000094000000060200000024000052534131000400000100010015c01ae1f50e8cc09ba9eac9147cf8fd9fce2cfe9f8dce4f7301c4132ca9fb50ce8cbf1df4dc18dd4d210e4345c744ecb3365ed327efdbc52603faa5e21daa11234c8c4a73e51f03bf192544581ebe107adee3a34928e39d04e524a9ce729d5090bfd7dad9d10c722c0def9ccc08ff0a03790e48bcd1f9b6c476063e1966a1c4")]
+[assembly: InternalsVisibleTo("Microsoft.ML.Benchmarks, PublicKey=002400000480000094000000060200000024000052534131000400000100010015c01ae1f50e8cc09ba9eac9147cf8fd9fce2cfe9f8dce4f7301c4132ca9fb50ce8cbf1df4dc18dd4d210e4345c744ecb3365ed327efdbc52603faa5e21daa11234c8c4a73e51f03bf192544581ebe107adee3a34928e39d04e524a9ce729d5090bfd7dad9d10c722c0def9ccc08ff0a03790e48bcd1f9b6c476063e1966a1c4")]
\ No newline at end of file
diff --git a/test/Microsoft.ML.Benchmarks/BigramAndTrigramBenchMark.cs b/test/Microsoft.ML.Benchmarks/BigramAndTrigramBenchMark.cs
new file mode 100644
index 0000000000..f4c947abda
--- /dev/null
+++ b/test/Microsoft.ML.Benchmarks/BigramAndTrigramBenchMark.cs
@@ -0,0 +1,85 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using BenchmarkDotNet.Attributes;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.RunTests;
+using Microsoft.ML.Runtime.Tools;
+using System.IO;
+using System.Text;
+
+namespace Microsoft.ML.Benchmarks
+{
+ // Adding this class to not print anything to the console.
+ // This is required for the current version of BenchmarkDotNet
+ internal class EmptyWriter : TextWriter
+ {
+ internal static readonly EmptyWriter Instance = new EmptyWriter();
+ public override Encoding Encoding => null;
+ }
+
+ public class BigramAndTrigramBenchmark
+ {
+ private string _dataPath_Wiki;
+ private string _modelPath_Wiki;
+
+ [GlobalSetup(Targets = new string[] {
+ nameof(CV_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron),
+ nameof(CV_Multiclass_WikiDetox_BigramsAndTrichar_LightGBMMulticlass) })]
+ public void SetupTrainingSpeedTests()
+ {
+ _dataPath_Wiki = Path.GetFullPath(TestDatasets.WikiDetox.trainFilename);
+
+ if (!File.Exists(_dataPath_Wiki))
+ {
+ throw new FileNotFoundException($"Could not find {_dataPath_Wiki} Please ensure you have run 'build.cmd -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=true' from the root");
+ }
+ }
+
+ [GlobalSetup(Target = nameof(Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron))]
+ public void SetupScoringSpeedTests()
+ {
+ SetupTrainingSpeedTests();
+ _modelPath_Wiki = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.zip");
+ string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=OVA{p=AveragedPerceptron{iter=10}} out={" + _modelPath_Wiki + "}";
+ using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
+ {
+ Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false);
+ }
+ }
+
+ [Benchmark]
+ public void CV_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron()
+ {
+ string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=OVA{p=AveragedPerceptron{iter=10}}";
+ using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
+ {
+ Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false);
+ }
+ }
+
+ [Benchmark]
+ public void CV_Multiclass_WikiDetox_BigramsAndTrichar_LightGBMMulticlass()
+ {
+ string cmd = @"CV k=5 data=" + _dataPath_Wiki + " loader=TextLoader{quote=- sparse=- col=Label:R4:0 col=rev_id:TX:1 col=comment:TX:2 col=logged_in:BL:4 col=ns:TX:5 col=sample:TX:6 col=split:TX:7 col=year:R4:3 header=+} xf=Convert{col=logged_in type=R4} xf=CategoricalTransform{col=ns} xf=TextTransform{col=FeaturesText:comment wordExtractor=NGramExtractorTransform{ngram=2}} xf=Concat{col=Features:FeaturesText,logged_in,ns} tr=LightGBMMulticlass{}";
+ using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
+ {
+ Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false);
+ }
+ }
+
+ [Benchmark]
+ public void Test_Multiclass_WikiDetox_BigramsAndTrichar_OVAAveragedPerceptron()
+ {
+ // This benchmark is profiling bulk scoring speed and not training speed.
+ string modelpath = Path.Combine(Directory.GetCurrentDirectory(), @"WikiModel.fold000.zip");
+ string cmd = @"Test data=" + _dataPath_Wiki + " in=" + modelpath;
+ using (var tlc = new TlcEnvironment(verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
+ {
+ Maml.MainCore(tlc, cmd, alwaysPrintStacktrace: false);
+ }
+ }
+ }
+}
diff --git a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj
index dfa673ea82..93e7eddab5 100644
--- a/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj
+++ b/test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj
@@ -13,8 +13,11 @@
+
+
+
@@ -31,5 +34,10 @@
PreserveNewest
+
+ PreserveNewest
+
\ No newline at end of file
diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs
index 272780021b..6e327b5c66 100644
--- a/test/Microsoft.ML.TestFramework/Datasets.cs
+++ b/test/Microsoft.ML.TestFramework/Datasets.cs
@@ -160,6 +160,13 @@ public static class TestDatasets
loaderSettings = "col=Label:R4:11 col=Features:R4:0-10 sep=; header+"
};
+ public static TestDataset WikiDetox = new TestDataset
+ {
+ name = "WikiDetox",
+ trainFilename = "external/WikiDetoxAnnotated160kRows.tsv",
+ testFilename = "external/WikiDetoxAnnotated160kRows.tsv"
+ };
+
public static TestDataset winequality = new TestDataset
{
name = "wine",