diff --git a/.gitignore b/.gitignore index d1c557c2ad..36b327cc99 100644 --- a/.gitignore +++ b/.gitignore @@ -328,3 +328,5 @@ ASALocalRun/ # MSBuild Binary and Structured Log *.binlog +# Ignore external test datasets. +/test/data/external/ diff --git a/build.proj b/build.proj index 65aa05fb5b..77b82dea2c 100644 --- a/build.proj +++ b/build.proj @@ -8,7 +8,7 @@ - + true @@ -33,6 +33,7 @@ RestoreProjects; BuildNative; $(TraversalBuildDependsOn); + DownloadExternalTestFiles; RunTests; @@ -56,13 +57,26 @@ - + + + + + + + + + + diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs index 66e241163f..96de75fdd8 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs @@ -4,6 +4,7 @@ using ML = Microsoft.ML; using Microsoft.ML.Runtime; +using Microsoft.ML.Data; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.TestFramework; @@ -269,10 +270,10 @@ public void TestCrossValidationBinaryMacro() } } - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] + [Fact] public void TestCrossValidationMacro() { - var dataPath = GetDataPath(@"housing.txt"); + var dataPath = GetDataPath(TestDatasets.winequality.trainFilename); using (var env = new TlcEnvironment()) { var subGraph = env.CreateExperiment(); @@ -295,7 +296,30 @@ public void TestCrossValidationMacro() var modelCombineOutput = subGraph.Add(modelCombine); var experiment = env.CreateExperiment(); - var importInput = new ML.Data.TextLoader(dataPath); + var importInput = new ML.Data.TextLoader(dataPath) + { + Arguments = new TextLoaderArguments + { + Separator = new[] { ';' }, + HasHeader = true, + Column = new[] + { + new TextLoaderColumn() + { + Name = "Label", + Source = new [] { new TextLoaderRange(11) }, + Type = DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "Features", + Source = new [] { new TextLoaderRange(0,10) }, + Type = DataKind.Num + } + } + } + }; var importOutput = experiment.Add(importInput); var crossValidate = new ML.Models.CrossValidator @@ -324,7 +348,7 @@ public void TestCrossValidationMacro() Assert.True(b); double val = 0; getter(ref val); - Assert.Equal(3.32, val, 1); + Assert.Equal(0.58, val, 1); b = cursor.MoveNext(); Assert.False(b); } diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 24e8374b4c..b40e9599e9 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -910,7 +910,7 @@ public void EntryPointTextToKeyToText() } private void RunTrainScoreEvaluate(string learner, string evaluator, string dataPath, string warningsPath, string overallMetricsPath, - string instanceMetricsPath, string confusionMatrixPath = null) + string instanceMetricsPath, string confusionMatrixPath = null, string loader = null) { string inputGraph = string.Format(@" {{ @@ -919,6 +919,7 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data 'Name': 'Data.CustomTextLoader', 'Inputs': {{ 'InputFile': '$file' + {8} }}, 'Outputs': {{ 'Data': '$AllData' @@ -978,7 +979,8 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data }} }}", learner, evaluator, EscapePath(dataPath), EscapePath(warningsPath), EscapePath(overallMetricsPath), EscapePath(instanceMetricsPath), confusionMatrixPath != null ? ", 'ConfusionMatrix': '$ConfusionMatrix'" : "", - confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : ""); + confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "", + string.IsNullOrWhiteSpace(loader) ? "" : string.Format(",'CustomSchema': '{0}'", loader)); var jsonPath = DeleteOutputPath("graph.json"); File.WriteAllLines(jsonPath, new[] { inputGraph }); @@ -1036,15 +1038,16 @@ public void EntryPointEvaluateMultiClass() Assert.Equal(3, CountRows(loader)); } - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] + [Fact] public void EntryPointEvaluateRegression() { - var dataPath = GetDataPath("housing.txt"); + var dataPath = GetDataPath(TestDatasets.winequality.trainFilename); var warningsPath = DeleteOutputPath("warnings.idv"); var overallMetricsPath = DeleteOutputPath("overall.idv"); var instanceMetricsPath = DeleteOutputPath("instance.idv"); - RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator", dataPath, warningsPath, overallMetricsPath, instanceMetricsPath); + RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator", + dataPath, warningsPath, overallMetricsPath, instanceMetricsPath, loader: TestDatasets.winequality.loaderSettings); using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), warningsPath)) Assert.Equal(0, CountRows(loader)); @@ -1053,7 +1056,7 @@ public void EntryPointEvaluateRegression() Assert.Equal(1, CountRows(loader)); using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), instanceMetricsPath)) - Assert.Equal(104, CountRows(loader)); + Assert.Equal(975, CountRows(loader)); } [Fact] @@ -1068,10 +1071,10 @@ public void EntryPointSDCAMultiClass() TestEntryPointRoutine("iris.txt", "Trainers.StochasticDualCoordinateAscentClassifier"); } - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] + [Fact()] public void EntryPointSDCARegression() { - TestEntryPointRoutine("housing.txt", "Trainers.StochasticDualCoordinateAscentRegressor"); + TestEntryPointRoutine(TestDatasets.winequality.trainFilename, "Trainers.StochasticDualCoordinateAscentRegressor", loader: TestDatasets.winequality.loaderSettings); } [Fact] @@ -1142,10 +1145,10 @@ public void EntryPointHogwildSGD() TestEntryPointRoutine("breast-cancer.txt", "Trainers.StochasticGradientDescentBinaryClassifier"); } - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] + [Fact()] public void EntryPointPoissonRegression() { - TestEntryPointRoutine("housing.txt", "Trainers.PoissonRegressor"); + TestEntryPointRoutine(TestDatasets.winequality.trainFilename, "Trainers.PoissonRegressor", loader: TestDatasets.winequality.loaderSettings); } [Fact] diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs index fddb70a8a5..e84109101e 100644 --- a/test/Microsoft.ML.TestFramework/Datasets.cs +++ b/test/Microsoft.ML.TestFramework/Datasets.cs @@ -152,6 +152,14 @@ public static class TestDatasets testFilename = "housing.txt" }; + public static TestDataset winequality = new TestDataset + { + name = "wine", + trainFilename = "external/winequality-white.csv", + testFilename = "external/winequality-white.csv", + loaderSettings = "col=Label:R4:11 col=Features:R4:0-10 sep=; header+" + }; + public static TestDataset msm = new TestDataset { // REVIEW: Why is the MSM train set smaller than the test set? Reverse these! diff --git a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj index 59c6d8f6c6..b2e97a3134 100644 --- a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj +++ b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj @@ -8,6 +8,6 @@ - + \ No newline at end of file