diff --git a/.gitignore b/.gitignore
index d1c557c2ad..36b327cc99 100644
--- a/.gitignore
+++ b/.gitignore
@@ -328,3 +328,5 @@ ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
+# Ignore external test datasets.
+/test/data/external/
diff --git a/build.proj b/build.proj
index 65aa05fb5b..77b82dea2c 100644
--- a/build.proj
+++ b/build.proj
@@ -8,7 +8,7 @@
-
+
true
@@ -33,6 +33,7 @@
RestoreProjects;
BuildNative;
$(TraversalBuildDependsOn);
+ DownloadExternalTestFiles;
RunTests;
@@ -56,13 +57,26 @@
-
+
+
+
+
+
+
+
+
+
+
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
index 66e241163f..96de75fdd8 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
@@ -4,6 +4,7 @@
using ML = Microsoft.ML;
using Microsoft.ML.Runtime;
+using Microsoft.ML.Data;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.TestFramework;
@@ -269,10 +270,10 @@ public void TestCrossValidationBinaryMacro()
}
}
- [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
+ [Fact]
public void TestCrossValidationMacro()
{
- var dataPath = GetDataPath(@"housing.txt");
+ var dataPath = GetDataPath(TestDatasets.winequality.trainFilename);
using (var env = new TlcEnvironment())
{
var subGraph = env.CreateExperiment();
@@ -295,7 +296,30 @@ public void TestCrossValidationMacro()
var modelCombineOutput = subGraph.Add(modelCombine);
var experiment = env.CreateExperiment();
- var importInput = new ML.Data.TextLoader(dataPath);
+ var importInput = new ML.Data.TextLoader(dataPath)
+ {
+ Arguments = new TextLoaderArguments
+ {
+ Separator = new[] { ';' },
+ HasHeader = true,
+ Column = new[]
+ {
+ new TextLoaderColumn()
+ {
+ Name = "Label",
+ Source = new [] { new TextLoaderRange(11) },
+ Type = DataKind.Num
+ },
+
+ new TextLoaderColumn()
+ {
+ Name = "Features",
+ Source = new [] { new TextLoaderRange(0,10) },
+ Type = DataKind.Num
+ }
+ }
+ }
+ };
var importOutput = experiment.Add(importInput);
var crossValidate = new ML.Models.CrossValidator
@@ -324,7 +348,7 @@ public void TestCrossValidationMacro()
Assert.True(b);
double val = 0;
getter(ref val);
- Assert.Equal(3.32, val, 1);
+ Assert.Equal(0.58, val, 1);
b = cursor.MoveNext();
Assert.False(b);
}
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
index 24e8374b4c..b40e9599e9 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
@@ -910,7 +910,7 @@ public void EntryPointTextToKeyToText()
}
private void RunTrainScoreEvaluate(string learner, string evaluator, string dataPath, string warningsPath, string overallMetricsPath,
- string instanceMetricsPath, string confusionMatrixPath = null)
+ string instanceMetricsPath, string confusionMatrixPath = null, string loader = null)
{
string inputGraph = string.Format(@"
{{
@@ -919,6 +919,7 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data
'Name': 'Data.CustomTextLoader',
'Inputs': {{
'InputFile': '$file'
+ {8}
}},
'Outputs': {{
'Data': '$AllData'
@@ -978,7 +979,8 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data
}}
}}", learner, evaluator, EscapePath(dataPath), EscapePath(warningsPath), EscapePath(overallMetricsPath), EscapePath(instanceMetricsPath),
confusionMatrixPath != null ? ", 'ConfusionMatrix': '$ConfusionMatrix'" : "",
- confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "");
+ confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "",
+ string.IsNullOrWhiteSpace(loader) ? "" : string.Format(",'CustomSchema': '{0}'", loader));
var jsonPath = DeleteOutputPath("graph.json");
File.WriteAllLines(jsonPath, new[] { inputGraph });
@@ -1036,15 +1038,16 @@ public void EntryPointEvaluateMultiClass()
Assert.Equal(3, CountRows(loader));
}
- [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
+ [Fact]
public void EntryPointEvaluateRegression()
{
- var dataPath = GetDataPath("housing.txt");
+ var dataPath = GetDataPath(TestDatasets.winequality.trainFilename);
var warningsPath = DeleteOutputPath("warnings.idv");
var overallMetricsPath = DeleteOutputPath("overall.idv");
var instanceMetricsPath = DeleteOutputPath("instance.idv");
- RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator", dataPath, warningsPath, overallMetricsPath, instanceMetricsPath);
+ RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator",
+ dataPath, warningsPath, overallMetricsPath, instanceMetricsPath, loader: TestDatasets.winequality.loaderSettings);
using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), warningsPath))
Assert.Equal(0, CountRows(loader));
@@ -1053,7 +1056,7 @@ public void EntryPointEvaluateRegression()
Assert.Equal(1, CountRows(loader));
using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), instanceMetricsPath))
- Assert.Equal(104, CountRows(loader));
+ Assert.Equal(975, CountRows(loader));
}
[Fact]
@@ -1068,10 +1071,10 @@ public void EntryPointSDCAMultiClass()
TestEntryPointRoutine("iris.txt", "Trainers.StochasticDualCoordinateAscentClassifier");
}
- [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
+ [Fact()]
public void EntryPointSDCARegression()
{
- TestEntryPointRoutine("housing.txt", "Trainers.StochasticDualCoordinateAscentRegressor");
+ TestEntryPointRoutine(TestDatasets.winequality.trainFilename, "Trainers.StochasticDualCoordinateAscentRegressor", loader: TestDatasets.winequality.loaderSettings);
}
[Fact]
@@ -1142,10 +1145,10 @@ public void EntryPointHogwildSGD()
TestEntryPointRoutine("breast-cancer.txt", "Trainers.StochasticGradientDescentBinaryClassifier");
}
- [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
+ [Fact()]
public void EntryPointPoissonRegression()
{
- TestEntryPointRoutine("housing.txt", "Trainers.PoissonRegressor");
+ TestEntryPointRoutine(TestDatasets.winequality.trainFilename, "Trainers.PoissonRegressor", loader: TestDatasets.winequality.loaderSettings);
}
[Fact]
diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs
index fddb70a8a5..e84109101e 100644
--- a/test/Microsoft.ML.TestFramework/Datasets.cs
+++ b/test/Microsoft.ML.TestFramework/Datasets.cs
@@ -152,6 +152,14 @@ public static class TestDatasets
testFilename = "housing.txt"
};
+ public static TestDataset winequality = new TestDataset
+ {
+ name = "wine",
+ trainFilename = "external/winequality-white.csv",
+ testFilename = "external/winequality-white.csv",
+ loaderSettings = "col=Label:R4:11 col=Features:R4:0-10 sep=; header+"
+ };
+
public static TestDataset msm = new TestDataset
{
// REVIEW: Why is the MSM train set smaller than the test set? Reverse these!
diff --git a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
index 59c6d8f6c6..b2e97a3134 100644
--- a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
+++ b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
@@ -8,6 +8,6 @@
-
+
\ No newline at end of file