diff --git a/src/Microsoft.ML.Sweeper/Algorithms/Grid.cs b/src/Microsoft.ML.Sweeper/Algorithms/Grid.cs index 543f2ac718..734cf93e30 100644 --- a/src/Microsoft.ML.Sweeper/Algorithms/Grid.cs +++ b/src/Microsoft.ML.Sweeper/Algorithms/Grid.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. - using System.Collections.Generic; using System.Linq; using Microsoft.ML.Runtime; diff --git a/src/Microsoft.ML/LearningPipeline.cs b/src/Microsoft.ML/LearningPipeline.cs index 0e554734ea..02a13b702a 100644 --- a/src/Microsoft.ML/LearningPipeline.cs +++ b/src/Microsoft.ML/LearningPipeline.cs @@ -49,12 +49,27 @@ public ScorerPipelineStep(Var data, Var model) public class LearningPipeline : ICollection { private List Items { get; } = new List(); + private readonly int? _seed; + private readonly int _conc; /// /// Construct an empty object. /// public LearningPipeline() { + _seed = null; + _conc = 0; + } + + /// + /// Construct an empty object. + /// + /// Specify seed for random generator + /// Specify concurrency factor (default value - autoselection) + internal LearningPipeline(int? seed=null, int conc=0) + { + _seed = seed; + _conc = conc; } /// @@ -137,8 +152,7 @@ public PredictionModel Train() where TInput : class where TOutput : class, new() { - - using (var environment = new TlcEnvironment()) + using (var environment = new TlcEnvironment(seed:_seed, conc:_conc)) { Experiment experiment = environment.CreateExperiment(); ILearningPipelineStep step = null; diff --git a/src/Microsoft.ML/Properties/AssemblyInfo.cs b/src/Microsoft.ML/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..52835b2f81 --- /dev/null +++ b/src/Microsoft.ML/Properties/AssemblyInfo.cs @@ -0,0 +1,9 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +[assembly: InternalsVisibleTo("Microsoft.ML.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010015c01ae1f50e8cc09ba9eac9147cf8fd9fce2cfe9f8dce4f7301c4132ca9fb50ce8cbf1df4dc18dd4d210e4345c744ecb3365ed327efdbc52603faa5e21daa11234c8c4a73e51f03bf192544581ebe107adee3a34928e39d04e524a9ce729d5090bfd7dad9d10c722c0def9ccc08ff0a03790e48bcd1f9b6c476063e1966a1c4")] diff --git a/test/Microsoft.ML.Tests/LearningPipelineTests.cs b/test/Microsoft.ML.Tests/LearningPipelineTests.cs index 3ccc36255f..63a8db17ee 100644 --- a/test/Microsoft.ML.Tests/LearningPipelineTests.cs +++ b/test/Microsoft.ML.Tests/LearningPipelineTests.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; @@ -34,9 +33,9 @@ public void CanAddAndRemoveFromPipeline() { var pipeline = new LearningPipeline() { - new Transforms.CategoricalOneHotVectorizer("String1", "String2"), - new Transforms.ColumnConcatenator(outputColumn: "Features", "String1", "String2", "Number1", "Number2"), - new Trainers.StochasticDualCoordinateAscentRegressor() + new CategoricalOneHotVectorizer("String1", "String2"), + new ColumnConcatenator(outputColumn: "Features", "String1", "String2", "Number1", "Number2"), + new StochasticDualCoordinateAscentRegressor() }; Assert.NotNull(pipeline); Assert.Equal(3, pipeline.Count); @@ -66,7 +65,7 @@ private class TransformedData public void TransformOnlyPipeline() { const string _dataPath = @"..\..\Data\breast-cancer.txt"; - var pipeline = new LearningPipeline(); + var pipeline = new LearningPipeline(seed: 1, conc: 1); pipeline.Add(new ML.Data.TextLoader(_dataPath).CreateFrom(useHeader: false)); pipeline.Add(new CategoricalHashOneHotVectorizer("F1") { HashBits = 10, Seed = 314489979, OutputKind = CategoricalTransformOutputKind.Bag }); var model = pipeline.Train(); @@ -103,9 +102,11 @@ public class Prediction public void NoTransformPipeline() { var data = new Data[1]; - data[0] = new Data(); - data[0].Features = new float[] { 0.0f, 1.0f }; - data[0].Label = 0f; + data[0] = new Data + { + Features = new float[] { 0.0f, 1.0f }, + Label = 0f + }; var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new FastForestBinaryClassifier()); @@ -126,9 +127,11 @@ public class BooleanLabelData public void BooleanLabelPipeline() { var data = new BooleanLabelData[1]; - data[0] = new BooleanLabelData(); - data[0].Features = new float[] { 0.0f, 1.0f }; - data[0].Label = false; + data[0] = new BooleanLabelData + { + Features = new float[] { 0.0f, 1.0f }, + Label = false + }; var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new FastForestBinaryClassifier()); @@ -149,12 +152,16 @@ public class NullableBooleanLabelData public void NullableBooleanLabelPipeline() { var data = new NullableBooleanLabelData[2]; - data[0] = new NullableBooleanLabelData(); - data[0].Features = new float[] { 0.0f, 1.0f }; - data[0].Label = null; - data[1] = new NullableBooleanLabelData(); - data[1].Features = new float[] { 1.0f, 0.0f }; - data[1].Label = false; + data[0] = new NullableBooleanLabelData + { + Features = new float[] { 0.0f, 1.0f }, + Label = null + }; + data[1] = new NullableBooleanLabelData + { + Features = new float[] { 1.0f, 0.0f }, + Label = false + }; var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new FastForestBinaryClassifier()); diff --git a/test/Microsoft.ML.Tests/Scenarios/ClusteringTests.cs b/test/Microsoft.ML.Tests/Scenarios/ClusteringTests.cs index b9c6713af7..560ee11d28 100644 --- a/test/Microsoft.ML.Tests/Scenarios/ClusteringTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/ClusteringTests.cs @@ -16,8 +16,8 @@ public void PredictNewsCluster() { string dataPath = GetDataPath(@"external/20newsgroups.txt"); - var pipeline = new LearningPipeline(); - pipeline.Add(new TextLoader(dataPath).CreateFrom(useHeader: false, allowQuotedStrings:true, supportSparse:false)); + var pipeline = new LearningPipeline(seed: 1, conc: 1); + pipeline.Add(new TextLoader(dataPath).CreateFrom(useHeader: false, allowQuotedStrings: true, supportSparse: false)); pipeline.Add(new ColumnConcatenator("AllText", "Subject", "Content")); pipeline.Add(new TextFeaturizer("Features", "AllText") { @@ -81,8 +81,8 @@ public class ClusteringData public void PredictClusters() { int n = 1000; - int k = 5; - var rand = new Random(); + int k = 4; + var rand = new Random(1); var clusters = new ClusteringData[k]; var data = new ClusteringData[n]; for (int i = 0; i < k; i++) @@ -94,7 +94,7 @@ public void PredictClusters() for (int i = 0; i < n; i++) { var index = rand.Next(0, k); - var shift = (rand.NextDouble() - 0.5) / k; + var shift = (rand.NextDouble() - 0.5) / 10; data[i] = new ClusteringData { Points = new float[2] @@ -104,7 +104,7 @@ public void PredictClusters() } }; } - var pipeline = new LearningPipeline(); + var pipeline = new LearningPipeline(seed: 1, conc: 1); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new KMeansPlusPlusClusterer() { K = k }); var model = pipeline.Train();