diff --git a/.vsts-dotnet-ci.yml b/.vsts-dotnet-ci.yml index 130288690b..0870cd1970 100644 --- a/.vsts-dotnet-ci.yml +++ b/.vsts-dotnet-ci.yml @@ -27,28 +27,31 @@ jobs: _config_short: RI _includeBenchmarkData: true _targetFramework: netcoreapp3.1 - innerLoop: true + innerLoop: false pool: name: NetCorePublic-Pool queue: BuildPool.Ubuntu.1604.Amd64.Open + runSpecific: true - template: /build/ci/job-template.yml parameters: name: Ubuntu_x64_NetCoreApp21 buildScript: ./build.sh container: UbuntuContainer - innerLoop: true + innerLoop: false pool: name: NetCorePublic-Pool queue: BuildPool.Ubuntu.1604.Amd64.Open + runSpecific: true - template: /build/ci/job-template.yml parameters: name: MacOS_x64_NetCoreApp21 buildScript: ./build.sh - innerLoop: true + innerLoop: false pool: name: Hosted macOS + runSpecific: true - template: /build/ci/job-template.yml parameters: @@ -65,19 +68,21 @@ jobs: _config_short: RI _includeBenchmarkData: true _targetFramework: netcoreapp3.1 - innerLoop: true + innerLoop: false vsTestConfiguration: "/Framework:.NETCoreApp,Version=v3.0" pool: name: Hosted VS2017 + runSpecific: true - template: /build/ci/job-template.yml parameters: name: Windows_x64_NetCoreApp21 buildScript: build.cmd - innerLoop: true + innerLoop: false vsTestConfiguration: "/Framework:.NETCoreApp,Version=v2.1" pool: name: Hosted VS2017 + runSpecific: true - template: /build/ci/job-template.yml parameters: @@ -94,17 +99,8 @@ jobs: _config_short: RFX _includeBenchmarkData: false _targetFramework: win-x64 - innerLoop: true + innerLoop: false vsTestConfiguration: "/Framework:.NETCoreApp,Version=v4.0" pool: name: Hosted VS2017 - -- template: /build/ci/job-template.yml - parameters: - name: Windows_x86_NetCoreApp21 - architecture: x86 - buildScript: build.cmd - innerLoop: true - vsTestConfiguration: "/Framework:.NETCoreApp,Version=v2.1" - pool: - name: Hosted VS2017 + runSpecific: true diff --git a/build/ci/job-template.yml b/build/ci/job-template.yml index e5641a9139..6745aaa0b6 100644 --- a/build/ci/job-template.yml +++ b/build/ci/job-template.yml @@ -7,7 +7,7 @@ parameters: codeCoverage: false nightlyBuild: false innerLoop: false - runSpecific: false + runSpecific: true container: '' useVSTestTask: false diff --git a/init-tools.cmd b/init-tools.cmd index 4c7893ec49..caa023b94a 100644 --- a/init-tools.cmd +++ b/init-tools.cmd @@ -67,6 +67,9 @@ if NOT [%AGENT_ID%] == [] ( reg add "HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps" /f /v DumpFolder /t REG_SZ /d "%~dp0CrashDumps" ) +:: Temp - get total RAM size +powershell -Command "(systeminfo | Select-String 'Total Physical Memory:').ToString().Split(':')[1].Trim()" + :: install procdump.exe to take process dump when test crashes, hangs or fails echo Installing procdump.exe powershell -Command "Invoke-WebRequest https://download.sysinternals.com/files/Procdump.zip -UseBasicParsing -outfile procdump.zip | Out-Null" diff --git a/init-tools.sh b/init-tools.sh index 492df5711f..fffee462b6 100755 --- a/init-tools.sh +++ b/init-tools.sh @@ -24,6 +24,9 @@ if [ -e "$__BUILD_TOOLS_SEMAPHORE" ]; then return #return instead of exit because this script is inlined in other scripts which we don't want to exit fi +# Temp - Get total RAM size +cat /proc/meminfo + if [ -e "$__TOOLRUNTIME_DIR" ]; then rm -rf -- "$__TOOLRUNTIME_DIR"; fi if [ -d "${DotNetBuildToolsDir:-}" ]; then diff --git a/src/Microsoft.ML.AutoML/API/ExperimentResults/CrossValidationExperimentResult.cs b/src/Microsoft.ML.AutoML/API/ExperimentResults/CrossValidationExperimentResult.cs index 5c1db48a59..3b3c7f5f3c 100644 --- a/src/Microsoft.ML.AutoML/API/ExperimentResults/CrossValidationExperimentResult.cs +++ b/src/Microsoft.ML.AutoML/API/ExperimentResults/CrossValidationExperimentResult.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Generic; using Microsoft.ML.Data; @@ -11,7 +12,7 @@ namespace Microsoft.ML.AutoML /// Result of an AutoML experiment that includes cross validation details. /// /// Metrics type for the experiment (like ). - public class CrossValidationExperimentResult + public class CrossValidationExperimentResult : IDisposable { /// /// Details of the cross validation runs in this experiment. @@ -36,5 +37,44 @@ internal CrossValidationExperimentResult(IEnumerable + /// Releases unmanaged Tensor objects in models stored in RunDetail and BestRun instances + /// + /// + /// Invocation of Dispose() is necessary to clean up remaining C library Tensor objects and + /// avoid a memory leak + /// + public void Dispose() + { + if (_disposed) + return; + if (!_disposedRunDetails) + DisposeRunDetails(); + (BestRun as IDisposable)?.Dispose(); + _disposed = true; + } + + /// + /// Releases unmanaged Tensor objects in models stored in RunDetail instances + /// + /// + /// Invocation of DisposeRunDetails() is necessary to clean up remaining C library + /// Tensor objects and avoid a memory leak. Compared to Dispose(), DisposeRunDetails() + /// only disposes of RunDetails so that the best determined model in BestRun can have + /// a different lifetime than models in other experimental runs. + /// + public void DisposeRunDetails() + { + if (_disposedRunDetails || _disposed) + return; + (RunDetails as IDisposable)?.Dispose(); + _disposedRunDetails = true; + } + #endregion } } diff --git a/src/Microsoft.ML.AutoML/API/ExperimentResults/ExperimentResult.cs b/src/Microsoft.ML.AutoML/API/ExperimentResults/ExperimentResult.cs index 85eecfdb7f..904c5bb54f 100644 --- a/src/Microsoft.ML.AutoML/API/ExperimentResults/ExperimentResult.cs +++ b/src/Microsoft.ML.AutoML/API/ExperimentResults/ExperimentResult.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Generic; using Microsoft.ML.Data; @@ -11,7 +12,7 @@ namespace Microsoft.ML.AutoML /// Result of an AutoML experiment. /// /// Metrics type for the experiment (like ). - public class ExperimentResult + public class ExperimentResult : IDisposable { /// /// Details of the runs in this experiment. @@ -36,5 +37,44 @@ internal ExperimentResult(IEnumerable> runDetails, RunDetails = runDetails; BestRun = bestRun; } + + #region IDisposable Support + private bool _disposed; + private bool _disposedRunDetails; + + /// + /// Releases unmanaged Tensor objects in models stored in RunDetail and BestRun instances + /// + /// + /// Invocation of Dispose() is necessary to clean up remaining C library Tensor objects and + /// avoid a memory leak + /// + public void Dispose() + { + if (_disposed) + return; + if (!_disposedRunDetails) + DisposeRunDetails(); + (BestRun as IDisposable)?.Dispose(); + _disposed = true; + } + + /// + /// Releases unmanaged Tensor objects in models stored in RunDetail instances + /// + /// + /// Invocation of DisposeRunDetails() is necessary to clean up remaining C library + /// Tensor objects and avoid a memory leak. Compared to Dispose(), DisposeRunDetails() + /// only disposes of RunDetails so that the best determined model in BestRun can have + /// a different lifetime than models in other experimental runs. + /// + public void DisposeRunDetails() + { + if (_disposedRunDetails || _disposed) + return; + (RunDetails as IDisposable)?.Dispose(); + _disposedRunDetails = true; + } + #endregion } } \ No newline at end of file diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValRunner.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValRunner.cs index fa12e10ada..7312bd1725 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValRunner.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Runners/CrossValRunner.cs @@ -53,7 +53,7 @@ public CrossValRunner(MLContext context, var modelFileInfo = RunnerUtil.GetModelFileInfo(modelDirectory, iterationNum, i + 1); var trainResult = RunnerUtil.TrainAndScorePipeline(_context, pipeline, _trainDatasets[i], _validDatasets[i], _labelColumn, _metricsAgent, _preprocessorTransforms?[i], modelFileInfo, _modelInputSchema, _logger); - trainResults.Add(new SuggestedPipelineTrainResult(trainResult.model, trainResult.metrics, trainResult.exception, trainResult.score)); + trainResults.Add(new SuggestedPipelineTrainResult(trainResult.modelContainer, trainResult.metrics, trainResult.exception, trainResult.score)); } var avgScore = CalcAverageScore(trainResults.Select(r => r.Score)); diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/RunnerUtil.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/RunnerUtil.cs index c6aeef49a5..a1625a56a0 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Runners/RunnerUtil.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Runners/RunnerUtil.cs @@ -10,7 +10,7 @@ namespace Microsoft.ML.AutoML { internal static class RunnerUtil { - public static (ModelContainer model, TMetrics metrics, Exception exception, double score) + public static (ModelContainer modelContainer, TMetrics metrics, Exception exception, double score) TrainAndScorePipeline(MLContext context, SuggestedPipeline pipeline, IDataView trainData, diff --git a/src/Microsoft.ML.AutoML/Experiment/Runners/TrainValidateRunner.cs b/src/Microsoft.ML.AutoML/Experiment/Runners/TrainValidateRunner.cs index d608f7dd2f..4289f45a0c 100644 --- a/src/Microsoft.ML.AutoML/Experiment/Runners/TrainValidateRunner.cs +++ b/src/Microsoft.ML.AutoML/Experiment/Runners/TrainValidateRunner.cs @@ -50,7 +50,7 @@ public TrainValidateRunner(MLContext context, trainResult.score, trainResult.exception == null, trainResult.metrics, - trainResult.model, + trainResult.modelContainer, trainResult.exception); var runDetail = suggestedPipelineRunDetail.ToIterationResult(_preFeaturizer); return (suggestedPipelineRunDetail, runDetail); diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index 6523f89f0d..7baca4ddfc 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -12,6 +12,8 @@ using static Microsoft.ML.DataOperationsCatalog; using Microsoft.ML.TestFramework; using Xunit.Abstractions; +using Microsoft.ML.TestFrameworkCommon.Attributes; +using System.Diagnostics; namespace Microsoft.ML.AutoML.Test { @@ -29,7 +31,7 @@ public void AutoFitBinaryTest() var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); - var result = context.Auto() + using var result = context.Auto() .CreateBinaryClassificationExperiment(0) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); Assert.True(result.BestRun.ValidationMetrics.Accuracy > 0.70); @@ -38,6 +40,27 @@ public void AutoFitBinaryTest() Assert.NotNull(result.BestRun.TrainerName); } + [Fact] + public void AutoFitBinaryTestDisposeNonBestModels() + { + var context = new MLContext(1); + var dataPath = DatasetUtil.GetUciAdultDataset(); + var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); + var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); + var trainData = textLoader.Load(dataPath); + // Result will have more than 1 model with a total experiment time of 15 seconds + using var result = context.Auto() + .CreateBinaryClassificationExperiment(15) + .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); + Assert.True(result.RunDetails.Count() > 1); + // Dispose of models that did not yield the best run + result.DisposeRunDetails(); + Assert.True(result.BestRun.ValidationMetrics.Accuracy > 0.70); + Assert.NotNull(result.BestRun.Estimator); + Assert.NotNull(result.BestRun.Model); + Assert.NotNull(result.BestRun.TrainerName); + } + [Fact] public void AutoFitMultiTest() { @@ -45,7 +68,7 @@ public void AutoFitMultiTest() var columnInference = context.Auto().InferColumns(DatasetUtil.TrivialMulticlassDatasetPath, DatasetUtil.TrivialMulticlassDatasetLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(DatasetUtil.TrivialMulticlassDatasetPath); - var result = context.Auto() + using var result = context.Auto() .CreateMulticlassClassificationExperiment(0) .Execute(trainData, 5, DatasetUtil.TrivialMulticlassDatasetLabel); Assert.True(result.BestRun.Results.First().ValidationMetrics.MicroAccuracy >= 0.7); @@ -53,11 +76,14 @@ public void AutoFitMultiTest() Assert.Equal(NumberDataViewType.Single, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type); } - [TensorFlowFact] - //Skipping test temporarily. This test will be re-enabled once the cause of failures has been determined - [Trait("Category", "SkipInCI")] - public void AutoFitImageClassificationTrainTest() + [Theory, TestCategory("RunSpecificTest"), IterationData(100)] + public void AutoFitImageClassificationTrainTest(int iterations) { + Console.WriteLine(String.Format("AutoFitImageClassificationTrainTest Iteration: {0}", iterations)); + Process proc = Process.GetCurrentProcess(); + Console.WriteLine(String.Format("Iteration {0} - Total memory usage in GBs (proc): {1}", iterations, proc.PrivateMemorySize64 / (1024.0 * 1024.0 * 1024.0))); + proc.Dispose(); + Console.WriteLine(String.Format("Iteration {0} - Total memory usage in GBs (GC): {1}", iterations, GC.GetTotalMemory(false) / (1024.0 * 1024.0 * 1024.0))); var context = new MLContext(seed: 1); var datasetPath = DatasetUtil.GetFlowersDataset(); var columnInference = context.Auto().InferColumns(datasetPath, "Label"); @@ -67,7 +93,7 @@ public void AutoFitImageClassificationTrainTest() TrainTestData trainTestData = context.Data.TrainTestSplit(trainData, testFraction: 0.2, seed: 1); IDataView trainDataset = SplitUtil.DropAllColumnsExcept(context, trainTestData.TrainSet, originalColumnNames); IDataView testDataset = SplitUtil.DropAllColumnsExcept(context, trainTestData.TestSet, originalColumnNames); - var result = context.Auto() + using var result = context.Auto() .CreateMulticlassClassificationExperiment(0) .Execute(trainDataset, testDataset, columnInference.ColumnInformation); @@ -89,7 +115,7 @@ public void AutoFitImageClassification() var columnInference = context.Auto().InferColumns(datasetPath, "Label"); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(datasetPath); - var result = context.Auto() + using var result = context.Auto() .CreateMulticlassClassificationExperiment(0) .Execute(trainData, columnInference.ColumnInformation); @@ -113,7 +139,7 @@ public void AutoFitRegressionTest() var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); - var result = context.Auto() + using var result = context.Auto() .CreateRegressionExperiment(0) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); @@ -137,7 +163,7 @@ public void AutoFitRecommendationTest() var testDataView = reader.Load(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename))); // STEP 2: Run AutoML experiment - ExperimentResult experimentResult = mlContext.Auto() + using ExperimentResult experimentResult = mlContext.Auto() .CreateRecommendationExperiment(5) .Execute(trainDataView, testDataView, new ColumnInformation()