From 8a18c9faaf1f9e3a3916978b7621060da6be78b5 Mon Sep 17 00:00:00 2001 From: Antonio Velazquez Date: Thu, 23 Apr 2020 15:27:24 -0700 Subject: [PATCH 1/4] Let Image Loader dispose the last image it loaded, instead of image resizer --- src/Microsoft.ML.ImageAnalytics/ImageLoader.cs | 14 +++++++++++++- src/Microsoft.ML.ImageAnalytics/ImageResizer.cs | 1 - 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs b/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs index a8120cd524..c2f2173f1c 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs @@ -217,7 +217,17 @@ private Delegate MakeGetterImageDataViewType(DataViewRow input, int iinfo, Func< { Contracts.AssertValue(input); Contracts.Assert(0 <= iinfo && iinfo < _parent.ColumnPairs.Length); - disposer = null; + var lastImage = default(Bitmap); + + disposer = () => + { + if (lastImage != null) + { + lastImage.Dispose(); + lastImage = null; + } + }; + var getSrc = input.GetGetter>(input.Schema[ColMapNewToOld[iinfo]]); ReadOnlyMemory src = default; ValueGetter del = @@ -247,6 +257,8 @@ private Delegate MakeGetterImageDataViewType(DataViewRow input, int iinfo, Func< if (dst.PixelFormat == System.Drawing.Imaging.PixelFormat.DontCare) throw Host.Except($"Failed to load image {src.ToString()}."); } + + lastImage = dst; }; return del; diff --git a/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs b/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs index 626f5fee6e..00abbf5ad3 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageResizer.cs @@ -285,7 +285,6 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func Date: Thu, 23 Apr 2020 15:27:36 -0700 Subject: [PATCH 2/4] Added Tests --- test/Microsoft.ML.Tests/ImagesTests.cs | 79 +++++++++++++++++++ .../TensorflowTests.cs | 76 ++++++++++++++++++ 2 files changed, 155 insertions(+) diff --git a/test/Microsoft.ML.Tests/ImagesTests.cs b/test/Microsoft.ML.Tests/ImagesTests.cs index 7ab0035dc2..8b1f0d73d8 100644 --- a/test/Microsoft.ML.Tests/ImagesTests.cs +++ b/test/Microsoft.ML.Tests/ImagesTests.cs @@ -908,5 +908,84 @@ private class DataPoint [VectorType(InputSize)] public double[] Features { get; set; } } + + public class InMemoryImage + { + [ImageType(229, 299)] + public Bitmap LoadedImage; + public string Label; + + public static List LoadFromTsv(MLContext mlContext, string tsvPath, string imageFolder) + { + var inMemoryImages = new List(); + var tsvFile = mlContext.Data.LoadFromTextFile(tsvPath, columns: new[] + { + new TextLoader.Column("ImagePath", DataKind.String, 0), + new TextLoader.Column("Label", DataKind.String, 1), + } + ); + + using (var cursor = tsvFile.GetRowCursorForAllColumns()) + { + var pathBuffer = default(ReadOnlyMemory); + var labelBuffer = default(ReadOnlyMemory); + var pathGetter = cursor.GetGetter>(tsvFile.Schema["ImagePath"]); + var labelGetter = cursor.GetGetter>(tsvFile.Schema["Label"]); + while (cursor.MoveNext()) + { + pathGetter(ref pathBuffer); + labelGetter(ref labelBuffer); + + var label = labelBuffer.ToString(); + var fileName = pathBuffer.ToString(); + var imagePath = Path.Combine(imageFolder, fileName); + + inMemoryImages.Add( + new InMemoryImage() + { + Label = label, + LoadedImage = (Bitmap)Image.FromFile(imagePath) + } + ); + } + } + + return inMemoryImages; + + } + } + + [Fact] + public void ResizeInMemoryImages() + { + var mlContext = new MLContext(seed: 1); + var dataFile = GetDataPath("images/images.tsv"); + var imageFolder = Path.GetDirectoryName(dataFile); + var dataObjects = InMemoryImage.LoadFromTsv(mlContext, dataFile, imageFolder); + + var dataView = mlContext.Data.LoadFromEnumerable(dataObjects); + var pipeline = mlContext.Transforms.ResizeImages("ResizedImage", 100, 100, nameof(InMemoryImage.LoadedImage)); + + // Check that the output is resized, and that it didn't resize the original image object + var resizedDV = pipeline.Fit(dataView).Transform(dataView); + var rowView = resizedDV.Preview().RowView; + var resizedImage = (Bitmap)rowView.First().Values.Last().Value; + Assert.Equal(100, resizedImage.Height); + Assert.NotEqual(100, dataObjects[0].LoadedImage.Height); + + // Check that the last in-memory image hasn't been disposed + // By running ResizeImageTransformer (see https://github.com/dotnet/machinelearning/issues/4126) + bool disposed = false; + try + { + int i = dataObjects.Last().LoadedImage.Height; + } + catch + { + disposed = true; + } + + Assert.False(disposed, "The last in memory image had been disposed by running ResizeImageTransformer"); + } } } diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index d2357d138a..97625c9b67 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; +using System.Drawing; using System.IO; using System.IO.Compression; using System.Linq; @@ -1126,6 +1127,81 @@ public void TensorFlowTransformCifarSavedModel() } } + public class InMemoryImage + { + [ImageType(229,299)] + public Bitmap LoadedImage; + public string Label; + + public static List LoadFromTsv(MLContext mlContext, string tsvPath, string imageFolder) + { + var inMemoryImages = new List(); + var tsvFile = mlContext.Data.LoadFromTextFile(tsvPath, columns: new[] + { + new TextLoader.Column("ImagePath", DataKind.String, 0), + new TextLoader.Column("Label", DataKind.String, 1), + } + ); + + using (var cursor = tsvFile.GetRowCursorForAllColumns()) + { + var pathBuffer = default(ReadOnlyMemory); + var labelBuffer = default(ReadOnlyMemory); + var pathGetter = cursor.GetGetter>(tsvFile.Schema["ImagePath"]); + var labelGetter = cursor.GetGetter>(tsvFile.Schema["Label"]); + while (cursor.MoveNext()) + { + pathGetter(ref pathBuffer); + labelGetter(ref labelBuffer); + + var label = labelBuffer.ToString(); + var fileName = pathBuffer.ToString(); + var imagePath = Path.Combine(imageFolder, fileName); + + inMemoryImages.Add( + new InMemoryImage() + { + Label = label, + LoadedImage = (Bitmap)Image.FromFile(imagePath) + } + ); + } + } + + return inMemoryImages; + + } + } + + // This test doesn't really check the values of the results + // Simply checks that CrossValidation is doable with in-memory images + // See issue https://github.com/dotnet/machinelearning/issues/4126 + [TensorFlowFact] + public void TensorFlowTransformCifarCrossValidationWithInMemoryImages() + { + var modelLocation = "cifar_saved_model"; + var mlContext = new MLContext(seed: 1); + using var tensorFlowModel = mlContext.Model.LoadTensorFlowModel(modelLocation); + var schema = tensorFlowModel.GetInputSchema(); + Assert.True(schema.TryGetColumnIndex("Input", out int column)); + var type = (VectorDataViewType)schema[column].Type; + var imageHeight = type.Dimensions[0]; + var imageWidth = type.Dimensions[1]; + var dataFile = GetDataPath("images/images.tsv"); + var imageFolder = Path.GetDirectoryName(dataFile); + var dataObjects = InMemoryImage.LoadFromTsv(mlContext, dataFile, imageFolder); + + var dataView = mlContext.Data.LoadFromEnumerable(dataObjects); + var pipeline = mlContext.Transforms.ResizeImages("ResizedImage", imageWidth, imageHeight, nameof(InMemoryImage.LoadedImage)) + .Append(mlContext.Transforms.ExtractPixels("Input", "ResizedImage", interleavePixelColors: true)) + .Append(tensorFlowModel.ScoreTensorFlowModel("Output", "Input")) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) + .Append(mlContext.MulticlassClassification.Trainers.NaiveBayes("Label", "Output")); + + var cross = mlContext.MulticlassClassification.CrossValidate(dataView, pipeline, 2); + Assert.Equal(2, cross.Count()); + } + // This test has been created as result of https://github.com/dotnet/machinelearning/issues/2156. [TensorFlowFact] public void TensorFlowGettingSchemaMultipleTimes() From 63a29af168c52362ae156ddc1433a395d46a956c Mon Sep 17 00:00:00 2001 From: Antonio Velazquez Date: Thu, 23 Apr 2020 16:10:29 -0700 Subject: [PATCH 3/4] Added a PredictionEngine to tests --- test/Microsoft.ML.Tests/ImagesTests.cs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/test/Microsoft.ML.Tests/ImagesTests.cs b/test/Microsoft.ML.Tests/ImagesTests.cs index 8b1f0d73d8..729f81dccd 100644 --- a/test/Microsoft.ML.Tests/ImagesTests.cs +++ b/test/Microsoft.ML.Tests/ImagesTests.cs @@ -955,6 +955,12 @@ public static List LoadFromTsv(MLContext mlContext, string tsvPat } } + public class InMemoryImageOutput : InMemoryImage + { + [ImageType(100, 100)] + public Bitmap ResizedImage; + } + [Fact] public void ResizeInMemoryImages() { @@ -967,12 +973,25 @@ public void ResizeInMemoryImages() var pipeline = mlContext.Transforms.ResizeImages("ResizedImage", 100, 100, nameof(InMemoryImage.LoadedImage)); // Check that the output is resized, and that it didn't resize the original image object - var resizedDV = pipeline.Fit(dataView).Transform(dataView); + var model = pipeline.Fit(dataView); + var resizedDV = model.Transform(dataView); var rowView = resizedDV.Preview().RowView; var resizedImage = (Bitmap)rowView.First().Values.Last().Value; Assert.Equal(100, resizedImage.Height); Assert.NotEqual(100, dataObjects[0].LoadedImage.Height); + // Also check usage of prediction Engine + // And that the references to the original image objects aren't lost + var predEngine = mlContext.Model.CreatePredictionEngine(model); + for(int i = 0; i < dataObjects.Count(); i++) + { + var prediction = predEngine.Predict(dataObjects[i]); + Assert.Equal(100, prediction.ResizedImage.Height); + Assert.NotEqual(100, prediction.LoadedImage.Height); + Assert.True(prediction.LoadedImage == dataObjects[i].LoadedImage); + Assert.False(prediction.ResizedImage == dataObjects[i].LoadedImage); + } + // Check that the last in-memory image hasn't been disposed // By running ResizeImageTransformer (see https://github.com/dotnet/machinelearning/issues/4126) bool disposed = false; From 6ee4df7abb7a695c5dd030fdc3592b05e91afdaa Mon Sep 17 00:00:00 2001 From: Antonio Velazquez Date: Thu, 23 Apr 2020 19:34:14 -0700 Subject: [PATCH 4/4] Change using statement to reuse test class --- .../TensorflowTests.cs | 47 +------------------ 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index 97625c9b67..31050f5d6d 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -19,6 +19,7 @@ using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Image; using Microsoft.ML.TensorFlow; +using InMemoryImage = Microsoft.ML.Tests.ImageTests.InMemoryImage; using Xunit; using Xunit.Abstractions; using static Microsoft.ML.DataOperationsCatalog; @@ -1127,52 +1128,6 @@ public void TensorFlowTransformCifarSavedModel() } } - public class InMemoryImage - { - [ImageType(229,299)] - public Bitmap LoadedImage; - public string Label; - - public static List LoadFromTsv(MLContext mlContext, string tsvPath, string imageFolder) - { - var inMemoryImages = new List(); - var tsvFile = mlContext.Data.LoadFromTextFile(tsvPath, columns: new[] - { - new TextLoader.Column("ImagePath", DataKind.String, 0), - new TextLoader.Column("Label", DataKind.String, 1), - } - ); - - using (var cursor = tsvFile.GetRowCursorForAllColumns()) - { - var pathBuffer = default(ReadOnlyMemory); - var labelBuffer = default(ReadOnlyMemory); - var pathGetter = cursor.GetGetter>(tsvFile.Schema["ImagePath"]); - var labelGetter = cursor.GetGetter>(tsvFile.Schema["Label"]); - while (cursor.MoveNext()) - { - pathGetter(ref pathBuffer); - labelGetter(ref labelBuffer); - - var label = labelBuffer.ToString(); - var fileName = pathBuffer.ToString(); - var imagePath = Path.Combine(imageFolder, fileName); - - inMemoryImages.Add( - new InMemoryImage() - { - Label = label, - LoadedImage = (Bitmap)Image.FromFile(imagePath) - } - ); - } - } - - return inMemoryImages; - - } - } - // This test doesn't really check the values of the results // Simply checks that CrossValidation is doable with in-memory images // See issue https://github.com/dotnet/machinelearning/issues/4126