diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningEarlyStopping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningEarlyStopping.cs index 3e7785b5fc..a7192a3157 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningEarlyStopping.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningEarlyStopping.cs @@ -57,18 +57,23 @@ public static void Example() IDataView trainDataset = trainTestData.TrainSet; IDataView testDataset = trainTestData.TestSet; - - var pipeline = mlContext.Model.ImageClassification( - "ImagePath", "Label", - // Just by changing/selecting InceptionV3 here instead of - // ResnetV2101 you can try a different architecture/pre-trained - // model. - arch: ImageClassificationEstimator.Architecture.ResnetV2101, - batchSize: 10, - learningRate: 0.01f, - earlyStopping: new ImageClassificationEstimator.EarlyStopping(minDelta: 0.001f, patience:20, metric:ImageClassificationEstimator.EarlyStoppingMetric.Loss), - metricsCallback: (metrics) => Console.WriteLine(metrics), - validationSet: testDataset); + + var validationSet = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer + .Fit(testDataset) + .Transform(testDataset); + + var pipeline = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer + .Append(mlContext.Model.ImageClassification( + "Image", "Label", + // Just by changing/selecting InceptionV3 here instead of + // ResnetV2101 you can try a different architecture/pre-trained + // model. + arch: ImageClassificationEstimator.Architecture.ResnetV2101, + batchSize: 10, + learningRate: 0.01f, + earlyStopping: new ImageClassificationEstimator.EarlyStopping(minDelta: 0.001f, patience: 20, metric: ImageClassificationEstimator.EarlyStoppingMetric.Loss), + metricsCallback: (metrics) => Console.WriteLine(metrics), + validationSet: validationSet)); Console.WriteLine("*** Training the image classification model with " + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs index 8846bfbf5e..3bb6238cbd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs @@ -30,7 +30,6 @@ public static void Example() //Download the image set and unzip string finalImagesFolderName = DownloadImageSet( imagesDownloadFolderPath); - string fullImagesetFolderPath = Path.Combine( imagesDownloadFolderPath, finalImagesFolderName); @@ -58,19 +57,25 @@ public static void Example() IDataView trainDataset = trainTestData.TrainSet; IDataView testDataset = trainTestData.TestSet; - var pipeline = mlContext.Model.ImageClassification( - "ImagePath", "Label", - // Just by changing/selecting InceptionV3 here instead of - // ResnetV2101 you can try a different architecture/pre-trained - // model. - arch: ImageClassificationEstimator.Architecture.ResnetV2101, - epoch: 50, - batchSize: 10, - learningRate: 0.01f, - metricsCallback: (metrics) => Console.WriteLine(metrics), - validationSet: testDataset, - disableEarlyStopping: true) - .Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName: "PredictedLabel", inputColumnName: "PredictedLabel")); + var validationSet = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer + .Fit(testDataset) + .Transform(testDataset); + + var pipeline = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer + .Append(mlContext.Model.ImageClassification( + "Image", "Label", + // Just by changing/selecting InceptionV3 here instead of + // ResnetV2101 you can try a different architecture/pre-trained + // model. + arch: ImageClassificationEstimator.Architecture.ResnetV2101, + epoch: 50, + batchSize: 10, + learningRate: 0.01f, + metricsCallback: (metrics) => Console.WriteLine(metrics), + validationSet: validationSet, + disableEarlyStopping: true) + .Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName: "PredictedLabel", inputColumnName: "PredictedLabel"))); + Console.WriteLine("*** Training the image classification model with " + "DNN Transfer Learning on top of the selected pre-trained " + @@ -98,6 +103,7 @@ public static void Example() EvaluateModel(mlContext, testDataset, loadedModel); watch = System.Diagnostics.Stopwatch.StartNew(); + TrySinglePrediction(fullImagesetFolderPath, mlContext, loadedModel); watch.Stop(); @@ -125,6 +131,9 @@ private static void TrySinglePrediction(string imagesForPredictions, IEnumerable testImages = LoadImagesFromDirectory( imagesForPredictions, false); + byte[] imgBytes = File.ReadAllBytes(testImages.First().ImagePath); + VBuffer imgData = new VBuffer(imgBytes.Length, imgBytes); + ImageData imageToPredict = new ImageData { ImagePath = testImages.First().ImagePath @@ -160,13 +169,12 @@ private static void EvaluateModel(MLContext mlContext, Console.WriteLine("Predicting and Evaluation took: " + (elapsed2Ms / 1000).ToString() + " seconds"); } - + public static IEnumerable LoadImagesFromDirectory(string folder, bool useFolderNameAsLabel = true) { var files = Directory.GetFiles(folder, "*", searchOption: SearchOption.AllDirectories); - foreach (var file in files) { if (Path.GetExtension(file) != ".jpg") @@ -186,7 +194,7 @@ public static IEnumerable LoadImagesFromDirectory(string folder, } } } - + yield return new ImageData() { ImagePath = file, @@ -299,4 +307,3 @@ public class ImagePrediction } } } - diff --git a/pkg/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.nupkgproj b/pkg/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.nupkgproj index 8bdef45d07..763256ff51 100644 --- a/pkg/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.nupkgproj +++ b/pkg/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.nupkgproj @@ -8,6 +8,5 @@ - - + diff --git a/src/Microsoft.ML.Dnn/ImageClassificationTransform.cs b/src/Microsoft.ML.Dnn/ImageClassificationTransform.cs index f3eaf6c544..fe0ab3d61f 100644 --- a/src/Microsoft.ML.Dnn/ImageClassificationTransform.cs +++ b/src/Microsoft.ML.Dnn/ImageClassificationTransform.cs @@ -203,6 +203,27 @@ private void CheckTrainingParameters(ImageClassificationEstimator.Options option return (jpegData, resizedImage); } + private static Tensor EncodeByteAsString(VBuffer buffer) + { + int length = buffer.Length; + var size = c_api.TF_StringEncodedSize((UIntPtr)length); + var handle = c_api.TF_AllocateTensor(TF_DataType.TF_STRING, IntPtr.Zero, 0, (UIntPtr)((ulong)size + 8)); + + IntPtr tensor = c_api.TF_TensorData(handle); + Marshal.WriteInt64(tensor, 0); + + var status = new Status(); + unsafe + { + fixed (byte* src = buffer.GetValues()) + c_api.TF_StringEncode(src, (UIntPtr)length, (sbyte*)(tensor + sizeof(Int64)), size, status); + } + + status.Check(true); + status.Dispose(); + return new Tensor(handle); + } + private sealed class ImageProcessor { private Runner _imagePreprocessingRunner; @@ -214,16 +235,16 @@ public ImageProcessor(ImageClassificationTransformer transformer) _imagePreprocessingRunner.AddOutputs(transformer._resizedImageTensorName); } - public Tensor ProcessImage(string path) + public Tensor ProcessImage(in VBuffer imageBuffer) { - var imageTensor = new Tensor(File.ReadAllBytes(path), TF_DataType.TF_STRING); + var imageTensor = EncodeByteAsString(imageBuffer); var processedTensor = _imagePreprocessingRunner.AddInput(imageTensor, 0).Run()[0]; imageTensor.Dispose(); return processedTensor; } } - private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName, string imagepathColumnName, + private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName, string imageColumnName, ImageProcessor imageProcessor, string inputTensorName, string outputTensorName, string cacheFilePath, ImageClassificationMetrics.Dataset dataset, ImageClassificationMetricsCallback metricsCallback) { @@ -234,17 +255,17 @@ private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName labelColumnName, typeof(uint).ToString(), labelColumn.Type.RawType.ToString()); - var imagePathColumn = input.Schema[imagepathColumnName]; + var imageColumn = input.Schema[imageColumnName]; Runner runner = new Runner(_session); runner.AddOutputs(outputTensorName); using (TextWriter writer = File.CreateText(cacheFilePath)) - using (var cursor = input.GetRowCursor(input.Schema.Where(c => c.Index == labelColumn.Index || c.Index == imagePathColumn.Index))) + using (var cursor = input.GetRowCursor(input.Schema.Where(c => c.Index == labelColumn.Index || c.Index == imageColumn.Index))) { var labelGetter = cursor.GetGetter(labelColumn); - var imagePathGetter = cursor.GetGetter>(imagePathColumn); + var imageGetter = cursor.GetGetter>(imageColumn); UInt32 label = UInt32.MaxValue; - ReadOnlyMemory imagePath = default; + VBuffer image = default; runner.AddInput(inputTensorName); ImageClassificationMetrics metrics = new ImageClassificationMetrics(); metrics.Bottleneck = new BottleneckMetrics(); @@ -253,9 +274,8 @@ private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName while (cursor.MoveNext()) { labelGetter(ref label); - imagePathGetter(ref imagePath); - var imagePathStr = imagePath.ToString(); - var imageTensor = imageProcessor.ProcessImage(imagePathStr); + imageGetter(ref image); + var imageTensor = imageProcessor.ProcessImage(image); runner.AddInput(imageTensor, 0); var featurizedImage = runner.Run()[0]; // Reuse memory featurizedImage.ToArray(ref imageArray); @@ -264,7 +284,6 @@ private void CacheFeaturizedImagesToDisk(IDataView input, string labelColumnName featurizedImage.Dispose(); imageTensor.Dispose(); metrics.Bottleneck.Index++; - metrics.Bottleneck.Name = imagePathStr; metricsCallback?.Invoke(metrics); } } @@ -878,8 +897,8 @@ public Mapper(ImageClassificationTransformer parent, DataViewSchema inputSchema) private class OutputCache { public long Position; - private ValueGetter> _imagePathGetter; - private ReadOnlyMemory _imagePath; + private ValueGetter> _imageGetter; + private VBuffer _image; private Runner _runner; private ImageProcessor _imageProcessor; private long _predictedLabel; @@ -890,8 +909,8 @@ private class OutputCache public OutputCache(DataViewRow input, ImageClassificationTransformer transformer) { - _imagePath = default; - _imagePathGetter = input.GetGetter>(input.Schema[transformer._inputs[0]]); + _image = default; + _imageGetter = input.GetGetter>(input.Schema[transformer._inputs[0]]); _runner = new Runner(transformer._session); _runner.AddInput(transformer._inputTensorName); _runner.AddOutputs(transformer._softmaxTensorName); @@ -908,8 +927,8 @@ public void UpdateCacheIfNeeded() if (_inputRow.Position != Position) { Position = _inputRow.Position; - _imagePathGetter(ref _imagePath); - var processedTensor = _imageProcessor.ProcessImage(_imagePath.ToString()); + _imageGetter(ref _image); + var processedTensor = _imageProcessor.ProcessImage(_image); var outputTensor = _runner.AddInput(processedTensor, 0).Run(); outputTensor[0].ToArray(ref _classProbability); outputTensor[1].ToScalar(ref _predictedLabel); @@ -1365,7 +1384,7 @@ internal sealed class Options : TransformInputBase private readonly IHost _host; private readonly Options _options; private readonly DnnModel _dnnModel; - private readonly TF_DataType[] _tfInputTypes; + private readonly DataViewType[] _inputTypes; private ImageClassificationTransformer _transformer; internal ImageClassificationEstimator(IHostEnvironment env, Options options, DnnModel dnnModel) @@ -1373,7 +1392,7 @@ internal ImageClassificationEstimator(IHostEnvironment env, Options options, Dnn _host = Contracts.CheckRef(env, nameof(env)).Register(nameof(ImageClassificationEstimator)); _options = options; _dnnModel = dnnModel; - _tfInputTypes = new[] { TF_DataType.TF_STRING }; + _inputTypes = new[] { new VectorDataViewType(NumberDataViewType.Byte) }; } private static Options CreateArguments(DnnModel tensorFlowModel, string[] outputColumnNames, string[] inputColumnName, bool addBatchDimensionInput) @@ -1399,8 +1418,8 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) var input = _options.InputColumns[i]; if (!inputSchema.TryFindColumn(input, out var col)) throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", input); - var expectedType = DnnUtils.Tf2MlNetType(_tfInputTypes[i]); - if (col.ItemType != expectedType) + var expectedType = _inputTypes[i]; + if (!col.ItemType.Equals(expectedType.GetItemType())) throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, expectedType.ToString(), col.ItemType.ToString()); } diff --git a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs index 60e92e79a2..e114446c0c 100644 --- a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs @@ -69,34 +69,27 @@ internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCata /// ]]> /// public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string outputColumnName, string imageFolder, string inputColumnName = null) - => new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + => new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, true, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); /// - /// Loads the images from the into memory. + /// Create a , which loads the data from the column specified in + /// as an image to a new column: . /// - /// - /// The image get loaded in memory as a type. - /// Loading is the first step of almost every pipeline that does image processing, and further analysis on images. - /// The images to load need to be in the formats supported by . - /// For end-to-end image processing pipelines, and scenarios in your applications, see the - /// examples in the machinelearning-samples github repository. - /// /// The transform's catalog. + /// Name of the column resulting from the transformation of . + /// This column's data type will be . + /// Name of the column with paths to the images to load. + /// This estimator operates over text data. /// Folder where to look for images. - /// Specifies the names of the input columns for the transformation, and their respective output column names. + /// Image type flag - If true loads image as a ImageDataViewType type else loads image as VectorDataViewType. Defaults to ImageDataViewType if not specified or is true. /// /// /// /// - [BestFriend] - internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params InputOutputColumnPair[] columns) - { - var env = CatalogUtils.GetEnvironment(catalog); - env.CheckValue(columns, nameof(columns)); - return new ImageLoadingEstimator(env, imageFolder, InputOutputColumnPair.ConvertToValueTuples(columns)); - } + public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string outputColumnName, string imageFolder, bool useImageType, string inputColumnName = null) + => new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, useImageType, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); /// /// Create a , which extracts pixels values from the data specified in column: diff --git a/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs b/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs index 6712d0111c..5be7476b2c 100644 --- a/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs +++ b/src/Microsoft.ML.ImageAnalytics/ImageLoader.cs @@ -3,10 +3,13 @@ // See the LICENSE file in the project root for more information. using System; +using System.Collections.Concurrent; using System.Collections.Generic; +using System.Diagnostics.Contracts; using System.Drawing; using System.IO; using System.Linq; +using System.Runtime.InteropServices; using System.Text; using Microsoft.ML; using Microsoft.ML.CommandLine; @@ -69,6 +72,11 @@ internal sealed class Options : TransformInputBase /// The folder to load the images from. /// public readonly string ImageFolder; + /// + /// The flag for DataViewType for the image. If Type true, it is a VectorDataView of bytes else it is an ImageDataView type. + /// If no options are specified, it defaults to false for ImageDataView type. + /// + public readonly bool UseImageType; /// /// The columns passed to this . @@ -85,6 +93,21 @@ internal ImageLoadingTransformer(IHostEnvironment env, string imageFolder = null : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ImageLoadingTransformer)), columns) { ImageFolder = imageFolder; + UseImageType = true; + } + + /// + /// Initializes a new instance of . + /// + /// The host environment. + /// Folder where to look for images. + /// Image type flag - true for ImageDataViewType or false for VectorDataViewType. Defaults to true i.e. ImageDataViewType if not specified. + /// Names of input and output columns. + internal ImageLoadingTransformer(IHostEnvironment env, string imageFolder = null, bool type = true, params (string outputColumnName, string inputColumnName)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ImageLoadingTransformer)), columns) + { + ImageFolder = imageFolder; + UseImageType = type; } // Factory method for SignatureDataTransform. @@ -112,6 +135,10 @@ private ImageLoadingTransformer(IHost host, ModelLoadContext ctx) // int: id of image folder ImageFolder = ctx.LoadStringOrNull(); + if (ctx.Header.ModelVerWritten >= 0x00010003) // do a version check + UseImageType = ctx.Reader.ReadBoolean(); + else + UseImageType = true; // It is an ImageDataViewType } // Factory method for SignatureLoadDataTransform. @@ -141,6 +168,7 @@ private protected override void SaveModel(ModelSaveContext ctx) base.SaveColumns(ctx); ctx.SaveStringOrNull(ImageFolder); + ctx.Writer.Write(UseImageType); } private static VersionInfo GetVersionInfo() @@ -148,32 +176,43 @@ private static VersionInfo GetVersionInfo() return new VersionInfo( modelSignature: "IMGLOADR", //verWrittenCur: 0x00010001, // Initial - verWrittenCur: 0x00010002, // Swith from OpenCV to Bitmap - verReadableCur: 0x00010002, - verWeCanReadBack: 0x00010002, + verWrittenCur: 0x00010003, // Added support for output type as byte array + verReadableCur: 0x00010003, + verWeCanReadBack: 0x00010003, loaderSignature: LoaderSignature, loaderAssemblyName: typeof(ImageLoadingTransformer).Assembly.FullName); } - private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema); + private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema, UseImageType); private sealed class Mapper : OneToOneMapperBase { private readonly ImageLoadingTransformer _parent; - private readonly ImageDataViewType _imageType; + private readonly bool _type; + private readonly ConcurrentBag _bufferPool; - public Mapper(ImageLoadingTransformer parent, DataViewSchema inputSchema) + public Mapper(ImageLoadingTransformer parent, DataViewSchema inputSchema, bool type) : base(parent.Host.Register(nameof(Mapper)), parent, inputSchema) { - _imageType = new ImageDataViewType(); + _type = type; _parent = parent; + _bufferPool = new ConcurrentBag(); } protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput, out Action disposer) + { + disposer = null; + // Check for the type of Image, if true load images as ImageDataViewType else load images as VBuffer + if (_type) + return MakeGetterImageDataViewType(input, iinfo, activeOutput, out disposer); + else + return MakeGetterVectorDataViewByteType(input, iinfo, activeOutput, out disposer); + } + + private Delegate MakeGetterImageDataViewType(DataViewRow input, int iinfo, Func activeOutput, out Action disposer) { Contracts.AssertValue(input); Contracts.Assert(0 <= iinfo && iinfo < _parent.ColumnPairs.Length); - disposer = null; var getSrc = input.GetGetter>(input.Schema[ColMapNewToOld[iinfo]]); ReadOnlyMemory src = default; @@ -201,11 +240,103 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput, out Action disposer) + { + Contracts.AssertValue(input); + Contracts.Assert(0 <= iinfo && iinfo < _parent.ColumnPairs.Length); + + disposer = null; + var getSrc = input.GetGetter>(input.Schema[ColMapNewToOld[iinfo]]); + ReadOnlyMemory src = default; + ValueGetter> del = + (ref VBuffer dst) => + { + byte[] buffer = null; + if (!_bufferPool.TryTake(out buffer)) + { + buffer = new byte[4096]; + } + + getSrc(ref src); + + if (src.Length > 0) + { + string path = src.ToString(); + if (!string.IsNullOrWhiteSpace(_parent.ImageFolder)) + path = Path.Combine(_parent.ImageFolder, path); + if (!TryLoadDataIntoBuffer(path, ref dst, buffer)) + throw Host.Except($"Failed to load image {src.ToString()}."); + } + + Contract.Assert(buffer != null); + _bufferPool.Add(buffer); + }; + return del; } + private static bool TryLoadDataIntoBuffer(string path, ref VBuffer imgData, byte[] readBuffer) + { + int count = -1; + int bytesread = -1; + // bufferSize == 1 used to avoid unnecessary buffer in FileStream + using (FileStream fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1)) + { + long fileLength = fs.Length; + if (fileLength > int.MaxValue) + throw new IOException($"File {path} too big to open."); + else if (fileLength == 0) + { + byte[] imageBuffer; + // Some file systems (e.g. procfs on Linux) return 0 for length even when there's content. + // Thus we need to assume 0 doesn't mean empty. + imageBuffer = File.ReadAllBytes(path); + count = imageBuffer.Length; + imgData = new VBuffer(count, imageBuffer); + return (count> 0); + } + + count = (int)fileLength; + var editor = VBufferEditor.Create(ref imgData, count); + bytesread = ReadToEnd(fs, editor.Values, readBuffer); + imgData = editor.Commit(); + return (count > 0); + } + + } + + private static int ReadToEnd(System.IO.Stream stream, Span bufferSpan, byte[] readBuffer) + { + int totalBytesRead = 0; + int bytesRead; + var readBufferSpan = readBuffer.AsSpan(); + var srcSpan = readBufferSpan; + while ((bytesRead = stream.Read(readBuffer, 0, readBuffer.Length)) > 0) + { + if (bytesRead != srcSpan.Length) + srcSpan = readBufferSpan.Slice(0, bytesRead); + var dstSpan = bufferSpan.Slice(totalBytesRead, bytesRead); + Contract.Assert(srcSpan.Length == dstSpan.Length); + srcSpan.CopyTo(dstSpan); + totalBytesRead += bytesRead; + } + return totalBytesRead; + } + + private DataViewType GetDataViewType() + { + if (_type) + return new ImageDataViewType(); + else + return new VectorDataViewType(NumberDataViewType.Byte); + } + protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() - => _parent.ColumnPairs.Select(x => new DataViewSchema.DetachedColumn(x.outputColumnName, _imageType, null)).ToArray(); + => _parent.ColumnPairs.Select(x => new DataViewSchema.DetachedColumn(x.outputColumnName, GetDataViewType(), null)).ToArray(); } } @@ -238,8 +369,7 @@ protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() public sealed class ImageLoadingEstimator : TrivialEstimator { - private readonly ImageDataViewType _imageType; - + private readonly bool _type; /// /// Load images in memory. /// @@ -247,14 +377,26 @@ public sealed class ImageLoadingEstimator : TrivialEstimatorFolder where to look for images. /// Names of input and output columns. internal ImageLoadingEstimator(IHostEnvironment env, string imageFolder, params (string outputColumnName, string inputColumnName)[] columns) - : this(env, new ImageLoadingTransformer(env, imageFolder, columns)) + : this(env, new ImageLoadingTransformer(env, imageFolder, true, columns), true) + { + } + + /// + /// Load images in memory. + /// + /// The host environment. + /// Folder where to look for images. + /// Image type flag - true for ImageDataViewType or false for VectorDataView. Defaults to true i.e ImageDataViewType if not specified. + /// Names of input and output columns. + internal ImageLoadingEstimator(IHostEnvironment env, string imageFolder, bool type = true, params (string outputColumnName, string inputColumnName)[] columns) + : this(env, new ImageLoadingTransformer(env, imageFolder, type, columns), type) { } - internal ImageLoadingEstimator(IHostEnvironment env, ImageLoadingTransformer transformer) + internal ImageLoadingEstimator(IHostEnvironment env, ImageLoadingTransformer transformer, bool type = true) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ImageLoadingEstimator)), transformer) { - _imageType = new ImageDataViewType(); + _type = type; } /// @@ -272,7 +414,10 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) if (!(col.ItemType is TextDataViewType) || col.Kind != SchemaShape.Column.VectorKind.Scalar) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", inputColumnName, TextDataViewType.Instance.ToString(), col.GetTypeString()); - result[outputColumnName] = new SchemaShape.Column(outputColumnName, SchemaShape.Column.VectorKind.Scalar, _imageType, false); + if (_type) + result[outputColumnName] = new SchemaShape.Column(outputColumnName, SchemaShape.Column.VectorKind.Scalar, new ImageDataViewType(), false); + else + result[outputColumnName] = new SchemaShape.Column(outputColumnName, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Byte, false); } return new SchemaShape(result.Values); diff --git a/src/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.csproj b/src/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.csproj index db04a98c57..0bc4d94f1a 100644 --- a/src/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.csproj +++ b/src/Microsoft.ML.ImageAnalytics/Microsoft.ML.ImageAnalytics.csproj @@ -12,6 +12,5 @@ - - + diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index a64336477b..bb70334956 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -1242,26 +1242,25 @@ public void TensorFlowImageClassification() IDataView trainDataset = trainTestData.TrainSet; IDataView testDataset = trainTestData.TestSet; - - var pipeline = mlContext.Model.ImageClassification( - "ImagePath", "Label", - // Just by changing/selecting InceptionV3 here instead of - // ResnetV2101 you can try a different architecture/pre-trained - // model. - // Uncomment reuseTrainSetBottleneckCachedValues and - // reuseValidationSetBottleneckCachedValues to reuse trained model - // for faster debugging. - arch: ImageClassificationEstimator.Architecture.ResnetV2101, - epoch: 50, - batchSize: 10, - learningRate: 0.01f, - metricsCallback: (metrics) => Console.WriteLine(metrics), - // reuseTrainSetBottleneckCachedValues: true, - // reuseValidationSetBottleneckCachedValues: true, - validationSet: testDataset, - testOnTrainSet: false, - disableEarlyStopping: true) - .Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName: "PredictedLabel", inputColumnName: "PredictedLabel")); + var validationSet = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer + .Fit(testDataset) + .Transform(testDataset); + + var pipeline = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer + .Append(mlContext.Model.ImageClassification( + "Image", "Label", + // Just by changing/selecting InceptionV3 here instead of + // ResnetV2101 you can try a different architecture/pre-trained + // model. + arch: ImageClassificationEstimator.Architecture.ResnetV2101, + epoch: 50, + batchSize: 10, + learningRate: 0.01f, + metricsCallback: (metrics) => Console.WriteLine(metrics), + testOnTrainSet: false, + validationSet: validationSet, + disableEarlyStopping: true) + .Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName: "PredictedLabel", inputColumnName: "PredictedLabel"))); var trainedModel = pipeline.Fit(trainDataset); @@ -1378,16 +1377,24 @@ public void TensorFlowImageClassificationEarlyStoppingIncreasing() IDataView testDataset = trainTestData.TestSet; int lastEpoch = 0; - var pipeline = mlContext.Model.ImageClassification( - "ImagePath", "Label", - arch: ImageClassificationEstimator.Architecture.ResnetV2101, - epoch: 100, - batchSize: 5, - learningRate: 0.01f, - earlyStopping: new ImageClassificationEstimator.EarlyStopping(), - metricsCallback: metric => { Console.WriteLine(metric); lastEpoch = metric.Train != null ? metric.Train.Epoch : 0; }, - testOnTrainSet: false, - validationSet: testDataset); + var validationSet = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer + .Fit(testDataset) + .Transform(testDataset); + + var pipeline = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer + .Append(mlContext.Model.ImageClassification( + "Image", "Label", + // Just by changing/selecting InceptionV3 here instead of + // ResnetV2101 you can try a different architecture/pre-trained + // model. + arch: ImageClassificationEstimator.Architecture.ResnetV2101, + epoch: 100, + batchSize: 5, + learningRate: 0.01f, + earlyStopping: new ImageClassificationEstimator.EarlyStopping(), + metricsCallback: (metrics) => { Console.WriteLine(metrics); lastEpoch = metrics.Train != null ? metrics.Train.Epoch : 0; }, + testOnTrainSet: false, + validationSet: validationSet)); var trainedModel = pipeline.Fit(trainDataset); mlContext.Model.Save(trainedModel, shuffledFullImagesDataset.Schema, @@ -1459,16 +1466,24 @@ public void TensorFlowImageClassificationEarlyStoppingDecreasing() IDataView testDataset = trainTestData.TestSet; int lastEpoch = 0; - var pipeline = mlContext.Model.ImageClassification( - "ImagePath", "Label", - arch: ImageClassificationEstimator.Architecture.ResnetV2101, - epoch: 100, - batchSize: 5, - learningRate: 0.01f, - earlyStopping: new ImageClassificationEstimator.EarlyStopping(metric: ImageClassificationEstimator.EarlyStoppingMetric.Loss), - metricsCallback: (metric) => { Console.WriteLine(metric); lastEpoch = metric.Train != null ? metric.Train.Epoch : 0; }, - testOnTrainSet: false, - validationSet: testDataset); + var validationSet = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer + .Fit(testDataset) + .Transform(testDataset); + + var pipeline = mlContext.Transforms.LoadImages("Image", fullImagesetFolderPath, false, "ImagePath") // false indicates we want the image as a VBuffer + .Append(mlContext.Model.ImageClassification( + "Image", "Label", + // Just by changing/selecting InceptionV3 here instead of + // ResnetV2101 you can try a different architecture/pre-trained + // model. + arch: ImageClassificationEstimator.Architecture.ResnetV2101, + epoch: 100, + batchSize: 5, + learningRate: 0.01f, + earlyStopping: new ImageClassificationEstimator.EarlyStopping(metric: ImageClassificationEstimator.EarlyStoppingMetric.Loss), + metricsCallback: (metrics) => {Console.WriteLine(metrics); lastEpoch = metrics.Train != null ? metrics.Train.Epoch : 0;}, + testOnTrainSet: false, + validationSet: validationSet)); var trainedModel = pipeline.Fit(trainDataset); mlContext.Model.Save(trainedModel, shuffledFullImagesDataset.Schema, @@ -1508,7 +1523,6 @@ public static IEnumerable LoadImagesFromDirectory(string folder, { var files = Directory.GetFiles(folder, "*", searchOption: SearchOption.AllDirectories); - foreach (var file in files) { if (Path.GetExtension(file) != ".jpg")