diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs index 914eebe6dd..8846bfbf5e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ImageClassification/ResnetV2101TransferLearningTrainTestSplit.cs @@ -41,7 +41,7 @@ public static void Example() //Load all the original images info IEnumerable images = LoadImagesFromDirectory( - folder: fullImagesetFolderPath, useFolderNameasLabel: true); + folder: fullImagesetFolderPath, useFolderNameAsLabel: true); IDataView shuffledFullImagesDataset = mlContext.Data.ShuffleRows( mlContext.Data.LoadFromEnumerable(images)); @@ -63,14 +63,14 @@ public static void Example() // Just by changing/selecting InceptionV3 here instead of // ResnetV2101 you can try a different architecture/pre-trained // model. - arch: ImageClassificationEstimator.Architecture.ResnetV2101, + arch: ImageClassificationEstimator.Architecture.ResnetV2101, epoch: 50, batchSize: 10, learningRate: 0.01f, metricsCallback: (metrics) => Console.WriteLine(metrics), validationSet: testDataset, - disableEarlyStopping: true); - + disableEarlyStopping: true) + .Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName: "PredictedLabel", inputColumnName: "PredictedLabel")); Console.WriteLine("*** Training the image classification model with " + "DNN Transfer Learning on top of the selected pre-trained " + @@ -84,7 +84,7 @@ public static void Example() watch.Stop(); long elapsedMs = watch.ElapsedMilliseconds; - Console.WriteLine("Training with transfer learning took: " + + Console.WriteLine("Training with transfer learning took: " + (elapsedMs / 1000).ToString() + " seconds"); mlContext.Model.Save(trainedModel, shuffledFullImagesDataset.Schema, @@ -97,12 +97,8 @@ public static void Example() EvaluateModel(mlContext, testDataset, loadedModel); - VBuffer> keys = default; - loadedModel.GetOutputSchema(schema)["Label"].GetKeyValues(ref keys); - watch = System.Diagnostics.Stopwatch.StartNew(); - TrySinglePrediction(fullImagesetFolderPath, mlContext, loadedModel, - keys.DenseValues().ToArray()); + TrySinglePrediction(fullImagesetFolderPath, mlContext, loadedModel); watch.Stop(); elapsedMs = watch.ElapsedMilliseconds; @@ -120,8 +116,7 @@ public static void Example() } private static void TrySinglePrediction(string imagesForPredictions, - MLContext mlContext, ITransformer trainedModel, - ReadOnlyMemory[] originalLabels) + MLContext mlContext, ITransformer trainedModel) { // Create prediction function to try one prediction var predictionEngine = mlContext.Model @@ -136,12 +131,11 @@ private static void TrySinglePrediction(string imagesForPredictions, }; var prediction = predictionEngine.Predict(imageToPredict); - var index = prediction.PredictedLabel; Console.WriteLine($"ImageFile : " + $"[{Path.GetFileName(imageToPredict.ImagePath)}], " + $"Scores : [{string.Join(",", prediction.Score)}], " + - $"Predicted Label : {originalLabels[index]}"); + $"Predicted Label : {prediction.PredictedLabel}"); } @@ -168,7 +162,7 @@ private static void EvaluateModel(MLContext mlContext, } public static IEnumerable LoadImagesFromDirectory(string folder, - bool useFolderNameasLabel = true) + bool useFolderNameAsLabel = true) { var files = Directory.GetFiles(folder, "*", searchOption: SearchOption.AllDirectories); @@ -179,7 +173,7 @@ public static IEnumerable LoadImagesFromDirectory(string folder, continue; var label = Path.GetFileName(file); - if (useFolderNameasLabel) + if (useFolderNameAsLabel) label = Directory.GetParent(file).Name; else { @@ -301,7 +295,7 @@ public class ImagePrediction public float[] Score; [ColumnName("PredictedLabel")] - public UInt32 PredictedLabel; + public string PredictedLabel; } } } diff --git a/src/Microsoft.ML.Dnn/ImageClassificationTransform.cs b/src/Microsoft.ML.Dnn/ImageClassificationTransform.cs index 1b80c9f6ba..f3eaf6c544 100644 --- a/src/Microsoft.ML.Dnn/ImageClassificationTransform.cs +++ b/src/Microsoft.ML.Dnn/ImageClassificationTransform.cs @@ -64,6 +64,7 @@ public sealed class ImageClassificationTransformer : RowToRowTransformerBase private Graph Graph => _session.graph; private readonly string[] _inputs; private readonly string[] _outputs; + private ReadOnlyMemory[] _keyValueAnnotations; private readonly string _labelColumnName; private readonly string _finalModelPrefix; private readonly Architecture _arch; @@ -105,11 +106,24 @@ private static ImageClassificationTransformer Create(IHostEnvironment env, Model // int: number of output columns // for each output column // int: id of output column name - // stream: tensorFlow model. + // string: value of label column name + // string: prefix pf final model and checkpoint files/folder for storing graph files + // int: value of the utilized model architecture for transfer learning + // string: value of score column name + // string: value of predicted label column name + // float: value of learning rate + // int: number of prediction classes + // for each key value annotation column + // string: value of key value annotations + // string: name of prediction tensor + // string: name of softmax tensor + // string: name of JPEG data tensor + // string: name of resized image tensor + // stream (byte): tensorFlow model. GetModelInfo(env, ctx, out string[] inputs, out string[] outputs, out bool addBatchDimensionInput, out string labelColumn, out string checkpointName, out Architecture arch, out string scoreColumnName, - out string predictedColumnName, out float learningRate, out int classCount, out string predictionTensorName, out string softMaxTensorName, + out string predictedColumnName, out float learningRate, out int classCount, out string[] keyValueAnnotations, out string predictionTensorName, out string softMaxTensorName, out string jpegDataTensorName, out string resizeTensorName); byte[] modelBytes = null; @@ -119,7 +133,7 @@ private static ImageClassificationTransformer Create(IHostEnvironment env, Model return new ImageClassificationTransformer(env, DnnUtils.LoadTFSession(env, modelBytes), outputs, inputs, null, addBatchDimensionInput, 1, labelColumn, checkpointName, arch, scoreColumnName, predictedColumnName, learningRate, null, classCount, true, predictionTensorName, - softMaxTensorName, jpegDataTensorName, resizeTensorName); + softMaxTensorName, jpegDataTensorName, resizeTensorName, keyValueAnnotations); } @@ -628,7 +642,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat private static void GetModelInfo(IHostEnvironment env, ModelLoadContext ctx, out string[] inputs, out string[] outputs, out bool addBatchDimensionInput, out string labelColumn, out string checkpointName, out Architecture arch, - out string scoreColumnName, out string predictedColumnName, out float learningRate, out int classCount, out string predictionTensorName, out string softMaxTensorName, + out string scoreColumnName, out string predictedColumnName, out float learningRate, out int classCount, out string[] keyValueAnnotations, out string predictionTensorName, out string softMaxTensorName, out string jpegDataTensorName, out string resizeTensorName) { addBatchDimensionInput = ctx.Reader.ReadBoolByte(); @@ -652,6 +666,12 @@ private static void GetModelInfo(IHostEnvironment env, ModelLoadContext ctx, out predictedColumnName = ctx.Reader.ReadString(); learningRate = ctx.Reader.ReadFloat(); classCount = ctx.Reader.ReadInt32(); + + env.CheckDecode(classCount > 0); + keyValueAnnotations = new string[classCount]; + for (int j = 0; j < keyValueAnnotations.Length; j++) + keyValueAnnotations[j] = ctx.LoadNonEmptyString(); + predictionTensorName = ctx.Reader.ReadString(); softMaxTensorName = ctx.Reader.ReadString(); jpegDataTensorName = ctx.Reader.ReadString(); @@ -662,7 +682,7 @@ internal ImageClassificationTransformer(IHostEnvironment env, Session session, s string[] inputColumnNames, string modelLocation, bool? addBatchDimensionInput, int batchSize, string labelColumnName, string finalModelPrefix, Architecture arch, string scoreColumnName, string predictedLabelColumnName, float learningRate, DataViewSchema inputSchema, int? classCount = null, bool loadModel = false, - string predictionTensorName = null, string softMaxTensorName = null, string jpegDataTensorName = null, string resizeTensorName = null) + string predictionTensorName = null, string softMaxTensorName = null, string jpegDataTensorName = null, string resizeTensorName = null, string[] labelAnnotations = null) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ImageClassificationTransformer))) { @@ -731,6 +751,24 @@ internal ImageClassificationTransformer(IHostEnvironment env, Session session, s (_evaluationStep, _) = AddEvaluationStep(_softMaxTensor, _labelTensor); _softmaxTensorName = _softMaxTensor.name; _predictionTensorName = _prediction.name; + + // Add annotations as key values, if they exist. + VBuffer> keysVBuffer = default; + if (inputSchema[labelColumnName].HasKeyValues()) + { + inputSchema[labelColumnName].GetKeyValues(ref keysVBuffer); + _keyValueAnnotations = keysVBuffer.DenseValues().ToArray(); + } + else + { + _keyValueAnnotations = Enumerable.Range(0, _classCount).Select(x => x.ToString().AsMemory()).ToArray(); + } + } + else + { + // Load annotations as key values, if they exist + if (labelAnnotations != null) + _keyValueAnnotations = labelAnnotations.Select(v => v.AsMemory()).ToArray(); } } @@ -738,6 +776,30 @@ internal ImageClassificationTransformer(IHostEnvironment env, Session session, s private protected override void SaveModel(ModelSaveContext ctx) { + // *** Binary format *** + // byte: indicator for frozen models + // byte: indicator for adding batch dimension in input + // int: number of input columns + // for each input column + // int: id of int column name + // int: number of output columns + // for each output column + // int: id of output column name + // string: value of label column name + // string: prefix pf final model and checkpoint files/folder for storing graph files + // int: value of the utilized model architecture for transfer learning + // string: value of score column name + // string: value of predicted label column name + // float: value of learning rate + // int: number of prediction classes + // for each key value annotation column + // string: value of key value annotations + // string: name of prediction tensor + // string: name of softmax tensor + // string: name of JPEG data tensor + // string: name of resized image tensor + // stream (byte): tensorFlow model. + Host.AssertValue(ctx); ctx.CheckAtModel(); ctx.SetVersionInfo(GetVersionInfo()); @@ -760,6 +822,12 @@ private protected override void SaveModel(ModelSaveContext ctx) ctx.Writer.Write(_predictedLabelColumnName); ctx.Writer.Write(_learningRate); ctx.Writer.Write(_classCount); + + Host.AssertNonEmpty(_keyValueAnnotations); + Host.Assert(_keyValueAnnotations.Length == _classCount); + for (int j = 0; j < _classCount; j++) + ctx.SaveNonEmptyString(_keyValueAnnotations[j]); + ctx.Writer.Write(_predictionTensorName); ctx.Writer.Write(_softmaxTensorName); ctx.Writer.Write(_jpegDataTensorName); @@ -845,6 +913,7 @@ public void UpdateCacheIfNeeded() var outputTensor = _runner.AddInput(processedTensor, 0).Run(); outputTensor[0].ToArray(ref _classProbability); outputTensor[1].ToScalar(ref _predictedLabel); + _predictedLabel += 1; outputTensor[0].Dispose(); outputTensor[1].Dispose(); processedTensor.Dispose(); @@ -890,9 +959,18 @@ private protected override Func GetDependenciesCore(Func a protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() { + var annotationBuilder = new DataViewSchema.Annotations.Builder(); + annotationBuilder.AddKeyValues(_parent._classCount, TextDataViewType.Instance, (ref VBuffer> dst) => + { + var editor = VBufferEditor.Create(ref dst, _parent._classCount); + for (int i = 0; i < _parent._classCount; i++) + editor.Values[i] = _parent._keyValueAnnotations[i]; + dst = editor.Commit(); + }); + var info = new DataViewSchema.DetachedColumn[_parent._outputs.Length]; - info[0] = new DataViewSchema.DetachedColumn(_parent._outputs[0], new VectorDataViewType(NumberDataViewType.Single, _parent._classCount), null); - info[1] = new DataViewSchema.DetachedColumn(_parent._outputs[1], NumberDataViewType.UInt32, null); + info[0] = new DataViewSchema.DetachedColumn(_parent._scoreColumnName, new VectorDataViewType(NumberDataViewType.Single, _parent._classCount), null); + info[1] = new DataViewSchema.DetachedColumn(_parent._predictedLabelColumnName, new KeyDataViewType(typeof(uint), _parent._classCount), annotationBuilder.ToAnnotations()); return info; } } @@ -1288,7 +1366,6 @@ internal sealed class Options : TransformInputBase private readonly Options _options; private readonly DnnModel _dnnModel; private readonly TF_DataType[] _tfInputTypes; - private readonly DataViewType[] _outputTypes; private ImageClassificationTransformer _transformer; internal ImageClassificationEstimator(IHostEnvironment env, Options options, DnnModel dnnModel) @@ -1297,7 +1374,6 @@ internal ImageClassificationEstimator(IHostEnvironment env, Options options, Dnn _options = options; _dnnModel = dnnModel; _tfInputTypes = new[] { TF_DataType.TF_STRING }; - _outputTypes = new[] { new VectorDataViewType(NumberDataViewType.Single), NumberDataViewType.UInt32.GetItemType() }; } private static Options CreateArguments(DnnModel tensorFlowModel, string[] outputColumnNames, string[] inputColumnName, bool addBatchDimensionInput) @@ -1327,12 +1403,16 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) if (col.ItemType != expectedType) throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, expectedType.ToString(), col.ItemType.ToString()); } - for (var i = 0; i < _options.OutputColumns.Length; i++) - { - resultDic[_options.OutputColumns[i]] = new SchemaShape.Column(_options.OutputColumns[i], - _outputTypes[i].IsKnownSizeVector() ? SchemaShape.Column.VectorKind.Vector - : SchemaShape.Column.VectorKind.VariableVector, _outputTypes[i].GetItemType(), false); - } + + resultDic[_options.OutputColumns[0]] = new SchemaShape.Column(_options.OutputColumns[0], + SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false); + + var metadata = new List(); + metadata.Add(new SchemaShape.Column(AnnotationUtils.Kinds.KeyValues, SchemaShape.Column.VectorKind.Vector, TextDataViewType.Instance, false)); + + resultDic[_options.OutputColumns[1]] = new SchemaShape.Column(_options.OutputColumns[1], + SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.UInt32, true, new SchemaShape(metadata.ToArray())); + return new SchemaShape(resultDic.Values); } @@ -1342,8 +1422,7 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema) public ImageClassificationTransformer Fit(IDataView input) { _host.CheckValue(input, nameof(input)); - if (_transformer == null) - _transformer = new ImageClassificationTransformer(_host, _options, _dnnModel, input); + _transformer = new ImageClassificationTransformer(_host, _options, _dnnModel, input); // Validate input schema. _transformer.GetOutputSchema(input.Schema); diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index ce851b5acd..a64336477b 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -1245,12 +1245,23 @@ public void TensorFlowImageClassification() var pipeline = mlContext.Model.ImageClassification( "ImagePath", "Label", + // Just by changing/selecting InceptionV3 here instead of + // ResnetV2101 you can try a different architecture/pre-trained + // model. + // Uncomment reuseTrainSetBottleneckCachedValues and + // reuseValidationSetBottleneckCachedValues to reuse trained model + // for faster debugging. arch: ImageClassificationEstimator.Architecture.ResnetV2101, - epoch: 5, - batchSize: 5, + epoch: 50, + batchSize: 10, learningRate: 0.01f, + metricsCallback: (metrics) => Console.WriteLine(metrics), + // reuseTrainSetBottleneckCachedValues: true, + // reuseValidationSetBottleneckCachedValues: true, + validationSet: testDataset, testOnTrainSet: false, - disableEarlyStopping: true); + disableEarlyStopping: true) + .Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName: "PredictedLabel", inputColumnName: "PredictedLabel")); var trainedModel = pipeline.Fit(trainDataset); @@ -1262,6 +1273,7 @@ public void TensorFlowImageClassification() using (var file = File.OpenRead("model.zip")) loadedModel = mlContext.Model.Load(file, out schema); + // Testing EvaluateModel: group testing on test dataset IDataView predictions = trainedModel.Transform(testDataset); var metrics = mlContext.MulticlassClassification.Evaluate(predictions); @@ -1281,6 +1293,52 @@ public void TensorFlowImageClassification() Assert.Equal(1, metrics.MicroAccuracy); Assert.Equal(1, metrics.MacroAccuracy); } + + // Testing TrySinglePrediction: Utilizing PredictionEngine for single + // predictions. Here, two pre-selected images are utilized in testing + // the Prediction engine. + var predictionEngine = mlContext.Model + .CreatePredictionEngine(loadedModel); + + IEnumerable testImages = LoadImagesFromDirectory( + fullImagesetFolderPath, true); + + string[] directories = Directory.GetDirectories(fullImagesetFolderPath); + string[] labels = new string[directories.Length]; + for(int j = 0; j < labels.Length; j++) + { + var dir = new DirectoryInfo(directories[j]); + labels[j] = dir.Name; + } + + // Test daisy image + ImageData firstImageToPredict = new ImageData + { + ImagePath = Path.Combine(fullImagesetFolderPath, "daisy", "5794835_d15905c7c8_n.jpg") + }; + + // Test rose image + ImageData secondImageToPredict = new ImageData + { + ImagePath = Path.Combine(fullImagesetFolderPath, "roses", "12240303_80d87f77a3_n.jpg") + }; + + var predictionFirst = predictionEngine.Predict(firstImageToPredict); + var predictionSecond = predictionEngine.Predict(secondImageToPredict); + + var labelColumnFirst = schema.GetColumnOrNull("Label").Value; + var labelTypeFirst = labelColumnFirst.Type; + var labelCountFirst = labelTypeFirst.GetKeyCount(); + var labelColumnSecond = schema.GetColumnOrNull("Label").Value; + var labelTypeSecond = labelColumnSecond.Type; + var labelCountSecond = labelTypeSecond.GetKeyCount(); + + Assert.Equal((int)labelCountFirst, predictionFirst.Score.Length); + Assert.Equal((int)labelCountSecond, predictionSecond.Score.Length); + Assert.Equal("daisy", predictionFirst.PredictedLabel); + Assert.Equal("roses", predictionSecond.PredictedLabel); + Assert.True(Array.IndexOf(labels, predictionFirst.PredictedLabel) > -1); + Assert.True(Array.IndexOf(labels, predictionSecond.PredictedLabel) > -1); } [TensorFlowFact] @@ -1544,7 +1602,7 @@ public class ImagePrediction public float[] Score; [ColumnName("PredictedLabel")] - public UInt32 PredictedLabel; + public string PredictedLabel; } }