From 577afc9d7c85e00255d860080281796be775d5bc Mon Sep 17 00:00:00 2001 From: Aishwarya Bhandare Date: Fri, 15 Nov 2019 16:51:27 -0800 Subject: [PATCH 1/2] remove wrong check in Mapper, add sample for issue repro --- .../Dynamic/TensorFlow/ModelScorer.cs | 330 ++++++++++++++++++ .../TensorflowTransform.cs | 59 +++- 2 files changed, 370 insertions(+), 19 deletions(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ModelScorer.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ModelScorer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ModelScorer.cs new file mode 100644 index 0000000000..6065282ba2 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ModelScorer.cs @@ -0,0 +1,330 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public static class ModelScorerSample + { + public static void Example() + { + string assetsRelativePath = @"../../../assets"; + string assetsPath = GetAbsolutePath(assetsRelativePath); + + var tagsTsv = Path.Combine(assetsPath, "inputs", "images", "tags.tsv"); + var imagesFolder = Path.Combine(assetsPath, "inputs", "images"); + //var inceptionPb = Path.Combine(assetsPath, "inputs", "inception", "tensorflow_inception_graph.pb"); + var inceptionPb = @"C:\Users\aibhanda\Downloads\PotatoDetector.pb"; + var labelsTxt = Path.Combine(assetsPath, "inputs", "inception", "imagenet_comp_graph_label_strings.txt"); + + try + { + var modelScorer = new TFModelScorer(tagsTsv, imagesFolder, inceptionPb, labelsTxt); + modelScorer.Score(); + + } + catch (Exception ex) + { + ConsoleHelpers.ConsoleWriteException(ex.ToString()); + } + + ConsoleHelpers.ConsolePressAnyKey(); + } + + public static string GetAbsolutePath(string relativePath) + { + FileInfo _dataRoot = new FileInfo(typeof(ModelScorerSample).Assembly.Location); + string assemblyFolderPath = _dataRoot.Directory.FullName; + string fullPath = Path.Combine(assemblyFolderPath, relativePath); + return fullPath; + } + } + public class TFModelScorer + { + private readonly string dataLocation; + private readonly string imagesFolder; + private readonly string modelLocation; + private readonly string labelsLocation; + private readonly MLContext mlContext; + private static string ImageReal = nameof(ImageReal); + + public TFModelScorer(string dataLocation, string imagesFolder, string modelLocation, string labelsLocation) + { + this.dataLocation = dataLocation; + this.imagesFolder = imagesFolder; + this.modelLocation = modelLocation; + this.labelsLocation = labelsLocation; + mlContext = new MLContext(); + } + + public struct ImageNetSettings + { + public const int imageHeight = 224; + public const int imageWidth = 224; + public const float mean = 117; + public const bool channelsLast = true; + } + + public struct InceptionSettings + { + // for checking tensor names, you can use tools like Netron, + // which is installed by Visual Studio AI Tools + + // input tensor name + public const string inputTensorName = "input"; + + // output tensor name + public const string outputTensorName = "softmax2"; + } + + public void Score() + { + var model = LoadModel(dataLocation, imagesFolder, modelLocation); + + var predictions = PredictDataUsingModel(dataLocation, imagesFolder, labelsLocation, model).ToArray(); + + } + + public PredictionEngine LoadModel(string dataLocation, string imagesFolder, string modelLocation) + { + ConsoleHelpers.ConsoleWriteHeader("Read model"); + Console.WriteLine($"Model location: {modelLocation}"); + Console.WriteLine($"Images folder: {imagesFolder}"); + Console.WriteLine($"Training file: {dataLocation}"); + Console.WriteLine($"Default parameters: image size=({ImageNetSettings.imageWidth},{ImageNetSettings.imageHeight}), image mean: {ImageNetSettings.mean}"); + + var data = mlContext.Data.LoadFromTextFile(dataLocation, hasHeader: true); + + + var pipeline = mlContext.Transforms.LoadImages(outputColumnName: "input", imageFolder: imagesFolder, inputColumnName: nameof(ImageNetData.ImagePath)) + .Append(mlContext.Transforms.ResizeImages(outputColumnName: "input", imageWidth: ImageNetSettings.imageWidth, imageHeight: ImageNetSettings.imageHeight, inputColumnName: "input")) + .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "image_tensor", inputColumnName: "input", interleavePixelColors: true, outputAsFloatArray: false)) + .Append(mlContext.Model.LoadTensorFlowModel(modelLocation). + ScoreTensorFlowModel(outputColumnNames: new[] { "detection_boxes", "detection_classes", "detection_scores", "num_detections" }, + inputColumnNames: new[] { "image_tensor" }, addBatchDimensionInput: true)); + + /* + var pipeline = mlContext.Transforms.LoadImages(outputColumnName: "input", imageFolder: imagesFolder, inputColumnName: nameof(ImageNetData.ImagePath)) + .Append(mlContext.Transforms.ResizeImages(outputColumnName: "input", imageWidth: ImageNetSettings.imageWidth, imageHeight: ImageNetSettings.imageHeight, inputColumnName: "input")) + .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "input", interleavePixelColors: ImageNetSettings.channelsLast, offsetImage: ImageNetSettings.mean, outputAsFloatArray: true)) + .Append(mlContext.Model.LoadTensorFlowModel(modelLocation). + ScoreTensorFlowModel(outputColumnNames: new[] { "softmax2" }, + inputColumnNames: new[] { "input" }, addBatchDimensionInput: true)); + */ + ITransformer model = pipeline.Fit(data); + + var predictionEngine = mlContext.Model.CreatePredictionEngine(model); + + return predictionEngine; + } + + public IEnumerable PredictDataUsingModel(string testLocation, + string imagesFolder, + string labelsLocation, + PredictionEngine model) + { + ConsoleHelpers.ConsoleWriteHeader("Classificate images"); + Console.WriteLine($"Images folder: {imagesFolder}"); + Console.WriteLine($"Training file: {testLocation}"); + Console.WriteLine($"Labels file: {labelsLocation}"); + + var labels = ModelHelpers.ReadLabels(labelsLocation); + + var testData = ImageNetData.ReadFromCsv(testLocation, imagesFolder); + + foreach (var sample in testData) + { + var probs = model.Predict(sample).PredictedLabels; + var imageData = new ImageNetDataProbability() + { + ImagePath = sample.ImagePath, + Label = sample.Label + }; + (imageData.PredictedLabel, imageData.Probability) = ModelHelpers.GetBestLabel(labels, probs); + imageData.ConsoleWrite(); + yield return imageData; + } + } + public class ImageNetPrediction + { + [ColumnName(TFModelScorer.InceptionSettings.outputTensorName)] + public float[] PredictedLabels; + } + + public class ImageNetData + { + [LoadColumn(0)] + public string ImagePath; + + [LoadColumn(1)] + public string Label; + + public static IEnumerable ReadFromCsv(string file, string folder) + { + return File.ReadAllLines(file) + .Select(x => x.Split('\t')) + .Select(x => new ImageNetData { ImagePath = Path.Combine(folder, x[0]), Label = x[1] }); + } + } + public class ImageNetDataProbability : ImageNetData + { + public string PredictedLabel; + public float Probability { get; set; } + } + } + + + public static class ConsoleHelpers + { + public static void ConsoleWriteHeader(params string[] lines) + { + var defaultColor = Console.ForegroundColor; + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine(" "); + foreach (var line in lines) + { + Console.WriteLine(line); + } + var maxLength = lines.Select(x => x.Length).Max(); + Console.WriteLine(new String('#', maxLength)); + Console.ForegroundColor = defaultColor; + } + + public static void ConsolePressAnyKey() + { + var defaultColor = Console.ForegroundColor; + Console.ForegroundColor = ConsoleColor.Green; + Console.WriteLine(" "); + Console.WriteLine("Press any key to finish."); + Console.ForegroundColor = defaultColor; + Console.ReadKey(); + } + + public static void ConsoleWriteException(params string[] lines) + { + var defaultColor = Console.ForegroundColor; + const string exceptionTitle = "EXCEPTION"; + + Console.WriteLine(" "); + Console.ForegroundColor = ConsoleColor.Red; + Console.WriteLine(exceptionTitle); + Console.WriteLine(new String('#', exceptionTitle.Length)); + Console.ForegroundColor = defaultColor; + foreach (var line in lines) + { + Console.WriteLine(line); + } + } + + public static void ConsoleWrite(this TFModelScorer.ImageNetDataProbability self) + { + var defaultForeground = Console.ForegroundColor; + var labelColor = ConsoleColor.Magenta; + var probColor = ConsoleColor.Blue; + var exactLabel = ConsoleColor.Green; + var failLabel = ConsoleColor.Red; + + Console.Write("ImagePath: "); + Console.ForegroundColor = labelColor; + Console.Write($"{Path.GetFileName(self.ImagePath)}"); + Console.ForegroundColor = defaultForeground; + Console.Write(" labeled as "); + Console.ForegroundColor = labelColor; + Console.Write(self.Label); + Console.ForegroundColor = defaultForeground; + Console.Write(" predicted as "); + if (self.Label.Equals(self.PredictedLabel)) + { + Console.ForegroundColor = exactLabel; + Console.Write($"{self.PredictedLabel}"); + } + else + { + Console.ForegroundColor = failLabel; + Console.Write($"{self.PredictedLabel}"); + } + Console.ForegroundColor = defaultForeground; + Console.Write(" with probability "); + Console.ForegroundColor = probColor; + Console.Write(self.Probability); + Console.ForegroundColor = defaultForeground; + Console.WriteLine(""); + } + + } + + public static class ModelHelpers + { + static FileInfo _dataRoot = new FileInfo(typeof(ModelScorerSample).Assembly.Location); + + public static string GetAssetsPath(params string[] paths) + { + if (paths == null || paths.Length == 0) + return null; + + return Path.Combine(paths.Prepend(_dataRoot.Directory.FullName).ToArray()); + } + + public static string DeleteAssets(params string[] paths) + { + var location = GetAssetsPath(paths); + + if (!string.IsNullOrWhiteSpace(location) && File.Exists(location)) + File.Delete(location); + return location; + } + + public static (string, float) GetBestLabel(string[] labels, float[] probs) + { + var max = probs.Max(); + var index = probs.AsSpan().IndexOf(max); + return (labels[index], max); + } + + public static string[] ReadLabels(string labelsLocation) + { + return File.ReadAllLines(labelsLocation); + } + + public static IEnumerable Columns() where T : class + { + return typeof(T).GetProperties().Select(p => p.Name); + } + + public static IEnumerable Columns() where T : class + { + var typeofU = typeof(U); + return typeof(T).GetProperties().Where(c => c.PropertyType == typeofU).Select(p => p.Name); + } + + public static IEnumerable Columns() where T : class + { + var typeofUV = new[] { typeof(U), typeof(V) }; + return typeof(T).GetProperties().Where(c => typeofUV.Contains(c.PropertyType)).Select(p => p.Name); + } + + public static IEnumerable Columns() where T : class + { + var typeofUVW = new[] { typeof(U), typeof(V), typeof(W) }; + return typeof(T).GetProperties().Where(c => typeofUVW.Contains(c.PropertyType)).Select(p => p.Name); + } + + public static string[] ColumnsNumerical() where T : class + { + return Columns().ToArray(); + } + + public static string[] ColumnsString() where T : class + { + return Columns().ToArray(); + } + + public static string[] ColumnsDateTime() where T : class + { + return Columns().ToArray(); + } + } +} diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index 3db6296eb0..1f688f55ed 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -513,7 +513,17 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : var colTypeDims = vecType.Dimensions.Select(dim => (int)dim).ToArray(); if (shape == null || (shape.Length == 0)) + { _fullySpecifiedShapes[i] = new TensorShape(colTypeDims); + if (_parent._addBatchDimensionInput) + { + var l = new int[_fullySpecifiedShapes[i].ndim + 1]; + l[0] = 1; + for (int ishape = 1; ishape < l.Length; ishape++) + l[ishape] = _fullySpecifiedShapes[i].dims[ishape - 1]; + _fullySpecifiedShapes[i] = new TensorShape(l); + } + } else { // If the column is one dimension we make sure that the total size of the TF shape matches. @@ -532,28 +542,39 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : if (typeValueCount % valCount != 0) throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of length {typeValueCount}."); - // If the shape is multi-dimensional, we should be able to create the length of the vector by plugging - // in a single value for the unknown shapes. For example, if the shape is [?,?,3], then there should exist a value - // d such that d*d*3 is equal to the length of the input column. - var d = numOfUnkDim > 0 ? Math.Pow(typeValueCount / valCount, 1.0 / numOfUnkDim) : 0; - if (d - (int)d != 0) - throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of length {typeValueCount}."); - - // Fill in the unknown dimensions. var originalShapeNdim = originalShape.ndim; var originalShapeDims = originalShape.dims; - var l = new int[originalShapeNdim]; - for (int ishape = 0; ishape < originalShapeNdim; ishape++) - l[ishape] = originalShapeDims[ishape] == -1 ? (int)d : originalShapeDims[ishape]; - _fullySpecifiedShapes[i] = new TensorShape(l); - } + var inputDataShapeNdim = colTypeDims.Length; + int[] l = new int[originalShapeNdim]; + int inputDataIndex = 0; + int tensorShapeIndex = 0; + + //If the input data passed has one dimension less than the expected input tensor shape + // and _addBatchDimensionInput option is set to true, set the batch size as 1. + if (originalShapeNdim - inputDataShapeNdim == 1 && _parent._addBatchDimensionInput) + { + l[0] = 1; + tensorShapeIndex = 1; + } + // If the number of input data dimensions do not match with that of tensor dimensions, throw. + else if(originalShapeNdim != inputDataShapeNdim) + throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of shape {colTypeDims.ToString()}."); - if (_parent._addBatchDimensionInput) - { - var l = new int[_fullySpecifiedShapes[i].ndim + 1]; - l[0] = 1; - for (int ishape = 1; ishape < l.Length; ishape++) - l[ishape] = _fullySpecifiedShapes[i].dims[ishape - 1]; + for (; tensorShapeIndex < originalShapeNdim; tensorShapeIndex++) + { + //Fill in tensor shape for unknown dims with input data shape. + if (originalShapeDims[tensorShapeIndex] == -1) + { + l[tensorShapeIndex] = colTypeDims[inputDataIndex]; + inputDataIndex++; + } + // If the tensor shape dim is known, assert that input data dim matches with + // expected tensor shape dim. + else if(originalShapeDims[tensorShapeIndex] == colTypeDims[inputDataIndex]) + l[tensorShapeIndex] = originalShapeDims[tensorShapeIndex]; + else + throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of shape {colTypeDims.ToString()}."); + } _fullySpecifiedShapes[i] = new TensorShape(l); } } From 78823f96e57d31ea16d72342b49278c10c0cae98 Mon Sep 17 00:00:00 2001 From: Aishwarya Bhandare Date: Wed, 27 Nov 2019 15:32:13 -0800 Subject: [PATCH 2/2] added comprehensive conditions for inferring input tensor shape, fixed tests. --- .../Dynamic/TensorFlow/ModelScorer.cs | 330 ------------------ .../TensorflowTransform.cs | 144 ++++++-- .../TensorflowTests.cs | 2 +- 3 files changed, 116 insertions(+), 360 deletions(-) delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ModelScorer.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ModelScorer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ModelScorer.cs deleted file mode 100644 index 6065282ba2..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ModelScorer.cs +++ /dev/null @@ -1,330 +0,0 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using Microsoft.ML; -using Microsoft.ML.Data; - -namespace Samples.Dynamic -{ - public static class ModelScorerSample - { - public static void Example() - { - string assetsRelativePath = @"../../../assets"; - string assetsPath = GetAbsolutePath(assetsRelativePath); - - var tagsTsv = Path.Combine(assetsPath, "inputs", "images", "tags.tsv"); - var imagesFolder = Path.Combine(assetsPath, "inputs", "images"); - //var inceptionPb = Path.Combine(assetsPath, "inputs", "inception", "tensorflow_inception_graph.pb"); - var inceptionPb = @"C:\Users\aibhanda\Downloads\PotatoDetector.pb"; - var labelsTxt = Path.Combine(assetsPath, "inputs", "inception", "imagenet_comp_graph_label_strings.txt"); - - try - { - var modelScorer = new TFModelScorer(tagsTsv, imagesFolder, inceptionPb, labelsTxt); - modelScorer.Score(); - - } - catch (Exception ex) - { - ConsoleHelpers.ConsoleWriteException(ex.ToString()); - } - - ConsoleHelpers.ConsolePressAnyKey(); - } - - public static string GetAbsolutePath(string relativePath) - { - FileInfo _dataRoot = new FileInfo(typeof(ModelScorerSample).Assembly.Location); - string assemblyFolderPath = _dataRoot.Directory.FullName; - string fullPath = Path.Combine(assemblyFolderPath, relativePath); - return fullPath; - } - } - public class TFModelScorer - { - private readonly string dataLocation; - private readonly string imagesFolder; - private readonly string modelLocation; - private readonly string labelsLocation; - private readonly MLContext mlContext; - private static string ImageReal = nameof(ImageReal); - - public TFModelScorer(string dataLocation, string imagesFolder, string modelLocation, string labelsLocation) - { - this.dataLocation = dataLocation; - this.imagesFolder = imagesFolder; - this.modelLocation = modelLocation; - this.labelsLocation = labelsLocation; - mlContext = new MLContext(); - } - - public struct ImageNetSettings - { - public const int imageHeight = 224; - public const int imageWidth = 224; - public const float mean = 117; - public const bool channelsLast = true; - } - - public struct InceptionSettings - { - // for checking tensor names, you can use tools like Netron, - // which is installed by Visual Studio AI Tools - - // input tensor name - public const string inputTensorName = "input"; - - // output tensor name - public const string outputTensorName = "softmax2"; - } - - public void Score() - { - var model = LoadModel(dataLocation, imagesFolder, modelLocation); - - var predictions = PredictDataUsingModel(dataLocation, imagesFolder, labelsLocation, model).ToArray(); - - } - - public PredictionEngine LoadModel(string dataLocation, string imagesFolder, string modelLocation) - { - ConsoleHelpers.ConsoleWriteHeader("Read model"); - Console.WriteLine($"Model location: {modelLocation}"); - Console.WriteLine($"Images folder: {imagesFolder}"); - Console.WriteLine($"Training file: {dataLocation}"); - Console.WriteLine($"Default parameters: image size=({ImageNetSettings.imageWidth},{ImageNetSettings.imageHeight}), image mean: {ImageNetSettings.mean}"); - - var data = mlContext.Data.LoadFromTextFile(dataLocation, hasHeader: true); - - - var pipeline = mlContext.Transforms.LoadImages(outputColumnName: "input", imageFolder: imagesFolder, inputColumnName: nameof(ImageNetData.ImagePath)) - .Append(mlContext.Transforms.ResizeImages(outputColumnName: "input", imageWidth: ImageNetSettings.imageWidth, imageHeight: ImageNetSettings.imageHeight, inputColumnName: "input")) - .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "image_tensor", inputColumnName: "input", interleavePixelColors: true, outputAsFloatArray: false)) - .Append(mlContext.Model.LoadTensorFlowModel(modelLocation). - ScoreTensorFlowModel(outputColumnNames: new[] { "detection_boxes", "detection_classes", "detection_scores", "num_detections" }, - inputColumnNames: new[] { "image_tensor" }, addBatchDimensionInput: true)); - - /* - var pipeline = mlContext.Transforms.LoadImages(outputColumnName: "input", imageFolder: imagesFolder, inputColumnName: nameof(ImageNetData.ImagePath)) - .Append(mlContext.Transforms.ResizeImages(outputColumnName: "input", imageWidth: ImageNetSettings.imageWidth, imageHeight: ImageNetSettings.imageHeight, inputColumnName: "input")) - .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "input", interleavePixelColors: ImageNetSettings.channelsLast, offsetImage: ImageNetSettings.mean, outputAsFloatArray: true)) - .Append(mlContext.Model.LoadTensorFlowModel(modelLocation). - ScoreTensorFlowModel(outputColumnNames: new[] { "softmax2" }, - inputColumnNames: new[] { "input" }, addBatchDimensionInput: true)); - */ - ITransformer model = pipeline.Fit(data); - - var predictionEngine = mlContext.Model.CreatePredictionEngine(model); - - return predictionEngine; - } - - public IEnumerable PredictDataUsingModel(string testLocation, - string imagesFolder, - string labelsLocation, - PredictionEngine model) - { - ConsoleHelpers.ConsoleWriteHeader("Classificate images"); - Console.WriteLine($"Images folder: {imagesFolder}"); - Console.WriteLine($"Training file: {testLocation}"); - Console.WriteLine($"Labels file: {labelsLocation}"); - - var labels = ModelHelpers.ReadLabels(labelsLocation); - - var testData = ImageNetData.ReadFromCsv(testLocation, imagesFolder); - - foreach (var sample in testData) - { - var probs = model.Predict(sample).PredictedLabels; - var imageData = new ImageNetDataProbability() - { - ImagePath = sample.ImagePath, - Label = sample.Label - }; - (imageData.PredictedLabel, imageData.Probability) = ModelHelpers.GetBestLabel(labels, probs); - imageData.ConsoleWrite(); - yield return imageData; - } - } - public class ImageNetPrediction - { - [ColumnName(TFModelScorer.InceptionSettings.outputTensorName)] - public float[] PredictedLabels; - } - - public class ImageNetData - { - [LoadColumn(0)] - public string ImagePath; - - [LoadColumn(1)] - public string Label; - - public static IEnumerable ReadFromCsv(string file, string folder) - { - return File.ReadAllLines(file) - .Select(x => x.Split('\t')) - .Select(x => new ImageNetData { ImagePath = Path.Combine(folder, x[0]), Label = x[1] }); - } - } - public class ImageNetDataProbability : ImageNetData - { - public string PredictedLabel; - public float Probability { get; set; } - } - } - - - public static class ConsoleHelpers - { - public static void ConsoleWriteHeader(params string[] lines) - { - var defaultColor = Console.ForegroundColor; - Console.ForegroundColor = ConsoleColor.Yellow; - Console.WriteLine(" "); - foreach (var line in lines) - { - Console.WriteLine(line); - } - var maxLength = lines.Select(x => x.Length).Max(); - Console.WriteLine(new String('#', maxLength)); - Console.ForegroundColor = defaultColor; - } - - public static void ConsolePressAnyKey() - { - var defaultColor = Console.ForegroundColor; - Console.ForegroundColor = ConsoleColor.Green; - Console.WriteLine(" "); - Console.WriteLine("Press any key to finish."); - Console.ForegroundColor = defaultColor; - Console.ReadKey(); - } - - public static void ConsoleWriteException(params string[] lines) - { - var defaultColor = Console.ForegroundColor; - const string exceptionTitle = "EXCEPTION"; - - Console.WriteLine(" "); - Console.ForegroundColor = ConsoleColor.Red; - Console.WriteLine(exceptionTitle); - Console.WriteLine(new String('#', exceptionTitle.Length)); - Console.ForegroundColor = defaultColor; - foreach (var line in lines) - { - Console.WriteLine(line); - } - } - - public static void ConsoleWrite(this TFModelScorer.ImageNetDataProbability self) - { - var defaultForeground = Console.ForegroundColor; - var labelColor = ConsoleColor.Magenta; - var probColor = ConsoleColor.Blue; - var exactLabel = ConsoleColor.Green; - var failLabel = ConsoleColor.Red; - - Console.Write("ImagePath: "); - Console.ForegroundColor = labelColor; - Console.Write($"{Path.GetFileName(self.ImagePath)}"); - Console.ForegroundColor = defaultForeground; - Console.Write(" labeled as "); - Console.ForegroundColor = labelColor; - Console.Write(self.Label); - Console.ForegroundColor = defaultForeground; - Console.Write(" predicted as "); - if (self.Label.Equals(self.PredictedLabel)) - { - Console.ForegroundColor = exactLabel; - Console.Write($"{self.PredictedLabel}"); - } - else - { - Console.ForegroundColor = failLabel; - Console.Write($"{self.PredictedLabel}"); - } - Console.ForegroundColor = defaultForeground; - Console.Write(" with probability "); - Console.ForegroundColor = probColor; - Console.Write(self.Probability); - Console.ForegroundColor = defaultForeground; - Console.WriteLine(""); - } - - } - - public static class ModelHelpers - { - static FileInfo _dataRoot = new FileInfo(typeof(ModelScorerSample).Assembly.Location); - - public static string GetAssetsPath(params string[] paths) - { - if (paths == null || paths.Length == 0) - return null; - - return Path.Combine(paths.Prepend(_dataRoot.Directory.FullName).ToArray()); - } - - public static string DeleteAssets(params string[] paths) - { - var location = GetAssetsPath(paths); - - if (!string.IsNullOrWhiteSpace(location) && File.Exists(location)) - File.Delete(location); - return location; - } - - public static (string, float) GetBestLabel(string[] labels, float[] probs) - { - var max = probs.Max(); - var index = probs.AsSpan().IndexOf(max); - return (labels[index], max); - } - - public static string[] ReadLabels(string labelsLocation) - { - return File.ReadAllLines(labelsLocation); - } - - public static IEnumerable Columns() where T : class - { - return typeof(T).GetProperties().Select(p => p.Name); - } - - public static IEnumerable Columns() where T : class - { - var typeofU = typeof(U); - return typeof(T).GetProperties().Where(c => c.PropertyType == typeofU).Select(p => p.Name); - } - - public static IEnumerable Columns() where T : class - { - var typeofUV = new[] { typeof(U), typeof(V) }; - return typeof(T).GetProperties().Where(c => typeofUV.Contains(c.PropertyType)).Select(p => p.Name); - } - - public static IEnumerable Columns() where T : class - { - var typeofUVW = new[] { typeof(U), typeof(V), typeof(W) }; - return typeof(T).GetProperties().Where(c => typeofUVW.Contains(c.PropertyType)).Select(p => p.Name); - } - - public static string[] ColumnsNumerical() where T : class - { - return Columns().ToArray(); - } - - public static string[] ColumnsString() where T : class - { - return Columns().ToArray(); - } - - public static string[] ColumnsDateTime() where T : class - { - return Columns().ToArray(); - } - } -} diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index 1f688f55ed..d74b4da1e9 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -8,6 +8,7 @@ using System.IO; using System.Linq; using System.Text; +using System.Xml.Schema; using Microsoft.ML; using Microsoft.ML.CommandLine; using Microsoft.ML.Data; @@ -509,10 +510,13 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : if (type.GetItemType() != expectedType) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", _parent.Inputs[i], expectedType.ToString(), type.ToString()); var originalShape = _parent.TFInputShapes[i]; - var shape = originalShape.dims; + var originalShapeNdim = originalShape.ndim; + var originalShapeDims = originalShape.dims; var colTypeDims = vecType.Dimensions.Select(dim => (int)dim).ToArray(); - if (shape == null || (shape.Length == 0)) + var inputDataShapeNdim = colTypeDims.Length; + + if (originalShapeDims == null || (originalShapeDims.Length == 0)) { _fullySpecifiedShapes[i] = new TensorShape(colTypeDims); if (_parent._addBatchDimensionInput) @@ -530,7 +534,7 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : // Compute the total size of the known dimensions of the shape. int valCount = 1; int numOfUnkDim = 0; - foreach (var s in shape) + foreach (var s in originalShapeDims) { if (s > 0) valCount *= s; @@ -542,40 +546,122 @@ public Mapper(TensorFlowTransformer parent, DataViewSchema inputSchema) : if (typeValueCount % valCount != 0) throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of length {typeValueCount}."); - var originalShapeNdim = originalShape.ndim; - var originalShapeDims = originalShape.dims; - var inputDataShapeNdim = colTypeDims.Length; - int[] l = new int[originalShapeNdim]; - int inputDataIndex = 0; - int tensorShapeIndex = 0; + // If the AddBatchDimensionInput is set to true, one unknown dimension(i.e. batch size) is inferrable + var trueNumOfUnkDim = _parent._addBatchDimensionInput && originalShapeDims[0]==-1 ? (numOfUnkDim - 1) : numOfUnkDim; - //If the input data passed has one dimension less than the expected input tensor shape - // and _addBatchDimensionInput option is set to true, set the batch size as 1. - if (originalShapeNdim - inputDataShapeNdim == 1 && _parent._addBatchDimensionInput) + //all dimensions are known(except batch dimension which can be unknown). Eg: + // originalShape = [-1,2,2], AddBatchDimensionInput = true + // Inferred shape:[1,2,2] + // originalShape = [2,2] + // Inferred shape:[2,2] + if (trueNumOfUnkDim == 0) { - l[0] = 1; - tensorShapeIndex = 1; + int[] l = new int[originalShapeNdim]; + int tensorShapeIndex = 0; + if (_parent._addBatchDimensionInput) + { + l[0] = 1; + tensorShapeIndex = 1; + } + for (; tensorShapeIndex < l.Length ; tensorShapeIndex++) + l[tensorShapeIndex] = originalShapeDims[tensorShapeIndex]; + _fullySpecifiedShapes[i] = new TensorShape(l); } - // If the number of input data dimensions do not match with that of tensor dimensions, throw. - else if(originalShapeNdim != inputDataShapeNdim) - throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of shape {colTypeDims.ToString()}."); + // One unknown dimension, which can be inferred from input. Eg: + // originalShape = [-1], input length=(5) + // Inferred shape:[5] + // originalShape = [1, -1, 2, 2], input length = 8. + // Inferred shape:[1,2,2,2] + // originalShape = [-1,-1, 2, 2], AddBatchDimensionInput = true and input length = 8. + // Inferred shape:[1,2,2,2] + else if (trueNumOfUnkDim == 1) + { + int[] l = new int[originalShapeNdim]; + int tensorShapeIndex = 0; + //attempt to infer single missing dimension from passed vector input + int missingDim = typeValueCount / valCount; + + if (_parent._addBatchDimensionInput) + { + l[0] = 1; + tensorShapeIndex = 1; + } + + for (; tensorShapeIndex < originalShapeNdim; tensorShapeIndex++) + { + //Fill in tensor shape for known dims with expected tensor shape. + if (originalShapeDims[tensorShapeIndex] != -1) + { + l[tensorShapeIndex] = originalShapeDims[tensorShapeIndex]; + } + else + { + l[tensorShapeIndex] = missingDim; + } + } + _fullySpecifiedShapes[i] = new TensorShape(l); - for (; tensorShapeIndex < originalShapeNdim; tensorShapeIndex++) + } + // For more than one unknown dimension, try to infer shape from input. Eg: + // originalShape = [-1,-1, 2, 2], AddBatchDimensionInput = false, inputShape = [1,2,2,2]. + // Inferred shape:[1,2,2,2] + // originalShape = [-1, -1, -1, 2], AddBatchDimensionInput = true, inputShape = [2,2,2]. + // Inferred shape:[1,2,2,2] + // originalShape = [1,-1,-1, 2], AddBatchDimensionInput = false, inputShape = [2,2,2]. + // Inferred shape:[1,2,2,2] + // originalShape = [2,-1,-1, 2], AddBatchDimensionInput = true, inputShape = [2,2,2]. + // Inferred shape:[2, 2, 2, 2]- use batch dim from the graph + else { - //Fill in tensor shape for unknown dims with input data shape. - if (originalShapeDims[tensorShapeIndex] == -1) + //attempt to fill unknown dims from input shape + int[] l = new int[originalShapeNdim]; + int inputDataIndex = 0; + int tensorShapeIndex = 0; + + //If the input data passed has one dimension less than the expected input tensor shape + if (originalShapeNdim - inputDataShapeNdim == 1) { - l[tensorShapeIndex] = colTypeDims[inputDataIndex]; - inputDataIndex++; + + // If _addBatchDimensionInput option is set to false, + // and batch dimension is unknown, suggest setting it to true. eg: + // originalShape = [-1,-1, 2, 2], AddBatchDimensionInput = false, inputShape = [2,2,2]. + if (!_parent._addBatchDimensionInput && originalShapeDims[0] == -1) + throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of shape ({String.Join(",", colTypeDims)}). Consider setting addBatchDimensionInput to true."); + + // Eg: + // originalShape = [-1,-1,-1, 2], AddBatchDimensionInput = true, inputShape = [2,2,2]. + // Inferred shape:[1,2,2,2] + else if (_parent._addBatchDimensionInput && originalShapeDims[0] == -1) + { + l[0] = 1; + tensorShapeIndex = 1; + } } - // If the tensor shape dim is known, assert that input data dim matches with - // expected tensor shape dim. - else if(originalShapeDims[tensorShapeIndex] == colTypeDims[inputDataIndex]) - l[tensorShapeIndex] = originalShapeDims[tensorShapeIndex]; - else - throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of shape {colTypeDims.ToString()}."); + // If the number of input data dimensions do not match with that of tensor dimensions, throw. Eg: + // originalShape = [1,-1,-1, 2], inputShape = [4,2]. + // originalShape = [1,-1,-1, 2], inputShape = [8]. + // originalShape = [-1, 2], inputShape = [2,2,2]. + else if (originalShapeNdim != inputDataShapeNdim) + throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of shape ({String.Join(",", colTypeDims)})."); + + for (; tensorShapeIndex < originalShapeNdim; tensorShapeIndex++, inputDataIndex++) + { + //Fill in tensor shape for unknown dims with input data shape. + if (originalShapeDims[tensorShapeIndex] == -1) + { + l[tensorShapeIndex] = colTypeDims[inputDataIndex]; + } + // If the tensor shape dim is known, assert that input data dim matches with + // expected tensor shape dim. + else if (originalShapeDims[tensorShapeIndex] == colTypeDims[inputDataIndex]) + { + l[tensorShapeIndex] = originalShapeDims[tensorShapeIndex]; + } + else + throw Contracts.Except($"Input shape mismatch: Input '{_parent.Inputs[i]}' has shape {originalShape.ToString()}, but input data is of shape ({String.Join(",", colTypeDims)})."); + } + _fullySpecifiedShapes[i] = new TensorShape(l); } - _fullySpecifiedShapes[i] = new TensorShape(l); } } diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index 3938bfd733..3ba94434da 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -187,7 +187,7 @@ private class ShapeData // Data will be passed as flat vector. // Intended data shape [1, 2, 2, 3], model shape [1, None, None, 3] - [VectorType(12)] + [VectorType(1, 2, 2, 3)] public float[] FourDim; // Data will be passed as 4-D vector.