diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index 776c20a23b..0b1c32d2ae 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -314,8 +314,11 @@ private class DataPoint public float[] Features { get; set; } } - [Fact] - public void LpNormOnnxConversionTest() + [Theory] + [CombinatorialData] + public void LpNormOnnxConversionTest( + bool ensureZeroMean, + LpNormNormalizingEstimatorBase.NormFunction norm) { var mlContext = new MLContext(seed: 1); @@ -329,42 +332,27 @@ public void LpNormOnnxConversionTest() }; var dataView = mlContext.Data.LoadFromEnumerable(samples); - LpNormNormalizingEstimatorBase.NormFunction[] norms = - { - LpNormNormalizingEstimatorBase.NormFunction.L1, - LpNormNormalizingEstimatorBase.NormFunction.L2, - LpNormNormalizingEstimatorBase.NormFunction.Infinity, - LpNormNormalizingEstimatorBase.NormFunction.StandardDeviation - }; + var pipe = mlContext.Transforms.NormalizeLpNorm(nameof(DataPoint.Features), norm:norm, ensureZeroMean: ensureZeroMean); - bool[] ensureZeroMeans = { true, false}; - foreach (var ensureZeroMean in ensureZeroMeans) - { - foreach (var norm in norms) - { - var pipe = mlContext.Transforms.NormalizeLpNorm(nameof(DataPoint.Features), norm:norm, ensureZeroMean: ensureZeroMean); - - var model = pipe.Fit(dataView); - var transformedData = model.Transform(dataView); - var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, dataView); + var model = pipe.Fit(dataView); + var transformedData = model.Transform(dataView); + var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, dataView); - var onnxFileName = $"LpNorm-{norm.ToString()}-{ensureZeroMean}.onnx"; - var onnxModelPath = GetOutputPath(onnxFileName); + var onnxFileName = $"LpNorm-{norm.ToString()}-{ensureZeroMean}.onnx"; + var onnxModelPath = GetOutputPath(onnxFileName); - SaveOnnxModel(onnxModel, onnxModelPath, null); + SaveOnnxModel(onnxModel, onnxModelPath, null); - // Compare results produced by ML.NET and ONNX's runtime. - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && Environment.Is64BitProcess) - { - // Evaluate the saved ONNX model using the data used to train the ML.NET pipeline. - string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); - string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); - var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); - var onnxTransformer = onnxEstimator.Fit(dataView); - var onnxResult = onnxTransformer.Transform(dataView); - CompareSelectedR4VectorColumns(nameof(DataPoint.Features), outputNames[0], transformedData, onnxResult, 3); - } - } + // Compare results produced by ML.NET and ONNX's runtime. + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && Environment.Is64BitProcess) + { + // Evaluate the saved ONNX model using the data used to train the ML.NET pipeline. + string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); + var onnxTransformer = onnxEstimator.Fit(dataView); + var onnxResult = onnxTransformer.Transform(dataView); + CompareSelectedR4VectorColumns(nameof(DataPoint.Features), outputNames[0], transformedData, onnxResult, 3); } Done(); @@ -791,81 +779,72 @@ public void WordEmbeddingsTest() Done(); } - [Fact] - public void OnnxTypeConversionTest() + [Theory] + // These are the supported conversions + // ML.NET does not allow any conversions between signed and unsigned numeric types + // Onnx does not seem to support casting a string to any type + // Though the onnx docs claim support for byte and sbyte, + // CreateNamedOnnxValue in OnnxUtils.cs throws a NotImplementedException for those two + [InlineData(DataKind.Int16, DataKind.Int16)] + [InlineData(DataKind.Int16, DataKind.Int32)] + [InlineData(DataKind.Int16, DataKind.Int64)] + [InlineData(DataKind.Int16, DataKind.Single)] + [InlineData(DataKind.Int16, DataKind.Double)] + [InlineData(DataKind.UInt16, DataKind.UInt16)] + [InlineData(DataKind.UInt16, DataKind.UInt32)] + [InlineData(DataKind.UInt16, DataKind.UInt64)] + [InlineData(DataKind.UInt16, DataKind.Single)] + [InlineData(DataKind.UInt16, DataKind.Double)] + [InlineData(DataKind.Int32, DataKind.Int16)] + [InlineData(DataKind.Int32, DataKind.Int32)] + [InlineData(DataKind.Int32, DataKind.Int64)] + [InlineData(DataKind.Int32, DataKind.Single)] + [InlineData(DataKind.Int32, DataKind.Double)] + [InlineData(DataKind.Int64, DataKind.Int16)] + [InlineData(DataKind.Int64, DataKind.Int32)] + [InlineData(DataKind.Int64, DataKind.Int64)] + [InlineData(DataKind.Int64, DataKind.Single)] + [InlineData(DataKind.Int64, DataKind.Double)] + [InlineData(DataKind.UInt64, DataKind.UInt16)] + [InlineData(DataKind.UInt64, DataKind.UInt32)] + [InlineData(DataKind.UInt64, DataKind.UInt64)] + [InlineData(DataKind.UInt64, DataKind.Single)] + [InlineData(DataKind.UInt64, DataKind.Double)] + [InlineData(DataKind.Single, DataKind.Single)] + [InlineData(DataKind.Single, DataKind.Double)] + [InlineData(DataKind.Double, DataKind.Single)] + [InlineData(DataKind.Double, DataKind.Double)] + public void OnnxTypeConversionTest(DataKind fromKind, DataKind toKind) { var mlContext = new MLContext(seed: 1); string filePath = GetDataPath("type-conversion.txt"); - // These are the supported conversions - // ML.NET does not allow any conversions between signed and unsigned numeric types - // Onnx does not seem to support casting a string to any type - // Though the onnx docs claim support for byte and sbyte, - // CreateNamedOnnxValue in OnnxUtils.cs throws a NotImplementedException for those two - DataKind[,] supportedConversions = new[,] + TextLoader.Column[] columns = new [] { - { DataKind.Int16, DataKind.Int16}, - { DataKind.Int16, DataKind.Int32}, - { DataKind.Int16, DataKind.Int64}, - { DataKind.Int16, DataKind.Single}, - { DataKind.Int16, DataKind.Double}, - { DataKind.UInt16, DataKind.UInt16}, - { DataKind.UInt16, DataKind.UInt32}, - { DataKind.UInt16, DataKind.UInt64}, - { DataKind.UInt16, DataKind.Single}, - { DataKind.UInt16, DataKind.Double}, - { DataKind.Int32, DataKind.Int16}, - { DataKind.Int32, DataKind.Int32}, - { DataKind.Int32, DataKind.Int64}, - { DataKind.Int32, DataKind.Single}, - { DataKind.Int32, DataKind.Double}, - { DataKind.Int64, DataKind.Int16}, - { DataKind.Int64, DataKind.Int32}, - { DataKind.Int64, DataKind.Int64}, - { DataKind.Int64, DataKind.Single}, - { DataKind.Int64, DataKind.Double}, - { DataKind.UInt64, DataKind.UInt16}, - { DataKind.UInt64, DataKind.UInt32}, - { DataKind.UInt64, DataKind.UInt64}, - { DataKind.UInt64, DataKind.Single}, - { DataKind.UInt64, DataKind.Double}, - { DataKind.Single, DataKind.Single}, - { DataKind.Single, DataKind.Double}, - { DataKind.Double, DataKind.Single}, - { DataKind.Double, DataKind.Double} + new TextLoader.Column("Value", fromKind, 0, 0) }; + var dataView = mlContext.Data.LoadFromTextFile(filePath, columns); - for (int i = 0; i < supportedConversions.GetLength(0); i++) - { - var fromKind = supportedConversions[i, 0]; - var toKind = supportedConversions[i, 1]; - - TextLoader.Column[] columns = new [] - { - new TextLoader.Column("Value", fromKind, 0, 0) - }; - var dataView = mlContext.Data.LoadFromTextFile(filePath, columns); - - var pipeline = mlContext.Transforms.Conversion.ConvertType("ValueConverted", "Value", outputKind: toKind); - var model = pipeline.Fit(dataView); - var mlnetResult = model.Transform(dataView); + var pipeline = mlContext.Transforms.Conversion.ConvertType("ValueConverted", "Value", outputKind: toKind); + var model = pipeline.Fit(dataView); + var mlnetResult = model.Transform(dataView); - var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, dataView); - var onnxFileName = "typeconversion.onnx"; - var onnxModelPath = GetOutputPath(onnxFileName); - SaveOnnxModel(onnxModel, onnxModelPath, null); + var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, dataView); + var onnxFileName = "typeconversion.onnx"; + var onnxModelPath = GetOutputPath(onnxFileName); + SaveOnnxModel(onnxModel, onnxModelPath, null); - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && Environment.Is64BitProcess) - { - string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); - string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); - var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); - var onnxTransformer = onnxEstimator.Fit(dataView); - var onnxResult = onnxTransformer.Transform(dataView); + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && Environment.Is64BitProcess) + { + string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); + var onnxTransformer = onnxEstimator.Fit(dataView); + var onnxResult = onnxTransformer.Transform(dataView); - CompareResults(model.ColumnPairs[0].outputColumnName, outputNames[1], mlnetResult, onnxResult); - } + CompareResults(model.ColumnPairs[0].outputColumnName, outputNames[1], mlnetResult, onnxResult); } + Done(); } @@ -969,48 +948,40 @@ public void IndicateMissingValuesOnnxConversionTest() Done(); } - [Fact] - public void ValueToKeyMappingOnnxConversionTest() + [Theory] + [InlineData(DataKind.Single)] + [InlineData(DataKind.String)] + public void ValueToKeyMappingOnnxConversionTest(DataKind valueType) { var mlContext = new MLContext(seed: 1); string filePath = GetDataPath("type-conversion.txt"); - DataKind[] supportedValueTypes = new[] + TextLoader.Column[] columns = new[] { - DataKind.Single, - DataKind.String + new TextLoader.Column("Value", valueType, 0, 0) }; + var dataView = mlContext.Data.LoadFromTextFile(filePath, columns); - for (int i = 0; i < supportedValueTypes.Length; i++) - { - var valueType = supportedValueTypes[i]; - - TextLoader.Column[] columns = new[] - { - new TextLoader.Column("Value", valueType, 0, 0) - }; - var dataView = mlContext.Data.LoadFromTextFile(filePath, columns); - - var pipeline = mlContext.Transforms.Conversion.MapValueToKey("Key", "Value"); - var model = pipeline.Fit(dataView); - var mlnetResult = model.Transform(dataView); + var pipeline = mlContext.Transforms.Conversion.MapValueToKey("Key", "Value"); + var model = pipeline.Fit(dataView); + var mlnetResult = model.Transform(dataView); - var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, dataView); - var onnxFileName = "ValueToKey.onnx"; - var onnxModelPath = GetOutputPath(onnxFileName); - SaveOnnxModel(onnxModel, onnxModelPath, null); + var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, dataView); + var onnxFileName = "ValueToKey.onnx"; + var onnxModelPath = GetOutputPath(onnxFileName); + SaveOnnxModel(onnxModel, onnxModelPath, null); - if (IsOnnxRuntimeSupported()) - { - string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); - string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); - var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); - var onnxTransformer = onnxEstimator.Fit(dataView); - var onnxResult = onnxTransformer.Transform(dataView); + if (IsOnnxRuntimeSupported()) + { + string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); + var onnxTransformer = onnxEstimator.Fit(dataView); + var onnxResult = onnxTransformer.Transform(dataView); - CompareSelectedVectorColumns(model.ColumnPairs[0].outputColumnName, outputNames[1], mlnetResult, onnxResult); - } + CompareSelectedVectorColumns(model.ColumnPairs[0].outputColumnName, outputNames[1], mlnetResult, onnxResult); } + Done(); } @@ -1056,8 +1027,12 @@ public void WordTokenizerOnnxConversionTest() Done(); } - [Fact] - public void NgramOnnxConnversionTest() + [Theory] + [CombinatorialData] + public void NgramOnnxConnversionTest( + [CombinatorialValues(1, 2, 3)] int ngramLength, + bool useAllLength, + NgramExtractingEstimator.WeightingCriteria weighting) { var mlContext = new MLContext(seed: 1); @@ -1068,64 +1043,46 @@ public void NgramOnnxConnversionTest() new TextData(){ Text = "cat think mat bad" }, }; - int[] ngramLengths = { 1, 2, 3 }; - bool[] useAllLengths = { true, false}; - NgramExtractingEstimator.WeightingCriteria[] weightingCriteria = + // Convert training data to IDataView. + var dataView = mlContext.Data.LoadFromEnumerable(samples); + + IEstimator[] pipelines = { - NgramExtractingEstimator.WeightingCriteria.Tf, - NgramExtractingEstimator.WeightingCriteria.Idf, - NgramExtractingEstimator.WeightingCriteria.TfIdf + mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text", new[] { ' ' }) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens")) + .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens", + ngramLength: ngramLength, + useAllLengths: useAllLength, + weighting: weighting)), + + mlContext.Transforms.Text.ProduceWordBags("Tokens", "Text", + ngramLength: ngramLength, + useAllLengths: useAllLength, + weighting: weighting) }; - - var paramCombinations = from ngramLength in ngramLengths - from useAllLength in useAllLengths - from weighting in weightingCriteria - select new { ngramLength, useAllLength, weighting }; - - - foreach (var combination in paramCombinations) + for (int i = 0; i < pipelines.Length; i++) { - // Convert training data to IDataView. - var dataView = mlContext.Data.LoadFromEnumerable(samples); + var pipe = pipelines[i]; + var model = pipe.Fit(dataView); + var transformedData = model.Transform(dataView); - IEstimator[] pipelines = - { - mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text", new[] { ' ' }) - .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens")) - .Append(mlContext.Transforms.Text.ProduceNgrams("NGrams", "Tokens", - ngramLength: combination.ngramLength, - useAllLengths:combination.useAllLength, - weighting: combination.weighting)), - - mlContext.Transforms.Text.ProduceWordBags("Tokens", "Text", - ngramLength: combination.ngramLength, - useAllLengths: combination.useAllLength, - weighting: combination.weighting) - }; - - for (int i = 0; i < pipelines.Length; i++) + var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, dataView); + var onnxFilename = $"Ngram-{i}-{ngramLength}-{useAllLength}-{weighting}.onnx"; + var onnxFilePath = GetOutputPath(onnxFilename); + SaveOnnxModel(onnxModel, onnxFilePath, null); + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && Environment.Is64BitProcess) { - var pipe = pipelines[i]; - var model = pipe.Fit(dataView); - var transformedData = model.Transform(dataView); - - var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, dataView); - var onnxFilename = $"Ngram-{i}-{combination.ngramLength}-{combination.useAllLength}-{combination.weighting}.onnx"; - var onnxFilePath = GetOutputPath(onnxFilename); - SaveOnnxModel(onnxModel, onnxFilePath, null); - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && Environment.Is64BitProcess) - { - // Evaluate the saved ONNX model using the data used to train the ML.NET pipeline. - string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); - string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); - var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxFilePath); - var onnxTransformer = onnxEstimator.Fit(dataView); - var onnxResult = onnxTransformer.Transform(dataView); - CompareSelectedR4VectorColumns(transformedData.Schema[3].Name, outputNames[outputNames.Length-1], transformedData, onnxResult, 3); - } + // Evaluate the saved ONNX model using the data used to train the ML.NET pipeline. + string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); + var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxFilePath); + var onnxTransformer = onnxEstimator.Fit(dataView); + var onnxResult = onnxTransformer.Transform(dataView); + CompareSelectedR4VectorColumns(transformedData.Schema[3].Name, outputNames[outputNames.Length-1], transformedData, onnxResult, 3); } } + Done(); }