From d3aea0c2bb2a9b9b7c7e3a052af60b8c125e9f98 Mon Sep 17 00:00:00 2001 From: Keren Fuentes Date: Thu, 23 Jan 2020 15:46:19 -0800 Subject: [PATCH 1/5] ova fix --- .../Standard/LinearModelParameters.cs | 2 +- .../OneVersusAllTrainer.cs | 19 ++++++++++++------- test/Microsoft.ML.Tests/OnnxConversionTest.cs | 17 ++++++++++++++++- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.StandardTrainers/Standard/LinearModelParameters.cs b/src/Microsoft.ML.StandardTrainers/Standard/LinearModelParameters.cs index d2dea6f60e..8f1745b2a0 100644 --- a/src/Microsoft.ML.StandardTrainers/Standard/LinearModelParameters.cs +++ b/src/Microsoft.ML.StandardTrainers/Standard/LinearModelParameters.cs @@ -138,7 +138,7 @@ private protected virtual bool SaveAsOnnx(OnnxContext ctx, string[] outputs, str Host.CheckValue(ctx, nameof(ctx)); Host.Check(Utils.Size(outputs) >= 1); string opType = "LinearRegressor"; - string scoreVarName = (Utils.Size(outputs) == 2) ? outputs[1] : outputs[0]; // Get Score from PredictedLabel and/or Score columns + string scoreVarName = (Utils.Size(outputs) >= 2) ? outputs[1] : outputs[0]; // Get Score from PredictedLabel and/or Score columns var node = ctx.CreateNode(opType, new[] { featureColumn }, new[] { scoreVarName }, ctx.GetNodeName(opType)); // Selection of logit or probit output transform. enum {'NONE', 'LOGIT', 'PROBIT} node.AddAttribute("post_transform", "NONE"); diff --git a/src/Microsoft.ML.StandardTrainers/Standard/MulticlassClassification/OneVersusAllTrainer.cs b/src/Microsoft.ML.StandardTrainers/Standard/MulticlassClassification/OneVersusAllTrainer.cs index 185da7a4eb..5c6bd75532 100644 --- a/src/Microsoft.ML.StandardTrainers/Standard/MulticlassClassification/OneVersusAllTrainer.cs +++ b/src/Microsoft.ML.StandardTrainers/Standard/MulticlassClassification/OneVersusAllTrainer.cs @@ -546,7 +546,6 @@ public string[] SaveAsOnnxPreProcess(OnnxContext ctx, string featureColumn, bool predictorOutputNames[0] = ctx.AddIntermediateVariable(NumberDataViewType.UInt32, $"{DefaultColumnNames.PredictedLabel}_{i}", true); predictorOutputNames[1] = ctx.AddIntermediateVariable(NumberDataViewType.Single, $"{DefaultColumnNames.Score}_{i}", true); predictorOutputNames[2] = ctx.AddIntermediateVariable(NumberDataViewType.Single, $"{DefaultColumnNames.Probability}_{i}", true); - string clipInput = predictorOutputNames[2]; var pred = Predictors[i] as ISingleCanSaveOnnx; @@ -563,7 +562,7 @@ public string[] SaveAsOnnxPreProcess(OnnxContext ctx, string featureColumn, bool var clipNode = ctx.CreateNode(opType, new[] { clipInput, zeroVar }, new[] { outputs[i] }, ctx.GetNodeName(opType), ""); } else - outputs[i] = predictorOutputNames[2]; + outputs[i] = predictorOutputNames[1]; } return outputs; } @@ -659,7 +658,8 @@ public override JToken SaveAsPfa(BoundPfaContext ctx, JToken input) public override bool SaveAsOnnx(OnnxContext ctx, string[] outputNames, string featureColumn) { - var probabilityOutputs = base.SaveAsOnnxPreProcess(ctx, featureColumn, true); + + var probabilityOutputs = base.SaveAsOnnxPreProcess(ctx, featureColumn, false); string opType = "Concat"; var concatOutput = ctx.AddIntermediateVariable(NumberDataViewType.Single, "ConcatOutput", true); @@ -794,22 +794,27 @@ public override bool SaveAsOnnx(OnnxContext ctx, string[] outputNames, string fe opType = "Sum"; var sumOutput = ctx.AddIntermediateVariable(NumberDataViewType.Single, "SumOfScores", true); - var sumNode = ctx.CreateNode(opType, probabilityOutputs, new[] { sumOutput }, ctx.GetNodeName(opType), ""); + ctx.CreateNode(opType, probabilityOutputs, new[] { sumOutput }, ctx.GetNodeName(opType), ""); opType = "Cast"; - var castOutput = ctx.AddIntermediateVariable(BooleanDataViewType.Instance, "IsSumZero", true); + var castOutput = ctx.AddIntermediateVariable(BooleanDataViewType.Instance, "CastOutput", true); var castNode = ctx.CreateNode(opType, sumOutput, castOutput, ctx.GetNodeName(opType), ""); var t = InternalDataKindExtensions.ToInternalDataKind(DataKind.Boolean).ToType(); castNode.AddAttribute("to", t); + opType = "Not"; + var notOutput = ctx.AddIntermediateVariable(null, "IsSumZero", true); + ctx.CreateNode(opType, castOutput, notOutput, ctx.GetNodeName(opType), ""); + + opType = "Cast"; var castIsZeroSumToFloat = ctx.AddIntermediateVariable(BooleanDataViewType.Instance, "IsSumZeroAsFloat", true); - var castIsZeroSumToFloatNode = ctx.CreateNode(opType, castOutput, castIsZeroSumToFloat, ctx.GetNodeName(opType), ""); + var castIsZeroSumToFloatNode = ctx.CreateNode(opType, notOutput, castIsZeroSumToFloat, ctx.GetNodeName(opType), ""); var t1 = InternalDataKindExtensions.ToInternalDataKind(DataKind.Single).ToType(); castIsZeroSumToFloatNode.AddAttribute("to", t1); opType = "Sum"; var sumOutputNonZero = ctx.AddIntermediateVariable(NumberDataViewType.Single, "SumOfScoresNonZero", true); - var sumOutputNonZeroNode = ctx.CreateNode(opType, new[] { sumOutput, castIsZeroSumToFloat }, + ctx.CreateNode(opType, new[] { sumOutput, castIsZeroSumToFloat }, new[] { sumOutputNonZero }, ctx.GetNodeName(opType), ""); string[] divOutputs = new string[Predictors.Length]; diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index e22b82133f..eacd8695b0 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -1319,8 +1319,22 @@ public void MulticlassTrainersOnnxConversionTest() { mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(), mlContext.MulticlassClassification.Trainers.NaiveBayes(), + mlContext.MulticlassClassification.Trainers.OneVersusAll( + mlContext.BinaryClassification.Trainers.AveragedPerceptron()), + mlContext.MulticlassClassification.Trainers.OneVersusAll( + mlContext.BinaryClassification.Trainers.AveragedPerceptron(), useProbabilities:false), + mlContext.MulticlassClassification.Trainers.OneVersusAll( + mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression()), mlContext.MulticlassClassification.Trainers.OneVersusAll( mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(), useProbabilities:false), + mlContext.MulticlassClassification.Trainers.OneVersusAll( + mlContext.BinaryClassification.Trainers.LinearSvm()), + mlContext.MulticlassClassification.Trainers.OneVersusAll( + mlContext.BinaryClassification.Trainers.LinearSvm(), useProbabilities:false), + mlContext.MulticlassClassification.Trainers.OneVersusAll( + mlContext.BinaryClassification.Trainers.FastForest()), + mlContext.MulticlassClassification.Trainers.OneVersusAll( + mlContext.BinaryClassification.Trainers.FastForest(), useProbabilities:false), mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(), mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated() }; @@ -1357,7 +1371,8 @@ public void MulticlassTrainersOnnxConversionTest() var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); var onnxTransformer = onnxEstimator.Fit(dataView); var onnxResult = onnxTransformer.Transform(dataView); - CompareSelectedScalarColumns(transformedData.Schema[5].Name, outputNames[2], transformedData, onnxResult); + CompareSelectedScalarColumns(transformedData.Schema[5].Name, outputNames[2], transformedData, onnxResult); //compare predicted labels + CompareSelectedR4VectorColumns(transformedData.Schema[6].Name, outputNames[3], transformedData, onnxResult, 5); //compare scores } } Done(); From e9a7af41525bb234f7f8c74abe152196da126ba9 Mon Sep 17 00:00:00 2001 From: Keren Fuentes Date: Thu, 23 Jan 2020 15:52:41 -0800 Subject: [PATCH 2/5] reformatted spacing --- .../Standard/MulticlassClassification/OneVersusAllTrainer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.StandardTrainers/Standard/MulticlassClassification/OneVersusAllTrainer.cs b/src/Microsoft.ML.StandardTrainers/Standard/MulticlassClassification/OneVersusAllTrainer.cs index 5c6bd75532..54f74abad0 100644 --- a/src/Microsoft.ML.StandardTrainers/Standard/MulticlassClassification/OneVersusAllTrainer.cs +++ b/src/Microsoft.ML.StandardTrainers/Standard/MulticlassClassification/OneVersusAllTrainer.cs @@ -546,6 +546,7 @@ public string[] SaveAsOnnxPreProcess(OnnxContext ctx, string featureColumn, bool predictorOutputNames[0] = ctx.AddIntermediateVariable(NumberDataViewType.UInt32, $"{DefaultColumnNames.PredictedLabel}_{i}", true); predictorOutputNames[1] = ctx.AddIntermediateVariable(NumberDataViewType.Single, $"{DefaultColumnNames.Score}_{i}", true); predictorOutputNames[2] = ctx.AddIntermediateVariable(NumberDataViewType.Single, $"{DefaultColumnNames.Probability}_{i}", true); + string clipInput = predictorOutputNames[2]; var pred = Predictors[i] as ISingleCanSaveOnnx; @@ -658,7 +659,6 @@ public override JToken SaveAsPfa(BoundPfaContext ctx, JToken input) public override bool SaveAsOnnx(OnnxContext ctx, string[] outputNames, string featureColumn) { - var probabilityOutputs = base.SaveAsOnnxPreProcess(ctx, featureColumn, false); string opType = "Concat"; From d3957ea727ab52180cebce0973822e3a45c34732 Mon Sep 17 00:00:00 2001 From: Keren Fuentes Date: Fri, 24 Jan 2020 11:32:28 -0800 Subject: [PATCH 3/5] reverted offset name -since another PR is resolving this, and tolerance change --- src/Microsoft.ML.Data/Prediction/Calibrator.cs | 2 +- src/Microsoft.ML.FastTree/FastTree.cs | 2 +- test/Microsoft.ML.Tests/OnnxConversionTest.cs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.Data/Prediction/Calibrator.cs b/src/Microsoft.ML.Data/Prediction/Calibrator.cs index 2f947fab24..4be879fb8b 100644 --- a/src/Microsoft.ML.Data/Prediction/Calibrator.cs +++ b/src/Microsoft.ML.Data/Prediction/Calibrator.cs @@ -1750,7 +1750,7 @@ bool ISingleCanSaveOnnx.SaveAsOnnx(OnnxContext ctx, string[] scoreProbablityColu var node = ctx.CreateNode(opType, new[] { scoreProbablityColumnNames[0], slopVar }, new[] { mulNodeOutput }, ctx.GetNodeName(opType), ""); opType = "Add"; - var betaVar = ctx.AddInitializer((float)(-Offset), "Offset"); + var betaVar = ctx.AddInitializer((float)(-Offset), "Slope"); var linearOutput = ctx.AddIntermediateVariable(null, "linearOutput", true); node = ctx.CreateNode(opType, new[] { mulNodeOutput, betaVar }, new[] { linearOutput }, ctx.GetNodeName(opType), ""); diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index d45f3ab469..e61a61e59c 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -3112,7 +3112,7 @@ private protected virtual bool SaveAsOnnx(OnnxContext ctx, string[] outputNames, } string opType = "TreeEnsembleRegressor"; - string scoreVarName = (Utils.Size(outputNames) == 2) ? outputNames[1] : outputNames[0]; // Get Score from PredictedLabel and/or Score columns + string scoreVarName = (Utils.Size(outputNames) >= 2) ? outputNames[1] : outputNames[0]; // Get Score from PredictedLabel and/or Score columns var node = ctx.CreateNode(opType, new[] { featureColumn }, new[] { scoreVarName }, ctx.GetNodeName(opType)); node.AddAttribute("post_transform", PostTransform.None.GetDescription()); diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index eacd8695b0..b52196a56e 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -1372,7 +1372,7 @@ public void MulticlassTrainersOnnxConversionTest() var onnxTransformer = onnxEstimator.Fit(dataView); var onnxResult = onnxTransformer.Transform(dataView); CompareSelectedScalarColumns(transformedData.Schema[5].Name, outputNames[2], transformedData, onnxResult); //compare predicted labels - CompareSelectedR4VectorColumns(transformedData.Schema[6].Name, outputNames[3], transformedData, onnxResult, 5); //compare scores + CompareSelectedR4VectorColumns(transformedData.Schema[6].Name, outputNames[3], transformedData, onnxResult, 4); //compare scores } } Done(); From afa875c6ed0b38ab242cf65a72c4df6fd1ed431d Mon Sep 17 00:00:00 2001 From: Keren Fuentes Date: Tue, 28 Jan 2020 10:50:52 -0800 Subject: [PATCH 4/5] rebase change --- src/Microsoft.ML.Data/Prediction/Calibrator.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/Prediction/Calibrator.cs b/src/Microsoft.ML.Data/Prediction/Calibrator.cs index 4be879fb8b..87ba1278b4 100644 --- a/src/Microsoft.ML.Data/Prediction/Calibrator.cs +++ b/src/Microsoft.ML.Data/Prediction/Calibrator.cs @@ -1750,7 +1750,7 @@ bool ISingleCanSaveOnnx.SaveAsOnnx(OnnxContext ctx, string[] scoreProbablityColu var node = ctx.CreateNode(opType, new[] { scoreProbablityColumnNames[0], slopVar }, new[] { mulNodeOutput }, ctx.GetNodeName(opType), ""); opType = "Add"; - var betaVar = ctx.AddInitializer((float)(-Offset), "Slope"); + var betaVar = ctx.AddInitializer(-0.0000001f, "Slope"); var linearOutput = ctx.AddIntermediateVariable(null, "linearOutput", true); node = ctx.CreateNode(opType, new[] { mulNodeOutput, betaVar }, new[] { linearOutput }, ctx.GetNodeName(opType), ""); From 3ddceded1238195e484b4f4fcb7a98507aa1ed87 Mon Sep 17 00:00:00 2001 From: Keren Fuentes Date: Tue, 28 Jan 2020 10:57:09 -0800 Subject: [PATCH 5/5] rebase change --- src/Microsoft.ML.Data/Prediction/Calibrator.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/Prediction/Calibrator.cs b/src/Microsoft.ML.Data/Prediction/Calibrator.cs index 87ba1278b4..2f947fab24 100644 --- a/src/Microsoft.ML.Data/Prediction/Calibrator.cs +++ b/src/Microsoft.ML.Data/Prediction/Calibrator.cs @@ -1750,7 +1750,7 @@ bool ISingleCanSaveOnnx.SaveAsOnnx(OnnxContext ctx, string[] scoreProbablityColu var node = ctx.CreateNode(opType, new[] { scoreProbablityColumnNames[0], slopVar }, new[] { mulNodeOutput }, ctx.GetNodeName(opType), ""); opType = "Add"; - var betaVar = ctx.AddInitializer(-0.0000001f, "Slope"); + var betaVar = ctx.AddInitializer((float)(-Offset), "Offset"); var linearOutput = ctx.AddIntermediateVariable(null, "linearOutput", true); node = ctx.CreateNode(opType, new[] { mulNodeOutput, betaVar }, new[] { linearOutput }, ctx.GetNodeName(opType), "");