Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions src/Microsoft.ML.Data/Prediction/Calibrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1742,12 +1742,17 @@ bool ISingleCanSaveOnnx.SaveAsOnnx(OnnxContext ctx, string[] scoreProbablityColu
_host.CheckValue(scoreProbablityColumnNames, nameof(scoreProbablityColumnNames));
_host.Check(Utils.Size(scoreProbablityColumnNames) == 2);

string opType = "Affine";
string linearOutput = ctx.AddIntermediateVariable(null, "linearOutput", true);
var node = ctx.CreateNode(opType, new[] { scoreProbablityColumnNames[0] },
new[] { linearOutput }, ctx.GetNodeName(opType), "");
node.AddAttribute("alpha", Slope * -1);
node.AddAttribute("beta", -0.0000001);
// The Affine operator is no longer supported in the v11 opset.
// So we have to decompose it using Mul and Add
string opType = "Mul";
var slopVar = ctx.AddInitializer((float)(-Slope), "Slope");
var mulNodeOutput = ctx.AddIntermediateVariable(null, "MulNodeOutput", true);
var node = ctx.CreateNode(opType, new[] { scoreProbablityColumnNames[0], slopVar }, new[] { mulNodeOutput }, ctx.GetNodeName(opType), "");

opType = "Add";
var betaVar = ctx.AddInitializer(-0.0000001f, "Slope");
var linearOutput = ctx.AddIntermediateVariable(null, "linearOutput", true);
node = ctx.CreateNode(opType, new[] { mulNodeOutput, betaVar }, new[] { linearOutput }, ctx.GetNodeName(opType), "");

opType = "Sigmoid";
node = ctx.CreateNode(opType, new[] { linearOutput },
Expand Down
60 changes: 57 additions & 3 deletions src/Microsoft.ML.Data/Transforms/SlotsDroppingTransformer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
using Microsoft.ML.Data;
using Microsoft.ML.Internal.Internallearn;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Model.OnnxConverter;
using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;

Expand Down Expand Up @@ -442,11 +443,12 @@ private static bool AreRangesValid(int[][] slotsMin, int[][] slotsMax)
private protected override IRowMapper MakeRowMapper(DataViewSchema schema)
=> new Mapper(this, schema);

private sealed class Mapper : OneToOneMapperBase
private sealed class Mapper : OneToOneMapperBase, ISaveAsOnnx
{
private readonly SlotsDroppingTransformer _parent;
private readonly int[] _cols;
private readonly DataViewType[] _srcTypes;
private readonly DataViewType[] _rawTypes;
private readonly DataViewType[] _dstTypes;
private readonly SlotDropper[] _slotDropper;
// Track if all the slots of the column are to be dropped.
Expand All @@ -459,6 +461,7 @@ public Mapper(SlotsDroppingTransformer parent, DataViewSchema inputSchema)
_parent = parent;
_cols = new int[_parent.ColumnPairs.Length];
_srcTypes = new DataViewType[_parent.ColumnPairs.Length];
_rawTypes = new DataViewType[_parent.ColumnPairs.Length];
_dstTypes = new DataViewType[_parent.ColumnPairs.Length];
_slotDropper = new SlotDropper[_parent.ColumnPairs.Length];
_suppressed = new bool[_parent.ColumnPairs.Length];
Expand All @@ -471,8 +474,8 @@ public Mapper(SlotsDroppingTransformer parent, DataViewSchema inputSchema)
_srcTypes[i] = inputSchema[_cols[i]].Type;
VectorDataViewType srcVectorType = _srcTypes[i] as VectorDataViewType;

DataViewType itemType = srcVectorType?.ItemType ?? _srcTypes[i];
if (!IsValidColumnType(itemType))
_rawTypes[i] = srcVectorType?.ItemType ?? _srcTypes[i];
if (!IsValidColumnType(_rawTypes[i]))
throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", _parent.ColumnPairs[i].inputColumnName);

int valueCount = srcVectorType?.Size ?? 1;
Expand Down Expand Up @@ -868,6 +871,57 @@ protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore()
}
return result;
}

public bool CanSaveOnnx(OnnxContext ctx) => true;

public void SaveAsOnnx(OnnxContext ctx)
{
Host.CheckValue(ctx, nameof(ctx));

for (int iinfo = 0; iinfo < _cols.Length; ++iinfo)
{
string inputColumnName = _parent.ColumnPairs[iinfo].inputColumnName;
if (!ctx.ContainsColumn(inputColumnName))
continue;

string srcVariableName = ctx.GetVariableName(inputColumnName);
string dstVariableName = ctx.AddIntermediateVariable(_dstTypes[iinfo], _parent.ColumnPairs[iinfo].outputColumnName);
if (!SaveAsOnnxCore(ctx, iinfo, srcVariableName, dstVariableName))
ctx.RemoveColumn(dstVariableName);
}
}

public bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, string srcVariableName, string dstVariableName)
{
string opType;
if (_srcTypes[iinfo] is VectorDataViewType)
{
opType = "GatherElements";
IEnumerable<long> slots = _slotDropper[iinfo].GetPreservedSlots();
var slotsVar = ctx.AddInitializer(slots, new long[] { 1, slots.Count() }, "PreservedSlots");
var node = ctx.CreateNode(opType, new[] { srcVariableName, slotsVar }, new[] { dstVariableName }, ctx.GetNodeName(opType), "");
node.AddAttribute("axis", 1);
}
else
{
string constVal;
long[] dims = { 1, 1 };
float[] floatVals = { 0.0f };
long[] keyVals = { 0 };
string[] stringVals = { "" };
if (_rawTypes[iinfo] is TextDataViewType)
constVal = ctx.AddInitializer(stringVals, dims);
else if (_rawTypes[iinfo] is KeyDataViewType)
constVal = ctx.AddInitializer(keyVals, dims);
else
constVal = ctx.AddInitializer(floatVals, dims);

opType = "Identity";
ctx.CreateNode(opType, constVal, dstVariableName, ctx.GetNodeName(opType), "");
}
return true;
}

}
}
}
15 changes: 15 additions & 0 deletions src/Microsoft.ML.Data/Utilities/SlotDropper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Runtime;
Expand All @@ -16,6 +18,7 @@ namespace Microsoft.ML.Internal.Internallearn
internal sealed class SlotDropper
{
private readonly int[] _lengthReduction;
private readonly int _srcLength;

/// <summary>
/// Returns -1 for non vector and unknown length vectors.
Expand Down Expand Up @@ -43,6 +46,7 @@ public SlotDropper(int srcLength, int[] slotsMin, int[] slotsMax)

SlotsMin = slotsMin;
SlotsMax = slotsMax;
_srcLength = srcLength;
_lengthReduction = ComputeLengthReduction();

Contracts.Check(SlotsMin.Length == _lengthReduction.Length);
Expand Down Expand Up @@ -212,5 +216,16 @@ public void DropSlots<TDst>(ref VBuffer<TDst> src, ref VBuffer<TDst> dst)

dst = editor.CommitTruncated(iiDst);
}

public IEnumerable<long> GetPreservedSlots()
{
var slots = Enumerable.Range(0, _srcLength);
var droppedSlots = Enumerable.Range(SlotsMin[0], SlotsMax[0] - SlotsMin[0] + 1);
for (int i = 1; i < SlotsMin.Length; i++)
{
droppedSlots = droppedSlots.Concat(Enumerable.Range(SlotsMin[i], SlotsMax[i] - SlotsMin[i] + 1));
}
return slots.Except(droppedSlots).Select(i=>(long)i);
}
}
}
2 changes: 1 addition & 1 deletion src/Microsoft.ML.OnnxConverter/OnnxUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ public static ModelProto MakeModel(List<NodeProto> nodes, string producerName, s
model.IrVersion = (long)OnnxCSharpToProtoWrapper.Version.IrVersion;
model.ModelVersion = modelVersion;
model.OpsetImport.Add(new OperatorSetIdProto() { Domain = "ai.onnx.ml", Version = 2 });
model.OpsetImport.Add(new OperatorSetIdProto() { Domain = "", Version = 9 });
model.OpsetImport.Add(new OperatorSetIdProto() { Domain = "", Version = 11 });
model.Graph = new GraphProto();
var graph = model.Graph;
graph.Node.Add(nodes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1004,10 +1004,18 @@ private bool SaveAsOnnxCore(OnnxContext ctx, string[] outputs, string featureCol

// Onnx outputs an Int64, but ML.NET outputs UInt32. So cast the Onnx output here
opType = "Cast";
var castNode = ctx.CreateNode(opType, predictedLabelInt64, predictedLabelUint32, ctx.GetNodeName(opType), "");
var castNodeOutput = ctx.AddIntermediateVariable(NumberDataViewType.UInt32, "CastNodeOutput", true);
var castNode = ctx.CreateNode(opType, predictedLabelInt64, castNodeOutput, ctx.GetNodeName(opType), "");
var t = InternalDataKindExtensions.ToInternalDataKind(DataKind.UInt32).ToType();
castNode.AddAttribute("to", t);

// The predictedLabel is a scalar. But the onnx output of ML.NET output expects a [1x1] tensor for output. So reshape it here
opType = "Reshape";
long[] shape = { 1, 1 };
long[] shapeDim = { 2 };
var shapeVar = ctx.AddInitializer(shape, shapeDim, "ShapeVar");
var reshapeNode = ctx.CreateNode(opType, new[] { castNodeOutput, shapeVar }, new[] { predictedLabelUint32 }, ctx.GetNodeName(opType), "");

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,8 @@ public string[] SaveAsOnnxPreProcess(OnnxContext ctx, string featureColumn, bool
outputs[i] = clipOutput;

string opType = "Clip";
var clipNode = ctx.CreateNode(opType, clipInput, outputs[i], ctx.GetNodeName(opType), "");
clipNode.AddAttribute("min", 0.0);
var zeroVar = ctx.AddInitializer(0.0f, "Zero");
var clipNode = ctx.CreateNode(opType, new[] { clipInput, zeroVar }, new[] { outputs[i] }, ctx.GetNodeName(opType), "");
}
else
outputs[i] = predictorOutputNames[2];
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Transforms/CountFeatureSelection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace Microsoft.ML.Transforms
/// | Does this estimator need to look at the data to train its parameters? | Yes |
/// | Input column data type | Vector or scalar of numeric, [text](xref:Microsoft.ML.Data.TextDataViewType) or [key](xref:Microsoft.ML.Data.KeyDataViewType) data types|
/// | Output column data type | Same as the input column|
/// | Exportable to ONNX | No |
/// | Exportable to ONNX | Yes |
///
/// This transform uses a set of aggregators to count the number of values for each slot (vector element)
/// that are non-default and non-missing (for the definitions of default and missing, refer to the remarks section
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace Microsoft.ML.Transforms
/// | Does this estimator need to look at the data to train its parameters? | Yes |
/// | Input column data type | Vector or scalar of numeric, [text](xref:Microsoft.ML.Data.TextDataViewType) or [key](xref:Microsoft.ML.Data.KeyDataViewType) data types|
/// | Output column data type | Same as the input column|
/// | Exportable to ONNX | No |
/// | Exportable to ONNX | Yes |
///
/// Formally, the mutual information can be written as:
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -381,25 +381,25 @@
},
{
"input": [
"Score"
"Score",
"Slope"
],
"output": [
"MulNodeOutput"
],
"name": "Mul",
"opType": "Mul"
},
{
"input": [
"MulNodeOutput",
"Slope0"
],
"output": [
"linearOutput"
],
"name": "Affine",
"opType": "Affine",
"attribute": [
{
"name": "alpha",
"f": 0.4,
"type": "FLOAT"
},
{
"name": "beta",
"f": -1E-07,
"type": "FLOAT"
}
]
"name": "Add",
"opType": "Add"
},
{
"input": [
Expand Down Expand Up @@ -478,6 +478,22 @@
}
],
"name": "A Simple Pipeline",
"initializer": [
{
"dataType": 1,
"floatData": [
0.4
],
"name": "Slope"
},
{
"dataType": 1,
"floatData": [
-1E-07
],
"name": "Slope0"
}
],
"input": [
{
"name": "F1",
Expand Down Expand Up @@ -671,7 +687,7 @@
"version": "2"
},
{
"version": "9"
"version": "11"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@
"version": "2"
},
{
"version": "9"
"version": "11"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@
"version": "2"
},
{
"version": "9"
"version": "11"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -707,25 +707,25 @@
},
{
"input": [
"Score"
"Score",
"Slope"
],
"output": [
"MulNodeOutput"
],
"name": "Mul",
"opType": "Mul"
},
{
"input": [
"MulNodeOutput",
"Slope0"
],
"output": [
"linearOutput"
],
"name": "Affine",
"opType": "Affine",
"attribute": [
{
"name": "alpha",
"f": 0.4,
"type": "FLOAT"
},
{
"name": "beta",
"f": -1E-07,
"type": "FLOAT"
}
]
"name": "Add",
"opType": "Add"
},
{
"input": [
Expand Down Expand Up @@ -804,6 +804,22 @@
}
],
"name": "modelWithLessIO",
"initializer": [
{
"dataType": 1,
"floatData": [
0.4
],
"name": "Slope"
},
{
"dataType": 1,
"floatData": [
-1E-07
],
"name": "Slope0"
}
],
"input": [
{
"name": "F1",
Expand Down Expand Up @@ -961,7 +977,7 @@
"version": "2"
},
{
"version": "9"
"version": "11"
}
]
}
Loading