diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln index c8fa8b8e84..c1b573c922 100644 --- a/Microsoft.ML.sln +++ b/Microsoft.ML.sln @@ -271,6 +271,14 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.AutoML.Samples EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Samples.GPU", "docs\samples\Microsoft.ML.Samples.GPU\Microsoft.ML.Samples.GPU.csproj", "{3C8F910B-7F23-4D25-B521-6D5AC9570ADD}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Featurizers", "src\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.csproj", "{E2DD0721-5B0F-4606-8182-4C7EFB834518}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.ML.Featurizers", "Microsoft.ML.Featurizers", "{1BA5C784-52E8-4A87-8525-26B2452F2882}" + ProjectSection(SolutionItems) = preProject + pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.nupkgproj = pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.nupkgproj + pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.symbols.nupkgproj = pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.symbols.nupkgproj + EndProjectSection +EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeGenerator", "src\Microsoft.ML.CodeGenerator\Microsoft.ML.CodeGenerator.csproj", "{56CB0850-7341-4D71-9AE4-9EFC472D93DD}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeGenerator.Tests", "test\Microsoft.ML.CodeGenerator.Tests\Microsoft.ML.CodeGenerator.Tests.csproj", "{46CC5637-3DDF-4100-93FC-44BB87B2DB81}" @@ -1763,6 +1771,30 @@ Global {C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU {C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU {C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.ActiveCfg = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.Build.0 = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.ActiveCfg = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.Build.0 = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.ActiveCfg = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.Build.0 = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.ActiveCfg = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.Build.0 = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.Build.0 = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.ActiveCfg = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.Build.0 = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.ActiveCfg = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.Build.0 = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.ActiveCfg = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.Build.0 = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -1857,6 +1889,8 @@ Global {C8DB58DC-6434-4431-A81F-263D86E2A5F3} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {C91F81E3-B900-4968-A6DF-F53B515E97E1} = {BF66A305-DF10-47E4-8D81-42049B149D2B} {027DBA48-85B6-46F1-9487-0B49B5057FC0} = {C91F81E3-B900-4968-A6DF-F53B515E97E1} + {E2DD0721-5B0F-4606-8182-4C7EFB834518} = {09EADF06-BE25-4228-AB53-95AE3E15B530} + {1BA5C784-52E8-4A87-8525-26B2452F2882} = {D3D38B03-B557-484D-8348-8BADEE4DF592} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D} diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index 9947f4ceb9..9b66d0a6ae 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -11,6 +11,7 @@ + diff --git a/pkg/Microsoft.ML.Featurizers/Microsoft.ML.Featurizers.nupkgproj b/pkg/Microsoft.ML.Featurizers/Microsoft.ML.Featurizers.nupkgproj new file mode 100644 index 0000000000..86cc32ace1 --- /dev/null +++ b/pkg/Microsoft.ML.Featurizers/Microsoft.ML.Featurizers.nupkgproj @@ -0,0 +1,14 @@ + + + + netstandard2.0 + ML.NET featurizers with native code implementation + + + + + + + + + diff --git a/pkg/Microsoft.ML.Featurizers/Microsoft.ML.Featurizers.symbols.nupkgproj b/pkg/Microsoft.ML.Featurizers/Microsoft.ML.Featurizers.symbols.nupkgproj new file mode 100644 index 0000000000..483e51c61a --- /dev/null +++ b/pkg/Microsoft.ML.Featurizers/Microsoft.ML.Featurizers.symbols.nupkgproj @@ -0,0 +1,5 @@ + + + + + diff --git a/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs index 31dade4d29..cd57925925 100644 --- a/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Core/Properties/AssemblyInfo.cs @@ -40,9 +40,8 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Vision" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TimeSeries" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Transforms" + PublicKey.Value)] - [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.AutoML" + PublicKey.Value)] - +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Featurizers" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.MetaLinearLearner" + InternalPublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "TreeVisualizer" + InternalPublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "TMSNlearnPrediction" + InternalPublicKey.Value)] diff --git a/src/Microsoft.ML.Core/Utilities/Utils.cs b/src/Microsoft.ML.Core/Utilities/Utils.cs index fbdb3a791d..506fd27962 100644 --- a/src/Microsoft.ML.Core/Utilities/Utils.cs +++ b/src/Microsoft.ML.Core/Utilities/Utils.cs @@ -966,6 +966,14 @@ private static MethodInfo MarshalInvokeCheckAndCreate(Type genArg, Delegat return meth; } + private static MethodInfo MarshalInvokeCheckAndCreate(Type[] genArgs, Delegate func) + { + var meth = MarshalActionInvokeCheckAndCreate(genArgs, func); + if (meth.ReturnType != typeof(TRet)) + throw Contracts.ExceptParam(nameof(func), "Cannot be generic on return type"); + return meth; + } + // REVIEW: n-argument versions? The multi-column re-application problem? // Think about how to address these. @@ -1092,6 +1100,28 @@ public static TRet MarshalInvoke + /// A 1 argument and n type version of . + /// + public static TRet MarshalInvoke( + Func func, + Type[] genArgs, TArg1 arg1) + { + var meth = MarshalInvokeCheckAndCreate(genArgs, func); + return (TRet)meth.Invoke(func.Target, new object[] { arg1}); + } + + /// + /// A 2 argument and n type version of . + /// + public static TRet MarshalInvoke( + Func func, + Type[] genArgs, TArg1 arg1, TArg2 arg2) + { + var meth = MarshalInvokeCheckAndCreate(genArgs, func); + return (TRet)meth.Invoke(func.Target, new object[] { arg1, arg2}); + } + private static MethodInfo MarshalActionInvokeCheckAndCreate(Type genArg, Delegate func) { Contracts.CheckValue(genArg, nameof(genArg)); @@ -1104,6 +1134,18 @@ private static MethodInfo MarshalActionInvokeCheckAndCreate(Type genArg, Delegat return meth; } + private static MethodInfo MarshalActionInvokeCheckAndCreate(Type[] typeArguments, Delegate func) + { + Contracts.CheckValue(typeArguments, nameof(typeArguments)); + Contracts.CheckValue(func, nameof(func)); + var meth = func.GetMethodInfo(); + Contracts.CheckParam(meth.IsGenericMethod, nameof(func), "Should be generic but is not"); + Contracts.CheckParam(meth.GetGenericArguments().Length == typeArguments.Length, nameof(func), + "Method should have exactly the same number of generic type parameters as list passed in but it does not."); + meth = meth.GetGenericMethodDefinition().MakeGenericMethod(typeArguments); + return meth; + } + /// /// This is akin to , except applied to /// instead of . diff --git a/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs index 06fe680348..06791fefde 100644 --- a/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs +++ b/src/Microsoft.ML.Data/Properties/AssemblyInfo.cs @@ -44,9 +44,8 @@ [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet101" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet18" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet50" + PublicKey.Value)] - [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Experimental" + PublicKey.Value)] - +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Featurizers" + PublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.MetaLinearLearner" + InternalPublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "TMSNlearnPrediction" + InternalPublicKey.Value)] [assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.CntkWrapper" + InternalPublicKey.Value)] diff --git a/src/Microsoft.ML.Featurizers/Common.cs b/src/Microsoft.ML.Featurizers/Common.cs new file mode 100644 index 0000000000..0d2111d8a2 --- /dev/null +++ b/src/Microsoft.ML.Featurizers/Common.cs @@ -0,0 +1,223 @@ +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Security; +using System.Text; +using Microsoft.Win32.SafeHandles; + +namespace Microsoft.ML.Featurizers +{ + internal enum FitResult : byte + { + Complete = 1, + Continue = 2, + ResetAndContinue = 3 + } + + // Not all these types are currently supported. These are taken directly from the Native code implementation. + internal enum TypeId : uint + { + // Enumeration values are in the following format: + // + // 0xVTTTXXXX + // ^^^^^^^^ + // || |- Id + // ||- Number of trailing types + // |- Has trailing types + // + String = 1, + SByte = 2, + Short = 3, + Int = 4, + Long = 5, + Byte = 6, + UShort = 7, + UInt = 8, + ULong = 9, + Float16 = 10, + Float32 = 11, + Double = 12, + Complex64 = 13, + Complex128 = 14, + BFloat16 = 15, + Bool = 16, + Timepoint = 17, + Duration = 18, + + LastStaticValue = 19, + + // The following values have N number of trailing types + Tensor = 0x1001 | LastStaticValue + 1, + SparseTensor = 0x1001 | LastStaticValue + 2, + Tabular = 0x1001 | LastStaticValue + 3, + + Nullable = 0x1001 | LastStaticValue + 4, + Vector = 0x1001 | LastStaticValue + 5, + MapId = 0x1002 | LastStaticValue + 6 + }; + + // Is a struct mirroring the native struct. + // I used to pass binary data between ML.NET and the native code. + [StructLayout(LayoutKind.Sequential, Pack = 1)] + internal unsafe struct NativeBinaryArchiveData + { + public byte* Data; + public IntPtr DataSize; + } + + #region SafeHandles + + // Safe handle that frees the memory for a native error returned to ML.NET. + internal class ErrorInfoSafeHandle : SafeHandleZeroOrMinusOneIsInvalid + { + [DllImport("Featurizers", EntryPoint = "DestroyErrorInfo", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity] + private static extern bool DestroyErrorInfo(IntPtr error); + + public ErrorInfoSafeHandle(IntPtr handle) : base(true) + { + SetHandle(handle); + } + + protected override bool ReleaseHandle() + { + return DestroyErrorInfo(handle); + } + } + + // Safe handle that frees the memory for errors strings return from the native code to ML.NET. + internal class ErrorInfoStringSafeHandle : SafeHandleZeroOrMinusOneIsInvalid + { + [DllImport("Featurizers", EntryPoint = "DestroyErrorInfoString", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity] + private static extern bool DestroyErrorInfoString(IntPtr errorString, IntPtr errorStringSize); + + private IntPtr _length; + public ErrorInfoStringSafeHandle(IntPtr handle, IntPtr length) : base(true) + { + SetHandle(handle); + _length = length; + } + + protected override bool ReleaseHandle() + { + return DestroyErrorInfoString(handle, _length); + } + } + + // Safe handle that frees the memory for the transformed data. + // Is called automatically after each call to transform. + internal delegate bool DestroyTransformedDataNative(IntPtr output, IntPtr outputSize, out IntPtr errorHandle); + internal class TransformedDataSafeHandle : SafeHandleZeroOrMinusOneIsInvalid + { + private DestroyTransformedDataNative _destroySaveDataHandler; + private IntPtr _dataSize; + + public TransformedDataSafeHandle(IntPtr handle, IntPtr dataSize, DestroyTransformedDataNative destroyCppTransformerEstimator) : base(true) + { + SetHandle(handle); + _dataSize = dataSize; + _destroySaveDataHandler = destroyCppTransformerEstimator; + } + + protected override bool ReleaseHandle() + { + // Not sure what to do with error stuff here. There shoudln't ever be one though. + return _destroySaveDataHandler(handle, _dataSize, out IntPtr errorHandle); + } + } + + // Safe handle that frees the memory for a native estimator or transformer. + // Is called automatically at the end of life for a transformer or estimator. + internal delegate bool DestroyNativeTransformerEstimator(IntPtr estimator, out IntPtr errorHandle); + internal class TransformerEstimatorSafeHandle : SafeHandleZeroOrMinusOneIsInvalid + { + private DestroyNativeTransformerEstimator _destroyNativeTransformerEstimator; + public TransformerEstimatorSafeHandle(IntPtr handle, DestroyNativeTransformerEstimator destroyNativeTransformerEstimator) : base(true) + { + SetHandle(handle); + _destroyNativeTransformerEstimator = destroyNativeTransformerEstimator; + } + + protected override bool ReleaseHandle() + { + // Not sure what to do with error stuff here. There shouldn't ever be one though. + return _destroyNativeTransformerEstimator(handle, out IntPtr errorHandle); + } + } + + // Safe handle that frees the memory for the internal state of a native transformer. + // Is called automatically after we save the model. + internal delegate bool DestroyTransformerSaveData(IntPtr buffer, IntPtr bufferSize, out IntPtr errorHandle); + internal class SaveDataSafeHandle : SafeHandleZeroOrMinusOneIsInvalid + { + private readonly IntPtr _dataSize; + + [DllImport("Featurizers", EntryPoint = "DestroyTransformerSaveData", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity] + private static extern bool DestroyTransformerSaveDataNative(IntPtr buffer, IntPtr bufferSize, out IntPtr error); + + public SaveDataSafeHandle(IntPtr handle, IntPtr dataSize) : base(true) + { + SetHandle(handle); + _dataSize = dataSize; + } + + protected override bool ReleaseHandle() + { + // Not sure what to do with error stuff here. There shoudln't ever be one though. + return DestroyTransformerSaveDataNative(handle, _dataSize, out _); + } + } + + #endregion + + // Static extension classes with Common methods used in multiple featurizers + internal static class CommonExtensions + { + [DllImport("Featurizers", EntryPoint = "GetErrorInfoString", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity] + private static extern bool GetErrorInfoString(IntPtr error, out IntPtr errorHandleString, out IntPtr errorHandleStringSize); + + internal static string GetErrorDetailsAndFreeNativeMemory(IntPtr errorHandle) + { + using (var error = new ErrorInfoSafeHandle(errorHandle)) + { + GetErrorInfoString(errorHandle, out IntPtr errorHandleString, out IntPtr errorHandleStringSize); + using (var errorString = new ErrorInfoStringSafeHandle(errorHandleString, errorHandleStringSize)) + { + byte[] buffer = new byte[errorHandleStringSize.ToInt32()]; + Marshal.Copy(errorHandleString, buffer, 0, buffer.Length); + + return Encoding.UTF8.GetString(buffer); + } + } + } + + internal static TypeId GetNativeTypeIdFromType(this Type type) + { + if (type == typeof(sbyte)) + return TypeId.SByte; + else if (type == typeof(short)) + return TypeId.Short; + else if (type == typeof(int)) + return TypeId.Int; + else if (type == typeof(long)) + return TypeId.Long; + else if (type == typeof(byte)) + return TypeId.Byte; + else if (type == typeof(ushort)) + return TypeId.UShort; + else if (type == typeof(uint)) + return TypeId.UInt; + else if (type == typeof(ulong)) + return TypeId.ULong; + else if (type == typeof(float)) + return TypeId.Float32; + else if (type == typeof(double)) + return TypeId.Double; + else if (type == typeof(bool)) + return TypeId.Bool; + else if (type == typeof(ReadOnlyMemory)) + return TypeId.String; + + throw new InvalidOperationException($"Unsupported type {type}"); + } + } +} diff --git a/src/Microsoft.ML.Featurizers/Microsoft.ML.Featurizers.csproj b/src/Microsoft.ML.Featurizers/Microsoft.ML.Featurizers.csproj new file mode 100644 index 0000000000..f3a35c5d85 --- /dev/null +++ b/src/Microsoft.ML.Featurizers/Microsoft.ML.Featurizers.csproj @@ -0,0 +1,18 @@ + + + + netstandard2.0 + Microsoft.ML.Featurizers + true + + + + + + + + + + + + diff --git a/src/Microsoft.ML.Featurizers/Properties/AssemblyInfo.cs b/src/Microsoft.ML.Featurizers/Properties/AssemblyInfo.cs new file mode 100644 index 0000000000..f7c9d934e5 --- /dev/null +++ b/src/Microsoft.ML.Featurizers/Properties/AssemblyInfo.cs @@ -0,0 +1,11 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; +using Microsoft.ML; + +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)] +[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.EntryPoints" + PublicKey.Value)] + +[assembly: WantsToBeBestFriends] diff --git a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj index 634c8fa8e9..1c28480f50 100644 --- a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj +++ b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj @@ -11,6 +11,7 @@ +