-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Initial featurizers project #4413
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d537954
b321ab5
2560986
79f3296
b652acd
2565afd
7fc97ae
fdd8ae7
47963b7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| <Project Sdk="Microsoft.NET.Sdk" DefaultTargets="Pack"> | ||
|
|
||
| <PropertyGroup> | ||
| <TargetFramework>netstandard2.0</TargetFramework> | ||
| <PackageDescription>ML.NET featurizers with native code implementation</PackageDescription> | ||
| </PropertyGroup> | ||
|
|
||
| <ItemGroup> | ||
| <ProjectReference Include="../Microsoft.ML/Microsoft.ML.nupkgproj" /> | ||
|
|
||
| <PackageReference Include="Microsoft.MLFeaturizers" Version="0.3.5" /> | ||
| </ItemGroup> | ||
|
|
||
| </Project> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| <Project DefaultTargets="Pack"> | ||
|
|
||
| <Import Project="Microsoft.ML.Featurizers.nupkgproj" /> | ||
|
|
||
| </Project> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,223 @@ | ||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.Runtime.InteropServices; | ||
| using System.Security; | ||
| using System.Text; | ||
| using Microsoft.Win32.SafeHandles; | ||
|
|
||
| namespace Microsoft.ML.Featurizers | ||
| { | ||
| internal enum FitResult : byte | ||
| { | ||
| Complete = 1, | ||
| Continue = 2, | ||
| ResetAndContinue = 3 | ||
| } | ||
|
|
||
| // Not all these types are currently supported. These are taken directly from the Native code implementation. | ||
| internal enum TypeId : uint | ||
| { | ||
| // Enumeration values are in the following format: | ||
| // | ||
| // 0xVTTTXXXX | ||
| // ^^^^^^^^ | ||
| // || |- Id | ||
| // ||- Number of trailing types | ||
| // |- Has trailing types | ||
| // | ||
| String = 1, | ||
| SByte = 2, | ||
| Short = 3, | ||
| Int = 4, | ||
| Long = 5, | ||
| Byte = 6, | ||
| UShort = 7, | ||
| UInt = 8, | ||
| ULong = 9, | ||
| Float16 = 10, | ||
| Float32 = 11, | ||
| Double = 12, | ||
| Complex64 = 13, | ||
| Complex128 = 14, | ||
| BFloat16 = 15, | ||
| Bool = 16, | ||
| Timepoint = 17, | ||
| Duration = 18, | ||
|
|
||
| LastStaticValue = 19, | ||
|
|
||
| // The following values have N number of trailing types | ||
| Tensor = 0x1001 | LastStaticValue + 1, | ||
| SparseTensor = 0x1001 | LastStaticValue + 2, | ||
| Tabular = 0x1001 | LastStaticValue + 3, | ||
|
|
||
| Nullable = 0x1001 | LastStaticValue + 4, | ||
| Vector = 0x1001 | LastStaticValue + 5, | ||
| MapId = 0x1002 | LastStaticValue + 6 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
please add comments for this #Resolved
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does this add Why the math? Seems more simple to directly map to the values. Why is the base of the last one
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All of these are taken directly as is from the Native code base. They are here just so we have a correct mapping on our side.
The math is there just to keep it identical to the native code. I could directly map the values, but then I would have to map the values every time I need to check if it has changed in the native code. Leaving it like this makes comparisons much easier. The number at the end of In reply to: 348115118 [](ancestors = 348115118) |
||
| }; | ||
|
|
||
| // Is a struct mirroring the native struct. | ||
| // I used to pass binary data between ML.NET and the native code. | ||
| [StructLayout(LayoutKind.Sequential, Pack = 1)] | ||
| internal unsafe struct NativeBinaryArchiveData | ||
| { | ||
| public byte* Data; | ||
| public IntPtr DataSize; | ||
| } | ||
|
|
||
| #region SafeHandles | ||
|
|
||
| // Safe handle that frees the memory for a native error returned to ML.NET. | ||
| internal class ErrorInfoSafeHandle : SafeHandleZeroOrMinusOneIsInvalid | ||
| { | ||
| [DllImport("Featurizers", EntryPoint = "DestroyErrorInfo", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity] | ||
michaelgsharp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| private static extern bool DestroyErrorInfo(IntPtr error); | ||
|
|
||
| public ErrorInfoSafeHandle(IntPtr handle) : base(true) | ||
| { | ||
| SetHandle(handle); | ||
| } | ||
|
|
||
| protected override bool ReleaseHandle() | ||
| { | ||
| return DestroyErrorInfo(handle); | ||
| } | ||
| } | ||
|
|
||
| // Safe handle that frees the memory for errors strings return from the native code to ML.NET. | ||
| internal class ErrorInfoStringSafeHandle : SafeHandleZeroOrMinusOneIsInvalid | ||
| { | ||
| [DllImport("Featurizers", EntryPoint = "DestroyErrorInfoString", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity] | ||
| private static extern bool DestroyErrorInfoString(IntPtr errorString, IntPtr errorStringSize); | ||
|
|
||
| private IntPtr _length; | ||
| public ErrorInfoStringSafeHandle(IntPtr handle, IntPtr length) : base(true) | ||
| { | ||
| SetHandle(handle); | ||
| _length = length; | ||
| } | ||
|
|
||
| protected override bool ReleaseHandle() | ||
| { | ||
| return DestroyErrorInfoString(handle, _length); | ||
| } | ||
| } | ||
|
|
||
| // Safe handle that frees the memory for the transformed data. | ||
| // Is called automatically after each call to transform. | ||
| internal delegate bool DestroyTransformedDataNative(IntPtr output, IntPtr outputSize, out IntPtr errorHandle); | ||
| internal class TransformedDataSafeHandle : SafeHandleZeroOrMinusOneIsInvalid | ||
| { | ||
| private DestroyTransformedDataNative _destroySaveDataHandler; | ||
| private IntPtr _dataSize; | ||
|
|
||
| public TransformedDataSafeHandle(IntPtr handle, IntPtr dataSize, DestroyTransformedDataNative destroyCppTransformerEstimator) : base(true) | ||
| { | ||
| SetHandle(handle); | ||
| _dataSize = dataSize; | ||
| _destroySaveDataHandler = destroyCppTransformerEstimator; | ||
| } | ||
|
|
||
| protected override bool ReleaseHandle() | ||
| { | ||
| // Not sure what to do with error stuff here. There shoudln't ever be one though. | ||
| return _destroySaveDataHandler(handle, _dataSize, out IntPtr errorHandle); | ||
| } | ||
| } | ||
|
|
||
| // Safe handle that frees the memory for a native estimator or transformer. | ||
| // Is called automatically at the end of life for a transformer or estimator. | ||
| internal delegate bool DestroyNativeTransformerEstimator(IntPtr estimator, out IntPtr errorHandle); | ||
| internal class TransformerEstimatorSafeHandle : SafeHandleZeroOrMinusOneIsInvalid | ||
| { | ||
| private DestroyNativeTransformerEstimator _destroyNativeTransformerEstimator; | ||
| public TransformerEstimatorSafeHandle(IntPtr handle, DestroyNativeTransformerEstimator destroyNativeTransformerEstimator) : base(true) | ||
| { | ||
| SetHandle(handle); | ||
| _destroyNativeTransformerEstimator = destroyNativeTransformerEstimator; | ||
| } | ||
|
|
||
| protected override bool ReleaseHandle() | ||
| { | ||
| // Not sure what to do with error stuff here. There shouldn't ever be one though. | ||
| return _destroyNativeTransformerEstimator(handle, out IntPtr errorHandle); | ||
| } | ||
| } | ||
|
|
||
| // Safe handle that frees the memory for the internal state of a native transformer. | ||
| // Is called automatically after we save the model. | ||
| internal delegate bool DestroyTransformerSaveData(IntPtr buffer, IntPtr bufferSize, out IntPtr errorHandle); | ||
| internal class SaveDataSafeHandle : SafeHandleZeroOrMinusOneIsInvalid | ||
| { | ||
| private readonly IntPtr _dataSize; | ||
|
|
||
| [DllImport("Featurizers", EntryPoint = "DestroyTransformerSaveData", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity] | ||
| private static extern bool DestroyTransformerSaveDataNative(IntPtr buffer, IntPtr bufferSize, out IntPtr error); | ||
|
|
||
| public SaveDataSafeHandle(IntPtr handle, IntPtr dataSize) : base(true) | ||
| { | ||
| SetHandle(handle); | ||
| _dataSize = dataSize; | ||
| } | ||
|
|
||
| protected override bool ReleaseHandle() | ||
| { | ||
| // Not sure what to do with error stuff here. There shoudln't ever be one though. | ||
| return DestroyTransformerSaveDataNative(handle, _dataSize, out _); | ||
| } | ||
| } | ||
|
|
||
| #endregion | ||
|
|
||
| // Static extension classes with Common methods used in multiple featurizers | ||
| internal static class CommonExtensions | ||
| { | ||
| [DllImport("Featurizers", EntryPoint = "GetErrorInfoString", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity] | ||
| private static extern bool GetErrorInfoString(IntPtr error, out IntPtr errorHandleString, out IntPtr errorHandleStringSize); | ||
|
|
||
| internal static string GetErrorDetailsAndFreeNativeMemory(IntPtr errorHandle) | ||
| { | ||
| using (var error = new ErrorInfoSafeHandle(errorHandle)) | ||
| { | ||
| GetErrorInfoString(errorHandle, out IntPtr errorHandleString, out IntPtr errorHandleStringSize); | ||
| using (var errorString = new ErrorInfoStringSafeHandle(errorHandleString, errorHandleStringSize)) | ||
| { | ||
| byte[] buffer = new byte[errorHandleStringSize.ToInt32()]; | ||
| Marshal.Copy(errorHandleString, buffer, 0, buffer.Length); | ||
|
|
||
| return Encoding.UTF8.GetString(buffer); | ||
| } | ||
| } | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
new line #Resolved |
||
|
|
||
| internal static TypeId GetNativeTypeIdFromType(this Type type) | ||
| { | ||
| if (type == typeof(sbyte)) | ||
| return TypeId.SByte; | ||
| else if (type == typeof(short)) | ||
| return TypeId.Short; | ||
| else if (type == typeof(int)) | ||
| return TypeId.Int; | ||
| else if (type == typeof(long)) | ||
| return TypeId.Long; | ||
| else if (type == typeof(byte)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
sbyte #Resolved |
||
| return TypeId.Byte; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
SByte #Resolved |
||
| else if (type == typeof(ushort)) | ||
| return TypeId.UShort; | ||
| else if (type == typeof(uint)) | ||
| return TypeId.UInt; | ||
| else if (type == typeof(ulong)) | ||
| return TypeId.ULong; | ||
| else if (type == typeof(float)) | ||
| return TypeId.Float32; | ||
| else if (type == typeof(double)) | ||
| return TypeId.Double; | ||
| else if (type == typeof(bool)) | ||
| return TypeId.Bool; | ||
| else if (type == typeof(ReadOnlyMemory<char>)) | ||
| return TypeId.String; | ||
|
|
||
| throw new InvalidOperationException($"Unsupported type {type}"); | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think
methodis a reserved keyword in C#. Recommend the longer name.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I kept it as
methbecause that is consistent with all the other methods here that do this same thing. I think if we want it to bemethodthen we need to change them all.In reply to: 347619466 [](ancestors = 347619466)