Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# NimbusML

`nimbusml` is a Python module that provides experimental Python bindings for [ML.NET](https://github.com/dotnet/machinelearning).
`nimbusml` is a Python module that provides Python bindings for [ML.NET](https://github.com/dotnet/machinelearning).

ML.NET was originally developed in Microsoft Research and is used across many product groups in Microsoft like Windows, Bing, PowerPoint, Excel and others. `nimbusml` was built to enable data science teams that are more familiar with Python to take advantage of ML.NET's functionality and performance.

Expand Down
8 changes: 8 additions & 0 deletions build.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,14 @@ copy "%BuildOutputDir%%Configuration%\pybridge.pyd" "%__currentScriptDir%src\py

if %PythonVersion% == 2.7 (
copy "%BuildOutputDir%%Configuration%\Platform\win-x64\publish\*.dll" "%__currentScriptDir%src\python\nimbusml\internal\libs\"
:: remove dataprep dlls as its not supported in python 2.7
del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.DPrep.*"
del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.Data.*"
del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.ProgramSynthesis.*"
del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.DataPrep.dll"
del "%__currentScriptDir%src\python\nimbusml\internal\libs\ExcelDataReader.dll"
del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.WindowsAzure.Storage.dll"
del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.Workbench.Messaging.SDK.dll"
) else (
for /F "tokens=*" %%A in (build/libs_win.txt) do copy "%BuildOutputDir%%Configuration%\Platform\win-x64\publish\%%A" "%__currentScriptDir%src\python\nimbusml\internal\libs\"
)
Expand Down
8 changes: 8 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,14 @@ then
ext=*.dylib
fi
cp "${BuildOutputDir}/${__configuration}/Platform/${PublishDir}"/publish/${ext} "${__currentScriptDir}/src/python/nimbusml/internal/libs/"
# remove dataprep dlls as its not supported in python 2.7
rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.DPrep.*"
rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.Data.*"
rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.ProgramSynthesis.*"
rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.DataPrep.dll"
rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/ExcelDataReader.dll"
rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.WindowsAzure.Storage.dll"
rm -f "${__currentScriptDir}/src/python/nimbusml/internal/libs/Microsoft.Workbench.Messaging.SDK.dll"
else
libs_txt=libs_linux.txt
if [ "$(uname -s)" = "Darwin" ]
Expand Down
74 changes: 3 additions & 71 deletions src/DotNetBridge/Bridge.cs
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,6 @@ private enum FnId
Generic = 2,
}

#if !CORECLR
// The hosting code invokes this to get a specific entry point.
[UnmanagedFunctionPointer(CallingConvention.StdCall)]
private delegate IntPtr NativeFnGetter(FnId id);
#endif

#region Callbacks to native

// Call back to provide messages to native code.
Expand Down Expand Up @@ -236,8 +230,9 @@ private struct EnvironmentBlock
[FieldOffset(0x18)]
public readonly void* modelSink;

//Max slots to return for vector valued columns(<=0 to return all).
[FieldOffset(0x20)]
public readonly int maxThreadsAllowed;
public readonly int maxSlots;

// Call back to provide cancel flag.
[FieldOffset(0x28)]
Expand All @@ -252,41 +247,14 @@ private struct EnvironmentBlock
[UnmanagedFunctionPointer(CallingConvention.StdCall)]
private unsafe delegate int NativeGeneric(EnvironmentBlock* penv, sbyte* psz, int cdata, DataSourceBlock** ppdata);

#if !CORECLR
private static NativeFnGetter FnGetter;
#endif
private static NativeGeneric FnGeneric;

private static TDel MarshalDelegate<TDel>(void* pv)
{
Contracts.Assert(typeof(TDel).IsSubclassOf(typeof(Delegate)));
Contracts.Assert(pv != null);
#if CORECLR
return Marshal.GetDelegateForFunctionPointer<TDel>((IntPtr)pv);
#else
return (TDel)(object)Marshal.GetDelegateForFunctionPointer((IntPtr)pv, typeof(TDel));
#endif
}

#if !CORECLR
/// <summary>
/// This is the bootstrapping entry point. It's labeled private but is actually invoked from the native
/// code to poke the address of the FnGetter callback into the address encoded in the string parameter.
/// This odd way of doing things is because the most convenient way to call an initial managed method
/// imposes the signature of Func{string, int}, which doesn't allow us to return a function adress.
/// </summary>
private static unsafe int GetFnGetterCallback(string addr)
{
if (FnGetter == null)
Interlocked.CompareExchange(ref FnGetter, (NativeFnGetter)GetFn, null);
long a = long.Parse(addr);
IntPtr* p = null;
IntPtr** pp = &p;
*(long*)pp = a;
*p = Marshal.GetFunctionPointerForDelegate(FnGetter);
return 1;
}
#endif

/// <summary>
/// This is the main FnGetter function. Given an FnId value, it returns a native-callable
Expand Down Expand Up @@ -397,7 +365,7 @@ private static unsafe int GenericExec(EnvironmentBlock* penv, sbyte* psz, int cd
// Wrap the data sets.
ch.Trace("Wrapping native data sources");
ch.Trace("Executing");
ExecCore(penv, host, ch, graph, cdata, ppdata);
RunGraphCore(penv, host, graph, cdata, ppdata);
}
catch (Exception e)
{
Expand All @@ -420,24 +388,6 @@ private static unsafe int GenericExec(EnvironmentBlock* penv, sbyte* psz, int cd
return 0;
}

private static void CheckModel(IHost host, byte** ppModelBin, long* pllModelBinLen, int i)
{
host.CheckParam(
ppModelBin != null && ppModelBin[i] != null
&& pllModelBinLen != null && pllModelBinLen[i] > 0, "pModelBin", "Model is missing");
}

private static void ExecCore(EnvironmentBlock* penv, IHost host, IChannel ch, string graph, int cdata, DataSourceBlock** ppdata)
{
Contracts.AssertValue(ch);
ch.AssertValue(host);
ch.AssertNonEmpty(graph);
ch.Assert(cdata >= 0);
ch.Assert(ppdata != null || cdata == 0);

RunGraphCore(penv, host, graph, cdata, ppdata);
}

/// <summary>
/// Convert UTF8 bytes with known length to ROM<char>. Negative length unsupported.
/// </summary>
Expand Down Expand Up @@ -483,25 +433,7 @@ internal static string BytesToString(sbyte* psz)

if (cch == 0)
return null;
#if CORECLR

return Encoding.UTF8.GetString((byte*)psz, cch);
#else
if (cch <= 0)
return "";

var decoder = Encoding.UTF8.GetDecoder();
var chars = new char[decoder.GetCharCount((byte*)psz, cch, true)];
int bytesUsed;
int charsUsed;
bool complete;
fixed (char* pchars = chars)
decoder.Convert((byte*)psz, cch, pchars, chars.Length, true, out bytesUsed, out charsUsed, out complete);
Contracts.Assert(bytesUsed == cch);
Contracts.Assert(charsUsed == chars.Length);
Contracts.Assert(complete);
return new string(chars);
#endif
}

/// <summary>
Expand Down
1 change: 0 additions & 1 deletion src/DotNetBridge/RmlEnvironment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ protected override IHost RegisterCore(HostEnvironmentBase<RmlEnvironment> source
public RmlEnvironment(Bridge.CheckCancelled checkDelegate, int? seed = null, bool verbose = false)
: this(RandomUtils.Create(seed), verbose)
{

CheckCancelled = checkDelegate;
}

Expand Down
38 changes: 3 additions & 35 deletions src/DotNetBridge/RunGraph.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,30 +27,6 @@ public unsafe static partial class Bridge
// std:null specifier in a graph, used to redirect output to std::null
const string STDNULL = "<null>";

private sealed class RunGraphArgs
{
#pragma warning disable 649 // never assigned
[Argument(ArgumentType.AtMostOnce)]
public string graph;

[Argument(ArgumentType.LastOccurenceWins, HelpText = "Desired degree of parallelism in the data pipeline", ShortName = "conc")]
public int? parallel;

[Argument(ArgumentType.AtMostOnce, HelpText = "Random seed", ShortName = "seed")]
public int? randomSeed;

[Argument(ArgumentType.AtMostOnce, ShortName = "lab")]
public string labelColumn; //not used

[Argument(ArgumentType.Multiple, ShortName = "feat")]
public string[] featureColumn; //not used

[Argument(ArgumentType.AtMostOnce, HelpText = "Max slots to return for vector valued columns (<=0 to return all)")]
public int maxSlots = -1;

#pragma warning restore 649 // never assigned
}

private static void SaveIdvToFile(IDataView idv, string path, IHost host)
{
if (path == STDNULL)
Expand Down Expand Up @@ -90,19 +66,11 @@ private static void RunGraphCore(EnvironmentBlock* penv, IHostEnvironment env, s
{
Contracts.AssertValue(env);

var args = new RunGraphArgs();
string err = null;
if (!CmdParser.ParseArguments(env, graphStr, args, e => err = err ?? e))
throw env.Except(err);

int? maxThreadsAllowed = Math.Min(args.parallel > 0 ? args.parallel.Value : penv->maxThreadsAllowed, penv->maxThreadsAllowed);
maxThreadsAllowed = penv->maxThreadsAllowed > 0 ? maxThreadsAllowed : args.parallel;
var host = env.Register("RunGraph", args.randomSeed, null);

var host = env.Register("RunGraph", penv->seed, null);
JObject graph;
try
{
graph = JObject.Parse(args.graph);
graph = JObject.Parse(graphStr);
}
catch (JsonReaderException ex)
{
Expand Down Expand Up @@ -221,7 +189,7 @@ private static void RunGraphCore(EnvironmentBlock* penv, IHostEnvironment env, s
}
else
{
var infos = ProcessColumns(ref idv, args.maxSlots, host);
var infos = ProcessColumns(ref idv, penv->maxSlots, host);
SendViewToNative(ch, penv, idv, infos);
}
break;
Expand Down
Loading