diff --git a/build.cmd b/build.cmd
index 8ed5005d..8ad4a127 100644
--- a/build.cmd
+++ b/build.cmd
@@ -173,6 +173,8 @@ if "%AzureBuild%" == "True" (
echo ##vso[task.prependpath]%_dotnetRoot%
)
+set LOCAL_NUGET_PACKAGES_DIR=.\local-nuget-packages
+
:: Build managed code
echo ""
echo "#################################"
@@ -311,6 +313,7 @@ copy "%BuildOutputDir%%Configuration%\pybridge.pyd" "%__currentScriptDir%src\py
if %PythonVersion% == 2.7 (
copy "%BuildOutputDir%%Configuration%\Platform\win-x64\publish\*.dll" "%__currentScriptDir%src\python\nimbusml\internal\libs\"
+ xcopy /S /E /I "%BuildOutputDir%%Configuration%\Platform\win-x64\publish\Data" "%__currentScriptDir%src\python\nimbusml\internal\libs\Data"
:: remove dataprep dlls as its not supported in python 2.7
del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.DPrep.*"
del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.Data.*"
@@ -321,6 +324,7 @@ if %PythonVersion% == 2.7 (
del "%__currentScriptDir%src\python\nimbusml\internal\libs\Microsoft.Workbench.Messaging.SDK.dll"
) else (
for /F "tokens=*" %%A in (build/libs_win.txt) do copy "%BuildOutputDir%%Configuration%\Platform\win-x64\publish\%%A" "%__currentScriptDir%src\python\nimbusml\internal\libs\"
+ xcopy /S /E /I "%BuildOutputDir%%Configuration%\Platform\win-x64\publish\Data" "%__currentScriptDir%src\python\nimbusml\internal\libs\Data"
)
if "%DebugBuild%" == "True" (
diff --git a/build.sh b/build.sh
index 6d5221c9..e2292693 100755
--- a/build.sh
+++ b/build.sh
@@ -175,6 +175,8 @@ then
echo "Installing dotnet SDK ... "
curl -sSL https://dot.net/v1/dotnet-install.sh | bash /dev/stdin -Version 2.1.701 -InstallDir ./cli
+ export LOCAL_NUGET_PACKAGES_DIR=./local-nuget-packages
+
# Build managed code
echo "Building managed code ... "
_dotnet="${__currentScriptDir}/cli/dotnet"
@@ -213,6 +215,7 @@ then
cp "${BuildOutputDir}/${__configuration}/Platform/${PublishDir}"/publish/System.Native.a "${__currentScriptDir}/src/python/nimbusml/internal/libs/"
cp "${BuildOutputDir}/${__configuration}/Platform/${PublishDir}"/publish/createdump "${__currentScriptDir}/src/python/nimbusml/internal/libs/" || :
cp "${BuildOutputDir}/${__configuration}/Platform/${PublishDir}"/publish/sosdocsunix.txt "${__currentScriptDir}/src/python/nimbusml/internal/libs/"
+ cp -r "${BuildOutputDir}/${__configuration}/Platform/${PublishDir}"/publish/Data "${__currentScriptDir}/src/python/nimbusml/internal/libs/."
ext=*.so
if [ "$(uname -s)" = "Darwin" ]
then
@@ -241,6 +244,7 @@ then
cat build/${libs_txt} | while read i; do
cp "${BuildOutputDir}/${__configuration}/Platform/${PublishDir}"/publish/$i "${__currentScriptDir}/src/python/nimbusml/internal/libs/"
done
+ cp -r "${BuildOutputDir}/${__configuration}/Platform/${PublishDir}"/publish/Data "${__currentScriptDir}/src/python/nimbusml/internal/libs/."
fi
if [[ $__configuration = Dbg* ]]
diff --git a/build/libs_linux.txt b/build/libs_linux.txt
index 6ce4cbed..c2c7d848 100644
--- a/build/libs_linux.txt
+++ b/build/libs_linux.txt
@@ -1,6 +1,7 @@
Newtonsoft.Json.dll
libCpuMathNative.so
libFastTreeNative.so
+libFeaturizers.so
libLdaNative.so
libMklImports.so
libMklProxyNative.so
diff --git a/build/libs_mac.txt b/build/libs_mac.txt
index 85544169..1ebc1724 100644
--- a/build/libs_mac.txt
+++ b/build/libs_mac.txt
@@ -9,6 +9,7 @@ lib_lightgbm.dylib
libtensorflow.dylib
libonnxruntime.dylib
libtensorflow_framework.1.dylib
+Featurizers.dll
System.Drawing.Common.dll
TensorFlow.NET.dll
NumSharp.Core.dll
diff --git a/build/libs_win.txt b/build/libs_win.txt
index 7ef9cca7..e815e645 100644
--- a/build/libs_win.txt
+++ b/build/libs_win.txt
@@ -8,6 +8,7 @@ libiomp5md.dll
MklImports.dll
MklProxyNative.dll
SymSgdNative.dll
+Featurizers.dll
tensorflow.dll
TensorFlow.NET.dll
NumSharp.Core.dll
diff --git a/local-nuget-packages/MicrosoftMLFeaturizers.0.1.0.nupkg b/local-nuget-packages/MicrosoftMLFeaturizers.0.1.0.nupkg
new file mode 100644
index 00000000..0a8b2fbd
Binary files /dev/null and b/local-nuget-packages/MicrosoftMLFeaturizers.0.1.0.nupkg differ
diff --git a/local-nuget-packages/microsoft.extensions.ml.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.extensions.ml.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..2ceed83a
Binary files /dev/null and b/local-nuget-packages/microsoft.extensions.ml.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.extensions.ml.symbols.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.extensions.ml.symbols.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..845b027f
Binary files /dev/null and b/local-nuget-packages/microsoft.extensions.ml.symbols.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..a8debf72
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.automl.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.automl.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..f858c678
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.automl.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.automl.symbols.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.automl.symbols.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..3cf6ed34
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.automl.symbols.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.cpumath.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.cpumath.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..008df73c
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.cpumath.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.cpumath.symbols.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.cpumath.symbols.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..bdcd6852
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.cpumath.symbols.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.dataview.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.dataview.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..5729bfa7
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.dataview.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.dataview.symbols.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.dataview.symbols.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..beefe429
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.dataview.symbols.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.dnn.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.dnn.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..f728196c
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.dnn.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.dnn.symbols.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.dnn.symbols.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..73ffedf4
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.dnn.symbols.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.ensemble.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.ensemble.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..9cbdef31
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.ensemble.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.ensemble.symbols.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.ensemble.symbols.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..069b69d9
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.ensemble.symbols.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.entrypoints.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.entrypoints.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..8e27e3cc
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.entrypoints.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.entrypoints.symbols.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.entrypoints.symbols.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..f72c9382
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.entrypoints.symbols.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.experimental.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.experimental.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..554d2417
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.experimental.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.experimental.symbols.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.experimental.symbols.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..fc844210
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.experimental.symbols.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.fasttree.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.fasttree.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..820b48b3
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.fasttree.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.fasttree.symbols.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.fasttree.symbols.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..4174ee8e
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.fasttree.symbols.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.featurizers.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.featurizers.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..cb04dfd5
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.featurizers.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.featurizers.symbols.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.featurizers.symbols.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..5be74193
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.featurizers.symbols.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.imageanalytics.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.imageanalytics.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..7c5afeb9
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.imageanalytics.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.imageanalytics.symbols.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.imageanalytics.symbols.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..11d473a0
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.imageanalytics.symbols.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.lightgbm.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.lightgbm.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..381c705c
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.lightgbm.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.lightgbm.symbols.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.lightgbm.symbols.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..cbd0cf9d
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.lightgbm.symbols.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.mkl.components.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.mkl.components.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..7e448a72
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.mkl.components.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.mkl.components.symbols.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.mkl.components.symbols.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..c24c142e
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.mkl.components.symbols.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.mkl.redist.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.mkl.redist.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..42d18904
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.mkl.redist.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.onnxconverter.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.onnxconverter.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..045429c8
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.onnxconverter.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.onnxconverter.symbols.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.onnxconverter.symbols.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..4a1216b1
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.onnxconverter.symbols.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.onnxtransformer.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.onnxtransformer.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..0d97af5c
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.onnxtransformer.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.onnxtransformer.symbols.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.onnxtransformer.symbols.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..e8e99abc
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.onnxtransformer.symbols.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.parquet.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.parquet.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..8f51320e
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.parquet.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.parquet.symbols.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.parquet.symbols.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..765ce5f9
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.parquet.symbols.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.recommender.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.recommender.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..dffcf5c4
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.recommender.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.recommender.symbols.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.recommender.symbols.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..0c802cb0
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.recommender.symbols.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.sampleutils.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.sampleutils.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..88add318
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.sampleutils.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.sampleutils.symbols.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.sampleutils.symbols.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..6348fe79
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.sampleutils.symbols.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.symbols.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.symbols.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..6637e4ff
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.symbols.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.tensorflow.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.tensorflow.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..2b4619e7
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.tensorflow.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.tensorflow.redist.0.18.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.tensorflow.redist.0.18.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..2e943616
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.tensorflow.redist.0.18.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.tensorflow.symbols.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.tensorflow.symbols.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..88925eb0
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.tensorflow.symbols.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.timeseries.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.timeseries.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..036a2ca2
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.timeseries.1.6.2-preview2-28208-8.nupkg differ
diff --git a/local-nuget-packages/microsoft.ml.timeseries.symbols.1.6.2-preview2-28208-8.nupkg b/local-nuget-packages/microsoft.ml.timeseries.symbols.1.6.2-preview2-28208-8.nupkg
new file mode 100644
index 00000000..fcb211d3
Binary files /dev/null and b/local-nuget-packages/microsoft.ml.timeseries.symbols.1.6.2-preview2-28208-8.nupkg differ
diff --git a/nuget.config b/nuget.config
index cedba361..75ab3744 100644
--- a/nuget.config
+++ b/nuget.config
@@ -5,6 +5,7 @@
-
+
+
diff --git a/src/DotNetBridge/Bridge.cs b/src/DotNetBridge/Bridge.cs
index a7954355..00947124 100644
--- a/src/DotNetBridge/Bridge.cs
+++ b/src/DotNetBridge/Bridge.cs
@@ -7,6 +7,8 @@
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
+using Microsoft.ML;
+using Microsoft.ML.Featurizers;
using Microsoft.ML.Data;
using Microsoft.ML.EntryPoints;
using Microsoft.ML.Runtime;
@@ -300,6 +302,7 @@ private static unsafe int GenericExec(EnvironmentBlock* penv, sbyte* psz, int cd
//env.ComponentCatalog.RegisterAssembly(typeof(TimeSeriesProcessingEntryPoints).Assembly);
//env.ComponentCatalog.RegisterAssembly(typeof(ParquetLoader).Assembly);
env.ComponentCatalog.RegisterAssembly(typeof(SsaChangePointDetector).Assembly);
+ env.ComponentCatalog.RegisterAssembly(typeof(CategoryImputerTransformer).Assembly);
env.ComponentCatalog.RegisterAssembly(typeof(DotNetBridgeEntrypoints).Assembly);
using (var ch = host.Start("Executing"))
diff --git a/src/DotNetBridge/DotNetBridge.csproj b/src/DotNetBridge/DotNetBridge.csproj
index 822db6aa..9985bb62 100644
--- a/src/DotNetBridge/DotNetBridge.csproj
+++ b/src/DotNetBridge/DotNetBridge.csproj
@@ -32,17 +32,19 @@
all
runtime; build; native; contentfiles; analyzers
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/Platforms/build.csproj b/src/Platforms/build.csproj
index 3db67054..626822c9 100644
--- a/src/Platforms/build.csproj
+++ b/src/Platforms/build.csproj
@@ -11,17 +11,19 @@
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/python/nimbusml.pyproj b/src/python/nimbusml.pyproj
index a90735af..dc1a2c39 100644
--- a/src/python/nimbusml.pyproj
+++ b/src/python/nimbusml.pyproj
@@ -91,10 +91,14 @@
+
+
+
+
@@ -106,6 +110,7 @@
+
@@ -120,6 +125,7 @@
+
@@ -158,6 +164,7 @@
+
@@ -173,6 +180,7 @@
+
@@ -228,6 +236,7 @@
+
@@ -302,19 +311,24 @@
+
+
+
+
+
@@ -394,6 +408,7 @@
+
@@ -406,6 +421,7 @@
+
@@ -434,6 +450,7 @@
+
@@ -452,6 +469,8 @@
+
+
@@ -630,6 +649,7 @@
+
@@ -646,6 +666,7 @@
+
@@ -659,6 +680,8 @@
+
+
@@ -694,15 +717,20 @@
+
+
+
+
+
@@ -711,6 +739,7 @@
+
diff --git a/src/python/nimbusml/__init__.py b/src/python/nimbusml/__init__.py
index 0b508fcf..0fdadc02 100644
--- a/src/python/nimbusml/__init__.py
+++ b/src/python/nimbusml/__init__.py
@@ -2,7 +2,7 @@
Microsoft Machine Learning for Python
"""
-__version__ = '1.5.0'
+__version__ = '1.5.1'
# CoreCLR version of MicrosoftML is built on Windows.
# But file permissions are not preserved when it's copied to Linux.
diff --git a/src/python/nimbusml/examples/DateTimeSplitter.py b/src/python/nimbusml/examples/DateTimeSplitter.py
new file mode 100644
index 00000000..fd8612d3
--- /dev/null
+++ b/src/python/nimbusml/examples/DateTimeSplitter.py
@@ -0,0 +1,31 @@
+###############################################################################
+# DateTimeSplitter
+import pandas as pd
+from nimbusml import FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.preprocessing import DateTimeSplitter
+
+# data input (as a FileDataStream)
+path = get_dataset('infert').as_filepath()
+
+data = FileDataStream.read_csv(path, sep=',')
+
+# transform usage
+xf = DateTimeSplitter(prefix='dt_') << 'age'
+
+# fit and transform
+features = xf.fit_transform(data)
+
+features = features.drop(['row_num', 'education', 'parity', 'induced',
+ 'case', 'spontaneous', 'stratum', 'pooled.stratum'], axis=1)
+
+# print features
+pd.set_option('display.max_columns', None)
+pd.set_option('display.width', 1000)
+print(features.head())
+# age dt_Year dt_Month dt_Day dt_Hour dt_Minute dt_Second dt_AmPm dt_Hour12 dt_DayOfWeek dt_DayOfQuarter dt_DayOfYear dt_WeekOfMonth dt_QuarterOfYear dt_HalfOfYear dt_WeekIso dt_YearIso dt_MonthLabel dt_AmPmLabel dt_DayOfWeekLabel dt_HolidayName dt_IsPaidTimeOff
+# 0 26 1970 1 1 0 0 26 0 0 4 1 0 0 1 1 1 1970 January am Thursday None 0
+# 1 42 1970 1 1 0 0 42 0 0 4 1 0 0 1 1 1 1970 January am Thursday None 0
+# 2 39 1970 1 1 0 0 39 0 0 4 1 0 0 1 1 1 1970 January am Thursday None 0
+# 3 34 1970 1 1 0 0 34 0 0 4 1 0 0 1 1 1 1970 January am Thursday None 0
+# 4 35 1970 1 1 0 0 35 0 0 4 1 0 0 1 1 1 1970 January am Thursday None 0
\ No newline at end of file
diff --git a/src/python/nimbusml/examples/RobustScaler.py b/src/python/nimbusml/examples/RobustScaler.py
new file mode 100644
index 00000000..4c6a6405
--- /dev/null
+++ b/src/python/nimbusml/examples/RobustScaler.py
@@ -0,0 +1,39 @@
+###############################################################################
+# RobustScaler
+import numpy
+from nimbusml import FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.preprocessing.normalization import RobustScaler
+
+# data input (as a FileDataStream)
+path = get_dataset('infert').as_filepath()
+data = FileDataStream.read_csv(path, sep=',')
+
+print(data.head())
+# row_num education age parity induced case spontaneous stratum pooled.stratum
+# 0 1 0-5yrs 26 6 1 1 2 1 3
+# 1 2 0-5yrs 42 1 1 1 0 2 1
+# 2 3 0-5yrs 39 6 2 1 0 3 4
+# 3 4 0-5yrs 34 4 2 1 0 4 2
+# 4 5 6-11yrs 35 3 1 1 1 5 32
+
+# transform usage
+xf = RobustScaler(
+ center=True, scale=True,
+ columns={'age_norm': 'age', 'par_norm': 'parity'})
+
+# fit and transform
+features = xf.fit_transform(data)
+
+print(features.head(n=10))
+# row_num education age parity induced case spontaneous stratum pooled.stratum age_norm par_norm
+# 0 1 0-5yrs 26 6 1 1 2 1 3 -0.434783 1.6
+# 1 2 0-5yrs 42 1 1 1 0 2 1 0.956522 -0.4
+# 2 3 0-5yrs 39 6 2 1 0 3 4 0.695652 1.6
+# 3 4 0-5yrs 34 4 2 1 0 4 2 0.260870 0.8
+# 4 5 6-11yrs 35 3 1 1 1 5 32 0.347826 0.4
+# 5 6 6-11yrs 36 4 2 1 1 6 36 0.434783 0.8
+# 6 7 6-11yrs 23 1 0 1 0 7 6 -0.695652 -0.4
+# 7 8 6-11yrs 32 2 0 1 0 8 22 0.086957 0.0
+# 8 9 6-11yrs 21 1 0 1 1 9 5 -0.869565 -0.4
+# 9 10 6-11yrs 28 2 0 1 0 10 19 -0.260870 0.0
diff --git a/src/python/nimbusml/examples/ToKeyImputer.py b/src/python/nimbusml/examples/ToKeyImputer.py
new file mode 100644
index 00000000..820127f5
--- /dev/null
+++ b/src/python/nimbusml/examples/ToKeyImputer.py
@@ -0,0 +1,35 @@
+###############################################################################
+# ToKey
+import numpy
+from nimbusml import FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.preprocessing import ToKeyImputer
+
+# data input (as a FileDataStream)
+path = get_dataset('airquality').as_filepath()
+
+data = FileDataStream.read_csv(path, sep=',', numeric_dtype=numpy.float32,
+ names={0: 'id'})
+print(data.head(6))
+# id Ozone Solar_R Wind Temp Month Day
+# 0 1.0 41.0 190.0 7.4 67.0 5.0 1.0
+# 1 2.0 36.0 118.0 8.0 72.0 5.0 2.0
+# 2 3.0 12.0 149.0 12.6 74.0 5.0 3.0
+# 3 4.0 18.0 313.0 11.5 62.0 5.0 4.0
+# 4 5.0 NaN NaN 14.3 56.0 5.0 5.0
+# 5 6.0 28.0 NaN 14.9 66.0 5.0 6.0
+
+
+# transform usage
+xf = ToKeyImputer(columns={'Ozone_1': 'Ozone', 'Solar_R_1': 'Solar_R'})
+
+# fit and transform
+features = xf.fit_transform(data)
+print(features.head(6))
+# id Ozone Solar_R Wind Temp Month Day Ozone_1 Solar_R_1
+# 0 1.0 41.0 190.0 7.4 67.0 5.0 1.0 41.0 190.0
+# 1 2.0 36.0 118.0 8.0 72.0 5.0 2.0 36.0 118.0
+# 2 3.0 12.0 149.0 12.6 74.0 5.0 3.0 12.0 149.0
+# 3 4.0 18.0 313.0 11.5 62.0 5.0 4.0 18.0 313.0
+# 4 5.0 NaN NaN 14.3 56.0 5.0 5.0 23.0 238.0 <== Missing values have been updated
+# 5 6.0 28.0 NaN 14.9 66.0 5.0 6.0 28.0 238.0 <== Missing values have been updated
diff --git a/src/python/nimbusml/examples/ToString.py b/src/python/nimbusml/examples/ToString.py
new file mode 100644
index 00000000..82185d32
--- /dev/null
+++ b/src/python/nimbusml/examples/ToString.py
@@ -0,0 +1,45 @@
+###############################################################################
+# ToKey
+import numpy
+from nimbusml import FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.preprocessing import ToString
+
+# data input (as a FileDataStream)
+path = get_dataset('infert').as_filepath()
+
+data = FileDataStream.read_csv(path, sep=',', numeric_dtype=numpy.float32,
+ names={0: 'id'})
+print(data.head())
+# id education age parity induced case spontaneous stratum pooled.stratum
+# 0 1.0 0-5yrs 26.0 6.0 1.0 1.0 2.0 1.0 3.0
+# 1 2.0 0-5yrs 42.0 1.0 1.0 1.0 0.0 2.0 1.0
+# 2 3.0 0-5yrs 39.0 6.0 2.0 1.0 0.0 3.0 4.0
+# 3 4.0 0-5yrs 34.0 4.0 2.0 1.0 0.0 4.0 2.0
+# 4 5.0 6-11yrs 35.0 3.0 1.0 1.0 1.0 5.0 32.0
+
+# transform usage
+xf = ToString(columns={'id_1': 'id', 'age_1': 'age'})
+
+# fit and transform
+features = xf.fit_transform(data)
+print(features.head())
+# id education age parity induced case spontaneous stratum pooled.stratum id_1 age_1
+# 0 1.0 0-5yrs 26.0 6.0 1.0 1.0 2.0 1.0 3.0 1.000000 26.000000
+# 1 2.0 0-5yrs 42.0 1.0 1.0 1.0 0.0 2.0 1.0 2.000000 42.000000
+# 2 3.0 0-5yrs 39.0 6.0 2.0 1.0 0.0 3.0 4.0 3.000000 39.000000
+# 3 4.0 0-5yrs 34.0 4.0 2.0 1.0 0.0 4.0 2.0 4.000000 34.000000
+# 4 5.0 6-11yrs 35.0 3.0 1.0 1.0 1.0 5.0 32.0 5.000000 35.000000
+
+print(features.dtypes)
+# id float32
+# education object
+# age float32
+# parity float32
+# induced float32
+# case float32
+# spontaneous float32
+# stratum float32
+# pooled.stratum float32
+# id_1 object <== string column
+# age_1 object <== string column
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/DateTimeSplitter_df.py b/src/python/nimbusml/examples/examples_from_dataframe/DateTimeSplitter_df.py
new file mode 100644
index 00000000..8e33ab7b
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/DateTimeSplitter_df.py
@@ -0,0 +1,31 @@
+###############################################################################
+# DateTimeSplitter
+import pandas
+from nimbusml.preprocessing import DateTimeSplitter
+
+df = pandas.DataFrame(data=dict(
+ tokens1=[1, 2, 3, 157161600],
+ tokens2=[10, 11, 12, 13]
+))
+
+cols_to_drop = [
+ 'Hour12', 'DayOfWeek', 'DayOfQuarter',
+ 'DayOfYear', 'WeekOfMonth', 'QuarterOfYear',
+ 'HalfOfYear', 'WeekIso', 'YearIso', 'MonthLabel',
+ 'AmPmLabel', 'DayOfWeekLabel', 'IsPaidTimeOff'
+]
+
+cd = DateTimeSplitter(prefix='dt',
+ country='Canada',
+ columns_to_drop=cols_to_drop) << 'tokens1'
+y = cd.fit_transform(df)
+
+# view the three columns
+pandas.set_option('display.max_columns', None)
+pandas.set_option('display.width', 1000)
+print(y)
+# tokens1 tokens2 dtYear dtMonth dtDay dtHour dtMinute dtSecond dtAmPm dtHolidayName
+# 0 1 10 1970 1 1 0 0 1 0 New Year's Day
+# 1 2 11 1970 1 1 0 0 2 0 New Year's Day
+# 2 3 12 1970 1 1 0 0 3 0 New Year's Day
+# 3 157161600 13 1974 12 25 0 0 0 0 Christmas Day
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/RobustScaler_df.py b/src/python/nimbusml/examples/examples_from_dataframe/RobustScaler_df.py
new file mode 100644
index 00000000..ff0ae793
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/RobustScaler_df.py
@@ -0,0 +1,20 @@
+###############################################################################
+# RobustScaler
+import pandas as pd
+from nimbusml import Pipeline
+from nimbusml.preprocessing.normalization import RobustScaler
+
+
+df = pd.DataFrame(data=dict(c0=[1, 3, 5, 7, 9]))
+
+xf = RobustScaler(columns='c0', center=True, scale=True)
+pipeline = Pipeline([xf])
+result = pipeline.fit_transform(df)
+
+print(result)
+# c0
+# 0 -1.0
+# 1 -0.5
+# 2 0.0
+# 3 0.5
+# 4 1.0
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/TimeSeriesImputer_df.py b/src/python/nimbusml/examples/examples_from_dataframe/TimeSeriesImputer_df.py
new file mode 100644
index 00000000..38ec9073
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/TimeSeriesImputer_df.py
@@ -0,0 +1,29 @@
+###############################################################################
+# DateTimeSplitter
+import pandas
+from nimbusml.timeseries import TimeSeriesImputer
+
+df = pandas.DataFrame(data=dict(
+ ts=[1, 2, 3, 5],
+ grain=[1970, 1970, 1970, 1970],
+ c3=[10, 13, 15, 20],
+ c4=[19, 12, 16, 19]
+))
+
+print(df)
+
+tsi = TimeSeriesImputer(time_series_column='ts',
+ grain_columns=['grain'],
+ filter_columns=['c3', 'c4'],
+ impute_mode='ForwardFill',
+ filter_mode='Include')
+result = tsi.fit_transform(df)
+
+print(result)
+# ts grain c3 c4 IsRowImputed
+# 0 0 0 0 0 False
+# 1 1 1970 10 19 False
+# 2 2 1970 13 12 False
+# 3 3 1970 15 16 False
+# 4 4 1970 15 16 True <== New row added
+# 5 5 1970 20 19 False
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/ToKeyImputer_df.py b/src/python/nimbusml/examples/examples_from_dataframe/ToKeyImputer_df.py
new file mode 100644
index 00000000..f613e3f4
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/ToKeyImputer_df.py
@@ -0,0 +1,34 @@
+###############################################################################
+# ToKeyImputer
+
+import pandas
+from nimbusml.preprocessing import ToKeyImputer
+
+# Create the data
+text_df = pandas.DataFrame(
+ data=dict(
+ text=[
+ "cat",
+ "dog",
+ "fish",
+ "orange",
+ "cat orange",
+ "dog",
+ "fish",
+ None,
+ "spider"]))
+
+tokey = ToKeyImputer() << 'text'
+y = tokey.fit_transform(text_df)
+print(y)
+
+# text
+# 0 cat
+# 1 dog
+# 2 fish
+# 3 orange
+# 4 cat orange
+# 5 dog
+# 6 fish
+# 7 dog <== Missing value has been replaced
+# 8 spider
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/ToString_df.py b/src/python/nimbusml/examples/examples_from_dataframe/ToString_df.py
new file mode 100644
index 00000000..b6c631fd
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/ToString_df.py
@@ -0,0 +1,43 @@
+###############################################################################
+# ToString
+
+import pandas
+from nimbusml.preprocessing import ToString, ToKey
+from pandas import Categorical
+
+# Create the data
+categorical_df = pandas.DataFrame(data=dict(
+ key=Categorical.from_codes([0, 1, 2, 1, 2, 0], categories=['a', 'b', 'c']),
+ text=['b', 'c', 'a', 'b', 'a', 'c']))
+
+print(categorical_df.dtypes)
+# key category
+# text object
+# dtype: object
+
+tostring = ToString(columns='key')
+y = tostring.fit_transform(categorical_df)
+print(y)
+# key text
+# 0 1 b
+# 1 2 c
+# 2 3 a
+# 3 2 b
+# 4 3 a
+# 5 1 c
+
+print(y.dtypes)
+# key object <== converted to string
+# text object
+# dtype: object
+
+tokey = ToKey(columns='text')
+y = tokey.fit_transform(categorical_df)
+y2 = tostring.clone().fit_transform(y)
+print(y2['text'] == categorical_df['text'])
+# 0 True
+# 1 True
+# 2 True
+# 3 True
+# 4 True
+# 5 True
diff --git a/src/python/nimbusml/internal/core/preprocessing/datetimesplitter.py b/src/python/nimbusml/internal/core/preprocessing/datetimesplitter.py
new file mode 100644
index 00000000..db2c39ef
--- /dev/null
+++ b/src/python/nimbusml/internal/core/preprocessing/datetimesplitter.py
@@ -0,0 +1,62 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+DateTimeSplitter
+"""
+
+__all__ = ["DateTimeSplitter"]
+
+
+from ...entrypoints.transforms_datetimesplitter import \
+ transforms_datetimesplitter
+from ...utils.utils import trace
+from ..base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class DateTimeSplitter(BasePipelineItem, DefaultSignature):
+ """
+ **Description**
+ Splits a date time value into each individual component
+
+ :param prefix: Output column prefix.
+
+ :param columns_to_drop: Columns to drop after the DateTime Expansion.
+
+ :param country: Country to get holidays for. Defaults to none if not
+ passed.
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ prefix,
+ columns_to_drop=None,
+ country='None',
+ **params):
+ BasePipelineItem.__init__(
+ self, type='transform', **params)
+
+ self.prefix = prefix
+ self.columns_to_drop = columns_to_drop
+ self.country = country
+
+ @property
+ def _entrypoint(self):
+ return transforms_datetimesplitter
+
+ @trace
+ def _get_node(self, **all_args):
+ algo_args = dict(
+ source=self.source,
+ prefix=self.prefix,
+ columns_to_drop=self.columns_to_drop,
+ country=self.country)
+
+ all_args.update(algo_args)
+ return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/core/preprocessing/normalization/robustscaler.py b/src/python/nimbusml/internal/core/preprocessing/normalization/robustscaler.py
new file mode 100644
index 00000000..08845bae
--- /dev/null
+++ b/src/python/nimbusml/internal/core/preprocessing/normalization/robustscaler.py
@@ -0,0 +1,103 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+RobustScaler
+"""
+
+__all__ = ["RobustScaler"]
+
+
+from ....entrypoints.transforms_robustscaler import transforms_robustscaler
+from ....utils.utils import trace
+from ...base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class RobustScaler(BasePipelineItem, DefaultSignature):
+ """
+ **Description**
+ Removes the median and scales the data according to the quantile range.
+
+ :param center: If True, center the data before scaling.
+
+ :param scale: If True, scale the data to interquartile range.
+
+ :param quantile_min: Min for the quantile range used to calculate scale.
+
+ :param quantile_max: Max for the quantile range used to calculate scale.
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ center=True,
+ scale=True,
+ quantile_min=25.0,
+ quantile_max=75.0,
+ **params):
+ BasePipelineItem.__init__(
+ self, type='transform', **params)
+
+ self.center = center
+ self.scale = scale
+ self.quantile_min = quantile_min
+ self.quantile_max = quantile_max
+
+ @property
+ def _entrypoint(self):
+ return transforms_robustscaler
+
+ @trace
+ def _get_node(self, **all_args):
+
+ input_columns = self.input
+ if input_columns is None and 'input' in all_args:
+ input_columns = all_args['input']
+ if 'input' in all_args:
+ all_args.pop('input')
+
+ output_columns = self.output
+ if output_columns is None and 'output' in all_args:
+ output_columns = all_args['output']
+ if 'output' in all_args:
+ all_args.pop('output')
+
+ # validate input
+ if input_columns is None:
+ raise ValueError(
+ "'None' input passed when it cannot be none.")
+
+ if not isinstance(input_columns, list):
+ raise ValueError(
+ "input has to be a list of strings, instead got %s" %
+ type(input_columns))
+
+ # validate output
+ if output_columns is None:
+ output_columns = input_columns
+
+ if not isinstance(output_columns, list):
+ raise ValueError(
+ "output has to be a list of strings, instead got %s" %
+ type(output_columns))
+
+ algo_args = dict(
+ column=[
+ dict(
+ Source=i,
+ Name=o) for i,
+ o in zip(
+ input_columns,
+ output_columns)] if input_columns else None,
+ center=self.center,
+ scale=self.scale,
+ quantile_min=self.quantile_min,
+ quantile_max=self.quantile_max)
+
+ all_args.update(algo_args)
+ return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/core/preprocessing/tokeyimputer.py b/src/python/nimbusml/internal/core/preprocessing/tokeyimputer.py
new file mode 100644
index 00000000..e82498a3
--- /dev/null
+++ b/src/python/nimbusml/internal/core/preprocessing/tokeyimputer.py
@@ -0,0 +1,80 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+ToKeyImputer
+"""
+
+__all__ = ["ToKeyImputer"]
+
+
+from ...entrypoints.transforms_categoryimputer import \
+ transforms_categoryimputer
+from ...utils.utils import trace
+from ..base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class ToKeyImputer(BasePipelineItem, DefaultSignature):
+ """
+ **Description**
+ Fills in missing values in a column based on the most frequent value
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ **params):
+ BasePipelineItem.__init__(
+ self, type='transform', **params)
+
+ @property
+ def _entrypoint(self):
+ return transforms_categoryimputer
+
+ @trace
+ def _get_node(self, **all_args):
+
+ input_columns = self.input
+ if input_columns is None and 'input' in all_args:
+ input_columns = all_args['input']
+ if 'input' in all_args:
+ all_args.pop('input')
+
+ output_columns = self.output
+ if output_columns is None and 'output' in all_args:
+ output_columns = all_args['output']
+ if 'output' in all_args:
+ all_args.pop('output')
+
+ # validate input
+ if input_columns is None:
+ raise ValueError(
+ "'None' input passed when it cannot be none.")
+
+ if not isinstance(input_columns, list):
+ raise ValueError(
+ "input has to be a list of strings, instead got %s" %
+ type(input_columns))
+
+ # validate output
+ if output_columns is None:
+ output_columns = input_columns
+
+ if not isinstance(output_columns, list):
+ raise ValueError(
+ "output has to be a list of strings, instead got %s" %
+ type(output_columns))
+
+ algo_args = dict(
+ column=[
+ dict(
+ Source=i, Name=o) for i, o in zip(
+ input_columns, output_columns)] if input_columns else None)
+
+ all_args.update(algo_args)
+ return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/core/preprocessing/tostring.py b/src/python/nimbusml/internal/core/preprocessing/tostring.py
new file mode 100644
index 00000000..2294c715
--- /dev/null
+++ b/src/python/nimbusml/internal/core/preprocessing/tostring.py
@@ -0,0 +1,79 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+ToString
+"""
+
+__all__ = ["ToString"]
+
+
+from ...entrypoints.transforms_tostring import transforms_tostring
+from ...utils.utils import trace
+from ..base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class ToString(BasePipelineItem, DefaultSignature):
+ """
+ **Description**
+ Turns the given column into a column of its string representation
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ **params):
+ BasePipelineItem.__init__(
+ self, type='transform', **params)
+
+ @property
+ def _entrypoint(self):
+ return transforms_tostring
+
+ @trace
+ def _get_node(self, **all_args):
+
+ input_columns = self.input
+ if input_columns is None and 'input' in all_args:
+ input_columns = all_args['input']
+ if 'input' in all_args:
+ all_args.pop('input')
+
+ output_columns = self.output
+ if output_columns is None and 'output' in all_args:
+ output_columns = all_args['output']
+ if 'output' in all_args:
+ all_args.pop('output')
+
+ # validate input
+ if input_columns is None:
+ raise ValueError(
+ "'None' input passed when it cannot be none.")
+
+ if not isinstance(input_columns, list):
+ raise ValueError(
+ "input has to be a list of strings, instead got %s" %
+ type(input_columns))
+
+ # validate output
+ if output_columns is None:
+ output_columns = input_columns
+
+ if not isinstance(output_columns, list):
+ raise ValueError(
+ "output has to be a list of strings, instead got %s" %
+ type(output_columns))
+
+ algo_args = dict(
+ column=[
+ dict(
+ Source=i, Name=o) for i, o in zip(
+ input_columns, output_columns)] if input_columns else None)
+
+ all_args.update(algo_args)
+ return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/core/timeseries/timeseriesimputer.py b/src/python/nimbusml/internal/core/timeseries/timeseriesimputer.py
new file mode 100644
index 00000000..0a492127
--- /dev/null
+++ b/src/python/nimbusml/internal/core/timeseries/timeseriesimputer.py
@@ -0,0 +1,78 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+TimeSeriesImputer
+"""
+
+__all__ = ["TimeSeriesImputer"]
+
+
+from ...entrypoints.transforms_timeseriesimputer import \
+ transforms_timeseriesimputer
+from ...utils.utils import trace
+from ..base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class TimeSeriesImputer(BasePipelineItem, DefaultSignature):
+ """
+ **Description**
+ Fills in missing row and values
+
+ :param time_series_column: Column representing the time.
+
+ :param grain_columns: List of grain columns.
+
+ :param filter_columns: Columns to filter.
+
+ :param filter_mode: Filter mode. Either include or exclude.
+
+ :param impute_mode: Mode for imputing, defaults to ForwardFill if not
+ provided.
+
+ :param supress_type_errors: Supress the errors that would occur if a column
+ and impute mode are imcompatible. If true, will skip the column. If
+ false, will stop and throw an error.
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ time_series_column,
+ grain_columns,
+ filter_columns=None,
+ filter_mode='Exclude',
+ impute_mode='ForwardFill',
+ supress_type_errors=False,
+ **params):
+ BasePipelineItem.__init__(
+ self, type='transform', **params)
+
+ self.time_series_column = time_series_column
+ self.grain_columns = grain_columns
+ self.filter_columns = filter_columns
+ self.filter_mode = filter_mode
+ self.impute_mode = impute_mode
+ self.supress_type_errors = supress_type_errors
+
+ @property
+ def _entrypoint(self):
+ return transforms_timeseriesimputer
+
+ @trace
+ def _get_node(self, **all_args):
+ algo_args = dict(
+ time_series_column=self.time_series_column,
+ grain_columns=self.grain_columns,
+ filter_columns=self.filter_columns,
+ filter_mode=self.filter_mode,
+ impute_mode=self.impute_mode,
+ supress_type_errors=self.supress_type_errors)
+
+ all_args.update(algo_args)
+ return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/entrypoints/transforms_categoryimputer.py b/src/python/nimbusml/internal/entrypoints/transforms_categoryimputer.py
new file mode 100644
index 00000000..7f72261b
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/transforms_categoryimputer.py
@@ -0,0 +1,65 @@
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+Transforms.CategoryImputer
+"""
+
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def transforms_categoryimputer(
+ column,
+ data,
+ output_data=None,
+ model=None,
+ **params):
+ """
+ **Description**
+ Fills in missing values in a column based on the most frequent value
+
+ :param column: New column definition (optional form: name:src)
+ (inputs).
+ :param data: Input dataset (inputs).
+ :param output_data: Transformed dataset (outputs).
+ :param model: Transform model (outputs).
+ """
+
+ entrypoint_name = 'Transforms.CategoryImputer'
+ inputs = {}
+ outputs = {}
+
+ if column is not None:
+ inputs['Column'] = try_set(
+ obj=column,
+ none_acceptable=False,
+ is_of_type=list,
+ is_column=True)
+ if data is not None:
+ inputs['Data'] = try_set(
+ obj=data,
+ none_acceptable=False,
+ is_of_type=str)
+ if output_data is not None:
+ outputs['OutputData'] = try_set(
+ obj=output_data,
+ none_acceptable=False,
+ is_of_type=str)
+ if model is not None:
+ outputs['Model'] = try_set(
+ obj=model,
+ none_acceptable=False,
+ is_of_type=str)
+
+ input_variables = {
+ x for x in unlist(inputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+ output_variables = {
+ x for x in unlist(outputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+
+ entrypoint = EntryPoint(
+ name=entrypoint_name, inputs=inputs, outputs=outputs,
+ input_variables=input_variables,
+ output_variables=output_variables)
+ return entrypoint
diff --git a/src/python/nimbusml/internal/entrypoints/transforms_datetimesplitter.py b/src/python/nimbusml/internal/entrypoints/transforms_datetimesplitter.py
new file mode 100644
index 00000000..7afc028a
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/transforms_datetimesplitter.py
@@ -0,0 +1,128 @@
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+Transforms.DateTimeSplitter
+"""
+
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def transforms_datetimesplitter(
+ source,
+ data,
+ prefix,
+ output_data=None,
+ model=None,
+ columns_to_drop=None,
+ country='None',
+ **params):
+ """
+ **Description**
+ Splits a date time value into each individual component
+
+ :param source: Input column (inputs).
+ :param data: Input dataset (inputs).
+ :param prefix: Output column prefix (inputs).
+ :param columns_to_drop: Columns to drop after the DateTime
+ Expansion (inputs).
+ :param country: Country to get holidays for. Defaults to none if
+ not passed (inputs).
+ :param output_data: Transformed dataset (outputs).
+ :param model: Transform model (outputs).
+ """
+
+ entrypoint_name = 'Transforms.DateTimeSplitter'
+ inputs = {}
+ outputs = {}
+
+ if source is not None:
+ inputs['Source'] = try_set(
+ obj=source,
+ none_acceptable=False,
+ is_of_type=str,
+ is_column=True)
+ if data is not None:
+ inputs['Data'] = try_set(
+ obj=data,
+ none_acceptable=False,
+ is_of_type=str)
+ if prefix is not None:
+ inputs['Prefix'] = try_set(
+ obj=prefix,
+ none_acceptable=False,
+ is_of_type=str)
+ if columns_to_drop is not None:
+ inputs['ColumnsToDrop'] = try_set(
+ obj=columns_to_drop,
+ none_acceptable=True,
+ is_of_type=list,
+ is_column=True)
+ if country is not None:
+ inputs['Country'] = try_set(
+ obj=country,
+ none_acceptable=True,
+ is_of_type=str,
+ values=[
+ 'None',
+ 'Argentina',
+ 'Australia',
+ 'Austria',
+ 'Belarus',
+ 'Belgium',
+ 'Brazil',
+ 'Canada',
+ 'Colombia',
+ 'Croatia',
+ 'Czech',
+ 'Denmark',
+ 'England',
+ 'Finland',
+ 'France',
+ 'Germany',
+ 'Hungary',
+ 'India',
+ 'Ireland',
+ 'IsleofMan',
+ 'Italy',
+ 'Japan',
+ 'Mexico',
+ 'Netherlands',
+ 'NewZealand',
+ 'NorthernIreland',
+ 'Norway',
+ 'Poland',
+ 'Portugal',
+ 'Scotland',
+ 'Slovenia',
+ 'SouthAfrica',
+ 'Spain',
+ 'Sweden',
+ 'Switzerland',
+ 'Ukraine',
+ 'UnitedKingdom',
+ 'UnitedStates',
+ 'Wales'])
+ if output_data is not None:
+ outputs['OutputData'] = try_set(
+ obj=output_data,
+ none_acceptable=False,
+ is_of_type=str)
+ if model is not None:
+ outputs['Model'] = try_set(
+ obj=model,
+ none_acceptable=False,
+ is_of_type=str)
+
+ input_variables = {
+ x for x in unlist(inputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+ output_variables = {
+ x for x in unlist(outputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+
+ entrypoint = EntryPoint(
+ name=entrypoint_name, inputs=inputs, outputs=outputs,
+ input_variables=input_variables,
+ output_variables=output_variables)
+ return entrypoint
diff --git a/src/python/nimbusml/internal/entrypoints/transforms_robustscaler.py b/src/python/nimbusml/internal/entrypoints/transforms_robustscaler.py
new file mode 100644
index 00000000..615af180
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/transforms_robustscaler.py
@@ -0,0 +1,98 @@
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+Transforms.RobustScaler
+"""
+
+import numbers
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def transforms_robustscaler(
+ column,
+ data,
+ output_data=None,
+ model=None,
+ center=True,
+ scale=True,
+ quantile_min=25.0,
+ quantile_max=75.0,
+ **params):
+ """
+ **Description**
+ Removes the median and scales the data according to the quantile
+ range.
+
+ :param column: New column definition (optional form: name:src)
+ (inputs).
+ :param data: Input dataset (inputs).
+ :param center: If True, center the data before scaling. (inputs).
+ :param scale: If True, scale the data to interquartile range.
+ (inputs).
+ :param quantile_min: Min for the quantile range used to calculate
+ scale. (inputs).
+ :param quantile_max: Max for the quantile range used to calculate
+ scale. (inputs).
+ :param output_data: Transformed dataset (outputs).
+ :param model: Transform model (outputs).
+ """
+
+ entrypoint_name = 'Transforms.RobustScaler'
+ inputs = {}
+ outputs = {}
+
+ if column is not None:
+ inputs['Column'] = try_set(
+ obj=column,
+ none_acceptable=False,
+ is_of_type=list,
+ is_column=True)
+ if data is not None:
+ inputs['Data'] = try_set(
+ obj=data,
+ none_acceptable=False,
+ is_of_type=str)
+ if center is not None:
+ inputs['Center'] = try_set(
+ obj=center,
+ none_acceptable=True,
+ is_of_type=bool)
+ if scale is not None:
+ inputs['Scale'] = try_set(
+ obj=scale,
+ none_acceptable=True,
+ is_of_type=bool)
+ if quantile_min is not None:
+ inputs['QuantileMin'] = try_set(
+ obj=quantile_min,
+ none_acceptable=True,
+ is_of_type=numbers.Real)
+ if quantile_max is not None:
+ inputs['QuantileMax'] = try_set(
+ obj=quantile_max,
+ none_acceptable=True,
+ is_of_type=numbers.Real)
+ if output_data is not None:
+ outputs['OutputData'] = try_set(
+ obj=output_data,
+ none_acceptable=False,
+ is_of_type=str)
+ if model is not None:
+ outputs['Model'] = try_set(
+ obj=model,
+ none_acceptable=False,
+ is_of_type=str)
+
+ input_variables = {
+ x for x in unlist(inputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+ output_variables = {
+ x for x in unlist(outputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+
+ entrypoint = EntryPoint(
+ name=entrypoint_name, inputs=inputs, outputs=outputs,
+ input_variables=input_variables,
+ output_variables=output_variables)
+ return entrypoint
diff --git a/src/python/nimbusml/internal/entrypoints/transforms_timeseriesimputer.py b/src/python/nimbusml/internal/entrypoints/transforms_timeseriesimputer.py
new file mode 100644
index 00000000..e19bd1f1
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/transforms_timeseriesimputer.py
@@ -0,0 +1,115 @@
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+Transforms.TimeSeriesImputer
+"""
+
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def transforms_timeseriesimputer(
+ time_series_column,
+ data,
+ grain_columns,
+ output_data=None,
+ model=None,
+ filter_columns=None,
+ filter_mode='Exclude',
+ impute_mode='ForwardFill',
+ supress_type_errors=False,
+ **params):
+ """
+ **Description**
+ Fills in missing row and values
+
+ :param time_series_column: Column representing the time (inputs).
+ :param data: Input dataset (inputs).
+ :param grain_columns: List of grain columns (inputs).
+ :param filter_columns: Columns to filter (inputs).
+ :param filter_mode: Filter mode. Either include or exclude
+ (inputs).
+ :param impute_mode: Mode for imputing, defaults to ForwardFill if
+ not provided (inputs).
+ :param supress_type_errors: Supress the errors that would occur
+ if a column and impute mode are imcompatible. If true, will
+ skip the column. If false, will stop and throw an error.
+ (inputs).
+ :param output_data: Transformed dataset (outputs).
+ :param model: Transform model (outputs).
+ """
+
+ entrypoint_name = 'Transforms.TimeSeriesImputer'
+ inputs = {}
+ outputs = {}
+
+ if time_series_column is not None:
+ inputs['TimeSeriesColumn'] = try_set(
+ obj=time_series_column,
+ none_acceptable=False,
+ is_of_type=str,
+ is_column=True)
+ if data is not None:
+ inputs['Data'] = try_set(
+ obj=data,
+ none_acceptable=False,
+ is_of_type=str)
+ if grain_columns is not None:
+ inputs['GrainColumns'] = try_set(
+ obj=grain_columns,
+ none_acceptable=False,
+ is_of_type=list,
+ is_column=True)
+ if filter_columns is not None:
+ inputs['FilterColumns'] = try_set(
+ obj=filter_columns,
+ none_acceptable=True,
+ is_of_type=list,
+ is_column=True)
+ if filter_mode is not None:
+ inputs['FilterMode'] = try_set(
+ obj=filter_mode,
+ none_acceptable=True,
+ is_of_type=str,
+ values=[
+ 'NoFilter',
+ 'Include',
+ 'Exclude'])
+ if impute_mode is not None:
+ inputs['ImputeMode'] = try_set(
+ obj=impute_mode,
+ none_acceptable=True,
+ is_of_type=str,
+ values=[
+ 'ForwardFill',
+ 'BackFill',
+ 'Median',
+ 'Interpolate'])
+ if supress_type_errors is not None:
+ inputs['SupressTypeErrors'] = try_set(
+ obj=supress_type_errors,
+ none_acceptable=True,
+ is_of_type=bool)
+ if output_data is not None:
+ outputs['OutputData'] = try_set(
+ obj=output_data,
+ none_acceptable=False,
+ is_of_type=str)
+ if model is not None:
+ outputs['Model'] = try_set(
+ obj=model,
+ none_acceptable=False,
+ is_of_type=str)
+
+ input_variables = {
+ x for x in unlist(inputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+ output_variables = {
+ x for x in unlist(outputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+
+ entrypoint = EntryPoint(
+ name=entrypoint_name, inputs=inputs, outputs=outputs,
+ input_variables=input_variables,
+ output_variables=output_variables)
+ return entrypoint
diff --git a/src/python/nimbusml/internal/entrypoints/transforms_tostring.py b/src/python/nimbusml/internal/entrypoints/transforms_tostring.py
new file mode 100644
index 00000000..2f6d9782
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/transforms_tostring.py
@@ -0,0 +1,65 @@
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+Transforms.ToString
+"""
+
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def transforms_tostring(
+ column,
+ data,
+ output_data=None,
+ model=None,
+ **params):
+ """
+ **Description**
+ Turns the given column into a column of its string representation
+
+ :param column: New column definition (optional form: name:src)
+ (inputs).
+ :param data: Input dataset (inputs).
+ :param output_data: Transformed dataset (outputs).
+ :param model: Transform model (outputs).
+ """
+
+ entrypoint_name = 'Transforms.ToString'
+ inputs = {}
+ outputs = {}
+
+ if column is not None:
+ inputs['Column'] = try_set(
+ obj=column,
+ none_acceptable=False,
+ is_of_type=list,
+ is_column=True)
+ if data is not None:
+ inputs['Data'] = try_set(
+ obj=data,
+ none_acceptable=False,
+ is_of_type=str)
+ if output_data is not None:
+ outputs['OutputData'] = try_set(
+ obj=output_data,
+ none_acceptable=False,
+ is_of_type=str)
+ if model is not None:
+ outputs['Model'] = try_set(
+ obj=model,
+ none_acceptable=False,
+ is_of_type=str)
+
+ input_variables = {
+ x for x in unlist(inputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+ output_variables = {
+ x for x in unlist(outputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+
+ entrypoint = EntryPoint(
+ name=entrypoint_name, inputs=inputs, outputs=outputs,
+ input_variables=input_variables,
+ output_variables=output_variables)
+ return entrypoint
diff --git a/src/python/nimbusml/preprocessing/__init__.py b/src/python/nimbusml/preprocessing/__init__.py
index 26b41b8e..728327be 100644
--- a/src/python/nimbusml/preprocessing/__init__.py
+++ b/src/python/nimbusml/preprocessing/__init__.py
@@ -2,10 +2,16 @@
from .tokey import ToKey
from .tensorflowscorer import TensorFlowScorer
from .datasettransformer import DatasetTransformer
+from .datetimesplitter import DateTimeSplitter
+from .tokeyimputer import ToKeyImputer
+from .tostring import ToString
__all__ = [
+ 'DateTimeSplitter',
'FromKey',
'ToKey',
+ 'ToKeyImputer',
+ 'ToString',
'TensorFlowScorer',
'DatasetTransformer'
]
diff --git a/src/python/nimbusml/preprocessing/datetimesplitter.py b/src/python/nimbusml/preprocessing/datetimesplitter.py
new file mode 100644
index 00000000..fb33337b
--- /dev/null
+++ b/src/python/nimbusml/preprocessing/datetimesplitter.py
@@ -0,0 +1,63 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+DateTimeSplitter
+"""
+
+__all__ = ["DateTimeSplitter"]
+
+
+from sklearn.base import TransformerMixin
+
+from ..base_transform import BaseTransform
+from ..internal.core.preprocessing.datetimesplitter import \
+ DateTimeSplitter as core
+from ..internal.utils.utils import trace
+
+
+class DateTimeSplitter(core, BaseTransform, TransformerMixin):
+ """
+ **Description**
+ Splits a date time value into each individual component
+
+ :param columns: see `Columns `_.
+
+ :param prefix: Output column prefix.
+
+ :param columns_to_drop: Columns to drop after the DateTime Expansion.
+
+ :param country: Country to get holidays for. Defaults to none if not
+ passed.
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ prefix,
+ columns_to_drop=None,
+ country='None',
+ columns=None,
+ **params):
+
+ if columns:
+ params['columns'] = columns
+ BaseTransform.__init__(self, **params)
+ core.__init__(
+ self,
+ prefix=prefix,
+ columns_to_drop=columns_to_drop,
+ country=country,
+ **params)
+ self._columns = columns
+
+ def get_params(self, deep=False):
+ """
+ Get the parameters for this operator.
+ """
+ return core.get_params(self)
diff --git a/src/python/nimbusml/preprocessing/normalization/__init__.py b/src/python/nimbusml/preprocessing/normalization/__init__.py
index f7d7647a..3928ac40 100644
--- a/src/python/nimbusml/preprocessing/normalization/__init__.py
+++ b/src/python/nimbusml/preprocessing/normalization/__init__.py
@@ -4,6 +4,7 @@
from .lpscaler import LpScaler
from .meanvariancescaler import MeanVarianceScaler
from .minmaxscaler import MinMaxScaler
+from .robustscaler import RobustScaler
__all__ = [
'Binner',
@@ -11,5 +12,6 @@
'LogMeanVarianceScaler',
'LpScaler',
'MeanVarianceScaler',
- 'MinMaxScaler'
+ 'MinMaxScaler',
+ 'RobustScaler'
]
diff --git a/src/python/nimbusml/preprocessing/normalization/robustscaler.py b/src/python/nimbusml/preprocessing/normalization/robustscaler.py
new file mode 100644
index 00000000..776d5609
--- /dev/null
+++ b/src/python/nimbusml/preprocessing/normalization/robustscaler.py
@@ -0,0 +1,66 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+RobustScaler
+"""
+
+__all__ = ["RobustScaler"]
+
+
+from sklearn.base import TransformerMixin
+
+from ...base_transform import BaseTransform
+from ...internal.core.preprocessing.normalization.robustscaler import \
+ RobustScaler as core
+from ...internal.utils.utils import trace
+
+
+class RobustScaler(core, BaseTransform, TransformerMixin):
+ """
+ **Description**
+ Removes the median and scales the data according to the quantile range.
+
+ :param columns: see `Columns `_.
+
+ :param center: If True, center the data before scaling.
+
+ :param scale: If True, scale the data to interquartile range.
+
+ :param quantile_min: Min for the quantile range used to calculate scale.
+
+ :param quantile_max: Max for the quantile range used to calculate scale.
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ center=True,
+ scale=True,
+ quantile_min=25.0,
+ quantile_max=75.0,
+ columns=None,
+ **params):
+
+ if columns:
+ params['columns'] = columns
+ BaseTransform.__init__(self, **params)
+ core.__init__(
+ self,
+ center=center,
+ scale=scale,
+ quantile_min=quantile_min,
+ quantile_max=quantile_max,
+ **params)
+ self._columns = columns
+
+ def get_params(self, deep=False):
+ """
+ Get the parameters for this operator.
+ """
+ return core.get_params(self)
diff --git a/src/python/nimbusml/preprocessing/tokeyimputer.py b/src/python/nimbusml/preprocessing/tokeyimputer.py
new file mode 100644
index 00000000..000d6a2f
--- /dev/null
+++ b/src/python/nimbusml/preprocessing/tokeyimputer.py
@@ -0,0 +1,49 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+ToKeyImputer
+"""
+
+__all__ = ["ToKeyImputer"]
+
+
+from sklearn.base import TransformerMixin
+
+from ..base_transform import BaseTransform
+from ..internal.core.preprocessing.tokeyimputer import ToKeyImputer as core
+from ..internal.utils.utils import trace
+
+
+class ToKeyImputer(core, BaseTransform, TransformerMixin):
+ """
+ **Description**
+ Fills in missing values in a column based on the most frequent value
+
+ :param columns: see `Columns `_.
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ columns=None,
+ **params):
+
+ if columns:
+ params['columns'] = columns
+ BaseTransform.__init__(self, **params)
+ core.__init__(
+ self,
+ **params)
+ self._columns = columns
+
+ def get_params(self, deep=False):
+ """
+ Get the parameters for this operator.
+ """
+ return core.get_params(self)
diff --git a/src/python/nimbusml/preprocessing/tostring.py b/src/python/nimbusml/preprocessing/tostring.py
new file mode 100644
index 00000000..2dd2826c
--- /dev/null
+++ b/src/python/nimbusml/preprocessing/tostring.py
@@ -0,0 +1,49 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+ToString
+"""
+
+__all__ = ["ToString"]
+
+
+from sklearn.base import TransformerMixin
+
+from ..base_transform import BaseTransform
+from ..internal.core.preprocessing.tostring import ToString as core
+from ..internal.utils.utils import trace
+
+
+class ToString(core, BaseTransform, TransformerMixin):
+ """
+ **Description**
+ Turns the given column into a column of its string representation
+
+ :param columns: see `Columns `_.
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ columns=None,
+ **params):
+
+ if columns:
+ params['columns'] = columns
+ BaseTransform.__init__(self, **params)
+ core.__init__(
+ self,
+ **params)
+ self._columns = columns
+
+ def get_params(self, deep=False):
+ """
+ Get the parameters for this operator.
+ """
+ return core.get_params(self)
diff --git a/src/python/nimbusml/tests/preprocessing/missing_values/test_data_with_missing.py b/src/python/nimbusml/tests/preprocessing/missing_values/test_data_with_missing.py
index 0dc85f6e..99db58dd 100644
--- a/src/python/nimbusml/tests/preprocessing/missing_values/test_data_with_missing.py
+++ b/src/python/nimbusml/tests/preprocessing/missing_values/test_data_with_missing.py
@@ -9,6 +9,7 @@
from math import isnan
from nimbusml import Pipeline
from nimbusml.linear_model import FastLinearRegressor
+from nimbusml.preprocessing import ToKeyImputer
from nimbusml.preprocessing.missing_values import Filter, Handler, Indicator
from pandas import DataFrame
from sklearn.utils.testing import assert_equal, assert_true, \
@@ -160,6 +161,19 @@ def test_input_conversion_to_float_retains_other_column_types(self):
assert_equal(result.dtypes['f1'], np.object)
assert_equal(result.dtypes['f2.f2'], np.float32)
+ def test_category_imputation(self):
+ data={'f0': [4, 4, np.nan, 9],
+ 'f1': [4, 4, np.nan, np.nan]}
+ data = DataFrame(data)
+
+ # Check ToKeyImputer
+ xf = ToKeyImputer(columns={'f0.out': 'f0', 'f1.out': 'f1'})
+ result = xf.fit_transform(data)
+
+ assert_equal(result['f0.out'][1], 4)
+ assert_equal(result['f0.out'][2], 4)
+ assert_equal(result['f1.out'][1], 4)
+ assert_equal(result['f1.out'][2], 4)
if __name__ == '__main__':
unittest.main()
diff --git a/src/python/nimbusml/tests/preprocessing/normalization/test_robustscaler.py b/src/python/nimbusml/tests/preprocessing/normalization/test_robustscaler.py
new file mode 100644
index 00000000..da854164
--- /dev/null
+++ b/src/python/nimbusml/tests/preprocessing/normalization/test_robustscaler.py
@@ -0,0 +1,27 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+import unittest
+
+import pandas
+from nimbusml import Pipeline
+from nimbusml.preprocessing.normalization import RobustScaler
+
+
+class TestRobustScaler(unittest.TestCase):
+
+ def test_with_integer_inputs(self):
+ df = pandas.DataFrame(data=dict(c0=[1, 3, 5, 7, 9]))
+
+ xf = RobustScaler(columns='c0', center=True, scale=True)
+ pipeline = Pipeline([xf])
+ result = pipeline.fit_transform(df)
+
+ expected_result = pandas.Series([-1.0, -0.5, 0.0, 0.5, 1.0])
+
+ self.assertTrue(result.loc[:, 'c0'].equals(expected_result))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/python/nimbusml/tests/preprocessing/test_datetimesplitter.py b/src/python/nimbusml/tests/preprocessing/test_datetimesplitter.py
new file mode 100644
index 00000000..0b9c8141
--- /dev/null
+++ b/src/python/nimbusml/tests/preprocessing/test_datetimesplitter.py
@@ -0,0 +1,42 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+import pandas
+from nimbusml.preprocessing import DateTimeSplitter
+from sklearn.utils.testing import assert_equal
+
+
+class TestDateTimeSplitter(unittest.TestCase):
+
+ def test_check_estimator_DateTimeSplitter(self):
+ df = pandas.DataFrame(data=dict(dt=[i for i in range(8)]))
+ dt = DateTimeSplitter(prefix='dt_') << 'dt'
+ result = dt.fit_transform(df)
+ assert_equal(result['dt_Year'][0], 1970, "it should have been year of 1970")
+
+ def test_holidays(self):
+ df = pandas.DataFrame(data=dict(
+ tokens1=[1, 2, 3, 157161600],
+ tokens2=[10, 11, 12, 13]
+ ))
+
+ cols_to_drop = [
+ 'Hour12', 'DayOfWeek', 'DayOfQuarter',
+ 'DayOfYear', 'WeekOfMonth', 'QuarterOfYear',
+ 'HalfOfYear', 'WeekIso', 'YearIso', 'MonthLabel',
+ 'AmPmLabel', 'DayOfWeekLabel', 'IsPaidTimeOff'
+ ]
+
+ dts = DateTimeSplitter(prefix='dt',
+ country='Canada',
+ columns_to_drop=cols_to_drop) << 'tokens1'
+ y = dts.fit_transform(df)
+
+ self.assertEqual(y.loc[3, 'dtHolidayName'], 'Christmas Day')
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/python/nimbusml/tests/preprocessing/test_tokeyimputer.py b/src/python/nimbusml/tests/preprocessing/test_tokeyimputer.py
new file mode 100644
index 00000000..85c501c7
--- /dev/null
+++ b/src/python/nimbusml/tests/preprocessing/test_tokeyimputer.py
@@ -0,0 +1,36 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml.preprocessing import ToKeyImputer
+
+
+class TestToKeyImputer(unittest.TestCase):
+
+ def test_tokeyimputer(self):
+ text_df = pd.DataFrame(
+ data=dict(
+ text=[
+ "cat",
+ "dog",
+ "fish",
+ "orange",
+ "cat orange",
+ "dog",
+ "fish",
+ None,
+ "spider"]))
+
+ tokey = ToKeyImputer() << 'text'
+ y = tokey.fit_transform(text_df)
+
+ self.assertEqual(y.loc[7, 'text'], 'dog')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/python/nimbusml/tests/preprocessing/test_tostring.py b/src/python/nimbusml/tests/preprocessing/test_tostring.py
new file mode 100644
index 00000000..edb11d63
--- /dev/null
+++ b/src/python/nimbusml/tests/preprocessing/test_tostring.py
@@ -0,0 +1,37 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+import numpy as np
+from pandas import DataFrame
+from nimbusml.preprocessing import ToString
+from sklearn.utils.testing import assert_equal
+
+
+class TestToString(unittest.TestCase):
+
+ def test_tostring(self):
+ data={'f0': [4, 4, -1, 9],
+ 'f1': [5, 5, 3.1, -0.23],
+ 'f2': [6, 6.7, np.nan, np.nan]}
+ data = DataFrame(data).astype({'f0': np.int32,
+ 'f1': np.float32,
+ 'f2': np.float64})
+
+ xf = ToString(columns={'f0.out': 'f0',
+ 'f1.out': 'f1',
+ 'f2.out': 'f2'})
+ result = xf.fit_transform(data)
+
+ assert_equal(result['f0.out'][1], '4')
+ assert_equal(result['f0.out'][2], '-1')
+ assert_equal(result['f1.out'][1], '5.000000')
+ assert_equal(result['f1.out'][2], '3.100000')
+ assert_equal(result['f2.out'][1], '6.700000')
+ assert_equal(result['f2.out'][2], 'NaN')
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/python/nimbusml/tests/timeseries/test_timeseriesimputer.py b/src/python/nimbusml/tests/timeseries/test_timeseriesimputer.py
new file mode 100644
index 00000000..98c9e21b
--- /dev/null
+++ b/src/python/nimbusml/tests/timeseries/test_timeseriesimputer.py
@@ -0,0 +1,41 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml.timeseries import TimeSeriesImputer
+
+
+class TestTimeSeriesImputer(unittest.TestCase):
+
+ def test_timeseriesimputer_adds_new_row(self):
+ from nimbusml.timeseries import TimeSeriesImputer
+
+ df = pd.DataFrame(data=dict(
+ ts=[1, 2, 3, 5],
+ grain=[1970, 1970, 1970, 1970],
+ c3=[10, 13, 15, 20],
+ c4=[19, 12, 16, 19]
+ ))
+
+ tsi = TimeSeriesImputer(time_series_column='ts',
+ grain_columns=['grain'],
+ filter_columns=['c3', 'c4'],
+ impute_mode='ForwardFill',
+ filter_mode='Include')
+ result = tsi.fit_transform(df)
+
+ self.assertEqual(result.loc[0, 'ts'], 1)
+ self.assertEqual(result.loc[3, 'ts'], 4)
+ self.assertEqual(result.loc[3, 'grain'], 1970)
+ self.assertEqual(result.loc[3, 'c3'], 15)
+ self.assertEqual(result.loc[3, 'c4'], 16)
+ self.assertEqual(result.loc[3, 'IsRowImputed'], True)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/python/nimbusml/timeseries/__init__.py b/src/python/nimbusml/timeseries/__init__.py
index 64e66add..05dbfa3c 100644
--- a/src/python/nimbusml/timeseries/__init__.py
+++ b/src/python/nimbusml/timeseries/__init__.py
@@ -3,11 +3,13 @@
from .ssaspikedetector import SsaSpikeDetector
from .ssachangepointdetector import SsaChangePointDetector
from .ssaforecaster import SsaForecaster
+from .timeseriesimputer import TimeSeriesImputer
__all__ = [
'IidSpikeDetector',
'IidChangePointDetector',
'SsaSpikeDetector',
'SsaChangePointDetector',
- 'SsaForecaster'
+ 'SsaForecaster',
+ 'TimeSeriesImputer'
]
diff --git a/src/python/nimbusml/timeseries/timeseriesimputer.py b/src/python/nimbusml/timeseries/timeseriesimputer.py
new file mode 100644
index 00000000..150b9959
--- /dev/null
+++ b/src/python/nimbusml/timeseries/timeseriesimputer.py
@@ -0,0 +1,77 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+TimeSeriesImputer
+"""
+
+__all__ = ["TimeSeriesImputer"]
+
+
+from sklearn.base import TransformerMixin
+
+from ..base_transform import BaseTransform
+from ..internal.core.timeseries.timeseriesimputer import \
+ TimeSeriesImputer as core
+from ..internal.utils.utils import trace
+
+
+class TimeSeriesImputer(core, BaseTransform, TransformerMixin):
+ """
+ **Description**
+ Fills in missing row and values
+
+ :param columns: see `Columns `_.
+
+ :param time_series_column: Column representing the time.
+
+ :param grain_columns: List of grain columns.
+
+ :param filter_columns: Columns to filter.
+
+ :param filter_mode: Filter mode. Either include or exclude.
+
+ :param impute_mode: Mode for imputing, defaults to ForwardFill if not
+ provided.
+
+ :param supress_type_errors: Supress the errors that would occur if a column
+ and impute mode are imcompatible. If true, will skip the column. If
+ false, will stop and throw an error.
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ time_series_column,
+ grain_columns,
+ filter_columns=None,
+ filter_mode='Exclude',
+ impute_mode='ForwardFill',
+ supress_type_errors=False,
+ columns=None,
+ **params):
+
+ if columns:
+ params['columns'] = columns
+ BaseTransform.__init__(self, **params)
+ core.__init__(
+ self,
+ time_series_column=time_series_column,
+ grain_columns=grain_columns,
+ filter_columns=filter_columns,
+ filter_mode=filter_mode,
+ impute_mode=impute_mode,
+ supress_type_errors=supress_type_errors,
+ **params)
+ self._columns = columns
+
+ def get_params(self, deep=False):
+ """
+ Get the parameters for this operator.
+ """
+ return core.get_params(self)
diff --git a/src/python/setup.py b/src/python/setup.py
index fc350275..5fc3fcba 100644
--- a/src/python/setup.py
+++ b/src/python/setup.py
@@ -45,7 +45,7 @@
# Versions should comply with PEP440. For a discussion on
# single-sourcing the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
- version='1.5.0',
+ version='1.5.1',
description='NimbusML',
long_description=long_description,
diff --git a/src/python/tests/test_estimator_checks.py b/src/python/tests/test_estimator_checks.py
index 7dfd5eb8..1a835fbc 100644
--- a/src/python/tests/test_estimator_checks.py
+++ b/src/python/tests/test_estimator_checks.py
@@ -16,9 +16,10 @@
from nimbusml.ensemble import LightGbmRegressor
from nimbusml.feature_extraction.text import NGramFeaturizer
from nimbusml.internal.entrypoints._ngramextractor_ngram import n_gram
+from nimbusml.preprocessing import TensorFlowScorer, DateTimeSplitter
from nimbusml.linear_model import SgdBinaryClassifier
-from nimbusml.preprocessing import TensorFlowScorer
from nimbusml.preprocessing.filter import SkipFilter, TakeFilter
+from nimbusml.preprocessing.normalization import RobustScaler
from nimbusml.timeseries import (IidSpikeDetector, IidChangePointDetector,
SsaSpikeDetector, SsaChangePointDetector,
SsaForecaster)
@@ -53,6 +54,12 @@
# I8 should not have NA values
'CountSelector':
'check_estimators_dtypes',
+ # DateTimeSplitter does not work with floating point types.
+ 'DateTimeSplitter':
+ 'check_transformer_general, check_pipeline_consistency'
+ 'check_estimators_pickle, check_estimators_dtypes'
+ 'check_dict_unchanged, check_dtype_object, check_fit_score_takes_y'
+ 'check_transformer_data_not_an_array',
# by design returns smaller number of rows
'SkipFilter': 'check_transformer_general, '
'check_transformer_data_not_an_array',
@@ -154,6 +161,15 @@
'check_estimators_overwrite_params, \
check_estimator_sparse_data, check_estimators_pickle, '
'check_estimators_nan_inf',
+ # RobustScaler does not support vectorized types
+ 'RobustScaler': 'check_estimator_sparse_data',
+ 'ToKeyImputer': 'check_estimator_sparse_data',
+ # Most of these skipped tests are failing because the checks
+ # require numerical types. ToString returns object types.
+ # TypeError: ufunc 'isfinite' not supported for the input types
+ 'ToString': 'check_estimator_sparse_data, check_pipeline_consistency'
+ 'check_transformer_data_not_an_array, check_estimators_pickle'
+ 'check_transformer_general',
}
OMITTED_CHECKS_TUPLE = (
@@ -191,6 +207,7 @@
'check_classifiers_train']
INSTANCES = {
+ 'DateTimeSplitter': DateTimeSplitter(prefix='dt', columns=['F0']),
'EnsembleClassifier': EnsembleClassifier(num_models=3),
'EnsembleRegressor': EnsembleRegressor(num_models=3),
'LightGbmBinaryClassifier': LightGbmBinaryClassifier(
@@ -202,6 +219,7 @@
'LightGbmRanker': LightGbmRanker(
minimum_example_count_per_group=1, minimum_example_count_per_leaf=1),
'NGramFeaturizer': NGramFeaturizer(word_feature_extractor=n_gram()),
+ 'RobustScaler': RobustScaler(scale=False),
'SgdBinaryClassifier': SgdBinaryClassifier(number_of_threads=1, shuffle=False),
'SkipFilter': SkipFilter(count=5),
'TakeFilter': TakeFilter(count=100000),
@@ -266,7 +284,8 @@ def load_json(file_path):
'TreeFeaturizer',
# skip SymSgdBinaryClassifier for now, because of crashes.
'SymSgdBinaryClassifier',
- 'DatasetTransformer'
+ 'DatasetTransformer',
+ 'TimeSeriesImputer'
])
epoints = []
diff --git a/src/python/tools/entrypoint_compiler.py b/src/python/tools/entrypoint_compiler.py
index ed829533..57b1b8de 100644
--- a/src/python/tools/entrypoint_compiler.py
+++ b/src/python/tools/entrypoint_compiler.py
@@ -1560,7 +1560,6 @@ def __init__(self, argument, inout): # dict
self.default = argument.get('Default', Missing())
self.required = argument.get('Required', Missing())
self.aliases = argument.get('Aliases', Missing())
- self.pass_as = argument.get('PassAs', None)
self.name_converted = convert_name(self.name)
self.new_name_converted = convert_name(
@@ -1615,7 +1614,7 @@ def get_body(self):
"is_of_type=numbers.Real"
body = template.format(
inout=self.inout,
- name=self.pass_as or self.name,
+ name=self.name,
name_converted=self.name_converted,
none_acceptable=not self.required)
if not isinstance(self.range, Missing):
@@ -1646,7 +1645,7 @@ def get_body(self):
"none_acceptable={none_acceptable}, is_of_type=bool"
body = template.format(
inout=self.inout,
- name=self.pass_as or self.name,
+ name=self.name,
name_converted=self.name_converted,
none_acceptable=not self.required)
return body + ")"
@@ -1693,7 +1692,7 @@ def get_body(self):
template += ", is_column=True"
body = template.format(
inout=self.inout,
- name=self.pass_as or self.name,
+ name=self.name,
name_converted=self.name_converted,
none_acceptable=not self.required)
return body + ")"
@@ -1717,7 +1716,7 @@ def get_body(self):
"none_acceptable={none_acceptable}, is_of_type=str"
body = template.format(
inout=self.inout,
- name=self.pass_as or self.name,
+ name=self.name,
name_converted=self.name_converted,
none_acceptable=not self.required)
value_check = ", values={0}".format(str(self.type['Values']))
@@ -1748,7 +1747,7 @@ def get_body(self):
"none_acceptable={none_acceptable}, is_of_type=list"
body = template.format(
inout=self.inout,
- name=self.pass_as or self.name,
+ name=self.name,
name_converted=self.name_converted,
none_acceptable=not self.required)
return body + ")"
@@ -1790,7 +1789,7 @@ def get_body(self):
template += ', is_column=True'
body = template.format(
inout=self.inout,
- name=self.pass_as or self.name,
+ name=self.name,
name_converted=self.name_converted,
none_acceptable=not self.required)
return body + ")"
@@ -1818,7 +1817,7 @@ def get_body(self):
template += ', is_column=True'
body = template.format(
inout=self.inout,
- name=self.pass_as or self.name,
+ name=self.name,
name_converted=self.name_converted,
none_acceptable=not self.required)
return body + ")"
@@ -1846,7 +1845,7 @@ def get_body(self):
"none_acceptable={none_acceptable}, is_of_type=dict"
body = template.format(
inout=self.inout,
- name=self.pass_as or self.name,
+ name=self.name,
name_converted=self.name_converted,
none_acceptable=not self.required)
return body + ")"
@@ -1882,7 +1881,7 @@ def get_body(self):
template += ", is_column=True"
body = template.format(
inout=self.inout,
- name=self.pass_as or self.name,
+ name=self.name,
name_converted=self.name_converted,
none_acceptable=not self.required)
field_check = ", field_names={0}".format(
@@ -2041,6 +2040,7 @@ def generate_code(pkg_path, generate_entrypoints, generate_api):
script_args = arg_parser.parse_args()
pkg_path = os.path.join(my_dir, r'..\nimbusml')
+
if script_args.check_manual_changes:
verbose = False
if script_args.folder == 'temp':
diff --git a/src/python/tools/manifest.json b/src/python/tools/manifest.json
index c8e6d6e5..45eb1a38 100644
--- a/src/python/tools/manifest.json
+++ b/src/python/tools/manifest.json
@@ -17301,6 +17301,82 @@
"ITransformOutput"
]
},
+ {
+ "Name": "Transforms.CategoryImputer",
+ "Desc": "Fills in missing values in a column based on the most frequent value",
+ "FriendlyName": "CategoryImputer",
+ "ShortName": "CategoryImputer",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition (optional form: name:src)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
{
"Name": "Transforms.CharacterTokenizer",
"Desc": "Character-oriented tokenizer where text is considered a sequence of characters.",
@@ -18077,6 +18153,157 @@
}
]
},
+ {
+ "Name": "Transforms.DateTimeSplitter",
+ "Desc": "Splits a date time value into each individual component",
+ "FriendlyName": "DateTime Transform",
+ "ShortName": "DateTimeTransform",
+ "Inputs": [
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Input column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Prefix",
+ "Type": "String",
+ "Desc": "Output column prefix",
+ "Aliases": [
+ "pre"
+ ],
+ "Required": true,
+ "SortOrder": 2.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "ColumnsToDrop",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Enum",
+ "Values": [
+ "Year",
+ "Month",
+ "Day",
+ "Hour",
+ "Minute",
+ "Second",
+ "AmPm",
+ "Hour12",
+ "DayOfWeek",
+ "DayOfQuarter",
+ "DayOfYear",
+ "WeekOfMonth",
+ "QuarterOfYear",
+ "HalfOfYear",
+ "WeekIso",
+ "YearIso",
+ "MonthLabel",
+ "AmPmLabel",
+ "DayOfWeekLabel",
+ "HolidayName",
+ "IsPaidTimeOff"
+ ]
+ }
+ },
+ "Desc": "Columns to drop after the DateTime Expansion",
+ "Aliases": [
+ "drop"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Country",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "None",
+ "Argentina",
+ "Australia",
+ "Austria",
+ "Belarus",
+ "Belgium",
+ "Brazil",
+ "Canada",
+ "Colombia",
+ "Croatia",
+ "Czech",
+ "Denmark",
+ "England",
+ "Finland",
+ "France",
+ "Germany",
+ "Hungary",
+ "India",
+ "Ireland",
+ "IsleofMan",
+ "Italy",
+ "Japan",
+ "Mexico",
+ "Netherlands",
+ "NewZealand",
+ "NorthernIreland",
+ "Norway",
+ "Poland",
+ "Portugal",
+ "Scotland",
+ "Slovenia",
+ "SouthAfrica",
+ "Spain",
+ "Sweden",
+ "Switzerland",
+ "Ukraine",
+ "UnitedKingdom",
+ "UnitedStates",
+ "Wales"
+ ]
+ },
+ "Desc": "Country to get holidays for. Defaults to none if not passed",
+ "Aliases": [
+ "ctry"
+ ],
+ "Required": false,
+ "SortOrder": 4.0,
+ "IsNullable": false,
+ "Default": "None"
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
{
"Name": "Transforms.Dictionarizer",
"Desc": "Converts input values (words, numbers, etc.) to index in a dictionary.",
@@ -21931,6 +22158,130 @@
"ITransformOutput"
]
},
+ {
+ "Name": "Transforms.RobustScaler",
+ "Desc": "Removes the median and scales the data according to the quantile range.",
+ "FriendlyName": "RobustScalerTransformer",
+ "ShortName": "RobustScalerTransformer",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition (optional form: name:src)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Center",
+ "Type": "Bool",
+ "Desc": "If True, center the data before scaling.",
+ "Aliases": [
+ "ctr"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "Scale",
+ "Type": "Bool",
+ "Desc": "If True, scale the data to interquartile range.",
+ "Aliases": [
+ "sc"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "QuantileMin",
+ "Type": "Float",
+ "Desc": "Min for the quantile range used to calculate scale.",
+ "Aliases": [
+ "min"
+ ],
+ "Required": false,
+ "SortOrder": 4.0,
+ "IsNullable": false,
+ "Default": 25.0
+ },
+ {
+ "Name": "QuantileMax",
+ "Type": "Float",
+ "Desc": "Max for the quantile range used to calculate scale.",
+ "Aliases": [
+ "max"
+ ],
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": 75.0
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
{
"Name": "Transforms.RowRangeFilter",
"Desc": "Filters a dataview on a column of type Single, Double or Key (contiguous). Keeps the values that are in the specified min/max range. NaNs are always filtered out. If the input is a Key type, the min/max are considered percentages of the number of values.",
@@ -22941,6 +23292,207 @@
"ITransformOutput"
]
},
+ {
+ "Name": "Transforms.TimeSeriesImputer",
+ "Desc": "Fills in missing row and values",
+ "FriendlyName": "TimeSeriesImputer",
+ "ShortName": "TimeSeriesImputer",
+ "Inputs": [
+ {
+ "Name": "TimeSeriesColumn",
+ "Type": "String",
+ "Desc": "Column representing the time",
+ "Aliases": [
+ "time"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "GrainColumns",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "List of grain columns",
+ "Aliases": [
+ "grains"
+ ],
+ "Required": true,
+ "SortOrder": 2.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "FilterColumns",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": "String"
+ },
+ "Desc": "Columns to filter",
+ "Aliases": [
+ "filters"
+ ],
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "FilterMode",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "NoFilter",
+ "Include",
+ "Exclude"
+ ]
+ },
+ "Desc": "Filter mode. Either include or exclude",
+ "Aliases": [
+ "fmode"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": "Exclude"
+ },
+ {
+ "Name": "ImputeMode",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "ForwardFill",
+ "BackFill",
+ "Median",
+ "Interpolate"
+ ]
+ },
+ "Desc": "Mode for imputing, defaults to ForwardFill if not provided",
+ "Aliases": [
+ "mode"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": "ForwardFill"
+ },
+ {
+ "Name": "SupressTypeErrors",
+ "Type": "Bool",
+ "Desc": "Supress the errors that would occur if a column and impute mode are imcompatible. If true, will skip the column. If false, will stop and throw an error.",
+ "Aliases": [
+ "error"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
+ {
+ "Name": "Transforms.ToString",
+ "Desc": "Turns the given column into a column of its string representation",
+ "FriendlyName": "ToString Transform",
+ "ShortName": "ToStringTransform",
+ "Inputs": [
+ {
+ "Name": "Column",
+ "Type": {
+ "Kind": "Array",
+ "ItemType": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "Name of the new column",
+ "Aliases": [
+ "name"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "Name of the source column",
+ "Aliases": [
+ "src"
+ ],
+ "Required": false,
+ "SortOrder": 150.0,
+ "IsNullable": false,
+ "Default": null
+ }
+ ]
+ }
+ },
+ "Desc": "New column definition (optional form: name:src)",
+ "Aliases": [
+ "col"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
{
"Name": "Transforms.TrainTestDatasetSplitter",
"Desc": "Split the dataset into train and test sets",
diff --git a/src/python/tools/manifest_diff.json b/src/python/tools/manifest_diff.json
index cddfaf25..d5b0b3a3 100644
--- a/src/python/tools/manifest_diff.json
+++ b/src/python/tools/manifest_diff.json
@@ -294,6 +294,24 @@
"Module": "preprocessing",
"Type": "Transform"
},
+ {
+ "Name": "Transforms.CategoryImputer",
+ "NewName": "ToKeyImputer",
+ "Module": "preprocessing",
+ "Type": "Transform"
+ },
+ {
+ "Name": "Transforms.ToString",
+ "NewName": "ToString",
+ "Module": "preprocessing",
+ "Type": "Transform"
+ },
+ {
+ "Name": "Transforms.DateTimeSplitter",
+ "NewName": "DateTimeSplitter",
+ "Module": "preprocessing",
+ "Type": "Transform"
+ },
{
"Name": "Transforms.TensorFlowScorer",
"NewName": "TensorFlowScorer",
@@ -493,6 +511,12 @@
"Module": "preprocessing.normalization",
"Type": "Transform"
},
+ {
+ "Name": "Transforms.RobustScaler",
+ "NewName": "RobustScaler",
+ "Module": "preprocessing.normalization",
+ "Type": "Transform"
+ },
{
"Name": "Transforms.MissingValuesRowDropper",
"NewName": "Filter",
@@ -610,6 +634,12 @@
"Module": "timeseries",
"Type": "Transform"
},
+ {
+ "Name": "Transforms.TimeSeriesImputer",
+ "NewName": "TimeSeriesImputer",
+ "Module": "timeseries",
+ "Type": "Transform"
+ },
{
"Name": "Trainers.PoissonRegressor",
"NewName": "PoissonRegressionRegressor",
diff --git a/version.txt b/version.txt
index 3e1ad720..8e03717d 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-1.5.0
\ No newline at end of file
+1.5.1
\ No newline at end of file