diff --git a/build.cmd b/build.cmd index 8ed5005d..4edfd58e 100644 --- a/build.cmd +++ b/build.cmd @@ -26,6 +26,9 @@ set RunExtendedTests=False set BuildDotNetBridgeOnly=False set SkipDotNetBridge=False set AzureBuild=False +set BuildManifestGenerator=False +set UpdateManifest=False +set VerifyManifest=False :Arg_Loop if [%1] == [] goto :Build @@ -53,6 +56,10 @@ if /i [%1] == [--skipDotNetBridge] ( set SkipDotNetBridge=True shift && goto :Arg_Loop ) +if /i [%1] == [--updateManifest] ( + set UpdateManifest=True + shift && goto :Arg_Loop +) if /i [%1] == [--azureBuild] ( set AzureBuild=True shift && goto :Arg_Loop @@ -68,6 +75,7 @@ echo " --installPythonPackages Install python packages after build" echo " --includeExtendedTests Include the extended tests if the tests are run" echo " --buildDotNetBridgeOnly Build only DotNetBridge" echo " --skipDotNetBridge Build everything except DotNetBridge" +echo " --updateManifest Update manifest.json" echo " --azureBuild Building in azure devops (adds dotnet CLI to the path)" goto :Exit_Success @@ -189,6 +197,37 @@ if "%BuildDotNetBridgeOnly%" == "True" ( call "%_dotnet%" build -c %Configuration% --force "%__currentScriptDir%src\Platforms\build.csproj" call "%_dotnet%" publish "%__currentScriptDir%src\Platforms\build.csproj" --force --self-contained -r win-x64 -c %Configuration% + +if "%Configuration:~-5%" == "Py3.7" set VerifyManifest=True +if "%VerifyManifest%" == "True" set BuildManifestGenerator=True +if "%UpdateManifest%" == "True" set BuildManifestGenerator=True + +if "%BuildManifestGenerator%" == "True" ( + echo "" + echo "#################################" + echo "Building Manifest Generator... " + echo "#################################" + call "%_dotnet%" build -c %Configuration% -o "%BuildOutputDir%%Configuration%" --force "%__currentScriptDir%src\ManifestGenerator\ManifestGenerator.csproj" +) + +if "%UpdateManifest%" == "True" ( + echo Updating manifest.json ... + call "%_dotnet%" "%BuildOutputDir%%Configuration%\ManifestGenerator.dll" create %__currentScriptDir%\src\python\tools\manifest.json + echo manifest.json updated. + echo Run entrypoint_compiler.py --generate_api --generate_entrypoints to generate entry points and api files. + goto :Exit_Success +) + +if "%VerifyManifest%" == "True" ( + echo Verifying manifest.json ... + call "%_dotnet%" "%BuildOutputDir%%Configuration%\ManifestGenerator.dll" verify %__currentScriptDir%\src\python\tools\manifest.json + if errorlevel 1 ( + echo manifest.json is invalid. + echo Run build --updateManifest to update manifest.json. + goto :Exit_Error + ) +) + echo "" echo "#################################" echo "Downloading Dependencies " @@ -392,10 +431,18 @@ if "%RunExtendedTests%" == "True" ( ) :Exit_Success +:: Shutdown all dotnet persistent servers so that the +:: dotnet executable is not left open in the background. +:: As of dotnet 2.1.3 three servers are left running in +:: the background. This will shutdown them all down. +:: See here for more info: https://github.com/dotnet/cli/issues/9458 +call "%_dotnet%" build-server shutdown endlocal exit /b %ERRORLEVEL% :Exit_Error +:: See comment above +call "%_dotnet%" build-server shutdown endlocal echo Failed with error %ERRORLEVEL% exit /b %ERRORLEVEL% \ No newline at end of file diff --git a/nimbusml.sln b/nimbusml.sln index 546014a9..c87f94b8 100644 --- a/nimbusml.sln +++ b/nimbusml.sln @@ -20,6 +20,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution version.txt = version.txt EndProjectSection EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ManifestGenerator", "src\ManifestGenerator\ManifestGenerator.csproj", "{D3AED287-722F-4243-966E-77AD0652B38E}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution DbgLinPy2.7|x64 = DbgLinPy2.7|x64 @@ -65,36 +67,36 @@ Global {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgLinPy3.5|x64.ActiveCfg = DbgLinPy3.5|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgLinPy3.5|x64.Build.0 = DbgLinPy3.5|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgLinPy3.6|x64.ActiveCfg = DbgLinPy3.6|x64 - {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgLinPy3.7|x64.ActiveCfg = DbgLinPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgLinPy3.6|x64.Build.0 = DbgLinPy3.6|x64 + {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgLinPy3.7|x64.ActiveCfg = DbgLinPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgLinPy3.7|x64.Build.0 = DbgLinPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgWinPy2.7|x64.ActiveCfg = DbgWinPy2.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgWinPy2.7|x64.Build.0 = DbgWinPy2.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgWinPy3.5|x64.ActiveCfg = DbgWinPy3.5|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgWinPy3.5|x64.Build.0 = DbgWinPy3.5|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgWinPy3.6|x64.ActiveCfg = DbgWinPy3.6|x64 - {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgWinPy3.7|x64.ActiveCfg = DbgWinPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgWinPy3.6|x64.Build.0 = DbgWinPy3.6|x64 + {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgWinPy3.7|x64.ActiveCfg = DbgWinPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.DbgWinPy3.7|x64.Build.0 = DbgWinPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsLinPy2.7|x64.ActiveCfg = RlsLinPy2.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsLinPy2.7|x64.Build.0 = RlsLinPy2.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsLinPy3.5|x64.ActiveCfg = RlsLinPy3.5|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsLinPy3.5|x64.Build.0 = RlsLinPy3.5|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsLinPy3.6|x64.ActiveCfg = RlsLinPy3.6|x64 - {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsLinPy3.7|x64.ActiveCfg = RlsLinPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsLinPy3.6|x64.Build.0 = RlsLinPy3.6|x64 + {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsLinPy3.7|x64.ActiveCfg = RlsLinPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsLinPy3.7|x64.Build.0 = RlsLinPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsMacPy3.6|x64.ActiveCfg = RlsMacPy3.6|x64 - {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsMacPy3.7|x64.ActiveCfg = RlsMacPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsMacPy3.6|x64.Build.0 = RlsMacPy3.6|x64 + {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsMacPy3.7|x64.ActiveCfg = RlsMacPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsMacPy3.7|x64.Build.0 = RlsMacPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsWinPy2.7|x64.ActiveCfg = RlsWinPy2.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsWinPy2.7|x64.Build.0 = RlsWinPy2.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsWinPy3.5|x64.ActiveCfg = RlsWinPy3.5|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsWinPy3.5|x64.Build.0 = RlsWinPy3.5|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsWinPy3.6|x64.ActiveCfg = RlsWinPy3.6|x64 - {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsWinPy3.7|x64.ActiveCfg = RlsWinPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsWinPy3.6|x64.Build.0 = RlsWinPy3.6|x64 + {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsWinPy3.7|x64.ActiveCfg = RlsWinPy3.7|x64 {EC58F2CF-A1D5-4E28-97F9-69B1E46F6F63}.RlsWinPy3.7|x64.Build.0 = RlsWinPy3.7|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.DbgLinPy2.7|x64.ActiveCfg = DbgWinPy2.7|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.DbgLinPy3.5|x64.ActiveCfg = DbgWinPy3.5|x64 @@ -105,8 +107,8 @@ Global {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.DbgWinPy3.5|x64.ActiveCfg = DbgWinPy3.5|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.DbgWinPy3.5|x64.Build.0 = DbgWinPy3.5|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.DbgWinPy3.6|x64.ActiveCfg = DbgWinPy3.6|x64 - {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.DbgWinPy3.7|x64.ActiveCfg = DbgWinPy3.7|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.DbgWinPy3.6|x64.Build.0 = DbgWinPy3.6|x64 + {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.DbgWinPy3.7|x64.ActiveCfg = DbgWinPy3.7|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.DbgWinPy3.7|x64.Build.0 = DbgWinPy3.7|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.RlsLinPy2.7|x64.ActiveCfg = RlsWinPy2.7|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.RlsLinPy3.5|x64.ActiveCfg = RlsWinPy3.5|x64 @@ -119,9 +121,45 @@ Global {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.RlsWinPy3.5|x64.ActiveCfg = RlsWinPy3.5|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.RlsWinPy3.5|x64.Build.0 = RlsWinPy3.5|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.RlsWinPy3.6|x64.ActiveCfg = RlsWinPy3.6|x64 - {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.RlsWinPy3.7|x64.ActiveCfg = RlsWinPy3.7|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.RlsWinPy3.6|x64.Build.0 = RlsWinPy3.6|x64 + {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.RlsWinPy3.7|x64.ActiveCfg = RlsWinPy3.7|x64 {3DA0AF32-A05B-4ECF-8010-83B14612FBB3}.RlsWinPy3.7|x64.Build.0 = RlsWinPy3.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgLinPy2.7|x64.ActiveCfg = DbgLinPy2.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgLinPy2.7|x64.Build.0 = DbgLinPy2.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgLinPy3.5|x64.ActiveCfg = DbgLinPy3.5|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgLinPy3.5|x64.Build.0 = DbgLinPy3.5|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgLinPy3.6|x64.ActiveCfg = DbgLinPy3.6|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgLinPy3.6|x64.Build.0 = DbgLinPy3.6|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgLinPy3.7|x64.ActiveCfg = DbgLinPy3.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgLinPy3.7|x64.Build.0 = DbgLinPy3.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgWinPy2.7|x64.ActiveCfg = DbgWinPy2.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgWinPy2.7|x64.Build.0 = DbgWinPy2.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgWinPy3.5|x64.ActiveCfg = DbgWinPy3.5|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgWinPy3.5|x64.Build.0 = DbgWinPy3.5|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgWinPy3.6|x64.ActiveCfg = DbgWinPy3.6|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgWinPy3.6|x64.Build.0 = DbgWinPy3.6|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgWinPy3.7|x64.ActiveCfg = DbgWinPy3.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.DbgWinPy3.7|x64.Build.0 = DbgWinPy3.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsLinPy2.7|x64.ActiveCfg = RlsLinPy2.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsLinPy2.7|x64.Build.0 = RlsLinPy2.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsLinPy3.5|x64.ActiveCfg = RlsLinPy3.5|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsLinPy3.5|x64.Build.0 = RlsLinPy3.5|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsLinPy3.6|x64.ActiveCfg = RlsLinPy3.6|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsLinPy3.6|x64.Build.0 = RlsLinPy3.6|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsLinPy3.7|x64.ActiveCfg = RlsLinPy3.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsLinPy3.7|x64.Build.0 = RlsLinPy3.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsMacPy3.6|x64.ActiveCfg = RlsMacPy3.6|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsMacPy3.6|x64.Build.0 = RlsMacPy3.6|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsMacPy3.7|x64.ActiveCfg = RlsMacPy3.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsMacPy3.7|x64.Build.0 = RlsMacPy3.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsWinPy2.7|x64.ActiveCfg = RlsWinPy2.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsWinPy2.7|x64.Build.0 = RlsWinPy2.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsWinPy3.5|x64.ActiveCfg = RlsWinPy3.5|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsWinPy3.5|x64.Build.0 = RlsWinPy3.5|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsWinPy3.6|x64.ActiveCfg = RlsWinPy3.6|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsWinPy3.6|x64.Build.0 = RlsWinPy3.6|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsWinPy3.7|x64.ActiveCfg = RlsWinPy3.7|x64 + {D3AED287-722F-4243-966E-77AD0652B38E}.RlsWinPy3.7|x64.Build.0 = RlsWinPy3.7|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/DotNetBridge/ManifestUtils.cs b/src/DotNetBridge/ManifestUtils.cs new file mode 100644 index 00000000..7d1c89a5 --- /dev/null +++ b/src/DotNetBridge/ManifestUtils.cs @@ -0,0 +1,110 @@ +//------------------------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +//------------------------------------------------------------------------------ + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Text.RegularExpressions; +using Microsoft.ML.Data; +using Microsoft.ML.EntryPoints; +using Microsoft.ML.Model.OnnxConverter; +using Microsoft.ML.Runtime; +using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.Ensemble; +using Microsoft.ML.Trainers.FastTree; +using Microsoft.ML.Trainers.LightGbm; +using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.TimeSeries; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; + + +namespace Microsoft.ML.DotNetBridge +{ + public static class ManifestUtils + { + private static readonly Type[] _types = new Type[] + { + typeof(TextLoader), + typeof(LinearModelParameters), + typeof(OneHotEncodingTransformer), + typeof(FastTreeBinaryModelParameters), + typeof(EnsembleModelParameters), + typeof(KMeansModelParameters), + typeof(PcaModelParameters), + typeof(CVSplit), + typeof(LightGbmBinaryModelParameters), + typeof(TensorFlowTransformer), + typeof(ImageLoadingTransformer), + typeof(SymbolicSgdLogisticRegressionBinaryTrainer), + typeof(OnnxContext), + typeof(SsaForecastingTransformer), + typeof(VariableColumnTransform) + }; + + private static (IEnumerable epListContents, JObject manifest) BuildManifests() + { + ConsoleEnvironment env = new ConsoleEnvironment(); + + foreach (Type type in _types) + { + env.ComponentCatalog.RegisterAssembly(type.Assembly); + } + + var catalog = env.ComponentCatalog; + + var regex = new Regex(@"\r\n?|\n", RegexOptions.Compiled); + var epListContents = catalog.AllEntryPoints() + .Select(x => string.Join("\t", + x.Name, + regex.Replace(x.Description, ""), + x.Method.DeclaringType, + x.Method.Name, + x.InputType, + x.OutputType) + .Replace(Environment.NewLine, "", StringComparison.Ordinal)) + .OrderBy(x => x); + + var manifest = JsonManifestUtils.BuildAllManifests(env, catalog); + + //clean up the description from the new line characters + if (manifest[FieldNames.TopEntryPoints] != null && manifest[FieldNames.TopEntryPoints] is JArray) + { + foreach (JToken entry in manifest[FieldNames.TopEntryPoints].Children()) + if (entry[FieldNames.Desc] != null) + entry[FieldNames.Desc] = regex.Replace(entry[FieldNames.Desc].ToString(), ""); + } + + return (epListContents, manifest); + } + + public static void ShowAssemblyInfo() + { + foreach (Type type in _types) + { + Assembly assembly = type.Assembly; + Console.WriteLine(assembly.Location); + } + } + + public static void GenerateManifest(string filePath) + { + var (epListContents, jObj) = BuildManifests(); + + if (!string.IsNullOrWhiteSpace(filePath)) + File.Delete(filePath); + + using (var file = File.OpenWrite(filePath)) + using (var writer = new StreamWriter(file)) + using (var jw = new JsonTextWriter(writer)) + { + jw.Formatting = Formatting.Indented; + jObj.WriteTo(jw); + } + } + } +} diff --git a/src/ManifestGenerator/ManifestGenerator.cs b/src/ManifestGenerator/ManifestGenerator.cs index 985318f6..b872775d 100644 --- a/src/ManifestGenerator/ManifestGenerator.cs +++ b/src/ManifestGenerator/ManifestGenerator.cs @@ -3,56 +3,79 @@ // Licensed under the MIT License. //------------------------------------------------------------------------------ +using System; using System.IO; -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.EntryPoints.JsonUtils; -using Microsoft.ML.Runtime.ImageAnalytics; -using Microsoft.ML.Runtime.Learners; -using Microsoft.ML.Runtime.LightGBM; -using Microsoft.ML.Runtime.Model.Onnx; -using Microsoft.ML.Runtime.PipelineInference; -using Microsoft.ML.Trainers.FastTree; -using Microsoft.ML.Trainers.KMeans; -using Microsoft.ML.Trainers.PCA; -using Microsoft.ML.Trainers.SymSgd; -using Microsoft.ML.Transforms; -using Microsoft.ML.Transforms.Categorical; -using Newtonsoft.Json; - -namespace Microsoft.MachineLearning.ManifestGenerator +using System.Linq; +using Microsoft.ML.DotNetBridge; + + +namespace Microsoft.ML.ManifestGenerator { public static class ManifestGenerator { - public static void Main() + private const int ERROR_SUCCESS = 0; + private const int ERROR_BAD_ARGUMENTS = 1; + private const int ERROR_MANIFEST_INVALID = 2; + + public static void ShowUsage() { - using (var env = new ConsoleEnvironment()) + string usage = + "Usage:\n" + + " create MANIFEST_PATH Creates a new manifest given the\n" + + " current assemblies and stores it\n" + + " in the file MANIFEST_PATH.\n" + + " verify MANIFEST_PATH Checks if the manifest specified by\n" + + " MANIFEST_PATH is valid given the\n" + + " the current assemblies.\n" + + "\n"; + + Console.WriteLine(usage); + } + + public static int Main(string[] args) + { + int exitCode = ERROR_BAD_ARGUMENTS; + + if (args.Length == 2) { - env.ComponentCatalog.RegisterAssembly(typeof(TextLoader).Assembly); // ML.Data - env.ComponentCatalog.RegisterAssembly(typeof(LinearPredictor).Assembly); // ML.StandardLearners - env.ComponentCatalog.RegisterAssembly(typeof(CategoricalTransform).Assembly); // ML.Transforms - env.ComponentCatalog.RegisterAssembly(typeof(FastTreeBinaryPredictor).Assembly); // ML.FastTree - env.ComponentCatalog.RegisterAssembly(typeof(KMeansPredictor).Assembly); // ML.KMeansClustering - env.ComponentCatalog.RegisterAssembly(typeof(PcaPredictor).Assembly); // ML.PCA - env.ComponentCatalog.RegisterAssembly(typeof(Experiment).Assembly); // ML.Legacy - env.ComponentCatalog.RegisterAssembly(typeof(LightGbmBinaryPredictor).Assembly); - env.ComponentCatalog.RegisterAssembly(typeof(TensorFlowTransform).Assembly); - env.ComponentCatalog.RegisterAssembly(typeof(ImageLoaderTransform).Assembly); - env.ComponentCatalog.RegisterAssembly(typeof(SymSgdClassificationTrainer).Assembly); - env.ComponentCatalog.RegisterAssembly(typeof(AutoInference).Assembly); - env.ComponentCatalog.RegisterAssembly(typeof(SaveOnnxCommand).Assembly); - var catalog = env.ComponentCatalog; - var jObj = JsonManifestUtils.BuildAllManifests(env, catalog); - - var jPath = "manifest.json"; - using (var file = File.OpenWrite(jPath)) - using (var writer = new StreamWriter(file)) - using (var jw = new JsonTextWriter(writer)) + if (args[0].ToLower() == "create") { - jw.Formatting = Formatting.Indented; - jObj.WriteTo(jw); + ManifestUtils.ShowAssemblyInfo(); + ManifestUtils.GenerateManifest(args[1]); + + exitCode = ERROR_SUCCESS; } + else if (args[0].ToLower() == "verify") + { + string tmpFilePath = Path.GetTempFileName(); + ManifestUtils.GenerateManifest(tmpFilePath); + + exitCode = FilesMatch(args[1], tmpFilePath) ? + exitCode = ERROR_SUCCESS : + exitCode = ERROR_MANIFEST_INVALID; + + File.Delete(tmpFilePath); + } + } + + if (exitCode == ERROR_BAD_ARGUMENTS) + { + Console.WriteLine("ManifestGenerator: Error - Invalid Arguments."); + ShowUsage(); } + + return exitCode; + } + + private static bool FilesMatch(string path1, string path2) + { + long fileLength1 = new FileInfo(path1).Length; + long fileLength2 = new FileInfo(path2).Length; + if (fileLength1 != fileLength2) return false; + + // TODO: read in only parts of the file at a time + bool bytesMatch = File.ReadAllBytes(path1).SequenceEqual(File.ReadAllBytes(path2)); + return bytesMatch; } } } diff --git a/src/ManifestGenerator/ManifestGenerator.csproj b/src/ManifestGenerator/ManifestGenerator.csproj index 4cd94610..13e69006 100644 --- a/src/ManifestGenerator/ManifestGenerator.csproj +++ b/src/ManifestGenerator/ManifestGenerator.csproj @@ -1,18 +1,24 @@  - {D3AED287-722F-4243-966E-77AD0652B38E} - Exe - Properties + netcoreapp2.1 true x64 + CORECLR ManifestGenerator ManifestGenerator false - $(OutputBase) - Debug;Release - Microsoft.MachineLearning.ManifestGenerator.ManifestGenerator + ..\$(Platform)\$(Configuration)\ + DbgWinPy3.7;DbgWinPy3.6;DbgWinPy3.5;DbgWinPy2.7;RlsWinPy3.7;RlsWinPy3.6;RlsWinPy3.5;RlsWinPy2.7;DbgLinPy3.7;DbgLinPy3.6;DbgLinPy3.5;DbgLinPy2.7;RlsLinPy3.7;RlsLinPy3.6;RlsLinPy3.5;RlsLinPy2.7;RlsMacPy3.7;RlsMacPy3.6 + Microsoft.ML.ManifestGenerator.ManifestGenerator + 0.1.0 + Microsoft Corporation + (c) Microsoft Corporation. All rights reserved. + https://github.com/Microsoft/NimbusML + https://github.com/Microsoft/NimbusML + latest - netcoreapp2.0 + {D3AED287-722F-4243-966E-77AD0652B38E} + Exe @@ -24,13 +30,7 @@ - - - - - - - + diff --git a/src/ManifestGenerator/ManifestGenerator.sln b/src/ManifestGenerator/ManifestGenerator.sln deleted file mode 100644 index 56d26d1d..00000000 --- a/src/ManifestGenerator/ManifestGenerator.sln +++ /dev/null @@ -1,25 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.27428.2037 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ManifestGenerator", "ManifestGenerator.csproj", "{D3AED287-722F-4243-966E-77AD0652B38E}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|x64 = Debug|x64 - Release|x64 = Release|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {D3AED287-722F-4243-966E-77AD0652B38E}.Debug|x64.ActiveCfg = Debug|x64 - {D3AED287-722F-4243-966E-77AD0652B38E}.Debug|x64.Build.0 = Debug|x64 - {D3AED287-722F-4243-966E-77AD0652B38E}.Release|x64.ActiveCfg = Release|x64 - {D3AED287-722F-4243-966E-77AD0652B38E}.Release|x64.Build.0 = Release|x64 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {620035F0-EA24-426B-BA6F-FF34BC8E14FA} - EndGlobalSection -EndGlobal diff --git a/src/ManifestGenerator/app.config b/src/ManifestGenerator/app.config index 5618aa07..7ded20c2 100644 --- a/src/ManifestGenerator/app.config +++ b/src/ManifestGenerator/app.config @@ -1,13 +1,5 @@ - - - - - - - - diff --git a/src/python/nimbusml.pyproj b/src/python/nimbusml.pyproj index 01e31597..af4cca04 100644 --- a/src/python/nimbusml.pyproj +++ b/src/python/nimbusml.pyproj @@ -342,7 +342,6 @@ - @@ -438,7 +437,6 @@ - @@ -459,6 +457,7 @@ + @@ -515,8 +514,6 @@ - - diff --git a/src/python/nimbusml/internal/entrypoints/_partitionedpathparser_parquetpathparser.py b/src/python/nimbusml/internal/entrypoints/_partitionedpathparser_parquetpathparser.py deleted file mode 100644 index a5c34acb..00000000 --- a/src/python/nimbusml/internal/entrypoints/_partitionedpathparser_parquetpathparser.py +++ /dev/null @@ -1,26 +0,0 @@ -# - Generated by tools/entrypoint_compiler.py: do not edit by hand -""" -ParquetPathParser -""" - - -from ..utils.entrypoints import Component - - -def parquet_path_parser( - **params): - """ - **Description** - Extract name/value pairs from Parquet formatted directory names. - Example path: Year=2018/Month=12/data1.parquet - - """ - - entrypoint_name = 'ParquetPathParser' - settings = {} - - component = Component( - name=entrypoint_name, - settings=settings, - kind='PartitionedPathParser') - return component diff --git a/src/python/nimbusml/internal/entrypoints/_partitionedpathparser_simplepathparser.py b/src/python/nimbusml/internal/entrypoints/_partitionedpathparser_simplepathparser.py deleted file mode 100644 index 3f63ac19..00000000 --- a/src/python/nimbusml/internal/entrypoints/_partitionedpathparser_simplepathparser.py +++ /dev/null @@ -1,71 +0,0 @@ -# - Generated by tools/entrypoint_compiler.py: do not edit by hand -""" -SimplePathParser -""" - - -from ..utils.entrypoints import Component -from ..utils.utils import try_set - - -def simple_path_parser( - columns=None, - type='TX', - **params): - """ - **Description** - A simple parser that extracts directory names as column values. - Column names are defined as arguments. - - :param columns: Column definitions used to override the - Partitioned Path Parser. Expected with the format - name:type:numeric-source, for example, col=MyFeature:R4:1 - (settings). - :param type: Data type of each column. (settings). - """ - - entrypoint_name = 'SimplePathParser' - settings = {} - - if columns is not None: - settings['Columns'] = try_set( - obj=columns, - none_acceptable=True, - is_of_type=list, - is_column=True) - if type is not None: - settings['Type'] = try_set( - obj=type, - none_acceptable=True, - is_of_type=str, - values=[ - 'I1', - 'U1', - 'I2', - 'U2', - 'I4', - 'U4', - 'I8', - 'U8', - 'R4', - 'Num', - 'R8', - 'TX', - 'Text', - 'TXT', - 'BL', - 'Bool', - 'TimeSpan', - 'TS', - 'DT', - 'DateTime', - 'DZ', - 'DateTimeZone', - 'UG', - 'U16']) - - component = Component( - name=entrypoint_name, - settings=settings, - kind='PartitionedPathParser') - return component diff --git a/src/python/nimbusml/internal/entrypoints/models_onnxconverter.py b/src/python/nimbusml/internal/entrypoints/models_onnxconverter.py deleted file mode 100644 index 3c080eb6..00000000 --- a/src/python/nimbusml/internal/entrypoints/models_onnxconverter.py +++ /dev/null @@ -1,116 +0,0 @@ -# - Generated by tools/entrypoint_compiler.py: do not edit by hand -""" -Models.OnnxConverter -""" - - -from ..utils.entrypoints import EntryPoint -from ..utils.utils import try_set, unlist - - -def models_onnxconverter( - onnx, - data_file=None, - json=None, - name=None, - domain=None, - inputs_to_drop=None, - outputs_to_drop=None, - model=None, - onnx_version='Stable', - predictive_model=None, - **params): - """ - **Description** - Converts the model to ONNX format. - - :param data_file: The data file (inputs). - :param onnx: The path to write the output ONNX to. (inputs). - :param json: The path to write the output JSON to. (inputs). - :param name: The 'name' property in the output ONNX. By default - this will be the ONNX extension-less name. (inputs). - :param domain: The 'domain' property in the output ONNX. - (inputs). - :param inputs_to_drop: Array of input column names to drop - (inputs). - :param outputs_to_drop: Array of output column names to drop - (inputs). - :param model: Model that needs to be converted to ONNX format. - (inputs). - :param onnx_version: The targeted ONNX version. It can be either - "Stable" or "Experimental". If "Experimental" is used, - produced model can contain components that is not officially - supported in ONNX standard. (inputs). - :param predictive_model: Predictor model that needs to be - converted to ONNX format. (inputs). - """ - - entrypoint_name = 'Models.OnnxConverter' - inputs = {} - outputs = {} - - if data_file is not None: - inputs['DataFile'] = try_set( - obj=data_file, - none_acceptable=True, - is_of_type=str) - if onnx is not None: - inputs['Onnx'] = try_set( - obj=onnx, - none_acceptable=False, - is_of_type=str) - if json is not None: - inputs['Json'] = try_set( - obj=json, - none_acceptable=True, - is_of_type=str) - if name is not None: - inputs['Name'] = try_set( - obj=name, - none_acceptable=True, - is_of_type=str, - is_column=True) - if domain is not None: - inputs['Domain'] = try_set( - obj=domain, - none_acceptable=True, - is_of_type=str) - if inputs_to_drop is not None: - inputs['InputsToDrop'] = try_set( - obj=inputs_to_drop, - none_acceptable=True, - is_of_type=list) - if outputs_to_drop is not None: - inputs['OutputsToDrop'] = try_set( - obj=outputs_to_drop, - none_acceptable=True, - is_of_type=list) - if model is not None: - inputs['Model'] = try_set( - obj=model, - none_acceptable=True, - is_of_type=str) - if onnx_version is not None: - inputs['OnnxVersion'] = try_set( - obj=onnx_version, - none_acceptable=True, - is_of_type=str, - values=[ - 'Stable', - 'Experimental']) - if predictive_model is not None: - inputs['PredictiveModel'] = try_set( - obj=predictive_model, none_acceptable=True, is_of_type=str) - - input_variables = { - x for x in unlist(inputs.values()) - if isinstance(x, str) and x.startswith("$")} - output_variables = { - x for x in unlist(outputs.values()) - if isinstance(x, str) and x.startswith("$")} - - entrypoint = EntryPoint( - name=entrypoint_name, inputs=inputs, outputs=outputs, - input_variables=input_variables, - output_variables=output_variables) - return entrypoint diff --git a/src/python/nimbusml/internal/entrypoints/models_schema.py b/src/python/nimbusml/internal/entrypoints/models_schema.py index 0b8b0056..096aa2e5 100644 --- a/src/python/nimbusml/internal/entrypoints/models_schema.py +++ b/src/python/nimbusml/internal/entrypoints/models_schema.py @@ -1,5 +1,6 @@ +# - Generated by tools/entrypoint_compiler.py: do not edit by hand """ -Models.Summarizer +Models.Schema """ @@ -8,23 +9,24 @@ def models_schema( - transform_model, + model, schema=None, **params): """ **Description** - Retreives input/output column schema for transform model. + Retrieve output model schema - :param transform_model: The transform model. + :param model: The transform model. (inputs). + :param schema: The model schema (outputs). """ entrypoint_name = 'Models.Schema' inputs = {} outputs = {} - if transform_model is not None: + if model is not None: inputs['Model'] = try_set( - obj=transform_model, + obj=model, none_acceptable=False, is_of_type=str) if schema is not None: @@ -32,7 +34,7 @@ def models_schema( obj=schema, none_acceptable=False, is_of_type=str) - + input_variables = { x for x in unlist(inputs.values()) if isinstance(x, str) and x.startswith("$")} diff --git a/src/python/nimbusml/internal/entrypoints/transforms_datasetscorerex.py b/src/python/nimbusml/internal/entrypoints/transforms_datasetscorerex.py index 7a5d8c71..addc2298 100644 --- a/src/python/nimbusml/internal/entrypoints/transforms_datasetscorerex.py +++ b/src/python/nimbusml/internal/entrypoints/transforms_datasetscorerex.py @@ -1,3 +1,4 @@ +# - Generated by tools/entrypoint_compiler.py: do not edit by hand """ Transforms.DatasetScorerEx """ diff --git a/src/python/nimbusml/internal/entrypoints/transforms_prefixcolumnconcatenator.py b/src/python/nimbusml/internal/entrypoints/transforms_prefixcolumnconcatenator.py index cfe672b7..301f1c2f 100644 --- a/src/python/nimbusml/internal/entrypoints/transforms_prefixcolumnconcatenator.py +++ b/src/python/nimbusml/internal/entrypoints/transforms_prefixcolumnconcatenator.py @@ -1,3 +1,4 @@ +# - Generated by tools/entrypoint_compiler.py: do not edit by hand """ Transforms.PrefixColumnConcatenator """ @@ -15,10 +16,10 @@ def transforms_prefixcolumnconcatenator( **params): """ **Description** - Concatenates one or more columns of the same item type by prefix. + Concatenates one or more columns of the same item type. - :param column: New column definition(s) (optional form: - name:srcs) (inputs). + :param column: New column definition(s) (optional form: name:src) + (inputs). :param data: Input dataset (inputs). :param output_data: Transformed dataset (outputs). :param model: Transform model (outputs). diff --git a/src/python/nimbusml/internal/entrypoints/transforms_variablecolumn.py b/src/python/nimbusml/internal/entrypoints/transforms_variablecolumntransform.py similarity index 82% rename from src/python/nimbusml/internal/entrypoints/transforms_variablecolumn.py rename to src/python/nimbusml/internal/entrypoints/transforms_variablecolumntransform.py index 16fca0ad..febcffde 100644 --- a/src/python/nimbusml/internal/entrypoints/transforms_variablecolumn.py +++ b/src/python/nimbusml/internal/entrypoints/transforms_variablecolumntransform.py @@ -1,3 +1,4 @@ +# - Generated by tools/entrypoint_compiler.py: do not edit by hand """ Transforms.VariableColumnTransform """ @@ -7,7 +8,7 @@ from ..utils.utils import try_set, unlist -def transforms_variablecolumn( +def transforms_variablecolumntransform( data, output_data=None, model=None, @@ -16,10 +17,12 @@ def transforms_variablecolumn( **params): """ **Description** - Combines the specified input columns in to a - single variable length vectorized column. + Combines the specified input columns in to a single variable length + vectorized column. :param data: Input dataset (inputs). + :param features: Features (inputs). + :param length_column_name: Length Column Name (inputs). :param output_data: Transformed dataset (outputs). :param model: Transform model (outputs). """ @@ -43,7 +46,8 @@ def transforms_variablecolumn( inputs['LengthColumnName'] = try_set( obj=length_column_name, none_acceptable=True, - is_of_type=str) + is_of_type=str, + is_column=True) if output_data is not None: outputs['OutputData'] = try_set( obj=output_data, diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py index 3e0dce27..704622a4 100644 --- a/src/python/nimbusml/pipeline.py +++ b/src/python/nimbusml/pipeline.py @@ -1834,7 +1834,7 @@ def get_output_columns(self, verbose=0, **params): inputs = dict([('transform_model', self.model)]) schema_node = models_schema( - transform_model="$transform_model", + model="$transform_model", schema="$output_data") all_nodes = [schema_node] diff --git a/src/python/nimbusml/tests/test_variable_column.py b/src/python/nimbusml/tests/test_variable_column.py index 6c1fc8bd..318094ff 100644 --- a/src/python/nimbusml/tests/test_variable_column.py +++ b/src/python/nimbusml/tests/test_variable_column.py @@ -8,17 +8,17 @@ import numpy as np import pandas as pd from nimbusml import Pipeline -from nimbusml.internal.entrypoints.transforms_variablecolumn import transforms_variablecolumn +from nimbusml.internal.entrypoints.transforms_variablecolumntransform import transforms_variablecolumntransform from nimbusml.internal.utils.entrypoints import Graph, DataOutputFormat class TestVariableColumn(unittest.TestCase): def to_variable_column(self, input, features=None, length_column_name=None): - node = transforms_variablecolumn(data='$data', - output_data='$output_data', - features=features, - length_column_name=length_column_name) + node = transforms_variablecolumntransform(data='$data', + output_data='$output_data', + features=features, + length_column_name=length_column_name) graph_nodes = [node] graph = Graph(dict(data=''), diff --git a/src/python/tools/manifest.json b/src/python/tools/manifest.json index c8e6d6e5..e54ff2c2 100644 --- a/src/python/tools/manifest.json +++ b/src/python/tools/manifest.json @@ -2194,119 +2194,6 @@ "ITrainerInput" ] }, - { - "Name": "Models.OnnxConverter", - "Desc": "Converts the model to ONNX format.", - "FriendlyName": "ONNX Converter.", - "ShortName": null, - "Inputs": [ - { - "Name": "DataFile", - "Type": "String", - "Desc": "The data file", - "Aliases": [ - "data" - ], - "Required": false, - "SortOrder": 0.0, - "IsNullable": false, - "Default": null - }, - { - "Name": "Onnx", - "Type": "String", - "Desc": "The path to write the output ONNX to.", - "Required": true, - "SortOrder": 1.0, - "IsNullable": false - }, - { - "Name": "Json", - "Type": "String", - "Desc": "The path to write the output JSON to.", - "Required": false, - "SortOrder": 2.0, - "IsNullable": false, - "Default": null - }, - { - "Name": "Name", - "Type": "String", - "Desc": "The 'name' property in the output ONNX. By default this will be the ONNX extension-less name.", - "Required": false, - "SortOrder": 3.0, - "IsNullable": false, - "Default": null - }, - { - "Name": "Domain", - "Type": "String", - "Desc": "The 'domain' property in the output ONNX.", - "Required": false, - "SortOrder": 4.0, - "IsNullable": false, - "Default": null - }, - { - "Name": "InputsToDrop", - "Type": { - "Kind": "Array", - "ItemType": "String" - }, - "Desc": "Array of input column names to drop", - "Required": false, - "SortOrder": 6.0, - "IsNullable": false, - "Default": null - }, - { - "Name": "OutputsToDrop", - "Type": { - "Kind": "Array", - "ItemType": "String" - }, - "Desc": "Array of output column names to drop", - "Required": false, - "SortOrder": 8.0, - "IsNullable": false, - "Default": null - }, - { - "Name": "Model", - "Type": "TransformModel", - "Desc": "Model that needs to be converted to ONNX format.", - "Required": false, - "SortOrder": 10.0, - "IsNullable": false, - "Default": null - }, - { - "Name": "OnnxVersion", - "Type": { - "Kind": "Enum", - "Values": [ - "Stable", - "Experimental" - ] - }, - "Desc": "The targeted ONNX version. It can be either \"Stable\" or \"Experimental\". If \"Experimental\" is used, produced model can contain components that is not officially supported in ONNX standard.", - "Required": false, - "SortOrder": 11.0, - "IsNullable": false, - "Default": "Stable" - }, - { - "Name": "PredictiveModel", - "Type": "PredictorModel", - "Desc": "Predictor model that needs to be converted to ONNX format.", - "Required": false, - "SortOrder": 12.0, - "IsNullable": false, - "Default": null - } - ], - "Outputs": [] - }, { "Name": "Models.OvaModelCombiner", "Desc": "Combines a sequence of PredictorModels into a single model", @@ -3061,6 +2948,29 @@ "ITrainerOutput" ] }, + { + "Name": "Models.Schema", + "Desc": "Retrieve output model schema", + "FriendlyName": null, + "ShortName": null, + "Inputs": [ + { + "Name": "Model", + "Type": "TransformModel", + "Desc": "The transform model.", + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + } + ], + "Outputs": [ + { + "Name": "Schema", + "Type": "DataView", + "Desc": "The model schema" + } + ] + }, { "Name": "Models.Summarizer", "Desc": "Summarize a linear regression predictor.", @@ -18041,6 +17951,51 @@ } ] }, + { + "Name": "Transforms.DatasetScorerEx", + "Desc": "Score a dataset with a predictor model", + "FriendlyName": null, + "ShortName": null, + "Inputs": [ + { + "Name": "Data", + "Type": "DataView", + "Desc": "The dataset to be scored", + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "PredictorModel", + "Type": "PredictorModel", + "Desc": "The predictor model to apply to data", + "Required": true, + "SortOrder": 2.0, + "IsNullable": false + }, + { + "Name": "Suffix", + "Type": "String", + "Desc": "Suffix to append to the score columns", + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": null + } + ], + "Outputs": [ + { + "Name": "ScoredData", + "Type": "DataView", + "Desc": "The scored dataset" + }, + { + "Name": "ScoringTransform", + "Type": "TransformModel", + "Desc": "The scoring transform" + } + ] + }, { "Name": "Transforms.DatasetTransformScorer", "Desc": "Score a dataset with a transform model", @@ -21825,6 +21780,82 @@ "ITransformOutput" ] }, + { + "Name": "Transforms.PrefixColumnConcatenator", + "Desc": "Concatenates one or more columns of the same item type.", + "FriendlyName": "Concat Transform", + "ShortName": "Concat", + "Inputs": [ + { + "Name": "Column", + "Type": { + "Kind": "Array", + "ItemType": { + "Kind": "Struct", + "Fields": [ + { + "Name": "Name", + "Type": "String", + "Desc": "Name of the new column", + "Aliases": [ + "name" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "Source", + "Type": "String", + "Desc": "Name of the source column", + "Aliases": [ + "src" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + } + ] + } + }, + "Desc": "New column definition(s) (optional form: name:src)", + "Aliases": [ + "col" + ], + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "Data", + "Type": "DataView", + "Desc": "Input dataset", + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + } + ], + "Outputs": [ + { + "Name": "OutputData", + "Type": "DataView", + "Desc": "Transformed dataset" + }, + { + "Name": "Model", + "Type": "TransformModel", + "Desc": "Transform model" + } + ], + "InputKind": [ + "ITransformInput" + ], + "OutputKind": [ + "ITransformOutput" + ] + }, { "Name": "Transforms.RandomNumberGenerator", "Desc": "Adds a column with a generated number sequence.", @@ -23088,6 +23119,61 @@ } ] }, + { + "Name": "Transforms.VariableColumnTransform", + "Desc": "Combines the specified input columns in to a single variable length vectorized column.", + "FriendlyName": "Variable Column Creator", + "ShortName": "Variable Column Creator", + "Inputs": [ + { + "Name": "Data", + "Type": "DataView", + "Desc": "Input dataset", + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "Features", + "Type": { + "Kind": "Array", + "ItemType": "String" + }, + "Desc": "Features", + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "LengthColumnName", + "Type": "String", + "Desc": "Length Column Name", + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": null + } + ], + "Outputs": [ + { + "Name": "OutputData", + "Type": "DataView", + "Desc": "Transformed dataset" + }, + { + "Name": "Model", + "Type": "TransformModel", + "Desc": "Transform model" + } + ], + "InputKind": [ + "ITransformInput" + ], + "OutputKind": [ + "ITransformOutput" + ] + }, { "Name": "Transforms.VectorToImage", "Desc": "Converts vector array into image type.", @@ -28962,140 +29048,6 @@ } ] }, - { - "Kind": "PartitionedPathParser", - "Components": [ - { - "Name": "ParquetPathParser", - "Desc": "Extract name/value pairs from Parquet formatted directory names. Example path: Year=2018/Month=12/data1.parquet", - "FriendlyName": "Parquet Partitioned Path Parser", - "Aliases": [ - "ParqPP" - ], - "Settings": [] - }, - { - "Name": "SimplePathParser", - "Desc": "A simple parser that extracts directory names as column values. Column names are defined as arguments.", - "FriendlyName": "Simple Partitioned Path Parser", - "Aliases": [ - "SmplPP" - ], - "Settings": [ - { - "Name": "Columns", - "Type": { - "Kind": "Array", - "ItemType": { - "Kind": "Struct", - "Fields": [ - { - "Name": "Name", - "Type": "String", - "Desc": "Name of the column.", - "Required": true, - "SortOrder": 150.0, - "IsNullable": false - }, - { - "Name": "Type", - "Type": { - "Kind": "Enum", - "Values": [ - "I1", - "U1", - "I2", - "U2", - "I4", - "U4", - "I8", - "U8", - "R4", - "Num", - "R8", - "TX", - "Text", - "TXT", - "BL", - "Bool", - "TimeSpan", - "TS", - "DT", - "DateTime", - "DZ", - "DateTimeZone", - "UG", - "U16" - ] - }, - "Desc": "Data type of the column.", - "Required": false, - "SortOrder": 150.0, - "IsNullable": true, - "Default": null - }, - { - "Name": "Source", - "Type": "Int", - "Desc": "Index of the directory representing this column.", - "Required": true, - "SortOrder": 150.0, - "IsNullable": false, - "Default": 0 - } - ] - } - }, - "Desc": "Column definitions used to override the Partitioned Path Parser. Expected with the format name:type:numeric-source, for example, col=MyFeature:R4:1", - "Aliases": [ - "col" - ], - "Required": false, - "SortOrder": 1.0, - "IsNullable": false, - "Default": null - }, - { - "Name": "Type", - "Type": { - "Kind": "Enum", - "Values": [ - "I1", - "U1", - "I2", - "U2", - "I4", - "U4", - "I8", - "U8", - "R4", - "Num", - "R8", - "TX", - "Text", - "TXT", - "BL", - "Bool", - "TimeSpan", - "TS", - "DT", - "DateTime", - "DZ", - "DateTimeZone", - "UG", - "U16" - ] - }, - "Desc": "Data type of each column.", - "Required": false, - "SortOrder": 150.0, - "IsNullable": false, - "Default": "TX" - } - ] - } - ] - }, { "Kind": "RegressionLossFunction", "Components": [