diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index 53a2db738d..5af98034bf 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -39,7 +39,7 @@ public static void Example() FeatureColumnName = "Features", Booster = new DartBooster.Options { - DropRate = 0.15, + TreeDropFraction = 0.15, XgboostDartMode = false } })) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs index c3bd9d604e..2e616dfee1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs @@ -20,10 +20,10 @@ public static void Example() // Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations. var pipeline = mlContext.Ranking.Trainers.LightGbm( - numLeaves: 4, - minDataPerLeaf: 10, + numberOfLeaves: 4, + minimumExampleCountPerLeaf: 10, learningRate: 0.1, - numBoostRound: 2); + numberOfIterations: 2); // Fit this Pipeline to the Training Data. var model = pipeline.Fit(split.TrainSet); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs index dc898fb4d3..101d08ec13 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs @@ -23,10 +23,10 @@ public static void Example() var pipeline = mlContext.Ranking.Trainers.LightGbm( new Options { - NumLeaves = 4, - MinDataPerLeaf = 10, + NumberOfLeaves = 4, + MinimumExampleCountPerGroup = 10, LearningRate = 0.1, - NumBoostRound = 2, + NumberOfIterations = 2, Booster = new TreeBooster.Options { FeatureFraction = 0.9 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs index ce9e27a0fc..d23aebf141 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs @@ -35,8 +35,8 @@ public static void Example() var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) .Append(mlContext.Regression.Trainers.LightGbm( labelColumnName: labelName, - numLeaves: 4, - minDataPerLeaf: 6, + numberOfLeaves: 4, + minimumExampleCountPerLeaf: 6, learningRate: 0.001)); // Fit this pipeline to the training data. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs index e93eeb3f96..260c546e7f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs @@ -39,8 +39,8 @@ public static void Example() .Append(mlContext.Regression.Trainers.LightGbm(new Options { LabelColumnName = labelName, - NumLeaves = 4, - MinDataPerLeaf = 6, + NumberOfLeaves = 4, + MinimumExampleCountPerLeaf = 6, LearningRate = 0.001, Booster = new GossBooster.Options { diff --git a/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs index 97a30add47..5228c356dc 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs @@ -78,8 +78,8 @@ public static void LightGbmBinaryClassification() Score: mlContext.BinaryClassification.Trainers.LightGbm( row.Label, row.Features, - numLeaves: 4, - minDataPerLeaf: 6, + numberOfLeaves: 4, + minimumExampleCountPerLeaf: 6, learningRate: 0.001))) .Append(row => ( Label: row.Label, diff --git a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs index 61225fe1e9..cab1700636 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs @@ -38,8 +38,8 @@ public static void LightGbmRegression() .Append(r => (r.label, score: mlContext.Regression.Trainers.LightGbm( r.label, r.features, - numLeaves: 4, - minDataPerLeaf: 6, + numberOfLeaves: 4, + minimumExampleCountPerLeaf: 6, learningRate: 0.001, onFit: p => pred = p) ) diff --git a/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs b/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs index 6e97ea3451..fb84489ff0 100644 --- a/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs +++ b/src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs @@ -21,10 +21,10 @@ public static class LightGbmStaticExtensions /// The label column. /// The features column. /// The weights column. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// Number of iterations. /// A delegate that is called every time the /// method is called on the /// instance created out of this. This delegate will receive @@ -39,19 +39,19 @@ public static class LightGbmStaticExtensions /// public static Scalar LightGbm(this RegressionCatalog.RegressionTrainers catalog, Scalar label, Vector features, Scalar weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + int? numberOfLeaves = null, + int? minimumExampleCountPerLeaf = null, double? learningRate = null, - int numBoostRound = Options.Defaults.NumBoostRound, + int numberOfIterations = Options.Defaults.NumberOfIterations, Action onFit = null) { - CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); + CheckUserValues(label, features, weights, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations, onFit); var rec = new TrainerEstimatorReconciler.Regression( (env, labelName, featuresName, weightsName) => { - var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numLeaves, - minDataPerLeaf, learningRate, numBoostRound); + var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numberOfLeaves, + minimumExampleCountPerLeaf, learningRate, numberOfIterations); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); return trainer; @@ -104,10 +104,10 @@ public static Scalar LightGbm(this RegressionCatalog.RegressionTrainers c /// The label column. /// The features column. /// The weights column. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// Number of iterations. /// A delegate that is called every time the /// method is called on the /// instance created out of this. This delegate will receive @@ -122,20 +122,22 @@ public static Scalar LightGbm(this RegressionCatalog.RegressionTrainers c /// ]]> /// public static (Scalar score, Scalar probability, Scalar predictedLabel) LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, - Scalar label, Vector features, Scalar weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + Scalar label, + Vector features, + Scalar weights = null, + int? numberOfLeaves = null, + int? minimumExampleCountPerLeaf = null, double? learningRate = null, - int numBoostRound = Options.Defaults.NumBoostRound, + int numberOfIterations = Options.Defaults.NumberOfIterations, Action> onFit = null) { - CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); + CheckUserValues(label, features, weights, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations, onFit); var rec = new TrainerEstimatorReconciler.BinaryClassifier( (env, labelName, featuresName, weightsName) => { - var trainer = new LightGbmBinaryTrainer(env, labelName, featuresName, weightsName, numLeaves, - minDataPerLeaf, learningRate, numBoostRound); + var trainer = new LightGbmBinaryTrainer(env, labelName, featuresName, weightsName, numberOfLeaves, + minimumExampleCountPerLeaf, learningRate, numberOfIterations); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); @@ -194,10 +196,10 @@ public static (Scalar score, Scalar probability, Scalar pred /// The features column. /// The groupId column. /// The weights column. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// Number of iterations. /// A delegate that is called every time the /// method is called on the /// instance created out of this. This delegate will receive @@ -206,21 +208,24 @@ public static (Scalar score, Scalar probability, Scalar pred /// The set of output columns including in order the predicted binary classification score (which will range /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label. public static Scalar LightGbm(this RankingCatalog.RankingTrainers catalog, - Scalar label, Vector features, Key groupId, Scalar weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + Scalar label, + Vector features, + Key groupId, + Scalar weights = null, + int? numberOfLeaves = null, + int? minimumExampleCountPerLeaf = null, double? learningRate = null, - int numBoostRound = Options.Defaults.NumBoostRound, + int numberOfIterations = Options.Defaults.NumberOfIterations, Action onFit = null) { - CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); + CheckUserValues(label, features, weights, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations, onFit); Contracts.CheckValue(groupId, nameof(groupId)); var rec = new TrainerEstimatorReconciler.Ranker( (env, labelName, featuresName, groupIdName, weightsName) => { - var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numLeaves, - minDataPerLeaf, learningRate, numBoostRound); + var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numberOfLeaves, + minimumExampleCountPerLeaf, learningRate, numberOfIterations); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); @@ -279,10 +284,10 @@ public static Scalar LightGbm(this RankingCatalog.RankingTrainers c /// The label, or dependent variable. /// The features, or independent variables. /// The weights column. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// Number of iterations. /// A delegate that is called every time the /// method is called on the /// instance created out of this. This delegate will receive @@ -301,19 +306,19 @@ public static (Vector score, Key predictedLabel) Key label, Vector features, Scalar weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + int? numberOfLeaves = null, + int? minimumExampleCountPerLeaf = null, double? learningRate = null, - int numBoostRound = Options.Defaults.NumBoostRound, + int numberOfIterations = Options.Defaults.NumberOfIterations, Action onFit = null) { - CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit); + CheckUserValues(label, features, weights, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations, onFit); var rec = new TrainerEstimatorReconciler.MulticlassClassifier( (env, labelName, featuresName, weightsName) => { - var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numLeaves, - minDataPerLeaf, learningRate, numBoostRound); + var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numberOfLeaves, + minimumExampleCountPerLeaf, learningRate, numberOfIterations); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); @@ -365,8 +370,8 @@ public static (Vector score, Key predictedLabel) } private static void CheckUserValues(PipelineColumn label, Vector features, Scalar weights, - int? numLeaves, - int? minDataPerLeaf, + int? numberOfLeaves, + int? minimumExampleCountPerLeaf, double? learningRate, int numBoostRound, Delegate onFit) @@ -374,8 +379,8 @@ private static void CheckUserValues(PipelineColumn label, Vector features Contracts.CheckValue(label, nameof(label)); Contracts.CheckValue(features, nameof(features)); Contracts.CheckValueOrNull(weights); - Contracts.CheckParam(!(numLeaves < 2), nameof(numLeaves), "Must be at least 2."); - Contracts.CheckParam(!(minDataPerLeaf <= 0), nameof(minDataPerLeaf), "Must be positive"); + Contracts.CheckParam(!(numberOfLeaves < 2), nameof(numberOfLeaves), "Must be at least 2."); + Contracts.CheckParam(!(minimumExampleCountPerLeaf <= 0), nameof(minimumExampleCountPerLeaf), "Must be positive"); Contracts.CheckParam(!(learningRate <= 0), nameof(learningRate), "Must be positive"); Contracts.CheckParam(numBoostRound > 0, nameof(numBoostRound), "Must be positive"); Contracts.CheckValueOrNull(onFit); diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index 0b635bfaed..21dbb0753f 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -46,9 +46,9 @@ public sealed class Options : TrainerInputBaseWithGroupId public abstract class BoosterParameter : IBoosterParameter where TOptions : class, new() { - protected TOptions BoosterParameterOptions { get; } + private protected TOptions BoosterParameterOptions { get; } - protected BoosterParameter(TOptions options) + private protected BoosterParameter(TOptions options) { BoosterParameterOptions = options; } @@ -66,15 +66,19 @@ internal virtual void UpdateParameters(Dictionary res) if (attribute == null) continue; - res[GetArgName(field.Name)] = field.GetValue(BoosterParameterOptions); + res[GetOptionName(field.Name)] = field.GetValue(BoosterParameterOptions); } } void IBoosterParameter.UpdateParameters(Dictionary res) => UpdateParameters(res); } - private static string GetArgName(string name) + private static string GetOptionName(string name) { + if (_nameMapping.ContainsKey(name)) + return _nameMapping[name]; + + // Otherwise convert the name to the light gbm argument StringBuilder strBuf = new StringBuilder(); bool first = true; foreach (char c in name) @@ -93,10 +97,36 @@ private static string GetArgName(string name) return strBuf.ToString(); } + // Static override name map that maps friendly names to lightGBM arguments. + // If an argument is not here, then its name is identicaltto a lightGBM argument + // and does not require a mapping, for example, Subsample. + private static Dictionary _nameMapping = new Dictionary() + { + {nameof(TreeBooster.Options.MinimumSplitGain), "min_split_gain" }, + {nameof(TreeBooster.Options.MaximumTreeDepth), "max_depth"}, + {nameof(TreeBooster.Options.MinimumChildWeight), "min_child_weight"}, + {nameof(TreeBooster.Options.SubsampleFraction), "subsample"}, + {nameof(TreeBooster.Options.SubsampleFrequency), "subsample_freq"}, + {nameof(TreeBooster.Options.L1Regularization), "reg_alpha"}, + {nameof(TreeBooster.Options.L2Regularization), "reg_lambda"}, + {nameof(TreeBooster.Options.WeightOfPositiveExamples), "scale_pos_weight"}, + {nameof(DartBooster.Options.TreeDropFraction), "drop_rate" }, + {nameof(DartBooster.Options.MaximumNumberOfDroppedTreesPerRound),"max_drop" }, + {nameof(DartBooster.Options.SkipDropFraction), "skip_drop" }, + {nameof(MinimumExampleCountPerLeaf), "min_data_per_leaf"}, + {nameof(NumberOfLeaves), "num_leaves"}, + {nameof(MaximumBinCountPerFeature), "max_bin" }, + {nameof(CustomGains), "label_gain" }, + {nameof(MinimumExampleCountPerGroup), "min_data_per_group" }, + {nameof(MaximumCategoricalSplitPointCount), "max_cat_threshold" }, + {nameof(CategoricalSmoothing), "cat_smooth" }, + {nameof(L2CategoricalRegularization), "cat_l2" } + }; + [BestFriend] internal static class Defaults { - public const int NumBoostRound = 100; + public const int NumberOfIterations = 100; } public sealed class TreeBooster : BoosterParameter @@ -107,38 +137,39 @@ public sealed class TreeBooster : BoosterParameter [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Traditional Gradient Boosting Decision Tree.")] public class Options : ISupportBoosterParameterFactory { - [Argument(ArgumentType.AtMostOnce, HelpText = "Use for binary classification when classes are not balanced.", ShortName = "us")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Use for binary classification when training data is not balanced.", ShortName = "us")] public bool UnbalancedSets = false; [Argument(ArgumentType.AtMostOnce, HelpText = "Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, " + "the more conservative the algorithm will be.")] [TlcModule.Range(Min = 0.0)] - public double MinSplitGain = 0; + public double MinimumSplitGain = 0; [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first.")] [TlcModule.Range(Min = 0, Max = int.MaxValue)] - public int MaxDepth = 0; + public int MaximumTreeDepth = 0; [Argument(ArgumentType.AtMostOnce, HelpText = "Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf " + "node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, " + "this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be.")] [TlcModule.Range(Min = 0.0)] - public double MinChildWeight = 0.1; + public double MinimumChildWeight = 0.1; [Argument(ArgumentType.AtMostOnce, - HelpText = "Subsample frequency. 0 means no subsample. " - + "If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.")] + HelpText = "Subsample frequency for bagging. 0 means no subsample. " + + "Specifies the frequency at which the bagging occurs, where if this is set to N, the subsampling will happen at every N iterations." + + "This must be set with Subsample as this specifies the amount to subsample.")] [TlcModule.Range(Min = 0, Max = int.MaxValue)] - public int SubsampleFreq = 0; + public int SubsampleFrequency = 0; [Argument(ArgumentType.AtMostOnce, HelpText = "Subsample ratio of the training instance. Setting it to 0.5 means that LightGBM randomly collected " + "half of the data instances to grow trees and this will prevent overfitting. Range: (0,1].")] [TlcModule.Range(Inf = 0.0, Max = 1.0)] - public double Subsample = 1; + public double SubsampleFraction = 1; [Argument(ArgumentType.AtMostOnce, HelpText = "Subsample ratio of columns when constructing each tree. Range: (0,1].", @@ -152,7 +183,7 @@ public class Options : ISupportBoosterParameterFactory [TlcModule.Range(Min = 0.0)] [TGUI(Label = "Lambda(L2)", SuggestedSweeps = "0,0.5,1")] [TlcModule.SweepableDiscreteParam("RegLambda", new object[] { 0f, 0.5f, 1f })] - public double RegLambda = 0.01; + public double L2Regularization = 0.01; [Argument(ArgumentType.AtMostOnce, HelpText = "L1 regularization term on weights, increase this value will make model more conservative.", @@ -160,12 +191,13 @@ public class Options : ISupportBoosterParameterFactory [TlcModule.Range(Min = 0.0)] [TGUI(Label = "Alpha(L1)", SuggestedSweeps = "0,0.5,1")] [TlcModule.SweepableDiscreteParam("RegAlpha", new object[] { 0f, 0.5f, 1f })] - public double RegAlpha = 0; + public double L1Regularization = 0; [Argument(ArgumentType.AtMostOnce, HelpText = "Control the balance of positive and negative weights, useful for unbalanced classes." + - " A typical value to consider: sum(negative cases) / sum(positive cases).")] - public double ScalePosWeight = 1; + " A typical value to consider: sum(negative cases) / sum(positive cases).", + ShortName = "ScalePosWeight")] + public double WeightOfPositiveExamples = 1; internal virtual IBoosterParameter CreateComponent(IHostEnvironment env) => new TreeBooster(this); @@ -175,11 +207,13 @@ public class Options : ISupportBoosterParameterFactory internal TreeBooster(Options options) : base(options) { - Contracts.CheckUserArg(BoosterParameterOptions.MinSplitGain >= 0, nameof(BoosterParameterOptions.MinSplitGain), "must be >= 0."); - Contracts.CheckUserArg(BoosterParameterOptions.MinChildWeight >= 0, nameof(BoosterParameterOptions.MinChildWeight), "must be >= 0."); - Contracts.CheckUserArg(BoosterParameterOptions.Subsample > 0 && BoosterParameterOptions.Subsample <= 1, nameof(BoosterParameterOptions.Subsample), "must be in (0,1]."); + Contracts.CheckUserArg(BoosterParameterOptions.MinimumSplitGain >= 0, nameof(BoosterParameterOptions.MinimumSplitGain), "must be >= 0."); + Contracts.CheckUserArg(BoosterParameterOptions.MinimumChildWeight >= 0, nameof(BoosterParameterOptions.MinimumChildWeight), "must be >= 0."); + Contracts.CheckUserArg(BoosterParameterOptions.SubsampleFraction > 0 && BoosterParameterOptions.SubsampleFraction <= 1, nameof(BoosterParameterOptions.SubsampleFraction), "must be in (0,1]."); Contracts.CheckUserArg(BoosterParameterOptions.FeatureFraction > 0 && BoosterParameterOptions.FeatureFraction <= 1, nameof(BoosterParameterOptions.FeatureFraction), "must be in (0,1]."); - Contracts.CheckUserArg(BoosterParameterOptions.ScalePosWeight > 0 && BoosterParameterOptions.ScalePosWeight <= 1, nameof(BoosterParameterOptions.ScalePosWeight), "must be in (0,1]."); + Contracts.CheckUserArg(BoosterParameterOptions.L2Regularization >= 0, nameof(BoosterParameterOptions.L2Regularization), "must be >= 0."); + Contracts.CheckUserArg(BoosterParameterOptions.L1Regularization >= 0, nameof(BoosterParameterOptions.L1Regularization), "must be >= 0."); + Contracts.CheckUserArg(BoosterParameterOptions.WeightOfPositiveExamples > 0, nameof(BoosterParameterOptions.WeightOfPositiveExamples), "must be >= 0."); } internal override void UpdateParameters(Dictionary res) @@ -197,17 +231,17 @@ public sealed class DartBooster : BoosterParameter [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Dropouts meet Multiple Additive Regresion Trees. See https://arxiv.org/abs/1505.01866")] public sealed class Options : TreeBooster.Options { - [Argument(ArgumentType.AtMostOnce, HelpText = "Drop ratio for trees. Range:(0,1).")] + [Argument(ArgumentType.AtMostOnce, HelpText = "The drop ratio for trees. Range:(0,1).")] [TlcModule.Range(Inf = 0.0, Max = 1.0)] - public double DropRate = 0.1; + public double TreeDropFraction = 0.1; - [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of dropped tree in a boosting round.")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of dropped trees in a boosting round.")] [TlcModule.Range(Inf = 0, Max = int.MaxValue)] - public int MaxDrop = 1; + public int MaximumNumberOfDroppedTreesPerRound = 1; - [Argument(ArgumentType.AtMostOnce, HelpText = "Probability for not perform dropping in a boosting round.")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Probability for not dropping in a boosting round.")] [TlcModule.Range(Inf = 0.0, Max = 1.0)] - public double SkipDrop = 0.5; + public double SkipDropFraction = 0.5; [Argument(ArgumentType.AtMostOnce, HelpText = "True will enable xgboost dart mode.")] public bool XgboostDartMode = false; @@ -221,9 +255,8 @@ public sealed class Options : TreeBooster.Options internal DartBooster(Options options) : base(options) { - Contracts.CheckUserArg(BoosterParameterOptions.DropRate > 0 && BoosterParameterOptions.DropRate < 1, nameof(BoosterParameterOptions.DropRate), "must be in (0,1)."); - Contracts.CheckUserArg(BoosterParameterOptions.MaxDrop > 0, nameof(BoosterParameterOptions.MaxDrop), "must be > 0."); - Contracts.CheckUserArg(BoosterParameterOptions.SkipDrop >= 0 && BoosterParameterOptions.SkipDrop < 1, nameof(BoosterParameterOptions.SkipDrop), "must be in [0,1)."); + Contracts.CheckUserArg(BoosterParameterOptions.TreeDropFraction > 0 && BoosterParameterOptions.TreeDropFraction < 1, nameof(BoosterParameterOptions.TreeDropFraction), "must be in (0,1)."); + Contracts.CheckUserArg(BoosterParameterOptions.SkipDropFraction >= 0 && BoosterParameterOptions.SkipDropFraction < 1, nameof(BoosterParameterOptions.SkipDropFraction), "must be in [0,1)."); } internal override void UpdateParameters(Dictionary res) @@ -241,14 +274,11 @@ public sealed class GossBooster : BoosterParameter [TlcModule.Component(Name = Name, FriendlyName = FriendlyName, Desc = "Gradient-based One-Side Sampling.")] public sealed class Options : TreeBooster.Options { - [Argument(ArgumentType.AtMostOnce, - HelpText = "Retain ratio for large gradient instances.")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Retain ratio for large gradient instances.")] [TlcModule.Range(Inf = 0.0, Max = 1.0)] public double TopRate = 0.2; - [Argument(ArgumentType.AtMostOnce, - HelpText = - "Retain ratio for small gradient instances.")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Retain ratio for small gradient instances.")] [TlcModule.Range(Inf = 0.0, Max = 1.0)] public double OtherRate = 0.1; @@ -287,7 +317,7 @@ public enum EvalMetricType [Argument(ArgumentType.AtMostOnce, HelpText = "Number of iterations.", SortOrder = 1, ShortName = "iter")] [TGUI(Label = "Number of boosting iterations", SuggestedSweeps = "10,20,50,100,150,200")] [TlcModule.SweepableDiscreteParam("NumBoostRound", new object[] { 10, 20, 50, 100, 150, 200 })] - public int NumBoostRound = Defaults.NumBoostRound; + public int NumberOfIterations = Defaults.NumberOfIterations; [Argument(ArgumentType.AtMostOnce, HelpText = "Shrinkage rate for trees, used to prevent over-fitting. Range: (0,1].", @@ -300,33 +330,33 @@ public enum EvalMetricType SortOrder = 2, ShortName = "nl", NullName = "")] [TGUI(Description = "The maximum number of leaves per tree", SuggestedSweeps = "2-128;log;inc:4")] [TlcModule.SweepableLongParamAttribute("NumLeaves", 2, 128, isLogScale: true, stepSize: 4)] - public int? NumLeaves; + public int? NumberOfLeaves; [Argument(ArgumentType.AtMostOnce, HelpText = "Minimum number of instances needed in a child.", SortOrder = 2, ShortName = "mil", NullName = "")] [TGUI(Label = "Min Documents In Leaves", SuggestedSweeps = "1,10,20,50 ")] [TlcModule.SweepableDiscreteParamAttribute("MinDataPerLeaf", new object[] { 1, 10, 20, 50 })] - public int? MinDataPerLeaf; + public int? MinimumExampleCountPerLeaf; - [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bucket bin for features.", ShortName = "mb")] - public int MaxBin = 255; + [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of bucket bin for features.", ShortName = "mb")] + public int MaximumBinCountPerFeature = 255; [Argument(ArgumentType.Multiple, HelpText = "Which booster to use, can be gbtree, gblinear or dart. gbtree and dart use tree based model while gblinear uses linear function.", SortOrder = 3)] public ISupportBoosterParameterFactory Booster = new TreeBooster.Options(); [Argument(ArgumentType.AtMostOnce, HelpText = "Verbose", ShortName = "v")] - public bool VerboseEval = false; + public bool Verbose = false; [Argument(ArgumentType.AtMostOnce, HelpText = "Printing running messages.")] public bool Silent = true; [Argument(ArgumentType.AtMostOnce, HelpText = "Number of parallel threads used to run LightGBM.", ShortName = "nt")] - public int? NThread; + public int? NumberOfThreads; [Argument(ArgumentType.AtMostOnce, HelpText = "Evaluation metrics.", ShortName = "em")] - public EvalMetricType EvalMetric = EvalMetricType.DefaultMetric; + public EvalMetricType EvaluationMetric = EvalMetricType.DefaultMetric; [Argument(ArgumentType.AtMostOnce, HelpText = "Use softmax loss for the multi classification.")] [TlcModule.SweepableDiscreteParam("UseSoftmax", new object[] { true, false })] @@ -350,31 +380,31 @@ public enum EvalMetricType [Argument(ArgumentType.AtMostOnce, HelpText = "Enable categorical split or not.", ShortName = "cat")] [TlcModule.SweepableDiscreteParam("UseCat", new object[] { true, false })] - public bool? UseCat; + public bool? UseCategoricalSplit; - [Argument(ArgumentType.AtMostOnce, HelpText = "Enable missing value auto infer or not.")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Enable special handling of missing value or not.")] [TlcModule.SweepableDiscreteParam("UseMissing", new object[] { true, false })] - public bool UseMissing = false; + public bool HandleMissingValue = false; - [Argument(ArgumentType.AtMostOnce, HelpText = "Min number of instances per categorical group.", ShortName = "mdpg")] + [Argument(ArgumentType.AtMostOnce, HelpText = "Minimum number of instances per categorical group.", ShortName = "mdpg")] [TlcModule.Range(Inf = 0, Max = int.MaxValue)] [TlcModule.SweepableDiscreteParam("MinDataPerGroup", new object[] { 10, 50, 100, 200 })] - public int MinDataPerGroup = 100; + public int MinimumExampleCountPerGroup = 100; [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of categorical thresholds.", ShortName = "maxcat")] [TlcModule.Range(Inf = 0, Max = int.MaxValue)] [TlcModule.SweepableDiscreteParam("MaxCatThreshold", new object[] { 8, 16, 32, 64 })] - public int MaxCatThreshold = 32; + public int MaximumCategoricalSplitPointCount = 32; [Argument(ArgumentType.AtMostOnce, HelpText = "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.")] [TlcModule.Range(Min = 0.0)] [TlcModule.SweepableDiscreteParam("CatSmooth", new object[] { 1, 10, 20 })] - public double CatSmooth = 10; + public double CategoricalSmoothing = 10; [Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization for categorical split.")] [TlcModule.Range(Min = 0.0)] [TlcModule.SweepableDiscreteParam("CatL2", new object[] { 0.1, 0.5, 1, 5, 10 })] - public double CatL2 = 10; + public double L2CategoricalRegularization = 10; [Argument(ArgumentType.AtMostOnce, HelpText = "Sets the random seed for LightGBM to use.")] public int? Seed; @@ -385,23 +415,23 @@ public enum EvalMetricType internal Dictionary ToDictionary(IHost host) { Contracts.CheckValue(host, nameof(host)); - Contracts.CheckUserArg(MaxBin > 0, nameof(MaxBin), "must be > 0."); + Contracts.CheckUserArg(MaximumBinCountPerFeature > 0, nameof(MaximumBinCountPerFeature), "must be > 0."); Contracts.CheckUserArg(Sigmoid > 0, nameof(Sigmoid), "must be > 0."); Dictionary res = new Dictionary(); var boosterParams = Booster.CreateComponent(host); boosterParams.UpdateParameters(res); - res[GetArgName(nameof(MaxBin))] = MaxBin; + res[GetOptionName(nameof(MaximumBinCountPerFeature))] = MaximumBinCountPerFeature; res["verbose"] = Silent ? "-1" : "1"; - if (NThread.HasValue) - res["nthread"] = NThread.Value; + if (NumberOfThreads.HasValue) + res["nthread"] = NumberOfThreads.Value; res["seed"] = (Seed.HasValue) ? Seed : host.Rand.Next(); string metric = null; - switch (EvalMetric) + switch (EvaluationMetric) { case EvalMetricType.DefaultMetric: break; @@ -424,18 +454,18 @@ internal Dictionary ToDictionary(IHost host) case EvalMetricType.Auc: case EvalMetricType.Ndcg: case EvalMetricType.Map: - metric = EvalMetric.ToString().ToLower(); + metric = EvaluationMetric.ToString().ToLower(); break; } if (!string.IsNullOrEmpty(metric)) - res["metric"] = metric; - res["sigmoid"] = Sigmoid; - res["label_gain"] = CustomGains; - res[GetArgName(nameof(UseMissing))] = UseMissing; - res[GetArgName(nameof(MinDataPerGroup))] = MinDataPerGroup; - res[GetArgName(nameof(MaxCatThreshold))] = MaxCatThreshold; - res[GetArgName(nameof(CatSmooth))] = CatSmooth; - res[GetArgName(nameof(CatL2))] = CatL2; + res[GetOptionName(nameof(metric))] = metric; + res[GetOptionName(nameof(Sigmoid))] = Sigmoid; + res[GetOptionName(nameof(CustomGains))] = CustomGains; + res[GetOptionName(nameof(HandleMissingValue))] = HandleMissingValue; + res[GetOptionName(nameof(MinimumExampleCountPerGroup))] = MinimumExampleCountPerGroup; + res[GetOptionName(nameof(MaximumCategoricalSplitPointCount))] = MaximumCategoricalSplitPointCount; + res[GetOptionName(nameof(CategoricalSmoothing))] = CategoricalSmoothing; + res[GetOptionName(nameof(L2CategoricalRegularization))] = L2CategoricalRegularization; return res; } } diff --git a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs index 5df67201f8..66f28f98b1 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs @@ -8,7 +8,6 @@ using Microsoft.ML.Data; using Microsoft.ML.EntryPoints; using Microsoft.ML.LightGBM; -using Microsoft.ML.Model; using Microsoft.ML.Trainers; using Microsoft.ML.Trainers.FastTree; @@ -24,7 +23,6 @@ namespace Microsoft.ML.LightGBM { - /// public sealed class LightGbmBinaryModelParameters : TreeEnsembleModelParametersBasedOnRegressionTree { internal const string LoaderSignature = "LightGBMBinaryExec"; @@ -81,7 +79,13 @@ private static IPredictorProducing Create(IHostEnvironment env, ModelLoad } } - /// + /// + /// Trains a Light GBM Model. + /// + /// + /// Light GBM is an open source implementation of boosted trees. + /// GitHub: LightGBM + /// public sealed class LightGbmBinaryTrainer : LightGbmTrainerBase>, CalibratedModelParametersBase> @@ -102,22 +106,22 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, Options options) /// Initializes a new instance of /// /// The private instance of . - /// The name of The label column. - /// The name of the feature column. - /// The name for the column containing the initial weight. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The name of The label column. + /// The name of the feature column. + /// The name of the example weight column (optional). + /// The number of leaves to use. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// Number of iterations. internal LightGbmBinaryTrainer(IHostEnvironment env, - string labelColumn = DefaultColumnNames.Label, - string featureColumn = DefaultColumnNames.Features, - string weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, + string exampleWeightColumnName = null, + int? numberOfLeaves = null, + int? minimumExampleCountPerLeaf = null, double? learningRate = null, - int numBoostRound = LightGBM.Options.Defaults.NumBoostRound) - : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound) + int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations) + : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumnName), featureColumnName, exampleWeightColumnName, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations) { } @@ -138,7 +142,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) if (!(labelType is BooleanDataViewType || labelType is KeyType || labelType == NumberDataViewType.Single)) { throw ch.ExceptParam(nameof(data), - $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType}', but must be key, boolean or R4."); + $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType.RawType}', but must be unsigned int, boolean or float."); } } diff --git a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs index 3e38fa248a..ce610af9b2 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs @@ -20,10 +20,10 @@ public static class LightGbmExtensions /// The name of the label column. /// The name of the feature column. /// The name of the example weight column (optional). - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// The number of iterations to use. /// /// /// @@ -72,10 +72,10 @@ public static LightGbmRegressorTrainer LightGbm(this RegressionCatalog.Regressio /// The name of the label column. /// The name of the feature column. /// The name of the example weight column (optional). - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// The number of iterations to use. /// /// /// @@ -125,23 +125,23 @@ public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationCatalog.Bi /// The name of the feature column. /// The name of the group column. /// The name of the example weight column (optional). - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// The number of iterations to use. public static LightGbmRankingTrainer LightGbm(this RankingCatalog.RankingTrainers catalog, string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string rowGroupColumnName = DefaultColumnNames.GroupId, string exampleWeightColumnName = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + int? numberOfLeaves = null, + int? minimumExampleCountPerLeaf = null, double? learningRate = null, - int numBoostRound = Options.Defaults.NumBoostRound) + int numberOfIterations = Options.Defaults.NumberOfIterations) { Contracts.CheckValue(catalog, nameof(catalog)); var env = CatalogUtils.GetEnvironment(catalog); - return new LightGbmRankingTrainer(env, labelColumnName, featureColumnName, rowGroupColumnName, exampleWeightColumnName, numLeaves, minDataPerLeaf, learningRate, numBoostRound); + return new LightGbmRankingTrainer(env, labelColumnName, featureColumnName, rowGroupColumnName, exampleWeightColumnName, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations); } /// @@ -164,10 +164,10 @@ public static LightGbmRankingTrainer LightGbm(this RankingCatalog.RankingTrainer /// The name of the label column. /// The name of the feature column. /// The name of the example weight column (optional). - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The number of leaves to use. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// The number of iterations to use. /// /// /// diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs index d31cf72eb5..6e01c37c7d 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs @@ -19,8 +19,6 @@ namespace Microsoft.ML.LightGBM { - - /// public sealed class LightGbmMulticlassTrainer : LightGbmTrainerBase, MulticlassPredictionTransformer, OneVersusAllModelParameters> { internal const string Summary = "LightGBM Multi Class Classifier"; @@ -43,22 +41,22 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, Options options) /// Initializes a new instance of /// /// The private instance of . - /// The name of The label column. - /// The name of the feature column. - /// The name for the column containing the initial weight. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The name of The label column. + /// The name of the feature column. + /// The name of the example weight column (optional). + /// The number of leaves to use. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// The number of iterations to use. internal LightGbmMulticlassTrainer(IHostEnvironment env, - string labelColumn = DefaultColumnNames.Label, - string featureColumn = DefaultColumnNames.Features, - string weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, + string exampleWeightColumnName = null, + int? numberOfLeaves = null, + int? minimumExampleCountPerLeaf = null, double? learningRate = null, - int numBoostRound = LightGBM.Options.Defaults.NumBoostRound) - : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound) + int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations) + : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumnName), featureColumnName, exampleWeightColumnName, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations) { _numClass = -1; } @@ -110,7 +108,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) if (!(labelType is BooleanDataViewType || labelType is KeyType || labelType == NumberDataViewType.Single)) { throw ch.ExceptParam(nameof(data), - $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType}', but must be key, boolean or R4."); + $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType.RawType}', but must be of unsigned int, boolean or float."); } } @@ -132,9 +130,9 @@ private protected override void ConvertNaNLabels(IChannel ch, RoleMappedData dat maxLabel = Math.Max(maxLabel, labelColumn); } } - ch.CheckParam(minLabel >= 0, nameof(data), "min labelColumn cannot be negative"); + ch.CheckParam(minLabel >= 0, nameof(data), "Minimum value in label column cannot be negative"); if (maxLabel >= _maxNumClass) - throw ch.ExceptParam(nameof(data), $"max labelColumn cannot exceed {_maxNumClass}"); + throw ch.ExceptParam(nameof(data), $"Maximum value {maxLabel} in label column exceeds {_maxNumClass}"); if (data.Schema.Label.Value.Type is KeyType keyType) { @@ -159,20 +157,20 @@ private protected override void ConvertNaNLabels(IChannel ch, RoleMappedData dat labels[i] = defaultLabel; } - protected override void GetDefaultParameters(IChannel ch, int numRow, bool hasCategorical, int totalCats, bool hiddenMsg = false) + private protected override void GetDefaultParameters(IChannel ch, int numRow, bool hasCategorical, int totalCats, bool hiddenMsg = false) { base.GetDefaultParameters(ch, numRow, hasCategorical, totalCats, true); - int numLeaves = (int)Options["num_leaves"]; - int minDataPerLeaf = LightGbmTrainerOptions.MinDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, _numClass); - Options["min_data_per_leaf"] = minDataPerLeaf; + int numberOfLeaves = (int)Options["num_leaves"]; + int minimumExampleCountPerLeaf = LightGbmTrainerOptions.MinimumExampleCountPerLeaf ?? DefaultMinDataPerLeaf(numRow, numberOfLeaves, _numClass); + Options["min_data_per_leaf"] = minimumExampleCountPerLeaf; if (!hiddenMsg) { if (!LightGbmTrainerOptions.LearningRate.HasValue) ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.LearningRate) + " = " + Options["learning_rate"]); - if (!LightGbmTrainerOptions.NumLeaves.HasValue) - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumLeaves) + " = " + numLeaves); - if (!LightGbmTrainerOptions.MinDataPerLeaf.HasValue) - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinDataPerLeaf) + " = " + minDataPerLeaf); + if (!LightGbmTrainerOptions.NumberOfLeaves.HasValue) + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumberOfLeaves) + " = " + numberOfLeaves); + if (!LightGbmTrainerOptions.MinimumExampleCountPerLeaf.HasValue) + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinimumExampleCountPerLeaf) + " = " + minimumExampleCountPerLeaf); } } diff --git a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs index 482a04f0d9..5e1cfe311d 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs @@ -22,7 +22,6 @@ namespace Microsoft.ML.LightGBM { - public sealed class LightGbmRankingModelParameters : TreeEnsembleModelParametersBasedOnRegressionTree { internal const string LoaderSignature = "LightGBMRankerExec"; @@ -71,7 +70,6 @@ private static LightGbmRankingModelParameters Create(IHostEnvironment env, Model } } - /// public sealed class LightGbmRankingTrainer : LightGbmTrainerBase, LightGbmRankingModelParameters> { internal const string UserName = "LightGBM Ranking"; @@ -89,26 +87,28 @@ internal LightGbmRankingTrainer(IHostEnvironment env, Options options) /// Initializes a new instance of /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. - /// The name of the column containing the group ID. - /// The name of the optional column containing the initial weights. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The name of the label column. + /// The name of the feature column. + /// The name of the column containing the group ID. + /// The name of the optional column containing the initial weights. + /// The number of leaves to use. /// The learning rate. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. + /// The number of iterations to use. internal LightGbmRankingTrainer(IHostEnvironment env, - string labelColumn = DefaultColumnNames.Label, - string featureColumn = DefaultColumnNames.Features, - string groupId = DefaultColumnNames.GroupId, - string weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, + string rowGroupdColumnName = DefaultColumnNames.GroupId, + string weightsColumnName = null, + int? numberOfLeaves = null, + int? minimumExampleCountPerLeaf = null, double? learningRate = null, - int numBoostRound = LightGBM.Options.Defaults.NumBoostRound) - : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weights, groupId, numLeaves, minDataPerLeaf, learningRate, numBoostRound) + int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations) + : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumnName), + featureColumnName, weightsColumnName, rowGroupdColumnName, numberOfLeaves, + minimumExampleCountPerLeaf, learningRate, numberOfIterations) { - Host.CheckNonEmpty(groupId, nameof(groupId)); + Host.CheckNonEmpty(rowGroupdColumnName, nameof(rowGroupdColumnName)); } private protected override void CheckDataValid(IChannel ch, RoleMappedData data) @@ -121,7 +121,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) if (!(labelType is KeyType || labelType == NumberDataViewType.Single)) { throw ch.ExceptParam(nameof(data), - $"Label column '{labelCol.Name}' is of type '{labelType}', but must be key or R4."); + $"Label column '{labelCol.Name}' is of type '{labelType.RawType}', but must be unsigned int or float."); } // Check group types. ch.CheckParam(data.Schema.Group.HasValue, nameof(data), "Need a group column."); @@ -130,7 +130,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) if (!(groupType == NumberDataViewType.UInt32 || groupType is KeyType)) { throw ch.ExceptParam(nameof(data), - $"Group column '{groupCol.Name}' is of type '{groupType}', but must be U4 or a Key."); + $"Group column '{groupCol.Name}' is of type '{groupType.RawType}', but must be unsigned int."); } } @@ -139,7 +139,7 @@ private protected override void CheckLabelCompatible(SchemaShape.Column labelCol Contracts.Assert(labelCol.IsValid); Action error = - () => throw Host.ExceptSchemaMismatch(nameof(labelCol), "label", labelCol.Name, "float or KeyType", labelCol.GetTypeString()); + () => throw Host.ExceptSchemaMismatch(nameof(labelCol), "label", labelCol.Name, "float or unsigned int", labelCol.GetTypeString()); if (labelCol.Kind != SchemaShape.Column.VectorKind.Scalar) error(); diff --git a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs index 3729154cbd..b1981250e3 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs @@ -21,7 +21,6 @@ namespace Microsoft.ML.LightGBM { - /// public sealed class LightGbmRegressionModelParameters : TreeEnsembleModelParametersBasedOnRegressionTree { internal const string LoaderSignature = "LightGBMRegressionExec"; @@ -73,7 +72,6 @@ private static LightGbmRegressionModelParameters Create(IHostEnvironment env, Mo } } - /// public sealed class LightGbmRegressorTrainer : LightGbmTrainerBase, LightGbmRegressionModelParameters> { internal const string Summary = "LightGBM Regression"; @@ -87,22 +85,22 @@ public sealed class LightGbmRegressorTrainer : LightGbmTrainerBase /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. - /// The name for the column containing the initial weight. - /// The number of leaves to use. - /// Number of iterations. - /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. + /// The name of the label column. + /// The name of the feature column. + /// The name of the example weight column (optional). + /// The number of leaves to use. + /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. + /// Number of iterations. internal LightGbmRegressorTrainer(IHostEnvironment env, - string labelColumn = DefaultColumnNames.Label, - string featureColumn = DefaultColumnNames.Features, - string weights = null, - int? numLeaves = null, - int? minDataPerLeaf = null, + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, + string exampleWeightColumnName = null, + int? numberOfLeaves = null, + int? minimumExampleCountPerLeaf = null, double? learningRate = null, - int numBoostRound = LightGBM.Options.Defaults.NumBoostRound) - : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound) + int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations) + : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumnName), featureColumnName, exampleWeightColumnName, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations) { } @@ -127,7 +125,7 @@ private protected override void CheckDataValid(IChannel ch, RoleMappedData data) if (!(labelType is BooleanDataViewType || labelType is KeyType || labelType == NumberDataViewType.Single)) { throw ch.ExceptParam(nameof(data), - $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType}', but must be key, boolean or R4."); + $"Label column '{data.Schema.Label.Value.Name}' is of type '{labelType.RawType}', but must be an unsigned int, boolean or float."); } } diff --git a/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs b/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs index 614251c117..5d4504add7 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmTrainerBase.cs @@ -58,27 +58,28 @@ private sealed class CategoricalMetaData private protected LightGbmTrainerBase(IHostEnvironment env, string name, - SchemaShape.Column label, - string featureColumn, - string weightColumn, - string groupIdColumn, - int? numLeaves, - int? minDataPerLeaf, + SchemaShape.Column labelColumn, + string featureColumnName, + string exampleWeightColumnName, + string rowGroupColumnName, + int? numberOfLeaves, + int? minimumExampleCountPerLeaf, double? learningRate, - int numBoostRound) - : base(Contracts.CheckRef(env, nameof(env)).Register(name), TrainerUtils.MakeR4VecFeature(featureColumn), label, TrainerUtils.MakeR4ScalarWeightColumn(weightColumn), TrainerUtils.MakeU4ScalarColumn(groupIdColumn)) + int numberOfIterations) + : base(Contracts.CheckRef(env, nameof(env)).Register(name), TrainerUtils.MakeR4VecFeature(featureColumnName), + labelColumn, TrainerUtils.MakeR4ScalarWeightColumn(exampleWeightColumnName), TrainerUtils.MakeU4ScalarColumn(rowGroupColumnName)) { LightGbmTrainerOptions = new Options(); - LightGbmTrainerOptions.NumLeaves = numLeaves; - LightGbmTrainerOptions.MinDataPerLeaf = minDataPerLeaf; + LightGbmTrainerOptions.NumberOfLeaves = numberOfLeaves; + LightGbmTrainerOptions.MinimumExampleCountPerLeaf = minimumExampleCountPerLeaf; LightGbmTrainerOptions.LearningRate = learningRate; - LightGbmTrainerOptions.NumBoostRound = numBoostRound; + LightGbmTrainerOptions.NumberOfIterations = numberOfIterations; - LightGbmTrainerOptions.LabelColumnName = label.Name; - LightGbmTrainerOptions.FeatureColumnName = featureColumn; - LightGbmTrainerOptions.ExampleWeightColumnName = weightColumn; - LightGbmTrainerOptions.RowGroupColumnName = groupIdColumn; + LightGbmTrainerOptions.LabelColumnName = labelColumn.Name; + LightGbmTrainerOptions.FeatureColumnName = featureColumnName; + LightGbmTrainerOptions.ExampleWeightColumnName = exampleWeightColumnName; + LightGbmTrainerOptions.RowGroupColumnName = rowGroupColumnName; InitParallelTraining(); } @@ -164,22 +165,22 @@ private protected virtual void CheckDataValid(IChannel ch, RoleMappedData data) ch.CheckParam(data.Schema.Label.HasValue, nameof(data), "Need a label column"); } - protected virtual void GetDefaultParameters(IChannel ch, int numRow, bool hasCategarical, int totalCats, bool hiddenMsg = false) + private protected virtual void GetDefaultParameters(IChannel ch, int numRow, bool hasCategarical, int totalCats, bool hiddenMsg = false) { double learningRate = LightGbmTrainerOptions.LearningRate ?? DefaultLearningRate(numRow, hasCategarical, totalCats); - int numLeaves = LightGbmTrainerOptions.NumLeaves ?? DefaultNumLeaves(numRow, hasCategarical, totalCats); - int minDataPerLeaf = LightGbmTrainerOptions.MinDataPerLeaf ?? DefaultMinDataPerLeaf(numRow, numLeaves, 1); + int numberOfLeaves = LightGbmTrainerOptions.NumberOfLeaves ?? DefaultNumLeaves(numRow, hasCategarical, totalCats); + int minimumExampleCountPerLeaf = LightGbmTrainerOptions.MinimumExampleCountPerLeaf ?? DefaultMinDataPerLeaf(numRow, numberOfLeaves, 1); Options["learning_rate"] = learningRate; - Options["num_leaves"] = numLeaves; - Options["min_data_per_leaf"] = minDataPerLeaf; + Options["num_leaves"] = numberOfLeaves; + Options["min_data_per_leaf"] = minimumExampleCountPerLeaf; if (!hiddenMsg) { if (!LightGbmTrainerOptions.LearningRate.HasValue) ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.LearningRate) + " = " + learningRate); - if (!LightGbmTrainerOptions.NumLeaves.HasValue) - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumLeaves) + " = " + numLeaves); - if (!LightGbmTrainerOptions.MinDataPerLeaf.HasValue) - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinDataPerLeaf) + " = " + minDataPerLeaf); + if (!LightGbmTrainerOptions.NumberOfLeaves.HasValue) + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.NumberOfLeaves) + " = " + numberOfLeaves); + if (!LightGbmTrainerOptions.MinimumExampleCountPerLeaf.HasValue) + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.MinimumExampleCountPerLeaf) + " = " + minimumExampleCountPerLeaf); } } @@ -274,9 +275,9 @@ private CategoricalMetaData GetCategoricalMetaData(IChannel ch, RoleMappedData t int[] categoricalFeatures = null; const int useCatThreshold = 50000; // Disable cat when data is too small, reduce the overfitting. - bool useCat = LightGbmTrainerOptions.UseCat ?? numRow > useCatThreshold; - if (!LightGbmTrainerOptions.UseCat.HasValue) - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseCat) + " = " + useCat); + bool useCat = LightGbmTrainerOptions.UseCategoricalSplit ?? numRow > useCatThreshold; + if (!LightGbmTrainerOptions.UseCategoricalSplit.HasValue) + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseCategoricalSplit) + " = " + useCat); if (useCat) { var featureCol = trainData.Schema.Schema[DefaultColumnNames.Features]; @@ -369,8 +370,8 @@ private void TrainCore(IChannel ch, IProgressChannel pch, Dataset dtrain, Catego { ch.Info("LightGBM objective={0}", Options["objective"]); using (Booster bst = WrappedLightGbmTraining.Train(ch, pch, Options, dtrain, - dvalid: dvalid, numIteration: LightGbmTrainerOptions.NumBoostRound, - verboseEval: LightGbmTrainerOptions.VerboseEval, earlyStoppingRound: LightGbmTrainerOptions.EarlyStoppingRound)) + dvalid: dvalid, numIteration: LightGbmTrainerOptions.NumberOfIterations, + verboseEval: LightGbmTrainerOptions.Verbose, earlyStoppingRound: LightGbmTrainerOptions.EarlyStoppingRound)) { TrainedEnsemble = bst.GetModel(catMetaData.CategoricalBoudaries); } @@ -872,11 +873,11 @@ private static int DefaultNumLeaves(int numRow, bool useCat, int totalCats) return 30; } - protected static int DefaultMinDataPerLeaf(int numRow, int numLeaves, int numClass) + private protected static int DefaultMinDataPerLeaf(int numRow, int numberOfLeaves, int numClass) { if (numClass > 1) { - int ret = numRow / numLeaves / numClass / 10; + int ret = numRow / numberOfLeaves / numClass / 10; ret = Math.Max(ret, 5); ret = Math.Min(ret, 50); return ret; diff --git a/src/Microsoft.ML.LightGBM/WrappedLightGbmBooster.cs b/src/Microsoft.ML.LightGBM/WrappedLightGbmBooster.cs index e5384a8c6c..21c3eb4a13 100644 --- a/src/Microsoft.ML.LightGBM/WrappedLightGbmBooster.cs +++ b/src/Microsoft.ML.LightGBM/WrappedLightGbmBooster.cs @@ -205,9 +205,9 @@ public InternalTreeEnsemble GetModel(int[] categoricalFeatureBoudaries) kvPairs[kv[0].Trim()] = kv[1].Trim(); ++i; } - int numLeaves = int.Parse(kvPairs["num_leaves"], CultureInfo.InvariantCulture); + int numberOfLeaves = int.Parse(kvPairs["num_leaves"], CultureInfo.InvariantCulture); int numCat = int.Parse(kvPairs["num_cat"], CultureInfo.InvariantCulture); - if (numLeaves > 1) + if (numberOfLeaves > 1) { var leftChild = Str2IntArray(kvPairs["left_child"], ' '); var rightChild = Str2IntArray(kvPairs["right_child"], ' '); @@ -217,12 +217,12 @@ public InternalTreeEnsemble GetModel(int[] categoricalFeatureBoudaries) var leafOutput = Str2DoubleArray(kvPairs["leaf_value"], ' '); var decisionType = Str2UIntArray(kvPairs["decision_type"], ' '); var defaultValue = GetDefalutValue(threshold, decisionType); - var categoricalSplitFeatures = new int[numLeaves - 1][]; - var categoricalSplit = new bool[numLeaves - 1]; + var categoricalSplitFeatures = new int[numberOfLeaves - 1][]; + var categoricalSplit = new bool[numberOfLeaves - 1]; if (categoricalFeatureBoudaries != null) { // Add offsets to split features. - for (int node = 0; node < numLeaves - 1; ++node) + for (int node = 0; node < numberOfLeaves - 1; ++node) splitFeature[node] = categoricalFeatureBoudaries[splitFeature[node]]; } @@ -230,7 +230,7 @@ public InternalTreeEnsemble GetModel(int[] categoricalFeatureBoudaries) { var catBoundaries = Str2IntArray(kvPairs["cat_boundaries"], ' '); var catThreshold = Str2UIntArray(kvPairs["cat_threshold"], ' '); - for (int node = 0; node < numLeaves - 1; ++node) + for (int node = 0; node < numberOfLeaves - 1; ++node) { if (GetIsCategoricalSplit(decisionType[node])) { @@ -254,7 +254,7 @@ public InternalTreeEnsemble GetModel(int[] categoricalFeatureBoudaries) } } } - InternalRegressionTree tree = InternalRegressionTree.Create(numLeaves, splitFeature, splitGain, + InternalRegressionTree tree = InternalRegressionTree.Create(numberOfLeaves, splitFeature, splitGain, threshold.Select(x => (float)(x)).ToArray(), defaultValue.Select(x => (float)(x)).ToArray(), leftChild, rightChild, leafOutput, categoricalSplitFeatures, categoricalSplit); res.AddTree(tree); diff --git a/src/Microsoft.ML.LightGBM/doc.xml b/src/Microsoft.ML.LightGBM/doc.xml deleted file mode 100644 index 1fcd38dd7a..0000000000 --- a/src/Microsoft.ML.LightGBM/doc.xml +++ /dev/null @@ -1,79 +0,0 @@ - - - - - - - Trains a Light GBM Model. - - - Light GBM is an open source implementation of boosted trees. - GitHub: LightGBM - - - - - - new LightGbmBinaryClassifier - { - NumBoostRound = 200, - LearningRate = 0.5f, - NumLeaves = 32, - MinDataPerLeaf = 20 - } - - - - - - - new LightGbmClassifier - { - NumBoostRound = 200, - LearningRate = 0.5f, - NumLeaves = 32, - MinDataPerLeaf = 20 - } - - - - - - - new LightGbmRegressor - { - NumBoostRound = 100, - LearningRate = 0.5f, - NumLeaves = 32, - MinDataPerLeaf = 20, - Booster = new DartBoosterParameterFunction - { - XgboostDartMode = true, - UniformDrop = true - } - } - - - - - - - new LightGbmRanker - { - NumBoostRound = 100, - LearningRate = 0.5f, - NumLeaves = 32, - MinDataPerLeaf = 20, - Booster = new GbdtBoosterParameterFunction - { - MinSplitGain = 3, - MaxDepth = 200, - Subsample = 0.5 - } - } - - - - - - \ No newline at end of file diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index c91fae0244..3fe119d98a 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -121,15 +121,15 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, Options options) /// /// The local instance of the /// The classification loss function. - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. + /// The name of the feature column. /// The learning rate. /// Whether to decrease learning rate as iterations progress. /// L2 Regularization Weight. /// The number of training iterations. internal AveragedPerceptronTrainer(IHostEnvironment env, - string labelColumn = DefaultColumnNames.Label, - string featureColumn = DefaultColumnNames.Features, + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, IClassificationLoss lossFunction = null, float learningRate = Options.AveragedDefault.LearningRate, bool decreaseLearningRate = Options.AveragedDefault.DecreaseLearningRate, @@ -137,8 +137,8 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, int numIterations = Options.AveragedDefault.NumIterations) : this(env, new Options { - LabelColumnName = labelColumn, - FeatureColumnName = featureColumn, + LabelColumnName = labelColumnName, + FeatureColumnName = featureColumnName, LearningRate = learningRate, DecreaseLearningRate = decreaseLearningRate, L2RegularizerWeight = l2RegularizerWeight, diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index d61f007216..8368b448e2 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -11129,7 +11129,7 @@ "ShortName": "LightGBM", "Inputs": [ { - "Name": "NumBoostRound", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations.", "Aliases": [ @@ -11181,7 +11181,7 @@ } }, { - "Name": "NumLeaves", + "Name": "NumberOfLeaves", "Type": "Int", "Desc": "Maximum leaves for trees.", "Aliases": [ @@ -11200,7 +11200,7 @@ } }, { - "Name": "MinDataPerLeaf", + "Name": "MinimumExampleCountPerLeaf", "Type": "Int", "Desc": "Minimum number of instances needed in a child.", "Aliases": [ @@ -11322,9 +11322,9 @@ "Default": "Auto" }, { - "Name": "MaxBin", + "Name": "MaximumBinCountPerFeature", "Type": "Int", - "Desc": "Max number of bucket bin for features.", + "Desc": "Maximum number of bucket bin for features.", "Aliases": [ "mb" ], @@ -11334,7 +11334,7 @@ "Default": 255 }, { - "Name": "VerboseEval", + "Name": "Verbose", "Type": "Bool", "Desc": "Verbose", "Aliases": [ @@ -11355,7 +11355,7 @@ "Default": true }, { - "Name": "NThread", + "Name": "NumberOfThreads", "Type": "Int", "Desc": "Number of parallel threads used to run LightGBM.", "Aliases": [ @@ -11367,7 +11367,7 @@ "Default": null }, { - "Name": "EvalMetric", + "Name": "EvaluationMetric", "Type": { "Kind": "Enum", "Values": [ @@ -11454,7 +11454,7 @@ "Default": 1048576 }, { - "Name": "UseCat", + "Name": "UseCategoricalSplit", "Type": "Bool", "Desc": "Enable categorical split or not.", "Aliases": [ @@ -11473,9 +11473,9 @@ } }, { - "Name": "UseMissing", + "Name": "HandleMissingValue", "Type": "Bool", - "Desc": "Enable missing value auto infer or not.", + "Desc": "Enable special handling of missing value or not.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -11489,9 +11489,9 @@ } }, { - "Name": "MinDataPerGroup", + "Name": "MinimumExampleCountPerGroup", "Type": "Int", - "Desc": "Min number of instances per categorical group.", + "Desc": "Minimum number of instances per categorical group.", "Aliases": [ "mdpg" ], @@ -11514,7 +11514,7 @@ } }, { - "Name": "MaxCatThreshold", + "Name": "MaximumCategoricalSplitPointCount", "Type": "Int", "Desc": "Max number of categorical thresholds.", "Aliases": [ @@ -11539,7 +11539,7 @@ } }, { - "Name": "CatSmooth", + "Name": "CategoricalSmoothing", "Type": "Float", "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.", "Required": false, @@ -11559,7 +11559,7 @@ } }, { - "Name": "CatL2", + "Name": "L2CategoricalRegularization", "Type": "Float", "Desc": "L2 Regularization for categorical split.", "Required": false, @@ -11632,7 +11632,7 @@ "ShortName": "LightGBMMC", "Inputs": [ { - "Name": "NumBoostRound", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations.", "Aliases": [ @@ -11684,7 +11684,7 @@ } }, { - "Name": "NumLeaves", + "Name": "NumberOfLeaves", "Type": "Int", "Desc": "Maximum leaves for trees.", "Aliases": [ @@ -11703,7 +11703,7 @@ } }, { - "Name": "MinDataPerLeaf", + "Name": "MinimumExampleCountPerLeaf", "Type": "Int", "Desc": "Minimum number of instances needed in a child.", "Aliases": [ @@ -11825,9 +11825,9 @@ "Default": "Auto" }, { - "Name": "MaxBin", + "Name": "MaximumBinCountPerFeature", "Type": "Int", - "Desc": "Max number of bucket bin for features.", + "Desc": "Maximum number of bucket bin for features.", "Aliases": [ "mb" ], @@ -11837,7 +11837,7 @@ "Default": 255 }, { - "Name": "VerboseEval", + "Name": "Verbose", "Type": "Bool", "Desc": "Verbose", "Aliases": [ @@ -11858,7 +11858,7 @@ "Default": true }, { - "Name": "NThread", + "Name": "NumberOfThreads", "Type": "Int", "Desc": "Number of parallel threads used to run LightGBM.", "Aliases": [ @@ -11870,7 +11870,7 @@ "Default": null }, { - "Name": "EvalMetric", + "Name": "EvaluationMetric", "Type": { "Kind": "Enum", "Values": [ @@ -11957,7 +11957,7 @@ "Default": 1048576 }, { - "Name": "UseCat", + "Name": "UseCategoricalSplit", "Type": "Bool", "Desc": "Enable categorical split or not.", "Aliases": [ @@ -11976,9 +11976,9 @@ } }, { - "Name": "UseMissing", + "Name": "HandleMissingValue", "Type": "Bool", - "Desc": "Enable missing value auto infer or not.", + "Desc": "Enable special handling of missing value or not.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -11992,9 +11992,9 @@ } }, { - "Name": "MinDataPerGroup", + "Name": "MinimumExampleCountPerGroup", "Type": "Int", - "Desc": "Min number of instances per categorical group.", + "Desc": "Minimum number of instances per categorical group.", "Aliases": [ "mdpg" ], @@ -12017,7 +12017,7 @@ } }, { - "Name": "MaxCatThreshold", + "Name": "MaximumCategoricalSplitPointCount", "Type": "Int", "Desc": "Max number of categorical thresholds.", "Aliases": [ @@ -12042,7 +12042,7 @@ } }, { - "Name": "CatSmooth", + "Name": "CategoricalSmoothing", "Type": "Float", "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.", "Required": false, @@ -12062,7 +12062,7 @@ } }, { - "Name": "CatL2", + "Name": "L2CategoricalRegularization", "Type": "Float", "Desc": "L2 Regularization for categorical split.", "Required": false, @@ -12135,7 +12135,7 @@ "ShortName": "LightGBMRank", "Inputs": [ { - "Name": "NumBoostRound", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations.", "Aliases": [ @@ -12187,7 +12187,7 @@ } }, { - "Name": "NumLeaves", + "Name": "NumberOfLeaves", "Type": "Int", "Desc": "Maximum leaves for trees.", "Aliases": [ @@ -12206,7 +12206,7 @@ } }, { - "Name": "MinDataPerLeaf", + "Name": "MinimumExampleCountPerLeaf", "Type": "Int", "Desc": "Minimum number of instances needed in a child.", "Aliases": [ @@ -12328,9 +12328,9 @@ "Default": "Auto" }, { - "Name": "MaxBin", + "Name": "MaximumBinCountPerFeature", "Type": "Int", - "Desc": "Max number of bucket bin for features.", + "Desc": "Maximum number of bucket bin for features.", "Aliases": [ "mb" ], @@ -12340,7 +12340,7 @@ "Default": 255 }, { - "Name": "VerboseEval", + "Name": "Verbose", "Type": "Bool", "Desc": "Verbose", "Aliases": [ @@ -12361,7 +12361,7 @@ "Default": true }, { - "Name": "NThread", + "Name": "NumberOfThreads", "Type": "Int", "Desc": "Number of parallel threads used to run LightGBM.", "Aliases": [ @@ -12373,7 +12373,7 @@ "Default": null }, { - "Name": "EvalMetric", + "Name": "EvaluationMetric", "Type": { "Kind": "Enum", "Values": [ @@ -12460,7 +12460,7 @@ "Default": 1048576 }, { - "Name": "UseCat", + "Name": "UseCategoricalSplit", "Type": "Bool", "Desc": "Enable categorical split or not.", "Aliases": [ @@ -12479,9 +12479,9 @@ } }, { - "Name": "UseMissing", + "Name": "HandleMissingValue", "Type": "Bool", - "Desc": "Enable missing value auto infer or not.", + "Desc": "Enable special handling of missing value or not.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -12495,9 +12495,9 @@ } }, { - "Name": "MinDataPerGroup", + "Name": "MinimumExampleCountPerGroup", "Type": "Int", - "Desc": "Min number of instances per categorical group.", + "Desc": "Minimum number of instances per categorical group.", "Aliases": [ "mdpg" ], @@ -12520,7 +12520,7 @@ } }, { - "Name": "MaxCatThreshold", + "Name": "MaximumCategoricalSplitPointCount", "Type": "Int", "Desc": "Max number of categorical thresholds.", "Aliases": [ @@ -12545,7 +12545,7 @@ } }, { - "Name": "CatSmooth", + "Name": "CategoricalSmoothing", "Type": "Float", "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.", "Required": false, @@ -12565,7 +12565,7 @@ } }, { - "Name": "CatL2", + "Name": "L2CategoricalRegularization", "Type": "Float", "Desc": "L2 Regularization for categorical split.", "Required": false, @@ -12638,7 +12638,7 @@ "ShortName": "LightGBMR", "Inputs": [ { - "Name": "NumBoostRound", + "Name": "NumberOfIterations", "Type": "Int", "Desc": "Number of iterations.", "Aliases": [ @@ -12690,7 +12690,7 @@ } }, { - "Name": "NumLeaves", + "Name": "NumberOfLeaves", "Type": "Int", "Desc": "Maximum leaves for trees.", "Aliases": [ @@ -12709,7 +12709,7 @@ } }, { - "Name": "MinDataPerLeaf", + "Name": "MinimumExampleCountPerLeaf", "Type": "Int", "Desc": "Minimum number of instances needed in a child.", "Aliases": [ @@ -12831,9 +12831,9 @@ "Default": "Auto" }, { - "Name": "MaxBin", + "Name": "MaximumBinCountPerFeature", "Type": "Int", - "Desc": "Max number of bucket bin for features.", + "Desc": "Maximum number of bucket bin for features.", "Aliases": [ "mb" ], @@ -12843,7 +12843,7 @@ "Default": 255 }, { - "Name": "VerboseEval", + "Name": "Verbose", "Type": "Bool", "Desc": "Verbose", "Aliases": [ @@ -12864,7 +12864,7 @@ "Default": true }, { - "Name": "NThread", + "Name": "NumberOfThreads", "Type": "Int", "Desc": "Number of parallel threads used to run LightGBM.", "Aliases": [ @@ -12876,7 +12876,7 @@ "Default": null }, { - "Name": "EvalMetric", + "Name": "EvaluationMetric", "Type": { "Kind": "Enum", "Values": [ @@ -12963,7 +12963,7 @@ "Default": 1048576 }, { - "Name": "UseCat", + "Name": "UseCategoricalSplit", "Type": "Bool", "Desc": "Enable categorical split or not.", "Aliases": [ @@ -12982,9 +12982,9 @@ } }, { - "Name": "UseMissing", + "Name": "HandleMissingValue", "Type": "Bool", - "Desc": "Enable missing value auto infer or not.", + "Desc": "Enable special handling of missing value or not.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -12998,9 +12998,9 @@ } }, { - "Name": "MinDataPerGroup", + "Name": "MinimumExampleCountPerGroup", "Type": "Int", - "Desc": "Min number of instances per categorical group.", + "Desc": "Minimum number of instances per categorical group.", "Aliases": [ "mdpg" ], @@ -13023,7 +13023,7 @@ } }, { - "Name": "MaxCatThreshold", + "Name": "MaximumCategoricalSplitPointCount", "Type": "Int", "Desc": "Max number of categorical thresholds.", "Aliases": [ @@ -13048,7 +13048,7 @@ } }, { - "Name": "CatSmooth", + "Name": "CategoricalSmoothing", "Type": "Float", "Desc": "Lapalace smooth term in categorical feature spilt. Avoid the bias of small categories.", "Required": false, @@ -13068,7 +13068,7 @@ } }, { - "Name": "CatL2", + "Name": "L2CategoricalRegularization", "Type": "Float", "Desc": "L2 Regularization for categorical split.", "Required": false, @@ -23562,9 +23562,9 @@ "FriendlyName": "Tree Dropout Tree Booster", "Settings": [ { - "Name": "DropRate", + "Name": "TreeDropFraction", "Type": "Float", - "Desc": "Drop ratio for trees. Range:(0,1).", + "Desc": "The drop ratio for trees. Range:(0,1).", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23575,9 +23575,9 @@ } }, { - "Name": "MaxDrop", + "Name": "MaximumNumberOfDroppedTreesPerRound", "Type": "Int", - "Desc": "Max number of dropped tree in a boosting round.", + "Desc": "Maximum number of dropped trees in a boosting round.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23588,9 +23588,9 @@ } }, { - "Name": "SkipDrop", + "Name": "SkipDropFraction", "Type": "Float", - "Desc": "Probability for not perform dropping in a boosting round.", + "Desc": "Probability for not dropping in a boosting round.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23621,7 +23621,7 @@ { "Name": "UnbalancedSets", "Type": "Bool", - "Desc": "Use for binary classification when classes are not balanced.", + "Desc": "Use for binary classification when training data is not balanced.", "Aliases": [ "us" ], @@ -23631,7 +23631,7 @@ "Default": false }, { - "Name": "MinSplitGain", + "Name": "MinimumSplitGain", "Type": "Float", "Desc": "Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.", "Required": false, @@ -23643,7 +23643,7 @@ } }, { - "Name": "MaxDepth", + "Name": "MaximumTreeDepth", "Type": "Int", "Desc": "Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first.", "Required": false, @@ -23656,7 +23656,7 @@ } }, { - "Name": "MinChildWeight", + "Name": "MinimumChildWeight", "Type": "Float", "Desc": "Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be.", "Required": false, @@ -23668,9 +23668,9 @@ } }, { - "Name": "SubsampleFreq", + "Name": "SubsampleFrequency", "Type": "Int", - "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.", + "Desc": "Subsample frequency for bagging. 0 means no subsample. Specifies the frequency at which the bagging occurs, where if this is set to N, the subsampling will happen at every N iterations.This must be set with Subsample as this specifies the amount to subsample.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23681,7 +23681,7 @@ } }, { - "Name": "Subsample", + "Name": "SubsampleFraction", "Type": "Float", "Desc": "Subsample ratio of the training instance. Setting it to 0.5 means that LightGBM randomly collected half of the data instances to grow trees and this will prevent overfitting. Range: (0,1].", "Required": false, @@ -23710,7 +23710,7 @@ } }, { - "Name": "RegLambda", + "Name": "L2Regularization", "Type": "Float", "Desc": "L2 regularization term on weights, increasing this value will make model more conservative.", "Aliases": [ @@ -23733,7 +23733,7 @@ } }, { - "Name": "RegAlpha", + "Name": "L1Regularization", "Type": "Float", "Desc": "L1 regularization term on weights, increase this value will make model more conservative.", "Aliases": [ @@ -23756,9 +23756,12 @@ } }, { - "Name": "ScalePosWeight", + "Name": "WeightOfPositiveExamples", "Type": "Float", "Desc": "Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases).", + "Aliases": [ + "ScalePosWeight" + ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23774,7 +23777,7 @@ { "Name": "UnbalancedSets", "Type": "Bool", - "Desc": "Use for binary classification when classes are not balanced.", + "Desc": "Use for binary classification when training data is not balanced.", "Aliases": [ "us" ], @@ -23784,7 +23787,7 @@ "Default": false }, { - "Name": "MinSplitGain", + "Name": "MinimumSplitGain", "Type": "Float", "Desc": "Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.", "Required": false, @@ -23796,7 +23799,7 @@ } }, { - "Name": "MaxDepth", + "Name": "MaximumTreeDepth", "Type": "Int", "Desc": "Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first.", "Required": false, @@ -23809,7 +23812,7 @@ } }, { - "Name": "MinChildWeight", + "Name": "MinimumChildWeight", "Type": "Float", "Desc": "Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be.", "Required": false, @@ -23821,9 +23824,9 @@ } }, { - "Name": "SubsampleFreq", + "Name": "SubsampleFrequency", "Type": "Int", - "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.", + "Desc": "Subsample frequency for bagging. 0 means no subsample. Specifies the frequency at which the bagging occurs, where if this is set to N, the subsampling will happen at every N iterations.This must be set with Subsample as this specifies the amount to subsample.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23834,7 +23837,7 @@ } }, { - "Name": "Subsample", + "Name": "SubsampleFraction", "Type": "Float", "Desc": "Subsample ratio of the training instance. Setting it to 0.5 means that LightGBM randomly collected half of the data instances to grow trees and this will prevent overfitting. Range: (0,1].", "Required": false, @@ -23863,7 +23866,7 @@ } }, { - "Name": "RegLambda", + "Name": "L2Regularization", "Type": "Float", "Desc": "L2 regularization term on weights, increasing this value will make model more conservative.", "Aliases": [ @@ -23886,7 +23889,7 @@ } }, { - "Name": "RegAlpha", + "Name": "L1Regularization", "Type": "Float", "Desc": "L1 regularization term on weights, increase this value will make model more conservative.", "Aliases": [ @@ -23909,9 +23912,12 @@ } }, { - "Name": "ScalePosWeight", + "Name": "WeightOfPositiveExamples", "Type": "Float", "Desc": "Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases).", + "Aliases": [ + "ScalePosWeight" + ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23953,7 +23959,7 @@ { "Name": "UnbalancedSets", "Type": "Bool", - "Desc": "Use for binary classification when classes are not balanced.", + "Desc": "Use for binary classification when training data is not balanced.", "Aliases": [ "us" ], @@ -23963,7 +23969,7 @@ "Default": false }, { - "Name": "MinSplitGain", + "Name": "MinimumSplitGain", "Type": "Float", "Desc": "Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.", "Required": false, @@ -23975,7 +23981,7 @@ } }, { - "Name": "MaxDepth", + "Name": "MaximumTreeDepth", "Type": "Int", "Desc": "Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first.", "Required": false, @@ -23988,7 +23994,7 @@ } }, { - "Name": "MinChildWeight", + "Name": "MinimumChildWeight", "Type": "Float", "Desc": "Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be.", "Required": false, @@ -24000,9 +24006,9 @@ } }, { - "Name": "SubsampleFreq", + "Name": "SubsampleFrequency", "Type": "Int", - "Desc": "Subsample frequency. 0 means no subsample. If subsampleFreq > 0, it will use a subset(ratio=subsample) to train. And the subset will be updated on every Subsample iteratinos.", + "Desc": "Subsample frequency for bagging. 0 means no subsample. Specifies the frequency at which the bagging occurs, where if this is set to N, the subsampling will happen at every N iterations.This must be set with Subsample as this specifies the amount to subsample.", "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -24013,7 +24019,7 @@ } }, { - "Name": "Subsample", + "Name": "SubsampleFraction", "Type": "Float", "Desc": "Subsample ratio of the training instance. Setting it to 0.5 means that LightGBM randomly collected half of the data instances to grow trees and this will prevent overfitting. Range: (0,1].", "Required": false, @@ -24042,7 +24048,7 @@ } }, { - "Name": "RegLambda", + "Name": "L2Regularization", "Type": "Float", "Desc": "L2 regularization term on weights, increasing this value will make model more conservative.", "Aliases": [ @@ -24065,7 +24071,7 @@ } }, { - "Name": "RegAlpha", + "Name": "L1Regularization", "Type": "Float", "Desc": "L1 regularization term on weights, increase this value will make model more conservative.", "Aliases": [ @@ -24088,9 +24094,12 @@ } }, { - "Name": "ScalePosWeight", + "Name": "WeightOfPositiveExamples", "Type": "Float", "Desc": "Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases).", + "Aliases": [ + "ScalePosWeight" + ], "Required": false, "SortOrder": 150.0, "IsNullable": false, diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt index 44635aa70a..e79b7a61e7 100644 --- a/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt +++ b/test/BaselineOutput/Common/LightGBM/LightGBMDart-CV-breast-cancer.dart-out.txt @@ -1,10 +1,10 @@ maml.exe CV tr=LightGBM{nt=1 iter=10 booster=dart lr=0.2 mil=10 nl=20} threads=- cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3702 (134.0/(134.0+228.0)) diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt index 232f6326d4..bfa0bf3f97 100644 --- a/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt +++ b/test/BaselineOutput/Common/LightGBM/LightGBMDart-TrainTest-breast-cancer.dart-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBM{nt=1 iter=10 booster=dart lr=0.2 mil=10 nl=20} cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0)) diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt index 8bd89002c1..a331a81b7e 100644 --- a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt +++ b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-CV-breast-cancer.goss-out.txt @@ -1,10 +1,10 @@ maml.exe CV tr=LightGBM{nt=1 iter=10 v=+ booster=goss lr=0.2 mil=10 nl=20} threads=- cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3702 (134.0/(134.0+228.0)) diff --git a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt index ba50420a1d..d249f34e1b 100644 --- a/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt +++ b/test/BaselineOutput/Common/LightGBM/LightGBMGoss-TrainTest-breast-cancer.goss-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBM{nt=1 iter=10 v=+ booster=goss lr=0.2 mil=10 nl=20} cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0)) diff --git a/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt b/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt index 2496917e04..391a8665ce 100644 --- a/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt +++ b/test/BaselineOutput/Common/LightGBMBinary/LightGBM-TrainTest-breast-cancer-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBMBinary{nt=1 nl=5 mil=5 lr=0.25 iter=20 mb=255} cache=- dout=%Output% loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=binary Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0)) diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt index dcd0b08107..5d529e5e17 100644 --- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt +++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.key-out.txt @@ -1,11 +1,11 @@ maml.exe CV tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:TX:0 col=Features:1-*} data=%Data% seed=1 xf=Term{col=Label} Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False Auto-tuning parameters: UseSoftmax = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False Auto-tuning parameters: UseSoftmax = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt index db69b4b0d8..0902395bec 100644 --- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt +++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-CV-iris.keyU404-out.txt @@ -1,11 +1,11 @@ maml.exe CV tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:U4[0-2]:0 col=Features:1-4} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False Auto-tuning parameters: UseSoftmax = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False Auto-tuning parameters: UseSoftmax = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt index 1c4cb95912..5fba9409c5 100644 --- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt +++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.key-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:TX:0 col=Features:1-*} data=%Data% out=%Output% seed=1 xf=Term{col=Label} Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False Auto-tuning parameters: UseSoftmax = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. diff --git a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt index 1de8c3d919..ff43c349eb 100644 --- a/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt +++ b/test/BaselineOutput/Common/LightGBMMC/LightGBMMC-TrainTest-iris.keyU404-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBMMC{nt=1 iter=10 v=- lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:U4[0-2]:0 col=Features:1-4} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False Auto-tuning parameters: UseSoftmax = False LightGBM objective=multiclassova Not training a calibrator because it is not needed. diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt index afa867d488..1fc6084997 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-CV-generatedRegressionDataset-out.txt @@ -1,10 +1,10 @@ maml.exe CV tr=LightGBMR{nt=1 iter=50 v=+ booster=gbdt{l1=0.2 l2=0.2} lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 27.477977 diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt index f15a4bb020..909d9f0012 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMReg-TrainTest-generatedRegressionDataset-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBMR{nt=1 iter=50 v=+ booster=gbdt{l1=0.2 l2=0.2} lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 3.472291 diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt index c2530555e1..4550a80d3c 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-CV-generatedRegressionDataset.MAE-out.txt @@ -1,10 +1,10 @@ maml.exe CV tr=LightGBMR{nt=1 iter=50 em=mae v=+ lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 27.482854 diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt index aaad5d20e5..59d2ceaa05 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegMae-TrainTest-generatedRegressionDataset.MAE-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBMR{nt=1 iter=50 em=mae v=+ lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 3.428896 diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt index 483c724038..71d131bb5a 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-CV-generatedRegressionDataset.RMSE-out.txt @@ -1,10 +1,10 @@ maml.exe CV tr=LightGBMR{nt=1 iter=50 em=rmse v=+ lr=0.2 mil=10 nl=20} threads=- dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 27.482854 diff --git a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt index 1ed592dd87..c919475347 100644 --- a/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt +++ b/test/BaselineOutput/Common/LightGBMR/LightGBMRegRmse-TrainTest-generatedRegressionDataset.RMSE-out.txt @@ -1,6 +1,6 @@ maml.exe TrainTest test=%Data% tr=LightGBMR{nt=1 iter=50 em=rmse v=+ lr=0.2 mil=10 nl=20} dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% out=%Output% seed=1 Not adding a normalizer. -Auto-tuning parameters: UseCat = False +Auto-tuning parameters: UseCategoricalSplit = False LightGBM objective=regression Not training a calibrator because it is not needed. L1(avg): 3.428896 diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs index 97e229965d..f59eb72092 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs @@ -776,8 +776,8 @@ public void TestMultiClassEnsembleCombiner() LightGbm.TrainMultiClass(Env, new Options { FeatureColumnName = "Features", - NumBoostRound = 5, - NumLeaves = 4, + NumberOfIterations = 5, + NumberOfLeaves = 4, LabelColumnName = DefaultColumnNames.Label, TrainingData = dataView }).PredictorModel, diff --git a/test/Microsoft.ML.StaticPipelineTesting/Training.cs b/test/Microsoft.ML.StaticPipelineTesting/Training.cs index 8776fe6f62..a62355e94a 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/Training.cs +++ b/test/Microsoft.ML.StaticPipelineTesting/Training.cs @@ -534,8 +534,8 @@ public void LightGbmBinaryClassification() var est = reader.MakeNewEstimator() .Append(r => (r.label, preds: catalog.Trainers.LightGbm(r.label, r.features, - numBoostRound: 10, - numLeaves: 5, + numberOfIterations: 10, + numberOfLeaves: 5, learningRate: 0.01, onFit: (p) => { pred = p; }))); @@ -576,8 +576,8 @@ public void LightGbmRegression() var est = reader.MakeNewEstimator() .Append(r => (r.label, score: catalog.Trainers.LightGbm(r.label, r.features, - numBoostRound: 10, - numLeaves: 5, + numberOfIterations: 10, + numberOfLeaves: 5, onFit: (p) => { pred = p; }))); var pipe = reader.Append(est); diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index 361a2f596d..7bc26635ad 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -345,7 +345,7 @@ public void LightGbmBinaryClassificationOnnxConversionTest() var dynamicPipeline = mlContext.Transforms.Normalize("FeatureVector") .AppendCacheCheckpoint(mlContext) - .Append(mlContext.Regression.Trainers.LightGbm(labelColumnName: "Target", featureColumnName: "FeatureVector", numBoostRound: 3, numLeaves: 16, minDataPerLeaf: 100)); + .Append(mlContext.Regression.Trainers.LightGbm(labelColumnName: "Target", featureColumnName: "FeatureVector", numberOfIterations: 3, numberOfLeaves: 16, minimumExampleCountPerLeaf: 100)); var model = dynamicPipeline.Fit(data); // Step 2: Convert ML.NET model to ONNX format and save it as a file. diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index bb74e76a4c..218a9670bd 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -677,8 +677,8 @@ private void ExecuteTFTransformMNISTConvTrainingTest(bool shuffle, int? shuffleS LabelColumnName = "Label", FeatureColumnName = "Features", Seed = 1, - NThread = 1, - NumBoostRound = 1 + NumberOfThreads = 1, + NumberOfIterations = 1 })); var trainedModel = pipe.Fit(preprocessedTrainData); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs index 444db573da..88c8e46e64 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs @@ -50,9 +50,9 @@ public void LightGBMBinaryEstimator() var trainer = ML.BinaryClassification.Trainers.LightGbm(new Options { - NumLeaves = 10, - NThread = 1, - MinDataPerLeaf = 2, + NumberOfLeaves = 10, + NumberOfThreads = 1, + MinimumExampleCountPerLeaf = 2, }); var pipeWithTrainer = pipe.Append(trainer); @@ -169,9 +169,9 @@ public void LightGBMRegressorEstimator() var dataView = GetRegressionPipeline(); var trainer = ML.Regression.Trainers.LightGbm(new Options { - NThread = 1, + NumberOfThreads = 1, NormalizeFeatures = NormalizeOption.Warn, - CatL2 = 5, + L2CategoricalRegularization = 5, }); TestEstimatorCore(trainer, dataView); @@ -295,9 +295,9 @@ private void LightGbmHelper(bool useSoftmax, out string modelString, out List