From b47c4d440665a0c711c14a792f92a27cbf95b2fb Mon Sep 17 00:00:00 2001 From: "jeremie.desgagne.bouchard" Date: Fri, 5 Apr 2024 00:46:58 -0400 Subject: [PATCH 1/6] test --- src/loss.jl | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/metrics.jl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/src/loss.jl b/src/loss.jl index 30a716a..67e5e2c 100644 --- a/src/loss.jl +++ b/src/loss.jl @@ -31,6 +31,49 @@ function logloss(m, x, y, w, offset) sum(w .* ((1 .- y) .* p .- logσ.(p))) / sum(w) end +function tweedie(m, x, y) + rho = eltype(x)(1.5) + p = m(x) + mean(2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + + p .^ (2 - rho) / (2 - rho)) + ) +end +function tweedie(m, x, y, w) + rho = eltype(x)(1.5) + p = m(x) + sum(w .* 2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + + p .^ (2 - rho) / (2 - rho)) + ) / sum(w) +end +function tweedie(m, x, y, w, offset) + rho = eltype(x)(1.5) + p = m(x) .+ offset + sum(w .* 2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + + p .^ (2 - rho) / (2 - rho)) + ) / sum(w) +end + + +function tweedie( + p::AbstractMatrix{T}, + y::AbstractVector, + w::AbstractVector, + eval::AbstractVector; + kwargs... +) where {T} + @threads for i in eachindex(y) + pred = exp(p[1, i]) + eval[i] = + w[i] * + 2 * + ( + y[i]^(2 - rho) / (1 - rho) / (2 - rho) - y[i] * pred^(1 - rho) / (1 - rho) + + pred^(2 - rho) / (2 - rho) + ) + end + return sum(eval) / sum(w) +end + function mlogloss(m, x, y) p = logsoftmax(m(x); dims=1) k = size(p, 1) @@ -69,6 +112,7 @@ const _loss_fn_dict = Dict( :mse => mse, :mae => mae, :logloss => logloss, + :tweedie => tweedie, :mlogloss => mlogloss, :gaussian_mle => gaussian_mle, ) diff --git a/src/metrics.jl b/src/metrics.jl index 4fecba1..6eb5150 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -64,6 +64,50 @@ function logloss(m, x, y, w, offset; agg=mean) end +""" + logloss(x, y; agg=mean) + logloss(x, y, w; agg=mean) + logloss(x, y, w, offset; agg=mean) +""" +function logloss(m, x, y; agg=mean) + p = m(x) + metric = agg((1 .- y) .* p .- logσ.(p)) + return metric +end +function logloss(m, x, y, w; agg=mean) + p = m(x) + metric = agg(((1 .- y) .* p .- logσ.(p)) .* w) + return metric +end +function logloss(m, x, y, w, offset; agg=mean) + p = m(x) .+ offset + metric = agg(((1 .- y) .* p .- logσ.(p)) .* w) + return metric +end + +function tweedie(m, x, y; agg=mean) + rho = eltype(x)(1.5) + p = m(x) + agg(2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + + p .^ (2 - rho) / (2 - rho)) + ) +end +function tweedie(m, x, y, w) + agg = mean + rho = eltype(x)(1.5) + p = m(x) + agg(w .* 2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + + p .^ (2 - rho) / (2 - rho)) + ) +end +function tweedie(m, x, y, w, offset; agg=mean) + rho = eltype(x)(1.5) + p = m(x) .+ offset + agg(w .* 2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + + p .^ (2 - rho) / (2 - rho)) + ) +end + """ mlogloss(x, y; agg=mean) mlogloss(x, y, w; agg=mean) @@ -141,6 +185,7 @@ const metric_dict = Dict( :mse => mse, :mae => mae, :logloss => logloss, + :tweedie => tweedie, :mlogloss => mlogloss, :gaussian_mle => gaussian_mle, ) @@ -148,6 +193,7 @@ const metric_dict = Dict( is_maximise(::typeof(mse)) = false is_maximise(::typeof(mae)) = false is_maximise(::typeof(logloss)) = false +is_maximise(::typeof(tweedie)) = false is_maximise(::typeof(mlogloss)) = false is_maximise(::typeof(gaussian_mle)) = true From ce584b33c9c699578bb709e37c3191b456c571cc Mon Sep 17 00:00:00 2001 From: "jeremie.desgagne.bouchard" Date: Fri, 19 Apr 2024 15:18:02 -0400 Subject: [PATCH 2/6] tweedie --- benchmarks/MSRank-tweedie.jl | 89 ++++++++++++++++++++++++++++++++++++ src/loss.jl | 35 +++----------- src/metrics.jl | 38 +++++---------- src/model.jl | 2 + 4 files changed, 109 insertions(+), 55 deletions(-) create mode 100644 benchmarks/MSRank-tweedie.jl diff --git a/benchmarks/MSRank-tweedie.jl b/benchmarks/MSRank-tweedie.jl new file mode 100644 index 0000000..18dd3fd --- /dev/null +++ b/benchmarks/MSRank-tweedie.jl @@ -0,0 +1,89 @@ +using Revise +using Random +using CSV +using DataFrames +using StatsBase +using Statistics: mean, std +using NeuroTreeModels +using Solage: Connectors +using ReadLIBSVM +using AWS: AWSCredentials, AWSConfig, @service + +# https://www.microsoft.com/en-us/research/project/mslr/ + +@service S3 +aws_creds = AWSCredentials(ENV["AWS_ACCESS_KEY_ID_JDB"], ENV["AWS_SECRET_ACCESS_KEY_JDB"]) +aws_config = AWSConfig(; creds=aws_creds, region="ca-central-1") +bucket = "jeremiedb" + +# initial prep +function read_libsvm_aws(file::String; has_query=false, aws_config=AWSConfig()) + raw = S3.get_object("jeremiedb", file, Dict("response-content-type" => "application/octet-stream"); aws_config) + return read_libsvm(raw; has_query) +end + +@time train_raw = read_libsvm_aws("share/data/msrank/train.txt"; has_query=true, aws_config); +@time eval_raw = read_libsvm_aws("share/data/msrank/vali.txt"; has_query=true, aws_config); +@time test_raw = read_libsvm_aws("share/data/msrank/test.txt"; has_query=true, aws_config); + +dtrain = DataFrame(train_raw[:x], :auto) +dtrain.y_raw = train_raw[:y] +dtrain.y = dtrain.y_raw ./ 4 +dtrain.q = train_raw[:q] + +deval = DataFrame(eval_raw[:x], :auto) +deval.y_raw = eval_raw[:y] +deval.y = deval.y_raw ./ 4 +deval.q = eval_raw[:q] + +dtest = DataFrame(test_raw[:x], :auto) +dtest.y_raw = test_raw[:y] +dtest.y = dtest.y_raw ./ 4 +dtest.q = test_raw[:q] + +feature_names = setdiff(names(dtrain), ["y", "y_raw", "q"]) +target_name = "y_raw" + +function percent_rank(x::AbstractVector{T}) where {T} + return tiedrank(x) / (length(x) + 1) +end + +transform!(dtrain, feature_names .=> percent_rank .=> feature_names) +transform!(deval, feature_names .=> percent_rank .=> feature_names) +transform!(dtest, feature_names .=> percent_rank .=> feature_names) + +config = NeuroTreeRegressor( + device=:gpu, + loss=:tweedie_deviance, + nrounds=2, + actA=:tanh, + outsize=1, + depth=4, + ntrees=64, + stack_size=2, + hidden_size=16, + batchsize=4096, + lr=3e-4, +) + +@time m, logger = NeuroTreeModels.fit( + config, + dtrain; + deval, + target_name, + feature_names, + print_every_n=1, + early_stopping_rounds=3, + metric=:tweedie_deviance, + return_logger=true +); + +dinfer_eval = NeuroTreeModels.get_df_loader_infer(deval; feature_names, batchsize=config.batchsize, device=config.device); +p_eval = m(dinfer_eval); +mse_eval = mean((p_eval .- deval.y_raw) .^ 2) +@info "MSE - deval" mse_eval + +dinfer_test = NeuroTreeModels.get_df_loader_infer(dtest; feature_names, batchsize=config.batchsize, device=config.device); +p_test = m(dinfer_test); +mse_test = mean((p_test .- dtest.y_raw) .^ 2) +@info "MSE - dtest" mse_test diff --git a/src/loss.jl b/src/loss.jl index 67e5e2c..f87c1e6 100644 --- a/src/loss.jl +++ b/src/loss.jl @@ -31,49 +31,28 @@ function logloss(m, x, y, w, offset) sum(w .* ((1 .- y) .* p .- logσ.(p))) / sum(w) end -function tweedie(m, x, y) +function tweedie_deviance(m, x, y) rho = eltype(x)(1.5) - p = m(x) + p = exp.(m(x)) mean(2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + p .^ (2 - rho) / (2 - rho)) ) end -function tweedie(m, x, y, w) +function tweedie_deviance(m, x, y, w) rho = eltype(x)(1.5) - p = m(x) + p = exp.(m(x)) sum(w .* 2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + p .^ (2 - rho) / (2 - rho)) ) / sum(w) end -function tweedie(m, x, y, w, offset) +function tweedie_deviance(m, x, y, w, offset) rho = eltype(x)(1.5) - p = m(x) .+ offset + p = exp.(m(x) .+ offset) sum(w .* 2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + p .^ (2 - rho) / (2 - rho)) ) / sum(w) end - -function tweedie( - p::AbstractMatrix{T}, - y::AbstractVector, - w::AbstractVector, - eval::AbstractVector; - kwargs... -) where {T} - @threads for i in eachindex(y) - pred = exp(p[1, i]) - eval[i] = - w[i] * - 2 * - ( - y[i]^(2 - rho) / (1 - rho) / (2 - rho) - y[i] * pred^(1 - rho) / (1 - rho) + - pred^(2 - rho) / (2 - rho) - ) - end - return sum(eval) / sum(w) -end - function mlogloss(m, x, y) p = logsoftmax(m(x); dims=1) k = size(p, 1) @@ -112,9 +91,9 @@ const _loss_fn_dict = Dict( :mse => mse, :mae => mae, :logloss => logloss, - :tweedie => tweedie, :mlogloss => mlogloss, :gaussian_mle => gaussian_mle, + :tweedie_deviance => tweedie_deviance, ) get_loss_fn(config::NeuroTreeRegressor) = _loss_fn_dict[config.loss] diff --git a/src/metrics.jl b/src/metrics.jl index 6eb5150..60c59d8 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -65,44 +65,28 @@ end """ - logloss(x, y; agg=mean) - logloss(x, y, w; agg=mean) - logloss(x, y, w, offset; agg=mean) + tweedie_deviance(x, y; agg=mean) + tweedie_deviance(x, y, w; agg=mean) + tweedie_deviance(x, y, w, offset; agg=mean) """ -function logloss(m, x, y; agg=mean) - p = m(x) - metric = agg((1 .- y) .* p .- logσ.(p)) - return metric -end -function logloss(m, x, y, w; agg=mean) - p = m(x) - metric = agg(((1 .- y) .* p .- logσ.(p)) .* w) - return metric -end -function logloss(m, x, y, w, offset; agg=mean) - p = m(x) .+ offset - metric = agg(((1 .- y) .* p .- logσ.(p)) .* w) - return metric -end - -function tweedie(m, x, y; agg=mean) +function tweedie_deviance(m, x, y; agg=mean) rho = eltype(x)(1.5) - p = m(x) + p = exp.(m(x)) agg(2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + p .^ (2 - rho) / (2 - rho)) ) end -function tweedie(m, x, y, w) +function tweedie_deviance(m, x, y, w) agg = mean rho = eltype(x)(1.5) - p = m(x) + p = exp.(m(x)) agg(w .* 2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + p .^ (2 - rho) / (2 - rho)) ) end -function tweedie(m, x, y, w, offset; agg=mean) +function tweedie_deviance(m, x, y, w, offset; agg=mean) rho = eltype(x)(1.5) - p = m(x) .+ offset + p = exp.(m(x) .+ offset) agg(w .* 2 .* (y .^ (2 - rho) / (1 - rho) / (2 - rho) - y .* p .^ (1 - rho) / (1 - rho) + p .^ (2 - rho) / (2 - rho)) ) @@ -185,16 +169,16 @@ const metric_dict = Dict( :mse => mse, :mae => mae, :logloss => logloss, - :tweedie => tweedie, :mlogloss => mlogloss, :gaussian_mle => gaussian_mle, + :tweedie_deviance => tweedie_deviance, ) is_maximise(::typeof(mse)) = false is_maximise(::typeof(mae)) = false is_maximise(::typeof(logloss)) = false -is_maximise(::typeof(tweedie)) = false is_maximise(::typeof(mlogloss)) = false is_maximise(::typeof(gaussian_mle)) = true +is_maximise(::typeof(tweedie_deviance)) = false end \ No newline at end of file diff --git a/src/model.jl b/src/model.jl index 27e0948..d225922 100644 --- a/src/model.jl +++ b/src/model.jl @@ -4,6 +4,7 @@ abstract type MAE <: LossType end abstract type LogLoss <: LossType end abstract type MLogLoss <: LossType end abstract type GaussianMLE <: LossType end +abstract type TweedieDeviance <: LossType end const _loss_type_dict = Dict( :mse => MSE, @@ -11,6 +12,7 @@ const _loss_type_dict = Dict( :logloss => LogLoss, :mlogloss => MLogLoss, :gaussian_mle => GaussianMLE, + :tweedie_deviance => TweedieDeviance ) mutable struct NeuroTreeRegressor <: MMI.Deterministic From 73471da49e36f2526394a03dc80cf9a625b24d35 Mon Sep 17 00:00:00 2001 From: jeremie Date: Sun, 21 Apr 2024 12:45:40 -0400 Subject: [PATCH 3/6] classif fixes api cleanup --- Project.toml | 5 +- benchmarks/titanic-logloss.jl | 58 +++++ benchmarks/titanic-mlogloss.jl | 64 ++++++ experiments/dataloader.jl | 52 ++++- src/MLJ.jl | 31 ++- src/NeuroTreeModels.jl | 5 +- src/callback.jl | 42 +--- src/data.jl | 143 ++++++------ src/fit.jl | 88 ++++---- src/learners.jl | 384 +++++++++++++++++++++++++++++++++ src/loss.jl | 3 +- src/metrics.jl | 2 +- src/model.jl | 225 +------------------ test/MLJ.jl | 58 ++++- test/core.jl | 72 +++++-- 15 files changed, 835 insertions(+), 397 deletions(-) create mode 100644 benchmarks/titanic-logloss.jl create mode 100644 benchmarks/titanic-mlogloss.jl create mode 100644 src/learners.jl diff --git a/Project.toml b/Project.toml index 9f24418..f404ca7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,10 +1,11 @@ -authors = ["jeremiedb "] name = "NeuroTreeModels" uuid = "1db4e0a5-a364-4b0c-897c-2bd5a4a3a1f2" -version = "1.2.0" +authors = ["jeremiedb "] +version = "1.3.0" [deps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" diff --git a/benchmarks/titanic-logloss.jl b/benchmarks/titanic-logloss.jl new file mode 100644 index 0000000..ded0388 --- /dev/null +++ b/benchmarks/titanic-logloss.jl @@ -0,0 +1,58 @@ +using NeuroTreeModels +using MLDatasets +using DataFrames +using Statistics: mean +using StatsBase: median +using CategoricalArrays +using Random +using CUDA +using CategoricalArrays + +Random.seed!(123) + +df = MLDatasets.Titanic().dataframe + +# convert string feature to Categorical +transform!(df, :Sex => categorical => :Sex) +transform!(df, :Sex => ByRow(levelcode) => :Sex) + +# treat string feature and missing values +transform!(df, :Age => ByRow(ismissing) => :Age_ismissing) +transform!(df, :Age => (x -> coalesce.(x, median(skipmissing(x)))) => :Age); + +# remove unneeded variables +df = df[:, Not([:PassengerId, :Name, :Embarked, :Cabin, :Ticket])] + +train_ratio = 0.8 +train_indices = randperm(nrow(df))[1:Int(round(train_ratio * nrow(df)))] + +dtrain = df[train_indices, :] +deval = df[setdiff(1:nrow(df), train_indices), :] + +target_name = "Survived" +feature_names = setdiff(names(df), ["Survived"]) + +config = NeuroTreeRegressor( + loss=:logloss, + nrounds=400, + depth=4, + lr=3e-2, +) + +m = NeuroTreeModels.fit( + config, + dtrain; + deval, + target_name, + feature_names, + metric=:logloss, + print_every_n=10, + early_stopping_rounds=3, + device=:cpu +) + +p_train = m(dtrain) +p_eval = m(deval) + +@info mean((p_train .> 0.5) .== (dtrain[!, target_name] .> 0.5)) +@info mean((p_eval .> 0.5) .== (deval[!, target_name] .> 0.5)) diff --git a/benchmarks/titanic-mlogloss.jl b/benchmarks/titanic-mlogloss.jl new file mode 100644 index 0000000..ba78b48 --- /dev/null +++ b/benchmarks/titanic-mlogloss.jl @@ -0,0 +1,64 @@ +using NeuroTreeModels +using MLDatasets +using DataFrames +using Statistics: mean +using StatsBase: median +using CategoricalArrays +using Random +using CUDA +using CategoricalArrays + +Random.seed!(123) + +df = MLDatasets.Titanic().dataframe + +# convert target variable to a categorical +transform!(df, :Survived => categorical => :y_cat) + +# convert string feature to Categorical +transform!(df, :Sex => categorical => :Sex) +transform!(df, :Sex => ByRow(levelcode) => :Sex) + +# treat string feature and missing values +transform!(df, :Age => ByRow(ismissing) => :Age_ismissing) +transform!(df, :Age => (x -> coalesce.(x, median(skipmissing(x)))) => :Age); + +# remove unneeded variables +df = df[:, Not([:PassengerId, :Name, :Embarked, :Cabin, :Ticket])] + +train_ratio = 0.8 +train_indices = randperm(nrow(df))[1:Int(round(train_ratio * nrow(df)))] + +dtrain = df[train_indices, :] +deval = df[setdiff(1:nrow(df), train_indices), :] + +target_name = "y_cat" +feature_names = setdiff(names(df), ["y_cat", "Survived"]) + +eltype(dtrain[:, "y_cat"]) +config = NeuroTreeClassifier( + nrounds=400, + depth=4, + lr=3e-2, +) + +m = NeuroTreeModels.fit( + config, + dtrain; + deval, + target_name, + feature_names, + metric=:mlogloss, + print_every_n=10, + early_stopping_rounds=3, + device=:cpu +) + +p_train = m(dtrain) +p_train_idx = [argmax(p) for p in eachrow(p_train)] + +p_eval = m(deval) +p_eval_idx = [argmax(p) for p in eachrow(p_eval)] + +@info mean(p_train_idx .== levelcode.(dtrain[!, target_name])) +@info mean(p_eval_idx .== levelcode.(deval[!, target_name])) diff --git a/experiments/dataloader.jl b/experiments/dataloader.jl index 7370bf5..feedeb8 100644 --- a/experiments/dataloader.jl +++ b/experiments/dataloader.jl @@ -1,25 +1,67 @@ using NeuroTreeModels using DataFrames +using CategoricalArrays ################################# # vanilla DataFrame ################################# -nobs=100 -nfeats=10 +nobs = 100 +nfeats = 10 x = rand(nobs, nfeats); df = DataFrame(x, :auto); df.y = rand(nobs); target_name = "y" -feature_names = setdiff(names(df), [target_name]) +feature_names = Symbol.(setdiff(names(df), [target_name])) +batchsize = 32 + +################################### +# CPU +################################### +device = :cpu +dtrain = NeuroTreeModels.get_df_loader_train(df; feature_names, target_name, batchsize, device) + +for d in dtrain + @info length(d) + @info size(d[1]) +end + +deval = NeuroTreeModels.get_df_loader_infer(df; feature_names, batchsize=32) +for d in deval + @info size(d) +end + +################################### +# GPU +################################### +device = :gpu +dtrain = NeuroTreeModels.get_df_loader_train(df; feature_names, target_name, batchsize, device) -dtrain = NeuroTrees.get_df_loader_train(df; feature_names, target_name, batchsize=32) for d in dtrain @info length(d) @info size(d[1]) end -deval = NeuroTrees.get_df_loader_infer(df; feature_names, batchsize=32) +deval = NeuroTreeModels.get_df_loader_infer(df; feature_names, batchsize=32) for d in deval @info size(d) end + +################################### +# Categorical +################################### +target_name = "y" +feature_names = Symbol.(setdiff(names(df), [target_name])) +batchsize = 32 +device = :gpu + +x = rand(nobs, nfeats); +df = DataFrame(x, :auto); +df.y = categorical(rand(1:2, nobs)); + +dtrain = NeuroTreeModels.get_df_loader_train(df; feature_names, target_name, batchsize, device) +for d in dtrain + @info length(d) + @info size(d[1]) + @info typeof(d[2]) +end diff --git a/src/MLJ.jl b/src/MLJ.jl index 78da31a..9f34806 100644 --- a/src/MLJ.jl +++ b/src/MLJ.jl @@ -1,5 +1,5 @@ function MMI.fit( - model::NeuroTreeRegressor, + model::NeuroTypes, verbosity::Int, A, y, @@ -8,7 +8,6 @@ function MMI.fit( Tables.istable(A) ? dtrain = DataFrame(A) : error("`A` must be a Table") nobs = Tables.DataAPI.nrow(dtrain) feature_names = string.(collect(Tables.schema(dtrain).names)) - @info feature_names @assert "_target" ∉ feature_names dtrain._target = y target_name = "_target" @@ -22,7 +21,7 @@ function MMI.fit( end offset_name = nothing - fitresult, cache = init(model, dtrain; feature_names, target_name, weight_name, offset_name) + fitresult, cache = init(model, dtrain; feature_names, target_name, weight_name, offset_name, device=:cpu) while fitresult.info[:nrounds] < model.nrounds fit_iter!(fitresult, cache) @@ -37,10 +36,10 @@ function okay_to_continue(model, fitresult, cache) end # For EarlyStopping.jl support -MMI.iteration_parameter(::Type{<:NeuroTreeRegressor}) = :nrounds +MMI.iteration_parameter(::Type{<:NeuroTypes}) = :nrounds function MMI.update( - model::NeuroTreeRegressor, + model::NeuroTypes, verbosity::Integer, fitresult, cache, @@ -68,9 +67,17 @@ function MMI.predict(::NeuroTreeRegressor, fitresult, A) return pred end +function predict(::NeuroTreeClassifier, fitresult, A) + df = DataFrame(A) + Tables.istable(A) ? df = DataFrame(A) : error("`A` must be a Table") + dinfer = get_df_loader_infer(df; feature_names=fitresult.info[:feature_names], batchsize=2048, device=:cpu) + pred = infer(fitresult, dinfer) + return MMI.UnivariateFinite(fitresult.info[:target_levels], pred, pool=missing, ordered=fitresult.info[:target_isordered]) +end + # Metadata MMI.metadata_pkg.( - (NeuroTreeRegressor), + (NeuroTreeRegressor, NeuroTreeClassifier), name="NeuroTreeModels", uuid="1db4e0a5-a364-4b0c-897c-2bd5a4a3a1f2", url="https://github.com/Evovest/NeuroTreeModels.jl", @@ -81,10 +88,16 @@ MMI.metadata_pkg.( MMI.metadata_model( NeuroTreeRegressor, - input_scitype=Union{ - MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor), - }, + input_scitype=MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor), target_scitype=AbstractVector{<:MMI.Continuous}, weights=true, path="NeuroTreeModels.NeuroTreeRegressor", ) + +MMI.metadata_model( + NeuroTreeClassifier, + input_scitype=MMI.Table(MMI.Continuous, MMI.Count, MMI.OrderedFactor), + target_scitype=AbstractVector{<:MMI.Finite}, + weights=true, + path="NeuroTreeModels.NeuroTreeClassifier", +) diff --git a/src/NeuroTreeModels.jl b/src/NeuroTreeModels.jl index 2be8638..3e4a998 100644 --- a/src/NeuroTreeModels.jl +++ b/src/NeuroTreeModels.jl @@ -3,6 +3,7 @@ module NeuroTreeModels using Base.Threads: @threads, nthreads import Tables using DataFrames +using CategoricalArrays using Statistics: mean, std using Random @@ -19,11 +20,13 @@ using ChainRulesCore import ChainRulesCore: rrule import MLJModelInterface as MMI +import MLJModelInterface: fit, update, predict, schema -export NeuroTreeRegressor, NeuroTreeModel, NeuroTree +export NeuroTreeRegressor, NeuroTreeClassifier, NeuroTreeModel, NeuroTree include("data.jl") include("utils.jl") +include("learners.jl") include("model.jl") include("loss.jl") include("metrics.jl") diff --git a/src/callback.jl b/src/callback.jl index 4d70acf..ff940ba 100644 --- a/src/callback.jl +++ b/src/callback.jl @@ -4,7 +4,7 @@ using DataFrames using Statistics: mean, median using Flux: cpu, gpu using CUDA: CuIterator -using ..NeuroTreeModels: NeuroTreeRegressor +using ..NeuroTreeModels: NeuroTypes using ..NeuroTreeModels: get_df_loader_train using ..NeuroTreeModels.Metrics @@ -21,43 +21,19 @@ function (cb::CallBack)(logger, iter, m) return nothing end -# function CallBack(config::NeuroTreeRegressor; metric, x_eval, y_eval, w_eval=nothing, offset_eval=nothing) -# feval = metric_dict[metric] - -# y_eval = ndims(y_eval) == 1 ? y_eval : y_eval' -# w_eval = isnothing(w_eval) ? ones(Float32, size(y_eval)[end]) : Vector{Float32}(w_eval) -# offset_eval = isnothing(offset_eval) ? zeros(Float32, size(y_eval)[end]) : Vector{Float32}(offset_eval) - -# deval = DataLoader( -# ( -# x=Matrix{Float32}(x_eval'), -# y=Float32.(y_eval), -# w=Float32.(w_eval), -# offset = Float32.(offset_eval), -# ), -# batchsize=config.batchsize, -# partial=true, -# shuffle=false, -# parallel=true, -# buffer=false, -# ) -# (config.device == :gpu) && (deval = CuIterator(deval)) -# return CallBack(feval, deval) -# end - function CallBack( - config::NeuroTreeRegressor, + config::NeuroTypes, deval::AbstractDataFrame; metric, feature_names, target_name, weight_name=nothing, - offset_name=nothing) + offset_name=nothing, + device=:cpu) batchsize = config.batchsize feval = metric_dict[metric] - deval = get_df_loader_train(deval; feature_names, target_name, weight_name, offset_name, batchsize) - (config.device == :gpu) && (deval = CuIterator(deval)) + deval = get_df_loader_train(deval; feature_names, target_name, weight_name, offset_name, batchsize, device) return CallBack(feval, deval) end @@ -96,11 +72,11 @@ end function agg_logger(logger_raw::Vector{Dict}) _l1 = first(logger_raw) - best_iters = [d[:best_iter] for d in logger_raw] - best_iter = ceil(Int, median(best_iters)) + best_iters = [d[:best_iter] for d in logger_raw] + best_iter = ceil(Int, median(best_iters)) - best_metrics = [d[:best_metric] for d in logger_raw] - best_metric = last(best_metrics) + best_metrics = [d[:best_metric] for d in logger_raw] + best_metric = last(best_metrics) metrics = (layer=Int[], iter=Int[], metric=Float64[]) for i in eachindex(logger_raw) diff --git a/src/data.jl b/src/data.jl index 04c6d50..247554d 100644 --- a/src/data.jl +++ b/src/data.jl @@ -4,56 +4,45 @@ import Base: length, getindex ContainerTrain """ -struct ContainerTrain{D<:AbstractDataFrame} - df::D - feature_names::Vector{Symbol} - target_name::String - weight_name::Union{Symbol,Nothing} - offset_name::Union{Symbol,Vector{Symbol},Nothing} +struct ContainerTrain{A<:AbstractMatrix,B<:AbstractVector,C,D} + x::A + y::B + w::C + offset::D end -function ContainerTrain( - df; - feature_names::Vector{Symbol}, - target_name, - weight_name=nothing, - offset_name=nothing) +length(data::ContainerTrain) = size(data.x, 2) - container = ContainerTrain( - df, - feature_names, - target_name, - weight_name, - offset_name) - - return container +function getindex(data::ContainerTrain{A,B,C,D}, idx::AbstractVector) where {A,B,C<:Nothing,D<:Nothing} + x = data.x[:, idx] + y = data.y[idx] + return (x, y) end - -length(data::ContainerTrain{<:AbstractDataFrame}) = nrow(data.df) - -function getindex(data::ContainerTrain{<:AbstractDataFrame}, idx::AbstractVector) - df = view(data.df, idx, :) - x = Matrix{Float32}(Matrix{Float32}(select(df, data.feature_names))') - y = Float32.(df[!, data.target_name]) - if isnothing(data.weight_name) && isnothing(data.offset_name) - return (x, y) - elseif isnothing(data.offset_name) - w = Float32.(df[!, data.weight_name]) - return (x, y, w) - elseif isnothing(data.weight_name) - w = ones(Float32, length(y)) - isa(data.offset_name, String) ? offset = Float32.(df[!, data.offset_name]) : offset = Matrix{Float32}(Matrix{Float32}(df[!, data.offset_name])') - return (x, y, w, offset) - else - w = Float32.(df[!, data.weight_name]) - isa(data.offset_name, String) ? offset = Float32.(df[!, data.offset_name]) : offset = Matrix{Float32}(Matrix{Float32}(df[!, data.offset_name])') - return (x, y, w, offset) - end +function getindex(data::ContainerTrain{A,B,C,D}, idx::AbstractVector) where {A,B,C<:AbstractVector,D<:Nothing} + x = data.x[:, idx] + y = data.y[idx] + w = data.w[idx] + return (x, y, w) +end +function getindex(data::ContainerTrain{A,B,C,D}, idx::AbstractVector) where {A,B,C<:AbstractVector,D<:AbstractVector} + x = data.x[:, idx] + y = data.y[idx] + w = data.w[idx] + offset = data.offset[idx] + return (x, y, w, offset) +end +function getindex(data::ContainerTrain{A,B,C,D}, idx::AbstractVector) where {A,B,C<:AbstractVector,D<:AbstractMatrix} + x = data.x[:, idx] + y = data.y[idx] + w = data.w[idx] + offset = data.offset[:, idx] + return (x, y, w, offset) end + function get_df_loader_train( df::AbstractDataFrame; - feature_names::Vector{Symbol}, + feature_names, target_name, weight_name=nothing, offset_name=nothing, @@ -61,10 +50,27 @@ function get_df_loader_train( shuffle=true, device=:cpu) - container = ContainerTrain(df; feature_names, target_name, weight_name, offset_name) + feature_names = Symbol.(feature_names) + x = Matrix{Float32}(Matrix{Float32}(select(df, feature_names))') + + if eltype(df[!, target_name]) <: CategoricalValue + y = UInt32.(CategoricalArrays.levelcode.(df[!, target_name])) + else + y = Float32.(df[!, target_name]) + end + + w = isnothing(weight_name) ? nothing : Float32.(df[!, weight_name]) + + offset = if isnothing(offset_name) + nothing + else + isa(offset_name, String) ? Float32.(df[!, offset_name]) : offset = Matrix{Float32}(Matrix{Float32}(df[!, data.offset_name])') + end + + container = ContainerTrain(x, y, w, offset) batchsize = min(batchsize, length(container)) dtrain = DataLoader(container; shuffle, batchsize, partial=true, parallel=false) - if Symbol(device) == :gpu + if device == :gpu return CuIterator(dtrain) else return dtrain @@ -76,32 +82,27 @@ end ContainerInfer """ -struct ContainerInfer{D<:AbstractDataFrame} - df::D - feature_names::Vector{Symbol} - offset_name::Union{Symbol,Nothing} -end - -function ContainerInfer( - df; - feature_names::Vector{Symbol}, - offset_name=nothing) - - container = ContainerInfer( - df, - feature_names, - offset_name) - - return container +struct ContainerInfer{A<:AbstractMatrix,D} + x::A + offset::D end -length(data::ContainerInfer{<:AbstractDataFrame}) = nrow(data.df) +length(data::ContainerInfer) = size(data.x, 2) -function getindex(data::ContainerInfer{<:AbstractDataFrame}, idx::AbstractVector) - df = view(data.df, idx, :) - x = Matrix{Float32}(Matrix{Float32}(select(df, data.feature_names))') +function getindex(data::ContainerInfer{A,D}, idx::AbstractVector) where {A,D<:Nothing} + x = data.x[:, idx] return x end +function getindex(data::ContainerTrain{A,D}, idx::AbstractVector) where {A,D<:AbstractVector} + x = data.x[:, idx] + offset = data.offset[idx] + return (x, offset) +end +function getindex(data::ContainerTrain{A,D}, idx::AbstractVector) where {A,D<:AbstractMatrix} + x = data.x[:, idx] + offset = data.offset[:, idx] + return (x, offset) +end function get_df_loader_infer( df::AbstractDataFrame; @@ -111,10 +112,18 @@ function get_df_loader_infer( device=:cpu) feature_names = Symbol.(feature_names) - container = ContainerInfer(df; feature_names, offset_name) + x = Matrix{Float32}(Matrix{Float32}(select(df, feature_names))') + + offset = if isnothing(offset_name) + nothing + else + isa(offset_name, String) ? Float32.(df[!, offset_name]) : offset = Matrix{Float32}(Matrix{Float32}(df[!, data.offset_name])') + end + + container = ContainerInfer(x, offset) batchsize = min(batchsize, length(container)) dinfer = DataLoader(container; shuffle=false, batchsize, partial=true, parallel=false) - if Symbol(device) == :gpu + if device == :gpu return CuIterator(dinfer) else return dinfer diff --git a/src/fit.jl b/src/fit.jl index 9f5d5b3..aa67770 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -1,48 +1,52 @@ function init( - config::NeuroTreeRegressor, + config::NeuroTypes, df::AbstractDataFrame; feature_names, target_name, weight_name=nothing, - offset_name=nothing) + offset_name=nothing, + device=:cpu, +) batchsize = config.batchsize - feature_names = Symbol.(feature_names) - if config.device == :gpu - device = Flux.gpu - CUDA.device!(config.gpuID) - else - device = Flux.cpu - end - - dtrain = NeuroTreeModels.get_df_loader_train(df; feature_names, target_name, weight_name, offset_name, batchsize) - (config.device == :gpu) && (dtrain = CuIterator(dtrain)) - nfeats = length(feature_names) loss = get_loss_fn(config) L = get_loss_type(config) - chain = get_model_chain(L; config, nfeats) + + target_levels = nothing + target_isordered = false + outsize = 1 + if L <: MLogLoss + eltype(df[!, target_name]) <: CategoricalValue || error("Target variable `$target_name` must have its elements `<: CategoricalValue`") + target_levels = CategoricalArrays.levels(df[!, target_name]) + target_isordered = isordered(df[!, target_name]) + outsize = length(target_levels) + end + dtrain = NeuroTreeModels.get_df_loader_train(df; feature_names, target_name, weight_name, offset_name, batchsize, device) + + chain = get_model_chain(L; config, nfeats, outsize) info = Dict( - :device => config.device, + :device => device, :nrounds => 0, - :feature_names => feature_names - ) + :feature_names => feature_names, + :target_levels => target_levels, + :target_isordered => target_isordered) m = NeuroTreeModel(L, chain, info) - if config.device == :gpu + if device == :gpu m = m |> gpu end optim = OptimiserChain(NAdam(config.lr), WeightDecay(config.wd)) opts = Optimisers.setup(optim, m) - cache = (dtrain=dtrain, loss=loss, opts=opts, device=device, info=info) + cache = (dtrain=dtrain, loss=loss, opts=opts, info=info) return m, cache end """ function fit( - config::NeuroTreeRegressor, + config::NeuroTypes, dtrain; feature_names, target_name, @@ -53,15 +57,16 @@ end print_every_n=9999, early_stopping_rounds=9999, verbosity=1, - return_logger=false + device=:cpu, + gpuID=0, ) Training function of NeuroTreeModels' internal API. # Arguments -- `config::NeuroTreeRegressor` -- `dtrain`: Must be a `AbstractDataFrame` +- `config::NeuroTypes` +- `dtrain`: Must be `<:AbstractDataFrame` # Keyword arguments @@ -79,11 +84,12 @@ Training function of NeuroTreeModels' internal API. - `print_every_n=9999` - `early_stopping_rounds=9999` - `verbosity=1` -- `return_logger=false` +- `device=:cpu`: device on which to perform the computation, either `:cpu` or `:gpu` +- `gpuID=0`: gpu device to use, only relveant if `device = :gpu` """ function fit( - config::NeuroTreeRegressor, + config::NeuroTypes, dtrain; feature_names, target_name, @@ -94,28 +100,33 @@ function fit( print_every_n=9999, early_stopping_rounds=9999, verbosity=1, - return_logger=false + device=:cpu, + gpuID=0, ) - feature_names = Symbol.(feature_names) - if config.device == :gpu - CUDA.device!(config.gpuID) + device = Symbol(device) + if device == :gpu + CUDA.device!(gpuID) end - # initialize callback and logger if tracking eval data + feature_names = Symbol.(feature_names) + target_name = Symbol(target_name) + weight_name = isnothing(weight_name) ? nothing : Symbol(weight_name) + offset_name = isnothing(offset_name) ? nothing : Symbol(offset_name) metric = isnothing(metric) ? nothing : Symbol(metric) + + m, cache = init(config, dtrain; feature_names, target_name, weight_name, offset_name, device) + + # initialize callback and logger if tracking eval data logging_flag = !isnothing(metric) && !isnothing(deval) any_flag = !isnothing(metric) || !isnothing(deval) if !logging_flag && any_flag @warn "For logger and eval metric to be tracked, `metric` and `deval` must both be provided." end - logger = Dict[] - logger = nothing - - m, cache = init(config, dtrain; feature_names, target_name, weight_name, offset_name) + logger = nothing if logging_flag - cb = CallBack(config, deval; metric, feature_names, target_name, weight_name, offset_name) + cb = CallBack(config, deval; metric, feature_names, target_name, weight_name, offset_name, device) logger = init_logger(; metric, early_stopping_rounds) cb(logger, 0, m) (verbosity > 0) && @info "Init training" metric = logger[:metrics][end] @@ -136,11 +147,8 @@ function fit( end end - if return_logger - return (m, logger) - else - return m - end + m.info[:logger] = logger + return m end function fit_iter!(m, cache) diff --git a/src/learners.jl b/src/learners.jl new file mode 100644 index 0000000..4a5a75f --- /dev/null +++ b/src/learners.jl @@ -0,0 +1,384 @@ +abstract type LossType end +abstract type MSE <: LossType end +abstract type MAE <: LossType end +abstract type LogLoss <: LossType end +abstract type MLogLoss <: LossType end +abstract type GaussianMLE <: LossType end + +const _loss_type_dict = Dict( + :mse => MSE, + :mae => MAE, + :logloss => LogLoss, + :gaussian_mle => GaussianMLE, + :mlogloss => MLogLoss +) + +mutable struct NeuroTreeRegressor <: MMI.Deterministic + loss::Symbol + nrounds::Int + lr::Float32 + wd::Float32 + batchsize::Int + actA::Symbol + depth::Int + ntrees::Int + hidden_size::Int + stack_size::Int + init_scale::Float32 + MLE_tree_split::Bool + rng::Any +end + +""" + NeuroTreeRegressor(; kwargs...) + +A model type for constructing a NeuroTreeRegressor, based on [NeuroTreeModels.jl](https://github.com/Evovest/NeuroTreeModels.jl), and implementing both an internal API and the MLJ model interface. + +# Hyper-parameters + +- `loss=:mse`: Loss to be be minimized during training. One of: + - `:mse` + - `:mae` + - `:logloss` + - `:mlogloss` + - `:gaussian_mle` +- `nrounds=10`: Max number of rounds (epochs). +- `lr=1.0f-2`: Learning rate. Must be > 0. A lower `eta` results in slower learning, typically requiring a higher `nrounds`. +- `wd=0.f0`: Weight decay applied to the gradients by the optimizer. +- `batchsize=2048`: Batch size. +- `actA=:tanh`: Activation function applied to each of input variable for determination of split node weight. Can be one of: + - `:tanh` + - `:identity` +- `depth=6`: Depth of a tree. Must be >= 1. A tree of depth 1 has 2 prediction leaf nodes. A complete tree of depth N contains `2^N` terminal leaves and `2^N - 1` split nodes. + Compute cost is proportional to `2^depth`. Typical optimal values are in the 3 to 5 range. +- `ntrees=64`: Number of trees (per stack). +- `hidden_size=16`: Size of hidden layers. Applicable only when `stack_size` > 1. +- `stack_size=1`: Number of stacked NeuroTree blocks. +- `init_scale=1.0`: Scaling factor applied to the predictions weights. Values in the `]0, 1]` short result in best performance. +- `MLE_tree_split=false`: Whether independent models are buillt for each of the 2 parameters (mu, sigma) of the the `gaussian_mle` loss. +- `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`). + +# Internal API + +Do `config = NeuroTreeRegressor()` to construct an instance with default hyper-parameters. +Provide keyword arguments to override hyper-parameter defaults, as in NeuroTreeRegressor(loss=...). + +## Training model + +A model is trained using [`fit`](@ref): + +```julia +m = fit(config, dtrain; feature_names, target_name, kwargs...) +``` + +## Inference + +Models act as a functor. returning predictions when called as a function with features as argument: + +```julia +m(data) +``` + +# MLJ Interface + +From MLJ, the type can be imported using: + +```julia +NeuroTreeRegressor = @load NeuroTreeRegressor pkg=NeuroTreeModels +``` + +Do `model = NeuroTreeRegressor()` to construct an instance with default hyper-parameters. +Provide keyword arguments to override hyper-parameter defaults, as in `NeuroTreeRegressor(loss=...)`. + +## Training model + +In MLJ or MLJBase, bind an instance `model` to data with + `mach = machine(model, X, y)` where +- `X`: any table of input features (eg, a `DataFrame`) whose columns + each have one of the following element scitypes: `Continuous`, + `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)` +- `y`: is the target, which can be any `AbstractVector` whose element + scitype is `<:Continuous`; check the scitype + with `scitype(y)` + +Train the machine using `fit!(mach, rows=...)`. + +## Operations + +- `predict(mach, Xnew)`: return predictions of the target given + features `Xnew` having the same scitype as `X` above. + +## Fitted parameters + +The fields of `fitted_params(mach)` are: + - `:fitresult`: The `NeuroTreeModel` object. + +## Report + +The fields of `report(mach)` are: + - `:features`: The names of the features encountered in training. + +# Examples + +## Internal API + +```julia +using NeuroTreeModels, DataFrames +config = NeuroTreeRegressor(depth=5, nrounds=10) +nobs, nfeats = 1_000, 5 +dtrain = DataFrame(randn(nobs, nfeats), :auto) +dtrain.y = rand(nobs) +feature_names, target_name = names(dtrain, r"x"), "y" +m = fit(config, dtrain; feature_names, target_name) +p = m(dtrain) +``` + +## MLJ Interface + +```julia +using MLJBase, NeuroTreeModels +m = NeuroTreeRegressor(depth=5, nrounds=10) +X, y = @load_boston +mach = machine(m, X, y) |> fit! +p = predict(mach, X) +``` +""" +function NeuroTreeRegressor(; kwargs...) + + # defaults arguments + args = Dict{Symbol,Any}( + :loss => :mse, + :nrounds => 10, + :lr => 1.0f-2, + :wd => 0.0f0, + :batchsize => 2048, + :actA => :tanh, + :depth => 4, + :ntrees => 64, + :hidden_size => 1, + :stack_size => 1, + :init_scale => 0.1, + :MLE_tree_split => false, + :rng => 123, + ) + + args_ignored = setdiff(keys(kwargs), keys(args)) + args_ignored_str = join(args_ignored, ", ") + length(args_ignored) > 0 && + @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." + + args_default = setdiff(keys(args), keys(kwargs)) + args_default_str = join(args_default, ", ") + length(args_default) > 0 && + @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." + + args_override = intersect(keys(args), keys(kwargs)) + for arg in args_override + args[arg] = kwargs[arg] + end + + loss = Symbol(args[:loss]) + loss ∉ [:mse, :mae, :logloss, :gaussian_mle] && error("The provided kwarg `loss`: $loss is not supported.") + + args[:rng] = mk_rng(args[:rng]) + + config = NeuroTreeRegressor( + args[:loss], + args[:nrounds], + Float32(args[:lr]), + Float32(args[:wd]), + args[:batchsize], + Symbol(args[:actA]), + args[:depth], + args[:ntrees], + args[:hidden_size], + args[:stack_size], + args[:init_scale], + args[:MLE_tree_split], + args[:rng] + ) + + return config +end + + +mutable struct NeuroTreeClassifier <: MMI.Probabilistic + loss::Symbol + nrounds::Int + lr::Float32 + wd::Float32 + batchsize::Int + actA::Symbol + depth::Int + ntrees::Int + hidden_size::Int + stack_size::Int + init_scale::Float32 + MLE_tree_split::Bool + rng::Any +end + +""" + NeuroTreeClassifier(; kwargs...) + +A model type for constructing a NeuroTreeClassifier, based on [NeuroTreeModels.jl](https://github.com/Evovest/NeuroTreeModels.jl), and implementing both an internal API and the MLJ model interface. + +# Hyper-parameters + +- `nrounds=10`: Max number of rounds (epochs). +- `lr=1.0f-2`: Learning rate. Must be > 0. A lower `eta` results in slower learning, typically requiring a higher `nrounds`. +- `wd=0.f0`: Weight decay applied to the gradients by the optimizer. +- `batchsize=2048`: Batch size. +- `actA=:tanh`: Activation function applied to each of input variable for determination of split node weight. Can be one of: + - `:tanh` + - `:identity` +- `depth=6`: Depth of a tree. Must be >= 1. A tree of depth 1 has 2 prediction leaf nodes. A complete tree of depth N contains `2^N` terminal leaves and `2^N - 1` split nodes. + Compute cost is proportional to `2^depth`. Typical optimal values are in the 3 to 5 range. +- `ntrees=64`: Number of trees (per stack). +- `hidden_size=16`: Size of hidden layers. Applicable only when `stack_size` > 1. +- `stack_size=1`: Number of stacked NeuroTree blocks. +- `init_scale=1.0`: Scaling factor applied to the predictions weights. Values in the `]0, 1]` short result in best performance. +- `MLE_tree_split=false`: Whether independent models are buillt for each of the 2 parameters (mu, sigma) of the the `gaussian_mle` loss. +- `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`). + +# Internal API + +Do `config = NeuroTreeClassifier()` to construct an instance with default hyper-parameters. +Provide keyword arguments to override hyper-parameter defaults, as in NeuroTreeClassifier(loss=...). + +## Training model + +A model is trained using [`fit`](@ref): + +```julia +m = fit(config, dtrain; feature_names, target_name, kwargs...) +``` + +## Inference + +Models act as a functor. returning predictions when called as a function with features as argument: + +```julia +m(data) +``` + +# MLJ Interface + +From MLJ, the type can be imported using: + +```julia +NeuroTreeClassifier = @load NeuroTreeClassifier pkg=NeuroTreeModels +``` + +Do `model = NeuroTreeClassifier()` to construct an instance with default hyper-parameters. +Provide keyword arguments to override hyper-parameter defaults, as in `NeuroTreeClassifier(loss=...)`. + +## Training model + +In MLJ or MLJBase, bind an instance `model` to data with + `mach = machine(model, X, y)` where +- `X`: any table of input features (eg, a `DataFrame`) whose columns + each have one of the following element scitypes: `Continuous`, + `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)` +- `y`: is the target, which can be any `AbstractVector` whose element + scitype is `<:Continuous`; check the scitype + with `scitype(y)` + +Train the machine using `fit!(mach, rows=...)`. + +## Operations + +- `predict(mach, Xnew)`: return predictions of the target given + features `Xnew` having the same scitype as `X` above. + +## Fitted parameters + +The fields of `fitted_params(mach)` are: + - `:fitresult`: The `NeuroTreeModel` object. + +## Report + +The fields of `report(mach)` are: + - `:features`: The names of the features encountered in training. + +# Examples + +## Internal API + +```julia +using NeuroTreeModels, DataFrames, CategoricalArrays, Random +config = NeuroTreeClassifier(depth=5, nrounds=10) +nobs, nfeats = 1_000, 5 +dtrain = DataFrame(randn(nobs, nfeats), :auto) +dtrain.y = categorical(rand(1:2, nobs)) +feature_names, target_name = names(dtrain, r"x"), "y" +m = fit(config, dtrain; feature_names, target_name) +p = m(dtrain) +``` + +## MLJ Interface + +```julia +using MLJBase, NeuroTreeModels +m = NeuroTreeClassifier(depth=5, nrounds=10) +X, y = @load_boston +mach = machine(m, X, y) |> fit! +p = predict(mach, X) +``` +""" +function NeuroTreeClassifier(; kwargs...) + + # defaults arguments + args = Dict{Symbol,Any}( + :nrounds => 10, + :lr => 1.0f-2, + :wd => 0.0f0, + :batchsize => 2048, + :actA => :tanh, + :depth => 4, + :ntrees => 64, + :hidden_size => 1, + :stack_size => 1, + :init_scale => 0.1, + :MLE_tree_split => false, + :rng => 123, + ) + + args_ignored = setdiff(keys(kwargs), keys(args)) + args_ignored_str = join(args_ignored, ", ") + length(args_ignored) > 0 && + @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." + + args_default = setdiff(keys(args), keys(kwargs)) + args_default_str = join(args_default, ", ") + length(args_default) > 0 && + @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." + + args_override = intersect(keys(args), keys(kwargs)) + for arg in args_override + args[arg] = kwargs[arg] + end + + args[:rng] = mk_rng(args[:rng]) + + config = NeuroTreeClassifier( + :mlogloss, + args[:nrounds], + Float32(args[:lr]), + Float32(args[:wd]), + args[:batchsize], + Symbol(args[:actA]), + args[:depth], + args[:ntrees], + args[:hidden_size], + args[:stack_size], + args[:init_scale], + args[:MLE_tree_split], + args[:rng], + ) + + return config +end + +const NeuroTypes = Union{NeuroTreeRegressor,NeuroTreeClassifier} +get_loss_type(config::NeuroTypes) = _loss_type_dict[config.loss] diff --git a/src/loss.jl b/src/loss.jl index 30a716a..24defdd 100644 --- a/src/loss.jl +++ b/src/loss.jl @@ -73,5 +73,4 @@ const _loss_fn_dict = Dict( :gaussian_mle => gaussian_mle, ) -get_loss_fn(config::NeuroTreeRegressor) = _loss_fn_dict[config.loss] - +get_loss_fn(config::NeuroTypes) = _loss_fn_dict[config.loss] diff --git a/src/metrics.jl b/src/metrics.jl index 4fecba1..d3aab24 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -151,4 +151,4 @@ is_maximise(::typeof(logloss)) = false is_maximise(::typeof(mlogloss)) = false is_maximise(::typeof(gaussian_mle)) = true -end \ No newline at end of file +end diff --git a/src/model.jl b/src/model.jl index 27e0948..9894f63 100644 --- a/src/model.jl +++ b/src/model.jl @@ -1,216 +1,3 @@ -abstract type LossType end -abstract type MSE <: LossType end -abstract type MAE <: LossType end -abstract type LogLoss <: LossType end -abstract type MLogLoss <: LossType end -abstract type GaussianMLE <: LossType end - -const _loss_type_dict = Dict( - :mse => MSE, - :mae => MAE, - :logloss => LogLoss, - :mlogloss => MLogLoss, - :gaussian_mle => GaussianMLE, -) - -mutable struct NeuroTreeRegressor <: MMI.Deterministic - loss::Symbol - nrounds::Int - lr::Float32 - wd::Float32 - batchsize::Int - actA::Symbol - outsize::Int - depth::Int - ntrees::Int - hidden_size::Int - stack_size::Int - init_scale::Float32 - MLE_tree_split::Bool - rng::Any - device::Symbol - gpuID::Int -end - -""" - NeuroTreeRegressor(;kwargs...) - -A model type for constructing a NeuroTreeRegressor, based on [NeuroTreeModels.jl](https://github.com/Evovest/NeuroTreeModels.jl), and implementing both an internal API and the MLJ model interface. - -# Hyper-parameters - -- `loss=:mse`: Loss to be be minimized during training. One of: - - `:mse` - - `:mae` - - `:logloss` - - `:mlogloss` - - `:gaussian_mle` -- `nrounds=10`: Max number of rounds (epochs). -- `lr=1.0f-2`: Learning rate. Must be > 0. A lower `eta` results in slower learning, typically requiring a higher `nrounds`. -- `wd=0.f0`: Weight decay applied to the gradients by the optimizer. -- `batchsize=2048`: Batch size. -- `actA=:tanh`: Activation function applied to each of input variable for determination of split node weight. Can be one of: - - `:tanh` - - `:identity` -- `outsize=1`: Number of predictions returned by the model. Typically only used for classification tasks and set to the number of target levels / classes. -- `depth=6`: Depth of a tree. Must be >= 1. A tree of depth 1 has 2 prediction leaf nodes. A complete tree of depth N contains `2^N` terminal leaves and `2^N - 1` split nodes. - Compute cost is proportional to `2^depth`. Typical optimal values are in the 3 to 5 range. -- `ntrees=64`: Number of trees (per stack). -- `hidden_size=16`: Size of hidden layers. Applicable only when `stack_size` > 1. -- `stack_size=1`: Number of stacked NeuroTree blocks. -- `init_scale=1.0`: Scaling factor applied to the predictions weights. Values in the `]0, 1]` short result in best performance. -- `MLE_tree_split=false`: Whether independent models are buillt for each of the 2 parameters (mu, sigma) of the the `gaussian_mle` loss. -- `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`). -- `device=:cpu`: Device to use. Either `:cpu` or `:gpu` (recommended as it improves significantly the training speed). -- `gpuID=0`: ID of the GPU to use for training. - -# Internal API - -Do `config = NeuroTreeRegressor()` to construct an instance with default hyper-parameters. -Provide keyword arguments to override hyper-parameter defaults, as in NeuroTreeRegressor(loss=...). - -## Training model - -A model is trained using [`fit`](@ref): - -```julia -m = fit(config, dtrain; feature_names, target_name, kwargs...) -``` - -## Inference - -Models act as a functor. returning predictions when called as a function with features as argument: - -```julia -m(data) -``` - -# MLJ Interface - -From MLJ, the type can be imported using: - -```julia -NeuroTreeRegressor = @load NeuroTreeRegressor pkg=NeuroTreeModels -``` - -Do `model = NeuroTreeRegressor()` to construct an instance with default hyper-parameters. -Provide keyword arguments to override hyper-parameter defaults, as in `NeuroTreeRegressor(loss=...)`. - -## Training model - -In MLJ or MLJBase, bind an instance `model` to data with - `mach = machine(model, X, y)` where -- `X`: any table of input features (eg, a `DataFrame`) whose columns - each have one of the following element scitypes: `Continuous`, - `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)` -- `y`: is the target, which can be any `AbstractVector` whose element - scitype is `<:Continuous`; check the scitype - with `scitype(y)` - -Train the machine using `fit!(mach, rows=...)`. - -## Operations - -- `predict(mach, Xnew)`: return predictions of the target given - features `Xnew` having the same scitype as `X` above. - -## Fitted parameters - -The fields of `fitted_params(mach)` are: - - `:fitresult`: The `NeuroTreeModel` object. - -## Report - -The fields of `report(mach)` are: - - `:features`: The names of the features encountered in training. - -# Examples - -## Internal API - -```julia -using NeuroTreeModels, DataFrames -config = NeuroTreeRegressor(depth=5, nrounds=10) -nobs, nfeats = 1_000, 5 -dtrain = DataFrame(randn(nobs, nfeats), :auto) -dtrain.y = rand(nobs) -feature_names, target_name = names(dtrain, r"x"), "y" -m = fit(config, dtrain; feature_names, target_name) -p = m(dtrain) -``` - -## MLJ Interface - -```julia -using MLJBase, NeuroTreeModels -m = NeuroTreeRegressor(depth=5, nrounds=10) -X, y = @load_boston -mach = machine(m, X, y) |> fit! -p = predict(mach, X) -``` -""" -function NeuroTreeRegressor(; kwargs...) - - # defaults arguments - args = Dict{Symbol,Any}( - :loss => :mse, - :nrounds => 10, - :lr => 1.0f-2, - :wd => 0.0f0, - :batchsize => 2048, - :actA => :tanh, - :outsize => 1, - :depth => 4, - :ntrees => 64, - :hidden_size => 1, - :stack_size => 1, - :init_scale => 0.1, - :MLE_tree_split => false, - :rng => 123, - :device => :cpu, - :gpuID => 0, - ) - - args_ignored = setdiff(keys(kwargs), keys(args)) - args_ignored_str = join(args_ignored, ", ") - length(args_ignored) > 0 && - @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." - - args_default = setdiff(keys(args), keys(kwargs)) - args_default_str = join(args_default, ", ") - length(args_default) > 0 && - @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." - - args_override = intersect(keys(args), keys(kwargs)) - for arg in args_override - args[arg] = kwargs[arg] - end - - args[:rng] = mk_rng(args[:rng]) - - config = NeuroTreeRegressor( - Symbol(args[:loss]), - args[:nrounds], - Float32(args[:lr]), - Float32(args[:wd]), - args[:batchsize], - Symbol(args[:actA]), - args[:outsize], - args[:depth], - args[:ntrees], - args[:hidden_size], - args[:stack_size], - args[:init_scale], - args[:MLE_tree_split], - args[:rng], - Symbol(args[:device]), - args[:gpuID], - ) - - return config -end - -get_loss_type(config::NeuroTreeRegressor) = _loss_type_dict[config.loss] struct NeuroTree{W,B,P} w::W @@ -245,7 +32,8 @@ end dot_prod_agg(lw, p) = dropdims(sum(reshape(lw, 1, size(lw)...) .* p, dims=(2, 3)), dims=(2, 3)) """ - NeuroTree + NeuroTree(; ins, outs, depth=4, ntrees=64, actA=identity, init_scale=1.0) + NeuroTree((ins, outs)::Pair{<:Integer,<:Integer}; depth=4, ntrees=64, actA=identity, init_scale=1.0) Initialization of a NeuroTree. """ @@ -327,7 +115,6 @@ end # return p # end - """ NeuroTreeModel A NeuroTreeModel is made of a collection of Tree, either regular `NeuroTree` or `StackTree`. @@ -367,21 +154,21 @@ const _act_dict = Dict( :hardsigmoid => hardsigmoid ) -function get_model_chain(L; config, nfeats) +function get_model_chain(L; config, nfeats, outsize) if L <: GaussianMLE && config.MLE_tree_split chain = Chain( BatchNorm(nfeats), Parallel( vcat, - StackTree(nfeats => config.outsize; + StackTree(nfeats => outsize; depth=config.depth, ntrees=config.ntrees, stack_size=config.stack_size, hidden_size=config.hidden_size, actA=_act_dict[config.actA], init_scale=config.init_scale), - StackTree(nfeats => config.outsize; + StackTree(nfeats => outsize; depth=config.depth, ntrees=config.ntrees, stack_size=config.stack_size, @@ -391,7 +178,7 @@ function get_model_chain(L; config, nfeats) ) ) else - outsize = L <: GaussianMLE ? 2 * config.outsize : config.outsize + outsize = L <: GaussianMLE ? 2 * outsize : outsize chain = Chain( BatchNorm(nfeats), StackTree(nfeats => outsize; diff --git a/test/MLJ.jl b/test/MLJ.jl index 3567ebb..78d01ca 100644 --- a/test/MLJ.jl +++ b/test/MLJ.jl @@ -14,6 +14,27 @@ sigmoid(x::AbstractVector) = sigmoid.(x) ) @test isempty(failures) end + @testset "NeuroTreeClassifier" begin + + failures, summary = MLJTestInterface.test( + [NeuroTreeClassifier], + MLJTestInterface.make_binary()...; + mod=@__MODULE__, + verbosity=0, # bump to debug + throw=true # set to true to debug + ) + @test isempty(failures) + + failures, summary = MLJTestInterface.test( + [NeuroTreeClassifier], + MLJTestInterface.make_multiclass()...; + mod=@__MODULE__, + verbosity=0, # bump to debug + throw=true # set to true to debug + ) + @test isempty(failures) + + end end ################################################## @@ -70,4 +91,39 @@ end predict(mach, X) end -MLJTestInterface.make_regression() +@testset "MLJ - classification" begin + X, y = @load_crabs + + tree_model = NeuroTreeClassifier( + depth=4, + lr=0.1, + nrounds=20, + batchsize=64 + ) + + # @load EvoTreeRegressor + mach = machine(tree_model, X, y) + train, test = partition(eachindex(y), 0.7, shuffle=true) # 70:30 split + fit!(mach, rows=train, verbosity=1) + + mach.model.nrounds += 50 + fit!(mach, rows=train, verbosity=1) + + pred_train = predict(mach, selectrows(X, train)) + pred_train_mode = predict_mode(mach, selectrows(X, train)) + sum(pred_train_mode .== y[train]) / length(y[train]) + + pred_test = predict(mach, selectrows(X, test)) + pred_test_mode = predict_mode(mach, selectrows(X, test)) + pred_test_mode = predict_mode(mach, selectrows(X, test)) + sum(pred_test_mode .== y[test]) / length(y[test]) +end + +@testset "MLJ - support for ordered factor predictions" begin + X = (; x=rand(10)) + y = coerce(rand("ab", 10), OrderedFactor) + model = NeuroTreeClassifier() + mach = machine(model, X, y) |> fit! + yhat = predict(mach, X) + @assert isordered(yhat) +end diff --git a/test/core.jl b/test/core.jl index 9f8bf9f..34ded19 100644 --- a/test/core.jl +++ b/test/core.jl @@ -1,7 +1,6 @@ -@testset "Regression test" begin +@testset "Core - internals test" begin config = NeuroTreeRegressor( - device=:cpu, loss=:mse, actA=:identity, init_scale=1.0, @@ -10,7 +9,6 @@ ntrees=32, stack_size=1, hidden_size=1, - outsize=1, batchsize=2048, lr=1e-3, ) @@ -21,11 +19,12 @@ x = rand(Float32, nfeats, nobs) feature_names = "var_" .* string.(1:nobs) + outsize = 1 loss = NeuroTreeModels.get_loss_fn(config) L = NeuroTreeModels.get_loss_type(config) - chain = NeuroTreeModels.get_model_chain(L; config, nfeats) + chain = NeuroTreeModels.get_model_chain(L; config, nfeats, outsize) info = Dict( - :device => config.device, + :device => :cpu, :nrounds => 0, :feature_names => feature_names ) @@ -33,12 +32,52 @@ end +@testset "Core - Regression" begin + + Random.seed!(123) + X, y = rand(1000, 10), randn(1000) + df = DataFrame(X, :auto) + df[!, :y] = y + target_name = "y" + feature_names = setdiff(names(df), [target_name]) + + train_ratio = 0.8 + train_indices = randperm(nrow(df))[1:Int(train_ratio * nrow(df))] + + dtrain = df[train_indices, :] + deval = df[setdiff(1:nrow(df), train_indices), :] + + config = NeuroTreeRegressor( + loss=:mse, + nrounds=20, + depth=3, + lr=1e-1, + ) + + m = NeuroTreeModels.fit( + config, + dtrain; + target_name, + feature_names + ) + + m = NeuroTreeModels.fit( + config, + dtrain; + target_name, + feature_names, + deval, + metric=:mse + ) + +end + @testset "Classification test" begin Random.seed!(123) X, y = @load_crabs df = DataFrame(X) - df[!, :class] .= levelcode.(y) + df[!, :class] = y target_name = "class" feature_names = setdiff(names(df), [target_name]) @@ -48,13 +87,11 @@ end dtrain = df[train_indices, :] deval = df[setdiff(1:nrow(df), train_indices), :] - config = NeuroTreeRegressor( - device=:cpu, - loss=:mlogloss, + config = NeuroTreeClassifier( nrounds=100, - outsize=3, - depth=3, - lr=1e-1, + depth=4, + lr=3e-2, + batchsize=64 ) m = NeuroTreeModels.fit( @@ -64,14 +101,15 @@ end target_name, feature_names, metric=:mlogloss, - print_every_n=10, - early_stopping_rounds=2, + early_stopping_rounds=10, + # print_every_n=10, + device=:cpu ) # Predictions depend on the number of samples in the dataset ptrain = [argmax(x) for x in eachrow(m(dtrain))] peval = [argmax(x) for x in eachrow(m(deval))] - @test mean(ptrain .== dtrain.class) > 0.95 - @test mean(peval .== deval.class) > 0.95 + @test mean(ptrain .== levelcode.(dtrain.class)) > 0.95 + @test mean(peval .== levelcode.(deval.class)) > 0.95 -end \ No newline at end of file +end From e73cc6c1617f3c65cf3d8231af34e68de8f95fed Mon Sep 17 00:00:00 2001 From: jeremie Date: Sun, 21 Apr 2024 13:02:21 -0400 Subject: [PATCH 4/6] clean docs --- docs/src/design.md | 2 +- docs/src/tutorials-classification-iris.md | 21 +++++++++++---------- docs/src/tutorials-logistic-titanic.md | 2 +- docs/src/tutorials-regression-boston.md | 2 +- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/docs/src/design.md b/docs/src/design.md index 9a025a6..3c3a5d0 100644 --- a/docs/src/design.md +++ b/docs/src/design.md @@ -42,7 +42,7 @@ The following illustrate how a basic decision tree is represented as a single di To illustrate how a NeuroTree derives the soft decision probability (referred to `NW1 - NW3` in the above figure), we first break down how a traditional tree split condition is derived from 2 underlying decisions: -1. *Selection of the feature on which to perform the condition*. +1. *Selection of the feature on which to apply the condition*. Such selection can be represented as the application of a binary mask where all elements are set to `false` except for that single selected feature where it's set to `true`. 2. *Selection of the condition's threshold value*. diff --git a/docs/src/tutorials-classification-iris.md b/docs/src/tutorials-classification-iris.md index 474c5b3..fc06cd4 100644 --- a/docs/src/tutorials-classification-iris.md +++ b/docs/src/tutorials-classification-iris.md @@ -19,12 +19,12 @@ Random.seed!(123) ## Preprocessing Before we can train our model, we need to preprocess the dataset. We will convert the class variable, which specifies the type of iris flower, into a categorical variable. +For classification tasks, it's a requirement that `eltype(target_var)<:CategoricalValue`. ```julia df = MLDatasets.Iris().dataframe df[!, :class] = categorical(df[!, :class]) -df[!, :class] .= levelcode.(df[!, :class]) target_name = "class" feature_names = setdiff(names(df), [target_name]) @@ -37,17 +37,15 @@ deval = df[setdiff(1:nrow(df), train_indices), :] ## Training -Now we are ready to train our model. We first define a model configuration using the [`NeuroTreeRegressor`](@ref) model constructor. +Now we are ready to train our model. We first define a model configuration using the [`NeuroTreeClassifier`](@ref) model constructor. Then, we use [`NeuroTreeModels.fit`](@ref) to train a boosted tree model. We pass the optional `deval` argument to enable the usage of early stopping. ```julia -config = NeuroTreeRegressor( - device=:cpu, - loss=:mlogloss, +config = NeuroTreeClassifier( nrounds=400, - outsize=3, depth=4, - lr=2e-2, + lr=5e-2, + batchsize=60, ) m = NeuroTreeModels.fit( @@ -71,10 +69,13 @@ p_train = m(dtrain) p_eval = m(deval) ``` +Note that the raw predictions for a classification task a `Matrix` where each row is the vector of probability for each of the target levels. +It can be converted into a predicted class index using `NeuroTreeModels.onecold` (imported from Flux), or `[argmax(p) for p in eachrow(p_train)]`. + ```julia-repl -julia> mean(dtrain[!, target_name] .== NeuroTreeModels.onecold(p_train')) -0.9833333333333333 +julia> mean(levelcode.(dtrain[!, target_name]) .== NeuroTreeModels.onecold(p_train')) +0.975 -julia> mean(deval[!, target_name] .== NeuroTreeModels.onecold(p_eval')) +julia> mean(levelcode.(deval[!, target_name]) .== NeuroTreeModels.onecold(p_eval')) 1.0 ``` \ No newline at end of file diff --git a/docs/src/tutorials-logistic-titanic.md b/docs/src/tutorials-logistic-titanic.md index 1b5d7f3..4e83c79 100644 --- a/docs/src/tutorials-logistic-titanic.md +++ b/docs/src/tutorials-logistic-titanic.md @@ -64,7 +64,6 @@ Then, we use [`NeuroTreeModels.fit`](@ref) to train a boosted tree model. We pas ```julia config = NeuroTreeRegressor( - device=:cpu, loss=:logloss, nrounds=400, depth=4, @@ -80,6 +79,7 @@ m = NeuroTreeModels.fit( metric=:logloss, print_every_n=10, early_stopping_rounds=2, + device=:cpu ) ``` diff --git a/docs/src/tutorials-regression-boston.md b/docs/src/tutorials-regression-boston.md index 2a9a2d0..5275c68 100644 --- a/docs/src/tutorials-regression-boston.md +++ b/docs/src/tutorials-regression-boston.md @@ -44,7 +44,6 @@ Then, we use [`NeuroTreeModels.fit`](@ref) to train a boosted tree model. We pas ```julia config = NeuroTreeRegressor( - device=:cpu, loss=:mse, nrounds=400, depth=5, @@ -60,6 +59,7 @@ m = NeuroTreeModels.fit( metric=:mse, print_every_n=10, early_stopping_rounds=2, + device=:cpu ) ``` From 4c87703565b52acf536c648a807c40ac495ad2c9 Mon Sep 17 00:00:00 2001 From: "jeremie.desgagne.bouchard" Date: Sun, 21 Apr 2024 13:06:46 -0400 Subject: [PATCH 5/6] tweedie --- README.md | 2 +- benchmarks/MSRank-tweedie.jl | 89 ------------------------------------ benchmarks/MSRank.jl | 6 +-- benchmarks/YEAR-tweedie.jl | 82 +++++++++++++++++++++++++++++++++ src/infer.jl | 10 ++++ src/loss.jl | 6 +-- src/metrics.jl | 6 +-- 7 files changed, 101 insertions(+), 100 deletions(-) delete mode 100644 benchmarks/MSRank-tweedie.jl create mode 100644 benchmarks/YEAR-tweedie.jl diff --git a/README.md b/README.md index 8a6234e..08d007b 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ ## Installation -```julia-repl +```julia ] add NeuroTreeModels ``` diff --git a/benchmarks/MSRank-tweedie.jl b/benchmarks/MSRank-tweedie.jl deleted file mode 100644 index 18dd3fd..0000000 --- a/benchmarks/MSRank-tweedie.jl +++ /dev/null @@ -1,89 +0,0 @@ -using Revise -using Random -using CSV -using DataFrames -using StatsBase -using Statistics: mean, std -using NeuroTreeModels -using Solage: Connectors -using ReadLIBSVM -using AWS: AWSCredentials, AWSConfig, @service - -# https://www.microsoft.com/en-us/research/project/mslr/ - -@service S3 -aws_creds = AWSCredentials(ENV["AWS_ACCESS_KEY_ID_JDB"], ENV["AWS_SECRET_ACCESS_KEY_JDB"]) -aws_config = AWSConfig(; creds=aws_creds, region="ca-central-1") -bucket = "jeremiedb" - -# initial prep -function read_libsvm_aws(file::String; has_query=false, aws_config=AWSConfig()) - raw = S3.get_object("jeremiedb", file, Dict("response-content-type" => "application/octet-stream"); aws_config) - return read_libsvm(raw; has_query) -end - -@time train_raw = read_libsvm_aws("share/data/msrank/train.txt"; has_query=true, aws_config); -@time eval_raw = read_libsvm_aws("share/data/msrank/vali.txt"; has_query=true, aws_config); -@time test_raw = read_libsvm_aws("share/data/msrank/test.txt"; has_query=true, aws_config); - -dtrain = DataFrame(train_raw[:x], :auto) -dtrain.y_raw = train_raw[:y] -dtrain.y = dtrain.y_raw ./ 4 -dtrain.q = train_raw[:q] - -deval = DataFrame(eval_raw[:x], :auto) -deval.y_raw = eval_raw[:y] -deval.y = deval.y_raw ./ 4 -deval.q = eval_raw[:q] - -dtest = DataFrame(test_raw[:x], :auto) -dtest.y_raw = test_raw[:y] -dtest.y = dtest.y_raw ./ 4 -dtest.q = test_raw[:q] - -feature_names = setdiff(names(dtrain), ["y", "y_raw", "q"]) -target_name = "y_raw" - -function percent_rank(x::AbstractVector{T}) where {T} - return tiedrank(x) / (length(x) + 1) -end - -transform!(dtrain, feature_names .=> percent_rank .=> feature_names) -transform!(deval, feature_names .=> percent_rank .=> feature_names) -transform!(dtest, feature_names .=> percent_rank .=> feature_names) - -config = NeuroTreeRegressor( - device=:gpu, - loss=:tweedie_deviance, - nrounds=2, - actA=:tanh, - outsize=1, - depth=4, - ntrees=64, - stack_size=2, - hidden_size=16, - batchsize=4096, - lr=3e-4, -) - -@time m, logger = NeuroTreeModels.fit( - config, - dtrain; - deval, - target_name, - feature_names, - print_every_n=1, - early_stopping_rounds=3, - metric=:tweedie_deviance, - return_logger=true -); - -dinfer_eval = NeuroTreeModels.get_df_loader_infer(deval; feature_names, batchsize=config.batchsize, device=config.device); -p_eval = m(dinfer_eval); -mse_eval = mean((p_eval .- deval.y_raw) .^ 2) -@info "MSE - deval" mse_eval - -dinfer_test = NeuroTreeModels.get_df_loader_infer(dtest; feature_names, batchsize=config.batchsize, device=config.device); -p_test = m(dinfer_test); -mse_test = mean((p_test .- dtest.y_raw) .^ 2) -@info "MSE - dtest" mse_test diff --git a/benchmarks/MSRank.jl b/benchmarks/MSRank.jl index 58fa386..572cae1 100644 --- a/benchmarks/MSRank.jl +++ b/benchmarks/MSRank.jl @@ -78,12 +78,10 @@ config = NeuroTreeRegressor( return_logger=true ); -dinfer_eval = NeuroTreeModels.get_df_loader_infer(deval; feature_names, batchsize=config.batchsize, device=config.device); -p_eval = m(dinfer_eval); +p_eval = m(deval); mse_eval = mean((p_eval .- deval.y_raw) .^ 2) @info "MSE - deval" mse_eval -dinfer_test = NeuroTreeModels.get_df_loader_infer(dtest; feature_names, batchsize=config.batchsize, device=config.device); -p_test = m(dinfer_test); +p_test = m(dtest); mse_test = mean((p_test .- dtest.y_raw) .^ 2) @info "MSE - dtest" mse_test diff --git a/benchmarks/YEAR-tweedie.jl b/benchmarks/YEAR-tweedie.jl new file mode 100644 index 0000000..61c8009 --- /dev/null +++ b/benchmarks/YEAR-tweedie.jl @@ -0,0 +1,82 @@ +##################################################################### +# WIP: need to adapt the fit! function to support normal DataFrame (not just GroupedOne) +# Have dataloader adapted to DF vs GDF (both at fit init and callback init) +##################################################################### + +using Revise +using Random +using CSV +using DataFrames +using StatsBase +using Statistics: mean, std +using NeuroTreeModels + +using AWS: AWSCredentials, AWSConfig, @service +@service S3 +aws_creds = AWSCredentials(ENV["AWS_ACCESS_KEY_ID_JDB"], ENV["AWS_SECRET_ACCESS_KEY_JDB"]) +aws_config = AWSConfig(; creds=aws_creds, region="ca-central-1") + +path = "share/data/year/year.csv" +raw = S3.get_object("jeremiedb", path, Dict("response-content-type" => "application/octet-stream"); aws_config) +df = DataFrame(CSV.File(raw, header=false)) +df_tot = copy(df) + +path = "share/data/year/year-train-idx.txt" +raw = S3.get_object("jeremiedb", path, Dict("response-content-type" => "application/octet-stream"); aws_config) +train_idx = DataFrame(CSV.File(raw, header=false))[:, 1] .+ 1 + +path = "share/data/year/year-eval-idx.txt" +raw = S3.get_object("jeremiedb", path, Dict("response-content-type" => "application/octet-stream"); aws_config) +eval_idx = DataFrame(CSV.File(raw, header=false))[:, 1] .+ 1 + +transform!(df_tot, "Column1" => identity => "y_raw") +transform!(df_tot, "y_raw" => (x -> (x .- minimum(x)) ./ std(x)) => "y_norm") +select!(df_tot, Not("Column1")) +feature_names = setdiff(names(df_tot), ["y_raw", "y_norm"]) +target_name = "y_norm" + +function percent_rank(x::AbstractVector{T}) where {T} + return tiedrank(x) / (length(x) + 1) +end + +transform!(df_tot, feature_names .=> percent_rank .=> feature_names) + +dtrain = df_tot[train_idx, :]; +deval = df_tot[eval_idx, :]; +dtest = df_tot[(end-51630+1):end, :]; + +config = NeuroTreeRegressor( + device=:gpu, + loss=:tweedie_deviance, + actA=:tanh, + nrounds=200, + outsize=1, + depth=4, + ntrees=64, + hidden_size=8, + stack_size=1, + init_scale=1.0, + MLE_tree_split=true, + batchsize=2048, + lr=1e-3, +) + +@time m, logger = NeuroTreeModels.fit( + config, + dtrain; + deval, + target_name, + feature_names, + print_every_n=5, + early_stopping_rounds=2, + metric=:tweedie_deviance, + return_logger=true +); + +p_eval = m(deval); +mse_eval = mean((p_eval .- deval.y_norm) .^ 2) +@info "MSE raw - deval" mse_eval + +p_test = m(dtest); +mse_test = mean((p_test .- dtest.y_norm) .^ 2) * std(df_tot.y_raw)^2 +@info "MSE - dtest" mse_test diff --git a/src/infer.jl b/src/infer.jl index 06d2e29..72a3b73 100644 --- a/src/infer.jl +++ b/src/infer.jl @@ -54,3 +54,13 @@ function infer(m::NeuroTreeModel{<:GaussianMLE}, data::DL) p[:, 2] .= exp.(p[:, 2]) # reproject log(σ) into σ return p end + +function infer(m::NeuroTreeModel{L}, data::DL) where {L<:Union{TweedieDeviance}} + preds = Vector{Float32}[] + for x in data + push!(preds, Vector(m(x))) + end + p = vcat(preds...) + p .= exp.(p) + return p +end diff --git a/src/loss.jl b/src/loss.jl index f87c1e6..13bcc7a 100644 --- a/src/loss.jl +++ b/src/loss.jl @@ -56,17 +56,17 @@ end function mlogloss(m, x, y) p = logsoftmax(m(x); dims=1) k = size(p, 1) - mean(-sum(onehotbatch(y, 1:k) .* p; dims=1)) + mean(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1)) end function mlogloss(m, x, y, w) p = logsoftmax(m(x); dims=1) k = size(p, 1) - sum(-sum(onehotbatch(y, 1:k) .* p; dims=1) .* w) / sum(w) + sum(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1) .* w) / sum(w) end function mlogloss(m, x, y, w, offset) p = logsoftmax(m(x) .+ offset; dims=1) k = size(p, 1) - sum(-sum(onehotbatch(y, 1:k) .* p; dims=1) .* w) / sum(w) + sum(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1) .* w) / sum(w) end gaussian_mle_loss(μ::AbstractVector{T}, σ::AbstractVector{T}, y::AbstractVector{T}) where {T} = diff --git a/src/metrics.jl b/src/metrics.jl index 60c59d8..cad5786 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -100,21 +100,21 @@ end function mlogloss(m, x, y; agg=mean) p = logsoftmax(m(x); dims=1) k = size(p, 1) - raw = dropdims(-sum(onehotbatch(y, 1:k) .* p; dims=1); dims=1) + raw = dropdims(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1); dims=1) metric = agg(raw) return metric end function mlogloss(m, x, y, w; agg=mean) p = logsoftmax(m(x); dims=1) k = size(p, 1) - raw = dropdims(-sum(onehotbatch(y, 1:k) .* p; dims=1); dims=1) + raw = dropdims(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1); dims=1) metric = agg(raw .* w) return metric end function mlogloss(m, x, y, w, offset; agg=mean) p = logsoftmax(m(x) .+ offset; dims=1) k = size(p, 1) - raw = dropdims(-sum(onehotbatch(y, 1:k) .* p; dims=1); dims=1) + raw = dropdims(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1); dims=1) metric = agg(raw .* w) return metric end From f89699872159a57ad3d769b4d2921d22178947d1 Mon Sep 17 00:00:00 2001 From: "jeremie.desgagne.bouchard" Date: Sun, 21 Apr 2024 13:39:15 -0400 Subject: [PATCH 6/6] cleanup --- benchmarks/Higgs-logloss.jl | 5 +- benchmarks/MSRank.jl | 6 +- benchmarks/YEAR-gaussian.jl | 6 +- benchmarks/YEAR-mse.jl | 6 +- benchmarks/YEAR-tweedie.jl | 14 +- benchmarks/Yahoo-LTRC.jl | 6 +- benchmarks/aicrowd-test.jl | 4 +- src/learners.jl | 272 ++++++++++++++++++------------------ src/loss.jl | 6 +- src/metrics.jl | 6 +- 10 files changed, 158 insertions(+), 173 deletions(-) diff --git a/benchmarks/Higgs-logloss.jl b/benchmarks/Higgs-logloss.jl index 40aa13c..7690807 100644 --- a/benchmarks/Higgs-logloss.jl +++ b/benchmarks/Higgs-logloss.jl @@ -31,7 +31,6 @@ deval = df_tot[end-1_000_000+1:end-500_000, :]; dtest = df_tot[end-500_000+1:end, :]; config = NeuroTreeRegressor( - device=:gpu, loss=:logloss, nrounds=200, scaler=true, @@ -44,7 +43,7 @@ config = NeuroTreeRegressor( batchsize=8092, ) -@time m, logger = NeuroTreeModels.fit( +@time m = NeuroTreeModels.fit( config, dtrain; deval, @@ -53,7 +52,7 @@ config = NeuroTreeRegressor( print_every_n=1, early_stopping_rounds=2, metric=:logloss, - return_logger=true + device=:gpu, ); dinfer_eval = NeuroTreeModels.get_df_loader_infer(deval; feature_names, batchsize=config.batchsize, device=config.device); diff --git a/benchmarks/MSRank.jl b/benchmarks/MSRank.jl index 572cae1..dc5248f 100644 --- a/benchmarks/MSRank.jl +++ b/benchmarks/MSRank.jl @@ -53,11 +53,9 @@ transform!(deval, feature_names .=> percent_rank .=> feature_names) transform!(dtest, feature_names .=> percent_rank .=> feature_names) config = NeuroTreeRegressor( - device=:gpu, loss=:mse, nrounds=2, actA=:tanh, - outsize=1, depth=4, ntrees=64, stack_size=2, @@ -66,7 +64,7 @@ config = NeuroTreeRegressor( lr=3e-4, ) -@time m, logger = NeuroTreeModels.fit( +@time m = NeuroTreeModels.fit( config, dtrain; deval, @@ -75,7 +73,7 @@ config = NeuroTreeRegressor( print_every_n=1, early_stopping_rounds=3, metric=:mse, - return_logger=true + device=:gpu, ); p_eval = m(deval); diff --git a/benchmarks/YEAR-gaussian.jl b/benchmarks/YEAR-gaussian.jl index 6f7a55e..6276faa 100644 --- a/benchmarks/YEAR-gaussian.jl +++ b/benchmarks/YEAR-gaussian.jl @@ -47,11 +47,9 @@ deval = df_tot[eval_idx, :]; dtest = df_tot[(end-51630+1):end, :]; config = NeuroTreeRegressor( - device=:gpu, loss=:gaussian_mle, actA=:identity, nrounds=200, - outsize=1, depth=4, ntrees=32, hidden_size=8, @@ -62,7 +60,7 @@ config = NeuroTreeRegressor( lr=1e-3, ) -@time m, logger = NeuroTreeModels.fit( +@time m = NeuroTreeModels.fit( config, dtrain; deval, @@ -71,7 +69,7 @@ config = NeuroTreeRegressor( print_every_n=5, early_stopping_rounds=2, metric=:gaussian_mle, - return_logger=true + device=:gpu ); # dinfer_eval = NeuroTrees.get_df_loader_infer(deval; feature_names, batchsize=config.batchsize, device=config.device); diff --git a/benchmarks/YEAR-mse.jl b/benchmarks/YEAR-mse.jl index a7d9d67..76e5e83 100644 --- a/benchmarks/YEAR-mse.jl +++ b/benchmarks/YEAR-mse.jl @@ -42,7 +42,6 @@ deval = df_tot[eval_idx, :]; dtest = df_tot[(end-51630+1):end, :]; config = NeuroTreeRegressor( - device=:gpu, loss=:mse, actA=:identity, init_scale=1.0, @@ -51,12 +50,11 @@ config = NeuroTreeRegressor( ntrees=32, stack_size=1, hidden_size=1, - outsize=1, batchsize=2048, lr=3e-4, ) -@time m, logger = NeuroTreeModels.fit( +@time m = NeuroTreeModels.fit( config, dtrain; deval, @@ -65,7 +63,7 @@ config = NeuroTreeRegressor( print_every_n=5, early_stopping_rounds=2, metric=:mse, - return_logger=true + device=:gpu ); # nfeats = length(feature_names) diff --git a/benchmarks/YEAR-tweedie.jl b/benchmarks/YEAR-tweedie.jl index 61c8009..f928a40 100644 --- a/benchmarks/YEAR-tweedie.jl +++ b/benchmarks/YEAR-tweedie.jl @@ -46,22 +46,16 @@ deval = df_tot[eval_idx, :]; dtest = df_tot[(end-51630+1):end, :]; config = NeuroTreeRegressor( - device=:gpu, loss=:tweedie_deviance, - actA=:tanh, + actA=:identity, nrounds=200, - outsize=1, depth=4, - ntrees=64, - hidden_size=8, - stack_size=1, - init_scale=1.0, - MLE_tree_split=true, + ntrees=32, batchsize=2048, lr=1e-3, ) -@time m, logger = NeuroTreeModels.fit( +@time m = NeuroTreeModels.fit( config, dtrain; deval, @@ -70,7 +64,7 @@ config = NeuroTreeRegressor( print_every_n=5, early_stopping_rounds=2, metric=:tweedie_deviance, - return_logger=true + device=:gpu ); p_eval = m(deval); diff --git a/benchmarks/Yahoo-LTRC.jl b/benchmarks/Yahoo-LTRC.jl index b9111c2..02e68a8 100644 --- a/benchmarks/Yahoo-LTRC.jl +++ b/benchmarks/Yahoo-LTRC.jl @@ -94,7 +94,6 @@ target_name = "y" # training ##################################### config = NeuroTreeRegressor( - device=:gpu, loss=:logloss, nrounds=400, actA=:identity, @@ -102,7 +101,6 @@ config = NeuroTreeRegressor( scaler=true, depth=4, ntrees=256, - outsize=1, hidden_size=1, stack_size=1, batchsize=1024, @@ -110,7 +108,7 @@ config = NeuroTreeRegressor( lr=3e-4, ) -@time m, logger = NeuroTreeModels.fit( +@time m = NeuroTreeModels.fit( config, dtrain; deval, @@ -119,7 +117,7 @@ config = NeuroTreeRegressor( print_every_n=5, early_stopping_rounds=3, metric=:logloss, - return_logger=true + device=:gpu, ); dinfer = NeuroTreeModels.get_df_loader_infer(dtest; feature_names, batchsize=config.batchsize, device=config.device); diff --git a/benchmarks/aicrowd-test.jl b/benchmarks/aicrowd-test.jl index cc2269c..d9155d8 100644 --- a/benchmarks/aicrowd-test.jl +++ b/benchmarks/aicrowd-test.jl @@ -51,15 +51,12 @@ y_train = Vector{Float32}(df_train[:, target]) y_eval = Vector{Float32}(df_eval[:, target]) config = NeuroTreeRegressor( - device = :gpu, loss = :logloss, nrounds = 100, actA = :tanh, scaler = false, - outsize = 1, depth = 4, ntrees = 32, - masks = nothing, batchsize = 4096, rng = 123, opt = Dict("type" => "nadam", "lr" => 3e-2, "rho" => 0.9), @@ -75,6 +72,7 @@ config = NeuroTreeRegressor( early_stopping_rounds = 5, print_every_n = 1, metric = :logloss, + device = :gpu, ); using CUDA diff --git a/src/learners.jl b/src/learners.jl index 4a5a75f..5c95eee 100644 --- a/src/learners.jl +++ b/src/learners.jl @@ -4,29 +4,31 @@ abstract type MAE <: LossType end abstract type LogLoss <: LossType end abstract type MLogLoss <: LossType end abstract type GaussianMLE <: LossType end +abstract type TweedieDeviance <: LossType end const _loss_type_dict = Dict( - :mse => MSE, - :mae => MAE, - :logloss => LogLoss, - :gaussian_mle => GaussianMLE, - :mlogloss => MLogLoss + :mse => MSE, + :mae => MAE, + :logloss => LogLoss, + :tweedie_deviance => TweedieDeviance, + :gaussian_mle => GaussianMLE, + :mlogloss => MLogLoss ) mutable struct NeuroTreeRegressor <: MMI.Deterministic - loss::Symbol - nrounds::Int - lr::Float32 - wd::Float32 - batchsize::Int - actA::Symbol - depth::Int - ntrees::Int - hidden_size::Int - stack_size::Int - init_scale::Float32 - MLE_tree_split::Bool - rng::Any + loss::Symbol + nrounds::Int + lr::Float32 + wd::Float32 + batchsize::Int + actA::Symbol + depth::Int + ntrees::Int + hidden_size::Int + stack_size::Int + init_scale::Float32 + MLE_tree_split::Bool + rng::Any end """ @@ -145,77 +147,77 @@ p = predict(mach, X) """ function NeuroTreeRegressor(; kwargs...) - # defaults arguments - args = Dict{Symbol,Any}( - :loss => :mse, - :nrounds => 10, - :lr => 1.0f-2, - :wd => 0.0f0, - :batchsize => 2048, - :actA => :tanh, - :depth => 4, - :ntrees => 64, - :hidden_size => 1, - :stack_size => 1, - :init_scale => 0.1, - :MLE_tree_split => false, - :rng => 123, - ) - - args_ignored = setdiff(keys(kwargs), keys(args)) - args_ignored_str = join(args_ignored, ", ") - length(args_ignored) > 0 && - @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." - - args_default = setdiff(keys(args), keys(kwargs)) - args_default_str = join(args_default, ", ") - length(args_default) > 0 && - @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." - - args_override = intersect(keys(args), keys(kwargs)) - for arg in args_override - args[arg] = kwargs[arg] - end - - loss = Symbol(args[:loss]) - loss ∉ [:mse, :mae, :logloss, :gaussian_mle] && error("The provided kwarg `loss`: $loss is not supported.") - - args[:rng] = mk_rng(args[:rng]) - - config = NeuroTreeRegressor( - args[:loss], - args[:nrounds], - Float32(args[:lr]), - Float32(args[:wd]), - args[:batchsize], - Symbol(args[:actA]), - args[:depth], - args[:ntrees], - args[:hidden_size], - args[:stack_size], - args[:init_scale], - args[:MLE_tree_split], - args[:rng] - ) - - return config + # defaults arguments + args = Dict{Symbol,Any}( + :loss => :mse, + :nrounds => 10, + :lr => 1.0f-2, + :wd => 0.0f0, + :batchsize => 2048, + :actA => :tanh, + :depth => 4, + :ntrees => 64, + :hidden_size => 1, + :stack_size => 1, + :init_scale => 0.1, + :MLE_tree_split => false, + :rng => 123, + ) + + args_ignored = setdiff(keys(kwargs), keys(args)) + args_ignored_str = join(args_ignored, ", ") + length(args_ignored) > 0 && + @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." + + args_default = setdiff(keys(args), keys(kwargs)) + args_default_str = join(args_default, ", ") + length(args_default) > 0 && + @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." + + args_override = intersect(keys(args), keys(kwargs)) + for arg in args_override + args[arg] = kwargs[arg] + end + + loss = Symbol(args[:loss]) + loss ∉ [:mse, :mae, :logloss, :gaussian_mle, :tweedie_deviance] && error("The provided kwarg `loss`: $loss is not supported.") + + args[:rng] = mk_rng(args[:rng]) + + config = NeuroTreeRegressor( + args[:loss], + args[:nrounds], + Float32(args[:lr]), + Float32(args[:wd]), + args[:batchsize], + Symbol(args[:actA]), + args[:depth], + args[:ntrees], + args[:hidden_size], + args[:stack_size], + args[:init_scale], + args[:MLE_tree_split], + args[:rng] + ) + + return config end mutable struct NeuroTreeClassifier <: MMI.Probabilistic - loss::Symbol - nrounds::Int - lr::Float32 - wd::Float32 - batchsize::Int - actA::Symbol - depth::Int - ntrees::Int - hidden_size::Int - stack_size::Int - init_scale::Float32 - MLE_tree_split::Bool - rng::Any + loss::Symbol + nrounds::Int + lr::Float32 + wd::Float32 + batchsize::Int + actA::Symbol + depth::Int + ntrees::Int + hidden_size::Int + stack_size::Int + init_scale::Float32 + MLE_tree_split::Bool + rng::Any end """ @@ -328,56 +330,56 @@ p = predict(mach, X) """ function NeuroTreeClassifier(; kwargs...) - # defaults arguments - args = Dict{Symbol,Any}( - :nrounds => 10, - :lr => 1.0f-2, - :wd => 0.0f0, - :batchsize => 2048, - :actA => :tanh, - :depth => 4, - :ntrees => 64, - :hidden_size => 1, - :stack_size => 1, - :init_scale => 0.1, - :MLE_tree_split => false, - :rng => 123, - ) - - args_ignored = setdiff(keys(kwargs), keys(args)) - args_ignored_str = join(args_ignored, ", ") - length(args_ignored) > 0 && - @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." - - args_default = setdiff(keys(args), keys(kwargs)) - args_default_str = join(args_default, ", ") - length(args_default) > 0 && - @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." - - args_override = intersect(keys(args), keys(kwargs)) - for arg in args_override - args[arg] = kwargs[arg] - end - - args[:rng] = mk_rng(args[:rng]) - - config = NeuroTreeClassifier( - :mlogloss, - args[:nrounds], - Float32(args[:lr]), - Float32(args[:wd]), - args[:batchsize], - Symbol(args[:actA]), - args[:depth], - args[:ntrees], - args[:hidden_size], - args[:stack_size], - args[:init_scale], - args[:MLE_tree_split], - args[:rng], - ) - - return config + # defaults arguments + args = Dict{Symbol,Any}( + :nrounds => 10, + :lr => 1.0f-2, + :wd => 0.0f0, + :batchsize => 2048, + :actA => :tanh, + :depth => 4, + :ntrees => 64, + :hidden_size => 1, + :stack_size => 1, + :init_scale => 0.1, + :MLE_tree_split => false, + :rng => 123, + ) + + args_ignored = setdiff(keys(kwargs), keys(args)) + args_ignored_str = join(args_ignored, ", ") + length(args_ignored) > 0 && + @info "Following $(length(args_ignored)) provided arguments will be ignored: $(args_ignored_str)." + + args_default = setdiff(keys(args), keys(kwargs)) + args_default_str = join(args_default, ", ") + length(args_default) > 0 && + @info "Following $(length(args_default)) arguments were not provided and will be set to default: $(args_default_str)." + + args_override = intersect(keys(args), keys(kwargs)) + for arg in args_override + args[arg] = kwargs[arg] + end + + args[:rng] = mk_rng(args[:rng]) + + config = NeuroTreeClassifier( + :mlogloss, + args[:nrounds], + Float32(args[:lr]), + Float32(args[:wd]), + args[:batchsize], + Symbol(args[:actA]), + args[:depth], + args[:ntrees], + args[:hidden_size], + args[:stack_size], + args[:init_scale], + args[:MLE_tree_split], + args[:rng], + ) + + return config end const NeuroTypes = Union{NeuroTreeRegressor,NeuroTreeClassifier} diff --git a/src/loss.jl b/src/loss.jl index ead9dd2..dc4bc6a 100644 --- a/src/loss.jl +++ b/src/loss.jl @@ -56,17 +56,17 @@ end function mlogloss(m, x, y) p = logsoftmax(m(x); dims=1) k = size(p, 1) - mean(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1)) + mean(-sum(onehotbatch(y, 1:k) .* p; dims=1)) end function mlogloss(m, x, y, w) p = logsoftmax(m(x); dims=1) k = size(p, 1) - sum(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1) .* w) / sum(w) + sum(-sum(onehotbatch(y, 1:k) .* p; dims=1) .* w) / sum(w) end function mlogloss(m, x, y, w, offset) p = logsoftmax(m(x) .+ offset; dims=1) k = size(p, 1) - sum(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1) .* w) / sum(w) + sum(-sum(onehotbatch(y, 1:k) .* p; dims=1) .* w) / sum(w) end gaussian_mle_loss(μ::AbstractVector{T}, σ::AbstractVector{T}, y::AbstractVector{T}) where {T} = diff --git a/src/metrics.jl b/src/metrics.jl index d4b7c23..b0bbcd4 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -100,21 +100,21 @@ end function mlogloss(m, x, y; agg=mean) p = logsoftmax(m(x); dims=1) k = size(p, 1) - raw = dropdims(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1); dims=1) + raw = dropdims(-sum(onehotbatch(y, 1:k) .* p; dims=1); dims=1) metric = agg(raw) return metric end function mlogloss(m, x, y, w; agg=mean) p = logsoftmax(m(x); dims=1) k = size(p, 1) - raw = dropdims(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1); dims=1) + raw = dropdims(-sum(onehotbatch(y, 1:k) .* p; dims=1); dims=1) metric = agg(raw .* w) return metric end function mlogloss(m, x, y, w, offset; agg=mean) p = logsoftmax(m(x) .+ offset; dims=1) k = size(p, 1) - raw = dropdims(-sum(onehotbatch(UInt32.(y), 1:k) .* p; dims=1); dims=1) + raw = dropdims(-sum(onehotbatch(y, 1:k) .* p; dims=1); dims=1) metric = agg(raw .* w) return metric end