Skip to content

Commit

Permalink
Merge pull request #242 from Evovest/oblivious
Browse files Browse the repository at this point in the history
Add support for Oblivious Trees
  • Loading branch information
jeremiedb authored Jul 28, 2023
2 parents ad016a6 + bae8ba7 commit 5eabb3a
Show file tree
Hide file tree
Showing 39 changed files with 641 additions and 92 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "EvoTrees"
uuid = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
authors = ["jeremiedb <[email protected]>"]
version = "0.15.0"
version = "0.15.1"

[deps]
BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
Expand Down
8 changes: 5 additions & 3 deletions benchmarks/aicrowd-bench.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,13 @@ config = EvoTreeRegressor(T=Float32,
max_depth=5,
rowsample=0.5,
colsample=0.8,
nrounds=400, eta=0.05)
nrounds=400,
tree_type="oblivious",
eta=0.05)

# @time m = fit_evotree(config; x_train, y_train, print_every_n=25);
@time m = fit_evotree(config; x_train, y_train, x_eval, y_eval, early_stopping_rounds = 50, print_every_n=25, metric=:logloss);
pred_eval_evo = m(x_eval) |> vec
@time m = fit_evotree(config; x_train, y_train, x_eval, y_eval, early_stopping_rounds=50, print_every_n=25, metric=:logloss);
pred_eval_evo = m(x_eval) |> vec;

params_xgb = [
"objective" => "reg:logistic",
Expand Down
13 changes: 9 additions & 4 deletions benchmarks/regressor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ import CUDA
nobs = Int(1e6)
num_feat = Int(100)
nrounds = 200
tree_type = "binary"
T = Float64
nthread = Base.Threads.nthreads()
@info "testing with: $nobs observations | $num_feat features. nthread: $nthread"
@info "testing with: $nobs observations | $num_feat features. nthread: $nthread | tree_type : $tree_type"
seed!(123)
x_train = rand(T, nobs, num_feat)
y_train = rand(T, size(x_train, 1))
Expand Down Expand Up @@ -48,7 +49,7 @@ params_xgb = Dict(

dtrain = DMatrix(x_train, y_train)
watchlist = Dict("train" => DMatrix(x_train, y_train));
@time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
@time m_xgb = xgboost(dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric=metric_xgb, params_xgb...);
# @btime m_xgb = xgboost($dtrain; watchlist, nthread=nthread, verbosity=0, eval_metric = metric_xgb, params_xgb...);
@info "predict"
@time pred_xgb = XGBoost.predict(m_xgb, x_train);
Expand Down Expand Up @@ -90,7 +91,7 @@ watchlist = Dict("train" => DMatrix(x_train, y_train));

@info "EvoTrees"
verbosity = 1
params_evo = EvoTreeRegressor(
params_evo = EvoTreeRegressor(;
loss=loss_evo,
nrounds=nrounds,
alpha=0.5,
Expand All @@ -102,7 +103,8 @@ params_evo = EvoTreeRegressor(
rowsample=0.5,
colsample=0.5,
nbins=64,
rng=123,
tree_type,
rng=123
)

@info "EvoTrees CPU"
Expand All @@ -115,6 +117,9 @@ device = "cpu"
# @time m_evo = fit_evotree(params_evo; x_train, y_train, device, verbosity, print_every_n=100);
@info "train - eval"
@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
# using Plots
# plot(m_evo, 2)

@time m_evo = fit_evotree(params_evo; x_train, y_train, x_eval=x_train, y_eval=y_train, metric=metric_evo, device, verbosity, print_every_n=100);
@info "predict"
@time pred_evo = m_evo(x_train);
Expand Down
File renamed without changes
Binary file removed docs/src/assets/gaussian_sinus.png
Binary file not shown.
Binary file added docs/src/assets/quantiles-sinus-binary.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed docs/src/assets/quantiles_sinus.png
Binary file not shown.
Binary file added docs/src/assets/regression-sinus-binary.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed docs/src/assets/regression_sinus.png
Binary file not shown.
Binary file removed docs/src/assets/regression_sinus2.png
Binary file not shown.
6 changes: 3 additions & 3 deletions docs/src/tutorials/examples-API.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ The following provides minimal examples of usage of the various loss functions a

Minimal example to fit a noisy sinus wave.

![](../assets/regression_sinus.png)
![](../assets/regression-sinus-binary.png)

```julia
using EvoTrees
Expand Down Expand Up @@ -80,7 +80,7 @@ pred_eval_poisson = model(x_eval)

## Quantile Regression

![](../assets/quantiles_sinus.png)
![](../assets/quantiles-sinus-binary.png)

```julia
# q50
Expand Down Expand Up @@ -119,7 +119,7 @@ pred_train_q80 = model(x_train)

## Gaussian Max Likelihood

![](../assets/gaussian_sinus.png)
![](../assets/gaussian-sinus-binary.png)

```julia
config = EvoTreeMLE(
Expand Down
60 changes: 38 additions & 22 deletions experiments/readme_plots_cpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ using EvoTrees
using EvoTrees: predict, sigmoid, logit
# using ProfileView

tree_type = "binary"

# prepare a dataset
Random.seed!(123)
features = rand(10_000) .* 5
Expand All @@ -28,8 +30,9 @@ x_train, x_eval = X[i_train, :], X[i_eval, :]
y_train, y_eval = Y[i_train], Y[i_eval]

# linear
params1 = EvoTreeRegressor(
params1 = EvoTreeRegressor(;
loss=:mse,
alpha=1,
nrounds=200,
nbins=64,
lambda=0.01,
Expand All @@ -39,7 +42,8 @@ params1 = EvoTreeRegressor(
min_weight=1.0,
rowsample=0.5,
colsample=1.0,
rng=122,
tree_type,
rng=122
)

@time model = fit_evotree(
Expand All @@ -52,6 +56,8 @@ params1 = EvoTreeRegressor(
print_every_n=25,
early_stopping_rounds=20
);

# plot(model, 2)
# laptop: 51.651 ms (237548 allocations: 23.94 MiB)
# @btime model = fit_evotree(params1; x_train, y_train, x_eval = x_eval, y_eval = y_eval, metric = :mse, print_every_n = 999, verbosity=0);
# Profile.clear() # in case we have any previous profiling data
Expand All @@ -68,7 +74,7 @@ model, logger = fit_evotree(
print_every_n=10,
return_logger=true
);
plot(logger[:metrics])
# plot(logger[:metrics])

# @btime model = grow_gbtree($X_train, $Y_train, $params1, X_eval = $X_eval, Y_eval = $Y_eval, print_every_n = 25, metric=:mae)
@time pred_train_linear = model(x_train);
Expand All @@ -77,7 +83,7 @@ mean((pred_train_linear .- y_train) .^ 2)
mean((pred_eval_linear .- y_eval) .^ 2)

# linear weighted
params1 = EvoTreeRegressor(
params1 = EvoTreeRegressor(;
T=Float64,
loss=:linear,
nrounds=500,
Expand All @@ -89,7 +95,8 @@ params1 = EvoTreeRegressor(
min_weight=1.0,
rowsample=0.5,
colsample=1.0,
rng=123,
tree_type,
rng=123
)

# W_train = ones(eltype(Y_train), size(Y_train)) .* 5
Expand Down Expand Up @@ -119,7 +126,7 @@ mean(abs.(pred_train_linear_w .- y_train))
sqrt(mean((pred_train_linear_w .- y_train) .^ 2))

# logistic / cross-entropy
params1 = EvoTreeRegressor(
params1 = EvoTreeRegressor(;
loss=:logistic,
nrounds=200,
nbins=64,
Expand All @@ -130,6 +137,7 @@ params1 = EvoTreeRegressor(
min_weight=1.0,
rowsample=0.5,
colsample=1.0,
tree_type
)

@time model = fit_evotree(
Expand All @@ -149,7 +157,7 @@ params1 = EvoTreeRegressor(
sqrt(mean((pred_train_logistic .- y_train) .^ 2))

# L1
params1 = EvoTreeRegressor(
params1 = EvoTreeRegressor(;
loss=:l1,
alpha=0.5,
nrounds=500,
Expand All @@ -161,6 +169,7 @@ params1 = EvoTreeRegressor(
min_weight=1.0,
rowsample=0.5,
colsample=1.0,
tree_type,
)
@time model = fit_evotree(
params1;
Expand Down Expand Up @@ -218,10 +227,10 @@ plot!(
linewidth=1.5,
label="L1",
)
savefig("figures/regression_sinus.png")
savefig("figures/regression-sinus-$tree_type.png")

# Poisson
params1 = EvoTreeCount(
params1 = EvoTreeCount(;
loss=:poisson,
nrounds=500,
nbins=64,
Expand All @@ -232,6 +241,7 @@ params1 = EvoTreeCount(
min_weight=1.0,
rowsample=0.5,
colsample=1.0,
tree_type,
)
@time model = fit_evotree(
params1;
Expand All @@ -247,17 +257,18 @@ params1 = EvoTreeCount(
sqrt(mean((pred_train_poisson .- y_train) .^ 2))

# Gamma
params1 = EvoTreeRegressor(
params1 = EvoTreeRegressor(;
loss=:gamma,
nrounds=500,
nbins=64,
lambda=0.1,
gamma=0.1,
eta=0.02,
eta=0.1,
max_depth=6,
min_weight=1.0,
rowsample=0.5,
colsample=1.0,
tree_type,
)
@time model = fit_evotree(
params1;
Expand All @@ -273,7 +284,7 @@ params1 = EvoTreeRegressor(
sqrt(mean((pred_train_gamma .- y_train) .^ 2))

# Tweedie
params1 = EvoTreeRegressor(
params1 = EvoTreeRegressor(;
loss=:tweedie,
nrounds=500,
nbins=64,
Expand All @@ -284,6 +295,7 @@ params1 = EvoTreeRegressor(
min_weight=1.0,
rowsample=0.5,
colsample=1.0,
tree_type,
)
@time model = fit_evotree(
params1;
Expand Down Expand Up @@ -333,14 +345,14 @@ plot!(
linewidth=1.5,
label="Tweedie",
)
savefig("figures/regression_sinus2.png")
savefig("figures/regression-sinus2-$tree_type.png")


###############################
## Quantiles
###############################
# q50
params1 = EvoTreeRegressor(
params1 = EvoTreeRegressor(;
loss=:quantile,
alpha=0.5,
nrounds=500,
Expand All @@ -352,6 +364,7 @@ params1 = EvoTreeRegressor(
min_weight=1.0,
rowsample=0.5,
colsample=1.0,
tree_type,
)
@time model = fit_evotree(
params1;
Expand All @@ -369,7 +382,7 @@ params1 = EvoTreeRegressor(
sum(pred_train_q50 .< y_train) / length(y_train)

# q20
params1 = EvoTreeRegressor(
params1 = EvoTreeRegressor(;
loss=:quantile,
alpha=0.2,
nrounds=300,
Expand All @@ -381,13 +394,14 @@ params1 = EvoTreeRegressor(
min_weight=1.0,
rowsample=0.5,
colsample=1.0,
tree_type,
)
@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25);
@time pred_train_q20 = model(x_train)
sum(pred_train_q20 .< y_train) / length(y_train)

# q80
params1 = EvoTreeRegressor(
params1 = EvoTreeRegressor(;
loss=:quantile,
alpha=0.8,
nrounds=300,
Expand All @@ -399,6 +413,7 @@ params1 = EvoTreeRegressor(
min_weight=1.0,
rowsample=0.5,
colsample=1.0,
tree_type,
)
@time model = fit_evotree(params1; x_train, y_train, x_eval, y_eval, print_every_n=25)
@time pred_train_q80 = model(x_train)
Expand Down Expand Up @@ -439,13 +454,12 @@ plot!(
linewidth=1.5,
label="Q80",
)
savefig("figures/quantiles_sinus.png")

savefig("figures/quantiles-sinus-$tree_type.png")

###############################
## gaussian
###############################
params1 = EvoTreeMLE(
params1 = EvoTreeMLE(;
T=Float64,
loss=:gaussian,
nrounds=500,
Expand All @@ -458,6 +472,7 @@ params1 = EvoTreeMLE(
rowsample=1.0,
colsample=1.0,
rng=123,
tree_type,
)

@time model = fit_evotree(
Expand Down Expand Up @@ -524,13 +539,13 @@ plot!(
linewidth=1.5,
label="q80",
)
savefig("figures/gaussian-sinus.png")
savefig("figures/gaussian-sinus-$tree_type.png")


###############################
## Logistic
###############################
params1 = EvoTrees.EvoTreeMLE(
params1 = EvoTrees.EvoTreeMLE(;
loss=:logistic,
nrounds=500,
nbins=64,
Expand All @@ -541,6 +556,7 @@ params1 = EvoTrees.EvoTreeMLE(
min_weight=1.0,
rowsample=1.0,
colsample=1.0,
tree_type,
rng=123,
)

Expand Down Expand Up @@ -609,4 +625,4 @@ plot!(
linewidth=1.5,
label="q80",
)
savefig("figures/logistic-sinus.png")
savefig("figures/logistic-sinus-$tree_type.png")
Loading

2 comments on commit 5eabb3a

@jeremiedb
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/88556

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.15.1 -m "<description of version>" 5eabb3a3f3dc73e5a8f5393b2600361e5cb13fb4
git push origin v0.15.1

Please sign in to comment.