diff --git a/experiments/Project.toml b/experiments/Project.toml
new file mode 100644
index 0000000..0fc9a79
--- /dev/null
+++ b/experiments/Project.toml
@@ -0,0 +1,7 @@
+[deps]
+CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
+NeuroTreeModels = "1db4e0a5-a364-4b0c-897c-2bd5a4a3a1f2"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
diff --git a/experiments/classification.jl b/experiments/classification.jl
new file mode 100644
index 0000000..00ef1f5
--- /dev/null
+++ b/experiments/classification.jl
@@ -0,0 +1,43 @@
+using NeuroTreeModels
+using MLDatasets
+using DataFrames
+using Statistics: mean
+using CategoricalArrays
+using Random
+Random.seed!(123)
+
+df = MLDatasets.Iris().dataframe
+
+df[!, :class] = categorical(df[!, :class])
+df[!, :class] .= levelcode.(df[!, :class])
+target_name = "class"
+feature_names = setdiff(names(df), [target_name])
+
+train_ratio = 0.8
+train_indices = randperm(nrow(df))[1:Int(train_ratio * nrow(df))]
+
+dtrain = df[train_indices, :]
+deval = df[setdiff(1:nrow(df), train_indices), :]
+
+config = NeuroTreeRegressor(
+    device=:cpu,
+    loss=:mlogloss,
+    nrounds=400,
+    outsize=3,
+    depth=4,
+    lr=2e-2,
+)
+
+m = NeuroTreeModels.fit(
+    config,
+    dtrain;
+    deval,
+    target_name,
+    feature_names,
+    metric=:mlogloss,
+    print_every_n=10,
+    early_stopping_rounds=2,
+)
+
+m(dtrain[1:5, :])[1:1,:] 
+m(dtrain[1:1, :])[1:1,:]
\ No newline at end of file
diff --git a/src/fit.jl b/src/fit.jl
index 104caa0..9f5d5b3 100644
--- a/src/fit.jl
+++ b/src/fit.jl
@@ -146,7 +146,9 @@ end
 function fit_iter!(m, cache)
     loss, opts, data = cache[:loss], cache[:opts], cache[:dtrain]
     GC.gc(true)
-    CUDA.reclaim()
+    if m.info[:device] == :gpu
+        CUDA.reclaim()
+    end
     for d in data
         grads = gradient(model -> loss(model, d...), m)[1]
         Optimisers.update!(opts, m, grads)
diff --git a/src/infer.jl b/src/infer.jl
index 86cb3c7..06d2e29 100644
--- a/src/infer.jl
+++ b/src/infer.jl
@@ -41,7 +41,7 @@ function infer(m::NeuroTreeModel{<:MLogLoss}, data::DL)
         push!(preds, Matrix(m(x)'))
     end
     p = vcat(preds...)
-    softmax!(p; dims=1)
+    softmax!(p; dims=2)
     return p
 end