Skip to content

Commit

Permalink
Merge branch 'branch-24.10' into cuvs
Browse files Browse the repository at this point in the history
  • Loading branch information
cjnolet authored Oct 3, 2024
2 parents 3e88b8e + 8ed7bda commit 064cced
Show file tree
Hide file tree
Showing 8 changed files with 0 additions and 347 deletions.
3 changes: 0 additions & 3 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ dependencies:
- numpydoc
- nvcc_linux-64=11.8
- packaging
- pip
- pydata-sphinx-theme!=0.14.2
- pylibraft==24.10.*,>=0.0.0a0
- pynndescent
Expand Down Expand Up @@ -79,6 +78,4 @@ dependencies:
- sysroot_linux-64==2.17
- treelite==4.3.0
- umap-learn==0.5.6
- pip:
- dask-glm==0.3.0
name: all_cuda-118_arch-x86_64
3 changes: 0 additions & 3 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ dependencies:
- numpy>=1.23,<3.0a0
- numpydoc
- packaging
- pip
- pydata-sphinx-theme!=0.14.2
- pylibraft==24.10.*,>=0.0.0a0
- pynndescent
Expand Down Expand Up @@ -75,6 +74,4 @@ dependencies:
- sysroot_linux-64==2.17
- treelite==4.3.0
- umap-learn==0.5.6
- pip:
- dask-glm==0.3.0
name: all_cuda-125_arch-x86_64
8 changes: 0 additions & 8 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -527,14 +527,6 @@ dependencies:
- umap-learn==0.5.6
- pynndescent
- setuptools # Needed on Python 3.12 for dask-glm, which requires pkg_resources but Python 3.12 doesn't have setuptools by default
- output_types: conda
packages:
- pip
- pip:
- dask-glm==0.3.0
- output_types: pyproject
packages:
- dask-glm==0.3.0
test_notebooks:
common:
- output_types: [conda, requirements]
Expand Down
Empty file.
27 changes: 0 additions & 27 deletions python/cuml/cuml/dask/extended/linear_model/__init__.py

This file was deleted.

219 changes: 0 additions & 219 deletions python/cuml/cuml/dask/extended/linear_model/logistic_regression.py

This file was deleted.

86 changes: 0 additions & 86 deletions python/cuml/cuml/tests/dask/test_dask_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,92 +103,6 @@ def make_classification_dataset(
return X, y


def select_sk_solver(cuml_solver):
if cuml_solver == "newton":
return "newton-cg"
elif cuml_solver in ["admm", "lbfgs"]:
return "lbfgs"
else:
pytest.xfail("No matched sklearn solver")


@pytest.mark.mg
@pytest.mark.parametrize("nrows", [1e5])
@pytest.mark.parametrize("ncols", [20])
@pytest.mark.parametrize("n_parts", [2, 6])
@pytest.mark.parametrize("fit_intercept", [False, True])
@pytest.mark.parametrize("datatype", [np.float32, np.float64])
@pytest.mark.parametrize("gpu_array_input", [False, True])
@pytest.mark.parametrize(
"solver", ["admm", "gradient_descent", "newton", "lbfgs", "proximal_grad"]
)
def test_lr_fit_predict_score(
nrows,
ncols,
n_parts,
fit_intercept,
datatype,
gpu_array_input,
solver,
client,
):
sk_solver = select_sk_solver(cuml_solver=solver)

def imp():
import cuml.comm.serialize # NOQA

client.run(imp)

from cuml.dask.extended.linear_model import (
LogisticRegression as cumlLR_dask,
)

n_info = 5
nrows = int(nrows)
ncols = int(ncols)
X, y = make_classification_dataset(datatype, nrows, ncols, n_info)

gX, gy = _prep_training_data(client, X, y, n_parts)

if gpu_array_input:
gX = gX.values
gX._meta = cp.asarray(gX._meta)
gy = gy.values
gy._meta = cp.asarray(gy._meta)

cuml_model = cumlLR_dask(
fit_intercept=fit_intercept, solver=solver, max_iter=10
)

# test fit and predict
cuml_model.fit(gX, gy)
cu_preds = cuml_model.predict(gX)
accuracy_cuml = accuracy_score(y, cu_preds.compute().get())

sk_model = skLR(fit_intercept=fit_intercept, solver=sk_solver, max_iter=10)
sk_model.fit(X, y)
sk_preds = sk_model.predict(X)
accuracy_sk = accuracy_score(y, sk_preds)

assert (accuracy_cuml >= accuracy_sk) | (
np.abs(accuracy_cuml - accuracy_sk) < 1e-3
)

# score
accuracy_cuml = cuml_model.score(gX, gy).compute().item()
accuracy_sk = sk_model.score(X, y)

assert (accuracy_cuml >= accuracy_sk) | (
np.abs(accuracy_cuml - accuracy_sk) < 1e-3
)

# predicted probabilities should differ by <= 5%
# even with different solvers (arbitrary)
probs_cuml = cuml_model.predict_proba(gX).compute()
probs_sk = sk_model.predict_proba(X)[:, 1]
assert np.abs(probs_sk - probs_cuml.get()).max() <= 0.05


@pytest.mark.mg
@pytest.mark.parametrize("n_parts", [2])
@pytest.mark.parametrize("datatype", [np.float32, np.float64])
Expand Down
1 change: 0 additions & 1 deletion python/cuml/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ classifiers = [
[project.optional-dependencies]
test = [
"cython>=3.0.0",
"dask-glm==0.3.0",
"dask-ml",
"hdbscan>=0.8.38,<0.8.39",
"hypothesis>=6.0,<7",
Expand Down

0 comments on commit 064cced

Please sign in to comment.