Skip to content

Commit

Permalink
[MNT] Small NumPy 2 related fixes (#5954)
Browse files Browse the repository at this point in the history
This applies some smaller NumPy 2 related fixes.  With (in progress) cupy 13.2 fixups, the single gpu test suite seems to be doing mostly fine.  There is a single test remaining:
```
test_simpl_set.py::test_simplicial_set_embedding
```
is failing with:
```
(Pdb) cp.asarray(cu_embedding)
array([[23067.518, 23067.518],
       [17334.559, 17334.559],
       [22713.598, 22713.598],
       ...,
       [23238.438, 23238.438],
       [25416.912, 25416.912],
       [19748.943, 19748.943]], dtype=float32)
```
being completely different from the reference:
```
array([[5.330462 , 4.3419437],
       [4.1822557, 5.6225405],
       [5.200859 , 4.530094 ],
       ...,
       [4.852359 , 5.0026293],
       [5.361374 , 4.1475334],
       [4.0259256, 5.7187223]], dtype=float32)
```
And I am not sure why that might be, I will prod it a bit more, but it may need someone who knows the methods to have a look.

One wrinkle is that hdbscan is not yet released for NumPy 2, but I guess that still required even though sklearn has a version?
(Probably, not a big issue, but my fixups scikit-learn-contrib/hdbscan#644 run into some issue even though it doesn't seem NumPy 2 related.)

xref: rapidsai/build-planning#38

Authors:
  - Sebastian Berg (https://github.com/seberg)
  - https://github.com/jakirkham
  - Dante Gama Dessavre (https://github.com/dantegd)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: #5954
  • Loading branch information
seberg authored Jul 28, 2024
1 parent d4535d2 commit 4338268
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 20 deletions.
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ repos:
setup[.]cfg$
exclude: |
(?x)
cpp/src/tsne/cannylab/bh[.]cu$
cpp/src/tsne/cannylab/bh[.]cu$|
python/cuml/cuml/_thirdparty
- id: verify-alpha-spec
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.13.11
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def _sparse_min_or_max(X, axis, min_or_max):
if np.isnan(m):
if 'nan' in min_or_max:
m = 0
elif X.nnz != cpu_np.product(X.shape):
elif X.nnz != cpu_np.prod(X.shape):
if 'min' in min_or_max:
m = m if m <= 0 else 0
else:
Expand Down
16 changes: 10 additions & 6 deletions python/cuml/cuml/internals/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1163,12 +1163,16 @@ def from_input(
if (
not fail_on_order and order != arr.order and order != "K"
) or make_copy:
arr = cls(
arr.mem_type.xpy.array(
arr.to_output("array"), order=order, copy=make_copy
),
index=index,
)
if make_copy:
data = arr.mem_type.xpy.array(
arr.to_output("array"), order=order
)
else:
data = arr.mem_type.xpy.asarray(
arr.to_output("array"), order=order
)

arr = cls(data, index=index)

n_rows = arr.shape[0]

Expand Down
4 changes: 2 additions & 2 deletions python/cuml/cuml/tests/test_make_classification.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -115,7 +115,7 @@ def test_make_classification_informative_features():

# Cluster by sign, viewed as strings to allow uniquing
signs = np.sign(cp.asnumpy(X))
signs = signs.view(dtype="|S{0}".format(signs.strides[0]))
signs = signs.view(dtype="|S{0}".format(signs.strides[0])).ravel()
unique_signs, cluster_index = np.unique(signs, return_inverse=True)

assert (
Expand Down
12 changes: 6 additions & 6 deletions python/cuml/cuml/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,7 +1065,7 @@ def test_pairwise_distances(metric: str, matrix_size, is_col_major):
cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

# Change precision of one parameter
Y = np.asfarray(Y, dtype=np.float32)
Y = np.asarray(Y, dtype=np.float32)
S = pairwise_distances(X, Y, metric=metric)
S2 = ref_dense_pairwise_dist(X, Y, metric=metric)
cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
Expand All @@ -1074,8 +1074,8 @@ def test_pairwise_distances(metric: str, matrix_size, is_col_major):
compare_precision = 2

# Change precision of both parameters to float
X = np.asfarray(X, dtype=np.float32)
Y = np.asfarray(Y, dtype=np.float32)
X = np.asarray(X, dtype=np.float32)
Y = np.asarray(Y, dtype=np.float32)
S = pairwise_distances(X, Y, metric=metric)
S2 = ref_dense_pairwise_dist(X, Y, metric=metric)
cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
Expand Down Expand Up @@ -1132,8 +1132,8 @@ def test_pairwise_distances_sklearn_comparison(metric: str, matrix_size):
# For fp32, compare at 4 decimals, (3 places less than the ~7 max)
compare_precision = 4

X = np.asfarray(X, dtype=np.float32)
Y = np.asfarray(Y, dtype=np.float32)
X = np.asarray(X, dtype=np.float32)
Y = np.asarray(Y, dtype=np.float32)

# Compare to sklearn, fp32
S = pairwise_distances(X, Y, metric=metric)
Expand Down Expand Up @@ -1228,7 +1228,7 @@ def test_pairwise_distances_exceptions():

X_int = rng.randint(10, size=(5, 4))
X_double = rng.random_sample((5, 4))
X_float = np.asfarray(X_double, dtype=np.float32)
X_float = np.asarray(X_double, dtype=np.float32)
X_bool = rng.choice([True, False], size=(5, 4))

# Test int inputs (only float/double accepted at this time)
Expand Down
8 changes: 4 additions & 4 deletions python/cuml/cuml/tests/test_umap.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,9 @@ def get_embedding(n_components, random_state):
)
return reducer.fit_transform(data, convert_dtype=True)

state = copy.copy(random_state)
state = copy.deepcopy(random_state)
cuml_embedding1 = get_embedding(n_components, state)
state = copy.copy(random_state)
state = copy.deepcopy(random_state)
cuml_embedding2 = get_embedding(n_components, state)

assert not np.isnan(cuml_embedding1).any()
Expand Down Expand Up @@ -475,9 +475,9 @@ def get_embedding(n_components, random_state):
reducer.fit(fit_data, convert_dtype=True)
return reducer.transform(transform_data, convert_dtype=True)

state = copy.copy(random_state)
state = copy.deepcopy(random_state)
cuml_embedding1 = get_embedding(n_components, state)
state = copy.copy(random_state)
state = copy.deepcopy(random_state)
cuml_embedding2 = get_embedding(n_components, state)

assert not np.isnan(cuml_embedding1).any()
Expand Down

0 comments on commit 4338268

Please sign in to comment.