Skip to content

Commit f7d1366

Browse files
committed
fixed types and device in new nplets dataset
1 parent 7b54360 commit f7d1366

10 files changed

+59
-49
lines changed

README.md

+11-11
Original file line numberDiff line numberDiff line change
@@ -60,28 +60,28 @@ import numpy as np
6060
6161
X = np.random.normal(0,1, (1000, 10))
6262
63-
# Computation of O information for the entire system
63+
# Computation of O information for the nplet that consider all the variables of X
6464
measures = nplets_measures(X)
6565
66-
# Computation of O info for the sub-system composed by 0, 1 and 3
67-
measures = nplets_measures(X, [0,1,3])
66+
# Computation of O info for a single nplet (it must be a list of nplets even if it is a single nplet)
67+
measures = nplets_measures(X, [[0,1,3]])
6868
69-
# Computation of O info for the sub-system composed by 0, 1 and 3
69+
# Computation of O info for multiple nplets
7070
measures = nplets_measures(X, [[0,1,3],[3,7,4],[2,6,3]])
7171
72-
# Extensive computation of O information measures over all combinations of X
72+
# Extensive computation of O information measures over all combinations of features in X
7373
measures = multi_order_measures(X)
7474
75-
# compute the best 10 combinations using greedy, starting by exaustive search in
75+
# Compute the best 10 combinations of features (nplet) using greedy, starting by exaustive search in
7676
# lower order and building from there. Result shows best O information for
7777
# each built optimal orders
78-
best_partitions, best_scores = greedy(X, 3, 5, repeat=10)
78+
best_nplets, best_scores = greedy(X, 3, 5, repeat=10)
7979
80-
# compute the best 10 combinations using simulated annealing: There are two initialization options
81-
# 1. Starting by exaustive search in lower order, then building with gready.
82-
# 2. Selection random sample of initial solutions.
80+
# Compute the best 10 combinations of features (nplet) using simulated annealing: There are two initialization options
81+
# 1. Starting by a custom initial solution with shape (repeat, order) explicitely provided by the user.
82+
# 2. Selecting random samples from the order.
8383
# Result shows best O information for each built optimal orders
84-
best_partitions, best_scores = simulated_annealing(X, 5, repeat=10)
84+
best_nplets, best_scores = simulated_annealing(X, 5, repeat=10)
8585
```
8686

8787
For detailed usage and examples, please refer to the [documentation](https://github.com/Laouen/THOI).

tests/test_multiorder_measures.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def test_multiorder_measures_precomputed_hot_encoded(self):
103103
T, N = self.X.shape
104104
covmat = gaussian_copula_covmat(self.X)
105105

106-
df_res = multi_order_measures_hot_encoded(covmat, batch_size=10000, use_cpu=True)
106+
df_res = multi_order_measures_hot_encoded(covmat, batch_size=200000, use_cpu=True)
107107

108108
dfs = []
109109
for order in sorted(df_res['order'].unique()):
@@ -122,7 +122,7 @@ def test_multiorder_measures_precomputed_hot_encoded(self):
122122
df_desc_order = df_desc_order.sort_index()
123123
df_stats_order = df_stats_order.sort_index()
124124

125-
self.assertTrue(np.allclose(df_desc_order.values, df_stats_order.values, atol=1e-6, equal_nan=True))
125+
self.assertTrue(np.allclose(df_desc_order.values, df_stats_order.values, atol=1e-4, equal_nan=True))
126126

127127
def test_multiple_times_same_datasets(self):
128128
# TODO: implement

thoi/commons.py

+13-21
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import scipy as sp
44
import torch
55

6+
from thoi.typing import TensorLikeArray
7+
68

79
def _get_string_metric(batched_res: np.ndarray, metric:str):
810
'''
@@ -60,16 +62,15 @@ def _to_numpy(X):
6062
return X.detach().cpu().numpy()
6163
elif isinstance(X, np.ndarray):
6264
return X
63-
else:
64-
raise TypeError(f"Unsupported type: {type(X)}")
65+
return np.array(X)
6566

6667
def _get_device(use_cpu:bool=False):
6768
"""Set the use of GPU if available"""
6869
using_GPU = torch.cuda.is_available() and not use_cpu
6970
device = torch.device('cuda' if using_GPU else 'cpu')
7071
return device
7172

72-
def _normalize_input_data(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]],
73+
def _normalize_input_data(X: TensorLikeArray,
7374
covmat_precomputed: bool=False,
7475
T: Optional[Union[int, List[int]]]=None,
7576
use_cpu: bool=False):
@@ -88,30 +89,21 @@ def _normalize_input_data(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], L
8889

8990
# Handle different options for X parameter. Accept multivariate data or covariance matrix
9091
if covmat_precomputed:
91-
92-
if isinstance(X, (np.ndarray, torch.Tensor)):
93-
assert X.shape[-2] == X.shape[-1], 'Covariance matrix should be square'
94-
assert len(X.shape) in [2, 3], 'Covariance matrix should have dimensions (N, N) or (D, N, N)'
95-
covmats = torch.as_tensor(X)
96-
covmats = covmats.unsqueeze(0) if len(covmats.shape) == 2 else covmats
97-
else:
98-
assert all([len(x.shape) == 2 for x in X]), 'All covariance matrices should have dimensions (N, N)'
99-
assert all([x.shape[0] == x.shape[1] == X[0].shape[0] for x in X]), 'All covariance matrices should have same dimensions (N, N)'
100-
covmats = torch.stack([torch.as_tensor(x) for x in X])
92+
covmats = torch.as_tensor(X)
93+
covmats = covmats.unsqueeze(0) if len(covmats.shape) == 2 else covmats
94+
assert X.shape[-2] == X.shape[-1], 'Covariance matrix should be square'
95+
assert len(X.shape) == 3, 'Covariance matrix should have dimensions (N, N) or (D, N, N)'
10196
else:
10297

103-
if isinstance(X, (np.ndarray, torch.Tensor)):
98+
try:
10499
X = _to_numpy(X)
105100
assert len(X.shape) in [2, 3], 'Covariance matrix should have dimensions (T, N) or (D, T, N)'
106-
if len(X.shape) == 2:
107-
X = [X]
108-
else:
109-
X = [X[i] for i in range(X.shape[0])]
110-
else:
101+
X = [X] if len(X.shape) == 2 else [X[i] for i in range(X.shape[0])]
102+
except:
103+
X = [_to_numpy(x) for x in X]
111104
assert all([len(x.shape) == 2 for x in X]), 'All multivariate series should have dimensions (T, N) where T my vary and N be constant across all series'
112105
assert all([x.shape[1] == X[0].shape[1] for x in X]), 'All multivariate series should have dimensions (T, N) where T my vary and N be constant across all series'
113-
X = [_to_numpy(x) for x in X]
114-
106+
115107
covmats = torch.stack([torch.from_numpy(gaussian_copula_covmat(x)) for x in X])
116108
T = [x.shape[0] for x in X]
117109

thoi/heuristics/greedy.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@
1212

1313
@torch.no_grad()
1414
def greedy(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]],
15-
covmat_precomputed: bool=False,
16-
T: Optional[Union[int, List[int]]]=None,
1715
initial_order: int=3,
1816
order: Optional[int]=None,
17+
*,
18+
covmat_precomputed: bool=False,
19+
T: Optional[Union[int, List[int]]]=None,
1920
repeat: int=10,
2021
use_cpu: bool=False,
2122
batch_size: int=1000000,

thoi/heuristics/simulated_annealing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@ def random_sampler(N:int, order:int, repeat:int, device:Optional[torch.device]=N
1818

1919
@torch.no_grad()
2020
def simulated_annealing(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]],
21+
order: Optional[int]=None,
22+
*,
2123
covmat_precomputed: bool=False,
2224
T: Optional[Union[int, List[int]]]=None,
2325
initial_solution: Optional[torch.Tensor] = None,
24-
order: Optional[int]=None,
2526
repeat: int = 10,
2627
use_cpu: bool = False,
2728
max_iterations: int = 1000,

thoi/heuristics/simulated_annealing_multi_order.py

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def hot_encode_to_indexes(nplets):
2828

2929
@torch.no_grad()
3030
def simulated_annealing_multi_order(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]],
31+
*,
3132
covmat_precomputed: bool=False,
3233
T: Optional[Union[int, List[int]]]=None,
3334
initial_solution: Optional[torch.Tensor] = None,

thoi/measures/gaussian_copula.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import torch
88
from torch.utils.data import DataLoader
99

10+
from thoi.typing import TensorLikeArray
1011
from thoi.commons import _normalize_input_data, _get_device
1112
from thoi.dataset import CovarianceDataset
1213
from thoi.collectors import batch_to_csv, concat_and_sort_csv
@@ -98,8 +99,8 @@ def _get_tc_dtc_from_batched_covmat(covmats: torch.Tensor, allmin1: torch.Tensor
9899
return nplet_tc, nplet_dtc, nplet_o, nplet_s
99100

100101
@torch.no_grad()
101-
def nplets_measures(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]],
102-
nplets: Optional[Union[np.ndarray,torch.Tensor]] = None,
102+
def nplets_measures(X: Union[TensorLikeArray],
103+
nplets: Optional[TensorLikeArray] = None,
103104
covmat_precomputed: bool = False,
104105
T: Optional[Union[int, List[int]]] = None,
105106
use_cpu: bool = False):
@@ -164,7 +165,7 @@ def nplets_measures(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[to
164165
nplets_s.view(batch_size, D)], dim=-1)
165166

166167
@torch.no_grad()
167-
def multi_order_measures(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]],
168+
def multi_order_measures(X: TensorLikeArray,
168169
covmat_precomputed: bool=False,
169170
T: Optional[Union[int, List[int]]]=None,
170171
min_order: int=3,
@@ -235,11 +236,14 @@ def multi_order_measures(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], Li
235236
# calculate measurments for each batch
236237
for bn, nplets in enumerate(tqdm(dataloader, total=len(dataloader), leave=False, desc='Batch')):
237238
curr_batch_size = nplets.shape[0]
238-
239+
240+
# Send nplets to the device in case it is not there
241+
nplets = nplets.to(device)
242+
239243
# Create the covariance matrices for each nplet in the batch
240244
# |curr_batch_size| x |D| x |N| x |N|
241245
nplets_covmats = _generate_nplets_covmants(covmats, nplets)
242-
246+
243247
# Pack covmats in a single batch
244248
# |curr_batch_size x D| x |N| x |N|
245249
nplets_covmats = nplets_covmats.view(curr_batch_size*D, order, order)

thoi/measures/gaussian_copula_hot_encoded.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import torch
88
from torch.utils.data import DataLoader
99

10+
from thoi.typing import TensorLikeArray
1011
from thoi.dataset import HotEncodedMultiOrderDataset
1112
from thoi.collectors import batch_to_csv, concat_and_sort_csv
1213
from thoi.measures.utils import _all_min_1_ids, _gaussian_entropy_bias_correction, _gaussian_entropy_estimation, _get_single_exclusion_covmats
@@ -177,8 +178,8 @@ def _compute_nplets_measures_hot_encoded(covmats: torch.Tensor,
177178
)
178179

179180
@torch.no_grad()
180-
def nplets_measures_hot_encoded(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]],
181-
nplets: Optional[Union[np.ndarray,torch.Tensor]] = None,
181+
def nplets_measures_hot_encoded(X: TensorLikeArray,
182+
nplets: Optional[TensorLikeArray] = None,
182183
covmat_precomputed: bool = False,
183184
T: Optional[int] = None,
184185
use_cpu: bool = False):
@@ -204,7 +205,7 @@ def nplets_measures_hot_encoded(X: Union[np.ndarray, torch.Tensor, List[np.ndarr
204205

205206

206207
@torch.no_grad()
207-
def multi_order_measures_hot_encoded(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]],
208+
def multi_order_measures_hot_encoded(X: TensorLikeArray,
208209
covmat_precomputed: bool=False,
209210
T: Optional[int]=None,
210211
min_order: int=3,

thoi/measures/utils.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from thoi.measures.constants import TWOPIE
66

77

8-
def _all_min_1_ids(N, device=torch.device('cpu')):
8+
def _all_min_1_ids(N: torch.device, device: torch.device=torch.device('cpu')):
99
base_tensor = torch.arange(N, device=device).unsqueeze(0).repeat(N, 1) # Shape: (N, N)
1010
mask = base_tensor != torch.arange(N, device=device).unsqueeze(1) # Shape: (N, N)
1111
result = base_tensor[mask].view(N, N - 1) # Shape: (N, N-1)
@@ -28,20 +28,21 @@ def _get_single_exclusion_covmats(covmats: torch.Tensor, allmin1: torch.Tensor):
2828
batch_size, N, _ = covmats.shape
2929

3030
# Step 1: Expand allmin1 to match the batch size
31-
# Shape: (batch_size, N, N-1)
31+
# |batch_size| |N| |N-1|
3232
allmin1_expanded = allmin1.unsqueeze(0).expand(batch_size, -1, -1)
3333

3434
# Step 2: Expand covmats to include the N dimension for variable exclusion
35-
# Shape: (batch_size, N, N, N)
35+
# |batch_size| |N| |N| |N|
3636
covmats_expanded = covmats.unsqueeze(1).expand(-1, N, -1, -1)
3737

3838
# Step 3: Gather the rows corresponding to the indices in allmin1
39-
# Shape of indices_row: (batch_size, N, N-1, N)
39+
# |batch_size| |N| |N-1| |N|
4040
indices_row = allmin1_expanded.unsqueeze(-1).expand(-1, -1, -1, N)
4141
gathered_rows = torch.gather(covmats_expanded, 2, indices_row)
4242

4343
# Step 4: Gather the columns corresponding to the indices in allmin1
4444
# Shape of indices_col: (batch_size, N, N-1, N-1)
45+
# |batch_size| |N| |N-1| |N-1|
4546
indices_col = allmin1_expanded.unsqueeze(-2).expand(-1, -1, N-1, -1)
4647
covmats_sub = torch.gather(gathered_rows, 3, indices_col)
4748

thoi/typing.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from typing import Union, Sequence, Any
2+
import torch
3+
import numpy as np
4+
5+
TensorLikeArray = Union[
6+
torch.Tensor,
7+
np.ndarray,
8+
Sequence[Union[np.ndarray, Sequence[Any]]],
9+
]

0 commit comments

Comments
 (0)