Skip to content

Commit c9a6b53

Browse files
committed
implemented nplets_measures for automatic hot encoding if multi order
nplets
1 parent 1e0da55 commit c9a6b53

10 files changed

+176
-113
lines changed

tests/test_multi_order_measures.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from thoi.measures.gaussian_copula_hot_encoded import multi_order_measures_hot_encoded
1010
from thoi.commons import gaussian_copula_covmat
1111

12-
# TODO: make this test for all combinations of use_cpu in [True, False] use_cpu_dataset in [True, False] and dataset_device in ['cpu', 'gpu']
12+
# TODO: make this test for all combinations of device in [cpu, cuda] and different input types
1313
class TestMultiOrderMeasures(unittest.TestCase):
1414

1515
# make the constructor
@@ -54,37 +54,37 @@ def _validate_same_results_for_repeated_datasets(self, df_res, rtol, atol):
5454
self._compare_with_ground_truth(df_dataset, rtol, atol)
5555

5656
def test_multiorder_measures_timeseries(self):
57-
df_res = multi_order_measures(self.X, use_cpu=True)
57+
df_res = multi_order_measures(self.X)
5858
self._compare_with_ground_truth(df_res, rtol=1e-16, atol=1e-12)
5959

6060
def test_multiorder_measures_precomputed_covmat(self):
61-
df_res = multi_order_measures(self.covmat, covmat_precomputed=True, T=self.X.shape[0], use_cpu=True)
61+
df_res = multi_order_measures(self.covmat, covmat_precomputed=True, T=self.X.shape[0])
6262
self._compare_with_ground_truth(df_res, rtol=1e-16, atol=1e-12)
6363

6464
def test_multiorder_measures_timeseries_hot_encoded(self):
65-
df_res = multi_order_measures_hot_encoded(self.X, batch_size=200000, use_cpu=True)
65+
df_res = multi_order_measures_hot_encoded(self.X, batch_size=200000)
6666
self._compare_with_ground_truth(df_res, rtol=1e-8, atol=1e-4)
6767

6868
def test_multiorder_measures_precomputed_hot_encoded(self):
69-
df_res = multi_order_measures_hot_encoded(self.covmat, covmat_precomputed=True, T=self.X.shape[0], use_cpu=True)
69+
df_res = multi_order_measures_hot_encoded(self.covmat, covmat_precomputed=True, T=self.X.shape[0])
7070
self._compare_with_ground_truth(df_res, rtol=1e-8, atol=1e-4)
7171

7272
def test_multiple_times_same_datasets_timeseries(self):
73-
df_res = multi_order_measures([self.X, self.X], use_cpu=True)
73+
df_res = multi_order_measures([self.X, self.X])
7474
self._validate_same_results_for_repeated_datasets(df_res, rtol=1e-16, atol=1e-7)
7575

7676
def test_multiple_times_same_datasets_precomputed(self):
7777
covmats = [self.covmat, self.covmat]
78-
df_res = multi_order_measures(covmats, covmat_precomputed=True, T=self.X.shape[0], use_cpu=True)
78+
df_res = multi_order_measures(covmats, covmat_precomputed=True, T=self.X.shape[0])
7979
self._validate_same_results_for_repeated_datasets(df_res, rtol=1e-16, atol=1e-7)
8080

8181
def test_multiple_times_same_datasets_timeseries_hot_encoded(self):
82-
df_res = multi_order_measures_hot_encoded([self.X, self.X], use_cpu=True)
82+
df_res = multi_order_measures_hot_encoded([self.X, self.X])
8383
self._validate_same_results_for_repeated_datasets(df_res, rtol=1e-8, atol=1e-4)
8484

8585
def test_multiple_times_same_datasets_precomputed_hot_encoded(self):
8686
covmats = [self.covmat, self.covmat]
87-
df_res = multi_order_measures_hot_encoded(covmats, covmat_precomputed=True, T=self.X.shape[0], use_cpu=True)
87+
df_res = multi_order_measures_hot_encoded(covmats, covmat_precomputed=True, T=self.X.shape[0])
8888
self._validate_same_results_for_repeated_datasets(df_res, rtol=1e-8, atol=1e-4)
8989

9090

tests/test_nplet_measures.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from thoi.measures.gaussian_copula_hot_encoded import nplets_measures_hot_encoded
1414
from thoi.commons import gaussian_copula_covmat
1515

16-
# TODO: make this test for all combinations of use_cpu in [True, False] use_cpu_dataset in [True, False] and dataset_device in ['cpu', 'gpu']
16+
# TODO: make this test for all combinations of devices in [cpu, gpu] and different input types
1717
class TestNpletsMeasures(unittest.TestCase):
1818

1919
# make the constructor
@@ -67,31 +67,31 @@ def test_nplets_measures_timeseries(self):
6767
for order in range(3,11):
6868
with self.subTest(order=order):
6969
nplets = torch.tensor(list(combinations(full_nplet, order)))
70-
res = nplets_measures(self.X, nplets, use_cpu=True)
70+
res = nplets_measures(self.X, nplets)
7171
self._compare_with_ground_truth(res, nplets, rtol=1e-16, atol=1e-12)
7272

7373
def test_nplets_measures_precomputed(self):
7474
full_nplet = range(self.X.shape[1])
7575
for order in range(3,11):
7676
with self.subTest(order=order):
7777
nplets = torch.tensor(list(combinations(full_nplet, order)))
78-
res = nplets_measures(self.covmat, nplets, covmat_precomputed=True, T=self.X.shape[0], use_cpu=True)
78+
res = nplets_measures(self.covmat, nplets, covmat_precomputed=True, T=self.X.shape[0])
7979
self._compare_with_ground_truth(res, nplets, rtol=1e-16, atol=1e-12)
8080

8181
def test_multiple_times_same_datasets_timeseries(self):
8282
full_nplet = range(self.X.shape[1])
8383
for order in range(3,11):
8484
with self.subTest(order=order):
8585
nplets = torch.tensor(list(combinations(full_nplet, order)))
86-
res = nplets_measures([self.X, self.X], nplets, use_cpu=True)
86+
res = nplets_measures([self.X, self.X], nplets)
8787
self._validate_same_results_for_repeated_datasets(res, nplets, rtol=1e-16, atol=1e-7)
8888

8989
def test_multiple_times_same_datasets_precomputed(self):
9090
full_nplet = range(self.X.shape[1])
9191
for order in range(3,11):
9292
with self.subTest(order=order):
9393
nplets = torch.tensor(list(combinations(full_nplet, order)))
94-
res = nplets_measures([self.covmat, self.covmat], nplets, covmat_precomputed=True, T=self.X.shape[0], use_cpu=True)
94+
res = nplets_measures([self.covmat, self.covmat], nplets, covmat_precomputed=True, T=self.X.shape[0])
9595
self._validate_same_results_for_repeated_datasets(res, nplets, rtol=1e-16, atol=1e-7)
9696

9797
def test_nplets_measures_timeseries_hot_encoded(self):
@@ -103,7 +103,7 @@ def test_nplets_measures_timeseries_hot_encoded(self):
103103
batch_size = nplets.shape[0]
104104
nplets_hot_encoded = torch.zeros((batch_size, N), dtype=torch.int)
105105
nplets_hot_encoded[torch.arange(0,batch_size, dtype=int).view(-1,1), nplets] = 1
106-
res = nplets_measures_hot_encoded(self.X, nplets_hot_encoded, use_cpu=True)
106+
res = nplets_measures_hot_encoded(self.X, nplets_hot_encoded)
107107
self._compare_with_ground_truth(res, nplets, rtol=1e-8, atol=1e-4)
108108

109109
def test_nplets_measures_precomputed_hot_encoded(self):
@@ -115,7 +115,7 @@ def test_nplets_measures_precomputed_hot_encoded(self):
115115
batch_size = nplets.shape[0]
116116
nplets_hot_encoded = torch.zeros((batch_size, N), dtype=torch.int)
117117
nplets_hot_encoded[torch.arange(0,batch_size, dtype=int).view(-1,1), nplets] = 1
118-
res = nplets_measures_hot_encoded(self.covmat, nplets_hot_encoded, covmat_precomputed=True, T=self.X.shape[0], use_cpu=True)
118+
res = nplets_measures_hot_encoded(self.covmat, nplets_hot_encoded, covmat_precomputed=True, T=self.X.shape[0])
119119
self._compare_with_ground_truth(res, nplets, rtol=1e-8, atol=1e-4)
120120

121121
def test_multiple_times_same_dataset_timeseries_hot_encoded(self):
@@ -127,7 +127,7 @@ def test_multiple_times_same_dataset_timeseries_hot_encoded(self):
127127
batch_size = nplets.shape[0]
128128
nplets_hot_encoded = torch.zeros((batch_size, N), dtype=torch.int)
129129
nplets_hot_encoded[torch.arange(0,batch_size, dtype=int).view(-1,1), nplets] = 1
130-
res = nplets_measures_hot_encoded([self.X, self.X], nplets_hot_encoded, use_cpu=True)
130+
res = nplets_measures_hot_encoded([self.X, self.X], nplets_hot_encoded)
131131
self._validate_same_results_for_repeated_datasets(res, nplets, rtol=1e-8, atol=1e-4)
132132

133133
def test_multiple_times_same_dataset_precomputed_hot_encoded(self):
@@ -139,7 +139,7 @@ def test_multiple_times_same_dataset_precomputed_hot_encoded(self):
139139
batch_size = nplets.shape[0]
140140
nplets_hot_encoded = torch.zeros((batch_size, N), dtype=torch.int)
141141
nplets_hot_encoded[torch.arange(0,batch_size, dtype=int).view(-1,1), nplets] = 1
142-
res = nplets_measures_hot_encoded([self.covmat, self.covmat], nplets_hot_encoded, covmat_precomputed=True, T=self.X.shape[0], use_cpu=True)
142+
res = nplets_measures_hot_encoded([self.covmat, self.covmat], nplets_hot_encoded, covmat_precomputed=True, T=self.X.shape[0])
143143
self._validate_same_results_for_repeated_datasets(res, nplets, rtol=1e-8, atol=1e-4)
144144

145145

thoi/commons.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -73,20 +73,17 @@ def _get_device(use_cpu:bool=False):
7373
def _normalize_input_data(X: TensorLikeArray,
7474
covmat_precomputed: bool=False,
7575
T: Optional[Union[int, List[int]]]=None,
76-
use_cpu: bool=False):
76+
device: torch.device=torch.device('cpu')):
7777
'''
7878
brief: Normalize the input data to be a list of covariance matrices with shape (D, N, N) where D is the lenght of the list and N is the number of variables in the system.
7979
8080
Parameters:
8181
- X: A list of 2D numpy arrays or tensors of shape: 1. (T, N) where T is the number of samples if X are multivariate series. 2. a list of 2D covariance matrices with shape (N, N).
8282
- covmat_precomputed: A boolean flag to indicate if the input data is a list of covariance matrices or multivariate series.
8383
- T (optional): A list of integers indicating the number of samples for each multivariate series.
84-
- use_cpu: A boolean flag to indicate if the computation should be done on the CPU.
84+
- device: The device to use for the computation. Default is 'cpu'.
8585
'''
8686

87-
# Get device to use
88-
device = _get_device(use_cpu)
89-
9087
# Handle different options for X parameter. Accept multivariate data or covariance matrix
9188
if covmat_precomputed:
9289
covmats = torch.as_tensor(X)
@@ -116,4 +113,4 @@ def _normalize_input_data(X: TensorLikeArray,
116113
# Send covmat to device
117114
covmats = covmats.to(device).contiguous()
118115

119-
return covmats, D, N, T, device
116+
return covmats, D, N, T

thoi/heuristics/greedy.py

+25-14
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,22 @@
55
import torch
66
from functools import partial
77

8+
from thoi.typing import TensorLikeArray
89
from thoi.measures.gaussian_copula import multi_order_measures
910
from thoi.collectors import batch_to_tensor, concat_batched_tensors
1011
from thoi.heuristics.commons import _get_valid_candidates
1112
from thoi.heuristics.scoring import _evaluate_nplets
1213
from thoi.commons import _normalize_input_data
1314

1415
@torch.no_grad()
15-
def greedy(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]],
16+
def greedy(X: TensorLikeArray,
1617
initial_order: int=3,
1718
order: Optional[int]=None,
1819
*,
1920
covmat_precomputed: bool=False,
2021
T: Optional[Union[int, List[int]]]=None,
2122
repeat: int=10,
22-
use_cpu: bool=False,
23+
device: torch.device=torch.device('cpu'),
2324
batch_size: int=1000000,
2425
metric: Union[str,Callable]='o',
2526
largest: bool=False):
@@ -28,12 +29,23 @@ def greedy(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tenso
2829
Brief: Greedy algorithm to find the best order of nplets to maximize the metric for a given multivariate series or covariance matrices
2930
3031
Parameters:
31-
- X (Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]]): The input data to compute the nplets. It can be a list of 2D numpy arrays or tensors of shape: 1. (T, N) where T is the number of samples if X are multivariate series. 2. a list of 2D covariance matrices with shape (N, N).
32-
- covmat_precomputed (bool): A boolean flag to indicate if the input data is a list of covariance matrices or multivariate series.
33-
- T (Optional[Union[int, List[int]]]): A list of integers indicating the number of samples for each multivariate series.
32+
- X (TensorLikeArray): The input data to compute the nplets. It can be a list of 2D numpy arrays or tensors of shape: 1. (T, N) where T is the number of samples if X are multivariate series. 2. a list of 2D covariance matrices with shape (N, N).
33+
- initial_order (int): The initial order to start the greedy algorithm. Default is 3.
34+
- order (Optional[int]): The final order to stop the greedy algorithm. If None, it will be set to N.
35+
- covmat_precomputed (bool): A boolean flag to indicate if the input data is a list of covariance matrices or multivariate series.
36+
- T (Optional[Union[int, List[int]]]): A list of integers indicating the number of samples for each multivariate series.
37+
- repeat (int): The number of repetition to do to obtain different solutions starting from les optimal initial solutions.
38+
- device (torch.device): The device to use for the computation. Default is 'cpu'.
39+
- batch_size (int): The batch size to use for the computation. Default is 1000000.
40+
- metric (Union[str,Callable]): The metric to evaluate. One of tc, dtc, o, s or a callable function
41+
- largest (bool): A flag to indicate if the metric is to be maximized or minimized
42+
43+
Returns:
44+
- best_nplets (torch.Tensor): The nplets with the best score found with shape (repeat, order)
45+
- best_scores (torch.Tensor): The best scores for the best nplets with shape (repeat,)
3446
'''
3547

36-
covmats, D, N, T, device = _normalize_input_data(X, covmat_precomputed, T, use_cpu)
48+
covmats, D, N, T = _normalize_input_data(X, covmat_precomputed, T, device)
3749

3850
# Compute initial solutions
3951
batch_data_collector = partial(batch_to_tensor, top_k=repeat, metric=metric, largest=largest)
@@ -46,7 +58,7 @@ def greedy(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tenso
4658
min_order=initial_order,
4759
max_order=initial_order,
4860
batch_size=batch_size,
49-
use_cpu=use_cpu,
61+
device=device,
5062
batch_data_collector=batch_data_collector,
5163
batch_aggregation=batch_aggregation)
5264

@@ -62,7 +74,7 @@ def greedy(X: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tenso
6274
best_candidate, best_score = _next_order_greedy(covmats, T, current_solution,
6375
metric=metric,
6476
largest=largest,
65-
use_cpu=use_cpu)
77+
device=device)
6678
best_scores.append(best_score)
6779

6880
current_solution = torch.cat((current_solution, best_candidate.unsqueeze(1)) , dim=1)
@@ -101,9 +113,9 @@ def _create_all_solutions(initial_solution: torch.Tensor, valid_candidates: torc
101113
def _next_order_greedy(covmats: torch.Tensor,
102114
T: Optional[List[int]],
103115
initial_solution: torch.Tensor,
104-
metric:Union[str,Callable],
105-
largest:bool,
106-
use_cpu:bool):
116+
metric: Union[str,Callable],
117+
largest: bool,
118+
device: torch.device=torch.device('cpu')):
107119

108120
'''
109121
Brief: Greedy algorithm to find the best candidate to add to the current solution
@@ -114,15 +126,14 @@ def _next_order_greedy(covmats: torch.Tensor,
114126
- initial_solution (torch.Tensor): The initial solution with shape (batch_size, order)
115127
- metric (Union[str,Callable]): The metric to evaluate. One of tc, dtc, o, s or a callable function
116128
- largest (bool): A flag to indicate if the metric is to be maximized or minimized
117-
- use_cpu (bool): A flag to indicate if the computation should be done on the CPU
129+
- device (torch.device): The device to use for the computation. Default is 'cpu'
118130
119131
Returns:
120132
- best_candidates (torch.Tensor): The best candidates to add to the current solution with shape (batch_size)
121133
- best_score (torch.Tensor): The best score for the best candidates with shape (batch_size)
122134
'''
123135

124136
# Get parameters attributes
125-
device = covmats.device
126137
N = covmats.shape[1]
127138
batch_size, order = initial_solution.shape
128139

@@ -137,7 +148,7 @@ def _next_order_greedy(covmats: torch.Tensor,
137148
all_solutions = all_solutions.view(batch_size*(N-order), order+1)
138149

139150
# |batch_size x N-order|
140-
best_score = _evaluate_nplets(covmats, T, all_solutions, metric, use_cpu=use_cpu)
151+
best_score = _evaluate_nplets(covmats, T, all_solutions, metric, device=device)
141152

142153
# |batch_size| x |N-order|
143154
best_score = best_score.view(batch_size, N-order)

thoi/heuristics/scoring.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@ def _evaluate_nplets(covmats: torch.Tensor,
1212
T: Optional[List[int]],
1313
batched_nplets: torch.Tensor,
1414
metric: Union[str, Callable],
15-
use_cpu:bool):
15+
device: torch.device):
1616
"""
17-
covmats (torch.Tensor): The covariance matrix or matrixes with shape (N, N) or (D, N, N)
18-
T (Optional[List[int]]): The number of samples for each multivariate series or None
19-
batched_nplets (torch.Tensor): The nplets to calculate the inverse of the oinformation with shape (batch_size, order)
20-
metric (str): The metric to evaluate. One of tc, dtc, o, s or Callable
17+
- covmats (torch.Tensor): The covariance matrix or matrixes with shape (N, N) or (D, N, N)
18+
- T (Optional[List[int]]): The number of samples for each multivariate series or None
19+
- batched_nplets (torch.Tensor): The nplets to calculate the inverse of the oinformation with shape (batch_size, order)
20+
- metric (str): The metric to evaluate. One of tc, dtc, o, s or Callable
21+
- device (torch.device): The device to use
2122
"""
2223

2324
if len(covmats.shape) == 2:
@@ -30,17 +31,17 @@ def _evaluate_nplets(covmats: torch.Tensor,
3031
nplets=batched_nplets,
3132
T=T,
3233
covmat_precomputed=True,
33-
use_cpu=use_cpu)
34+
device=device)
3435

3536
# |batch_size|
36-
return metric_func(batched_measures).to(covmats.device)
37+
return metric_func(batched_measures).to(device)
3738

3839

3940
def _evaluate_nplet_hot_encoded(covmats: torch.Tensor,
40-
T:int,
41+
T: int,
4142
batched_nplets: torch.Tensor,
42-
metric:str,
43-
use_cpu:bool):
43+
metric: str,
44+
device: torch.device):
4445

4546
"""
4647
covmats (torch.Tensor): The covariance matrix or matrixes with shape (N, N) or (D, N, N)
@@ -59,7 +60,7 @@ def _evaluate_nplet_hot_encoded(covmats: torch.Tensor,
5960
nplets=batched_nplets,
6061
T=T,
6162
covmat_precomputed=True,
62-
use_cpu=use_cpu)
63+
device=device)
6364

6465
# |batch_size|
65-
return metric_func(batched_measures).to(covmats.device)
66+
return metric_func(batched_measures).to(device)

0 commit comments

Comments
 (0)