diff --git a/README.md b/README.md index 3dff693..1921780 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ The following parameters can be configured in the `config/config_hydra.yaml` fil - **General Parameters:** - `output_dir`: Directory where output plots and MP4s will be saved (default: `./`). - - `dataset`: Name of the dataset to use (default: `ItalyPowerDemand`) loaded from aeon-toolkit, see the [classification dataset list](https://github.com/aeon-toolkit/aeon/blob/main/aeon/datasets/tsc_datasets.py), the [regression datasets list](https://github.com/aeon-toolkit/aeon/blob/main/aeon/datasets/tser_datasets.py) and the [forecasting datasets lists](https://github.com/aeon-toolkit/aeon/blob/main/aeon/datasets/tsf_datasets.py). + - `dataset`: Name of the dataset to use (default: `ItalyPowerDemand`) loaded from aeon-toolkit, see the [classification dataset list](https://github.com/aeon-toolkit/aeon/blob/main/aeon/datasets/tsc_datasets.py) and the [regression datasets list](https://github.com/aeon-toolkit/aeon/blob/main/aeon/datasets/tser_datasets.py), works with both univariate and multivariate time series datasets. - `split`: Dataset split (default: `None`). - `znormalize`: Whether to Z-normalize the time series (default: `True`). - `class_x`: Class label for the first time series (default: `0`). @@ -74,10 +74,15 @@ The following parameters can be configured in the `config/config_hydra.yaml` fil ## Example -Using DTW on ItalyPowerDemand dataset +Using DTW on [ECGFiveDays](http://timeseriesclassification.com/description.php?Dataset=ECGFiveDays) univariate dataset -- In video format [download-here](exps/dtw-vis/ItalyPowerDemand/dtw.mp4): ![dtw-italy](exps/dtw-vis/ItalyPowerDemand/dtw.gif) -- In pdf format: [dtw-italy-pdf](exps/dtw-vis/ItalyPowerDemand/dtw.pdf) +- In video format [download-here](exps/dtw-vis/ECGFiveDays/dtw.mp4): ![dtw-italy](exps/dtw-vis/ECGFiveDays/dtw.gif) +- In pdf format: [dtw-italy-pdf](exps/dtw-vis/ECGFiveDays/dtw.pdf) + +Using DTW on [ERing](http://timeseriesclassification.com/description.php?Dataset=ERing) multivariate dataset + +- In video format [download-here](exps/dtw-vis/ERing/dtw.mp4): ![dtw-italy](exps/dtw-vis/ERing/dtw.gif) +- In pdf format: [dtw-italy-pdf](exps/dtw-vis/ERing/dtw.pdf) ## Citation @@ -96,5 +101,5 @@ If you use this code in your research, please cite this repository: ## Acknowledgments -We would like to thank the authors of the UCR, UEA and Monash archives for making the Time Series Classification/Regression/Forecasting datasets publicly available. +We would like to thank the authors of the UCR, UEA and Monash archives for making the Time Series Classification/Regression datasets publicly available. We would also like to thank the Aeon time series machine learning python toolkit for their fast implementation of elastic similarity measures. diff --git a/config/config_hydra.yaml b/config/config_hydra.yaml index b329529..3bf8af0 100644 --- a/config/config_hydra.yaml +++ b/config/config_hydra.yaml @@ -5,7 +5,7 @@ hydra : dir : exps/${hydra.job.name} output_dir: './' # output directory -dataset : "ItalyPowerDemand" # dataset name to load from aeon +dataset : "ERing" # dataset name to load from aeon split: Null # either use train/test or both splits znormalize: True # znormalize each time series channel independent @@ -22,7 +22,7 @@ metric_params : # dictionary with measure parameters w : null # for the minkowski distance itakura_max_slope: null # for all warping based distances descriptor : "identity" # for shape_dtw - reach : 5 # for shape_dtw + reach : 15 # for shape_dtw g : 0.05 # for wdtw epsilon : 1.0 # for lcss, edr g_arr : null # for erp diff --git a/draw_functions.py b/draw_functions.py index b780f07..f613f49 100644 --- a/draw_functions.py +++ b/draw_functions.py @@ -19,7 +19,6 @@ def draw_elastic( x: np.ndarray, y: np.ndarray, - channel_used: int = 0, output_dir: str = "./", figsize: Tuple[int, int] = None, metric: str = "dtw", @@ -36,8 +35,6 @@ def draw_elastic( The first time series to compare. Expected to be a 2D array with shape (channels, length). y : np.ndarray The second time series to compare. Expected to be a 2D array with shape (channels, length). - channel_used : int, optional - The index of the channel to be used for comparison. Default is 0. output_dir : str, optional The directory where the output plot will be saved. Default is "./". figsize : Tuple[int, int], optional @@ -59,8 +56,10 @@ def draw_elastic( ------ A plot comparing the two time series with elastic alignment. """ - if int(x.shape[0]) == 1: - channel_used = 0 + n_channels = int(x.shape[0]) + + blue_shades = [(0, 0, i / n_channels) for i in range(1, 1 + n_channels)] + red_shades = [(i / n_channels, 0, 0) for i in range(1, 1 + n_channels)] figsize = (10, 10) if figsize is not None else figsize @@ -83,47 +82,69 @@ def draw_elastic( lines_for_legend = [] - lines_for_legend.append( - ax_x.plot( - -x[channel_used, 0 : len(x[channel_used])][::-1], - np.arange(0, len(x[channel_used])), - lw=4, - color="blue", - label="Time Series 1", - )[0] - ) + for c in range(n_channels): + if c == n_channels - 1: + lines_for_legend.append( + ax_x.plot( + -x[c, 0 : len(x[c])][::-1], + np.arange(0, len(x[c])), + lw=4, + color=blue_shades[c], + label="Time Series 1", + )[0] + ) + else: + lines_for_legend.append( + ax_x.plot( + -x[c, 0 : len(x[c])][::-1], + np.arange(0, len(x[c])), + lw=4, + color=blue_shades[c], + )[0] + ) - lines_for_legend.append( - ax_y.plot( - np.arange(0, len(y[channel_used])), - y[channel_used, 0 : len(y[channel_used])], - lw=4, - color="red", - label="Time Series 2", - )[0] - ) + for c in range(n_channels): + if c == n_channels - 1: + lines_for_legend.append( + ax_y.plot( + np.arange(0, len(y[c])), + y[c, 0 : len(y[c])], + lw=4, + color=red_shades[c], + label="Time Series 2", + )[0] + ) + else: + lines_for_legend.append( + ax_y.plot( + np.arange(0, len(y[c])), + y[c, 0 : len(y[c])], + lw=4, + color=red_shades[c], + )[0] + ) ax_x.arrow( - x=-np.max(x[channel_used]) - 0.5, - y=len(x[channel_used]) - 1, + x=-np.max(x) - 0.5, + y=len(x[0]) - 1, dx=0, - dy=-len(x[channel_used]) + 1, + dy=-len(x[0]) + 1, head_width=0.1, color="gray", ) - ax_x.text(x=-np.max(x[channel_used]) - 0.7, y=0, s="time", rotation="vertical") + ax_x.text(x=-np.max(x) - 0.7, y=0, s="time", rotation="vertical") ax_y.arrow( x=0, - y=np.max(y[channel_used]) + 0.5, - dx=len(y[channel_used]) - 1, + y=np.max(y) + 0.5, + dx=len(y[0]) - 1, dy=0, head_width=0.1, color="gray", ) ax_y.text( - x=len(y[channel_used]) - 1, - y=np.max(y[channel_used]) + 0.7, + x=len(y[0]) - 1, + y=np.max(y) + 0.7, s="time", rotation="horizontal", ) @@ -142,8 +163,8 @@ def draw_elastic( con_x = ConnectionPatch( ( - -x[channel_used, ::-1][len(x[channel_used]) - 1 - i_mid], - len(x[channel_used]) - 1 - i_mid, + -x[0, ::-1][len(x[0]) - 1 - i_mid], + len(x[0]) - 1 - i_mid, ), (j, i), "data", @@ -155,7 +176,7 @@ def draw_elastic( ) con_y = ConnectionPatch( - (j_mid, y[channel_used, j_mid]), + (j_mid, y[0, j_mid]), (j, i), "data", "data", @@ -193,7 +214,6 @@ def draw_elastic_gif( x: np.ndarray, y: np.ndarray, output_dir: str = "./", - channel_used: int = 0, figsize: Tuple[int, int] = None, metric: str = "dtw", fontsize: int = 10, @@ -211,8 +231,6 @@ def draw_elastic_gif( The second time series to compare. Expected to be a 2D array with shape (channels, length). output_dir : str, optional The directory where the output GIF will be saved. Default is './'. - channel_used : int, optional - The index of the channel to be used for comparison. Default is 0. figsize : Tuple[int, int], optional The size of the figure in inches (width, height). Default is None, which uses a default size of (10, 10). metric : str, optional @@ -235,6 +253,11 @@ def draw_elastic_gif( if figsize is None: figsize = (10, 10) + n_channels = int(x.shape[0]) + + blue_shades = [(0, 0, i / n_channels) for i in range(1, 1 + n_channels)] + red_shades = [(i / n_channels, 0, 0) for i in range(1, 1 + n_channels)] + _x = np.copy(x) _y = np.copy(y) @@ -254,47 +277,69 @@ def draw_elastic_gif( lines_for_legend = [] - lines_for_legend.append( - ax_x.plot( - -x[channel_used, 0 : len(x[channel_used])][::-1], - np.arange(0, len(x[channel_used])), - lw=4, - color="blue", - label="Time Series 1", - )[0] - ) + for c in range(n_channels): + if c == n_channels - 1: + lines_for_legend.append( + ax_x.plot( + -x[c, 0 : len(x[c])][::-1], + np.arange(0, len(x[c])), + lw=4, + color=blue_shades[c], + label="Time Series 1", + )[0] + ) + else: + lines_for_legend.append( + ax_x.plot( + -x[c, 0 : len(x[c])][::-1], + np.arange(0, len(x[c])), + lw=4, + color=blue_shades[c], + )[0] + ) - lines_for_legend.append( - ax_y.plot( - np.arange(0, len(y[channel_used])), - y[channel_used, 0 : len(y[channel_used])], - lw=4, - color="red", - label="Time Series 2", - )[0] - ) + for c in range(n_channels): + if c == n_channels - 1: + lines_for_legend.append( + ax_y.plot( + np.arange(0, len(y[c])), + y[c, 0 : len(y[c])], + lw=4, + color=red_shades[c], + label="Time Series 2", + )[0] + ) + else: + lines_for_legend.append( + ax_y.plot( + np.arange(0, len(y[c])), + y[c, 0 : len(y[c])], + lw=4, + color=red_shades[c], + )[0] + ) ax_x.arrow( - x=-np.max(x[channel_used]) - 0.5, - y=len(x[channel_used]) - 1, + x=-np.max(x) - 0.5, + y=len(x[0]) - 1, dx=0, - dy=-len(x[channel_used]) + 1, + dy=-len(x[0]) + 1, head_width=0.1, color="gray", ) - ax_x.text(x=-np.max(x[channel_used]) - 0.7, y=0, s="time", rotation="vertical") + ax_x.text(x=-np.max(x) - 0.7, y=0, s="time", rotation="vertical") ax_y.arrow( x=0, - y=np.max(y[channel_used]) + 0.5, - dx=len(y[channel_used]) - 1, + y=np.max(y) + 0.5, + dx=len(y[0]) - 1, dy=0, head_width=0.1, color="gray", ) ax_y.text( - x=len(y[channel_used]) - 1, - y=np.max(y[channel_used]) + 0.7, + x=len(y[0]) - 1, + y=np.max(y) + 0.7, s="time", rotation="horizontal", ) @@ -306,10 +351,46 @@ def draw_elastic_gif( dtw_plot = ax_matrix.plot(path_dtw_y, path_dtw_x, color="black", lw=4)[0] + if n_channels > 1: + vert_x = ax_x.plot( + [ + -x[:, ::-1][:, len(x[0]) - 1 - optimal_path[0][0]].max(), + -x[:, ::-1][:, len(x[0]) - 1 - optimal_path[0][0]].min(), + ], + [len(x[0]) - 1 - optimal_path[0][0], len(x[0]) - 1 - optimal_path[0][0]], + lw=4, + color="orange", + zorder=2, + )[0] + + vert_y = ax_y.plot( + [optimal_path[0][1], optimal_path[0][1]], + [y[:, optimal_path[0][1]].min(), y[:, optimal_path[0][1]].max()], + lw=4, + color="orange", + zorder=2, + )[0] + else: + vert_x = ax_x.scatter( + -x[0, ::-1][len(x[0]) - 1 - optimal_path[0][0]], + len(x[0]) - 1 - optimal_path[0][0], + s=100, + color="orange", + zorder=2, + ) + + vert_y = ax_y.scatter( + optimal_path[0][1], + y[0, optimal_path[0][1]], + s=100, + color="orange", + zorder=2, + ) + con_x = ConnectionPatch( ( - -x[channel_used, ::-1][len(x[channel_used]) - 1 - optimal_path[0][0]], - len(x[channel_used]) - 1 - optimal_path[0][0], + -x[:, ::-1][:, len(x[0]) - 1 - optimal_path[0][0]].max(), + len(x[0]) - 1 - optimal_path[0][0], ), (optimal_path[0][1], optimal_path[0][0]), "data", @@ -321,7 +402,7 @@ def draw_elastic_gif( ) con_y = ConnectionPatch( - (optimal_path[0][1], y[channel_used, optimal_path[0][1]]), + (optimal_path[0][1], y[:, optimal_path[0][1]].min()), (optimal_path[0][1], optimal_path[0][0]), "data", "data", @@ -347,13 +428,33 @@ def animate(i): dtw_plot.set_data(path_dtw_y[time_mesh], path_dtw_x[time_mesh]) + if n_channels > 1: + vert_x.set_data( + [ + -x[:, ::-1][:, len(x[0]) - 1 - i_x_mid].max(), + -x[:, ::-1][:, len(x[0]) - 1 - i_x_mid].min(), + ], + [len(x[0]) - 1 - i_x_mid, len(x[0]) - 1 - i_x_mid], + ) + + vert_y.set_data( + [i_y_mid, i_y_mid], + [y[:, i_y_mid].min(), y[:, i_y_mid].max()], + ) + else: + vert_x.set_offsets( + [-x[0, ::-1][len(x[0]) - 1 - i_x_mid], len(x[0]) - 1 - i_x_mid] + ) + + vert_y.set_offsets([i_y_mid, y[0, i_y_mid]]) + con_x.xy1 = ( - -x[channel_used, ::-1][len(x[channel_used]) - 1 - i_x_mid], - len(x[channel_used]) - 1 - i_x_mid, + -x[:, ::-1][:, len(x[0]) - 1 - i_x_mid].max(), + len(x[0]) - 1 - i_x_mid, ) con_x.xy2 = path_dtw_y[i], path_dtw_x[i] - con_y.xy1 = i_y_mid, y[channel_used, i_y_mid] + con_y.xy1 = i_y_mid, y[:, i_y_mid].min() con_y.xy2 = path_dtw_y[i], path_dtw_x[i] return dtw_plot, con_x, con_y diff --git a/exps/dtw-vis/ECGFiveDays/dtw.gif b/exps/dtw-vis/ECGFiveDays/dtw.gif new file mode 100644 index 0000000..b884045 Binary files /dev/null and b/exps/dtw-vis/ECGFiveDays/dtw.gif differ diff --git a/exps/dtw-vis/ECGFiveDays/dtw.mp4 b/exps/dtw-vis/ECGFiveDays/dtw.mp4 new file mode 100644 index 0000000..59de6d4 Binary files /dev/null and b/exps/dtw-vis/ECGFiveDays/dtw.mp4 differ diff --git a/exps/dtw-vis/ECGFiveDays/dtw.pdf b/exps/dtw-vis/ECGFiveDays/dtw.pdf new file mode 100644 index 0000000..cf518c6 Binary files /dev/null and b/exps/dtw-vis/ECGFiveDays/dtw.pdf differ diff --git a/exps/dtw-vis/ERing/dtw.gif b/exps/dtw-vis/ERing/dtw.gif new file mode 100644 index 0000000..5817a2e Binary files /dev/null and b/exps/dtw-vis/ERing/dtw.gif differ diff --git a/exps/dtw-vis/ERing/dtw.mp4 b/exps/dtw-vis/ERing/dtw.mp4 new file mode 100644 index 0000000..5a25175 Binary files /dev/null and b/exps/dtw-vis/ERing/dtw.mp4 differ diff --git a/exps/dtw-vis/ERing/dtw.pdf b/exps/dtw-vis/ERing/dtw.pdf new file mode 100644 index 0000000..5d85770 Binary files /dev/null and b/exps/dtw-vis/ERing/dtw.pdf differ diff --git a/exps/dtw-vis/ItalyPowerDemand/dtw.mp4 b/exps/dtw-vis/ItalyPowerDemand/dtw.mp4 index dfd5282..7a46d52 100644 Binary files a/exps/dtw-vis/ItalyPowerDemand/dtw.mp4 and b/exps/dtw-vis/ItalyPowerDemand/dtw.mp4 differ diff --git a/exps/dtw-vis/ItalyPowerDemand/dtw.pdf b/exps/dtw-vis/ItalyPowerDemand/dtw.pdf index cce76f9..87cc22f 100644 Binary files a/exps/dtw-vis/ItalyPowerDemand/dtw.pdf and b/exps/dtw-vis/ItalyPowerDemand/dtw.pdf differ diff --git a/main.py b/main.py index 21a5aeb..27e99dc 100644 --- a/main.py +++ b/main.py @@ -21,19 +21,24 @@ def main(args: DictConfig): output_dir_dataset = os.path.join(output_dir, dataset) create_directory(output_dir_dataset) - X, y = load_data(dataset_name=dataset, split=args.split, znormalize=args.znormalize) + X, y, is_classif = load_data( + dataset_name=dataset, split=args.split, znormalize=args.znormalize + ) - ts1 = X[y == args.class_x][ - np.random.randint(low=0, high=len(X[y == args.class_x]), size=1)[0] - ] - ts2 = X[y == args.class_y][ - np.random.randint(low=0, high=len(X[y == args.class_y]), size=1)[0] - ] + if is_classif: + ts1 = X[y == args.class_x][ + np.random.randint(low=0, high=len(X[y == args.class_x]), size=1)[0] + ] + ts2 = X[y == args.class_y][ + np.random.randint(low=0, high=len(X[y == args.class_y]), size=1)[0] + ] + else: + ts1 = X[np.random.randint(low=0, high=len(X), size=1)[0]] + ts2 = X[np.random.randint(low=0, high=len(X), size=1)[0]] draw_elastic( x=ts1, y=ts2, - channel_used=0, output_dir=output_dir_dataset, figsize=args.figsize, metric=args.metric, @@ -47,7 +52,6 @@ def main(args: DictConfig): y=ts2, figsize=args.figsize, fontsize=10, - channel_used=0, metric_params=args.metric_params, metric=args.metric, ) diff --git a/utils.py b/utils.py index e3f7549..73adb48 100644 --- a/utils.py +++ b/utils.py @@ -7,8 +7,7 @@ from aeon.datasets.tsc_datasets import univariate, multivariate from aeon.datasets.tser_datasets import tser_soton -from aeon.datasets.tsf_datasets import tsf_all -from aeon.datasets import load_classification, load_regression, load_forecasting +from aeon.datasets import load_classification, load_regression def load_data(dataset_name: str, split: str, znormalize: bool): @@ -28,23 +27,22 @@ def load_data(dataset_name: str, split: str, znormalize: bool): Tuple[np.ndarray, np.ndarray] The loaded data and labels. """ - y = None + is_classif = True if dataset_name in univariate or dataset_name in multivariate: X, y = load_classification(name=dataset_name, split=split) elif dataset_name in tser_soton: X, y = load_regression(name=dataset_name, split=split) - elif dataset_name in tsf_all: - X = load_forecasting(name=dataset_name) + is_classif = False else: raise ValueError("The dataset " + dataset_name + " does not exist in aeon.") if znormalize: X = znormalisation(x=X) - if y is not None: + if is_classif: y = encode_labels(y) - return X, y + return X, y, is_classif def create_directory(directory_path):