Fix dim and copy_dim methods in SampleForecast (#366)

* fix copy_dim method, adjust docstrings * update tests accordingly * Raise tolerance in sampling test * simplify test code
awslabs · Oct 16, 2019 · 3663066 · 3663066
1 parent 618d4af
commit 3663066
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 43 deletions.
diff --git a/src/gluonts/model/forecast.py b/src/gluonts/model/forecast.py
@@ -258,8 +258,7 @@ def copy_dim(self, dim: int):
         Parameters
         ----------
         dim
-            The returned forecast object will only have samples of this
-            dimension.
+            The returned forecast object will only represent this dimension.
         """
         raise NotImplementedError()
 
@@ -381,10 +380,12 @@ def copy_dim(self, dim: int):
         if len(self.samples.shape) == 2:
             samples = self.samples
         else:
-            assert (
-                dim < self.samples.shape[1]
-            ), f"dim should be target_dim - 1, but got dim={dim}, target_dim={self.samples.shape[1]}"
-            samples = self.samples[:, dim]
+            target_dim = self.samples.shape[2]
+            assert dim < target_dim, (
+                f"must set 0 <= dim < target_dim, but got dim={dim},"
+                f" target_dim={target_dim}"
+            )
+            samples = self.samples[:, :, dim]
 
         return SampleForecast(
             samples=samples,
@@ -398,14 +399,14 @@ def dim(self) -> int:
         if self._dim is not None:
             return self._dim
         else:
-            if (
-                len(self.samples.shape) == 2
-            ):  # 1D target. shape: (num_samples, prediction_length)
+            if len(self.samples.shape) == 2:
+                # univariate target
+                # shape: (num_samples, prediction_length)
                 return 1
             else:
-                return self.samples.shape[
-                    1
-                ]  # 2D target. shape: (num_samples, target_dim, prediction_length)
+                # multivariate target
+                # shape: (num_samples, prediction_length, target_dim)
+                return self.samples.shape[2]
 
     def as_json_dict(self, config: "Config") -> dict:
         result = super().as_json_dict(config)
@@ -523,15 +524,16 @@ def __repr__(self):
 class DistributionForecast(Forecast):
     """
     A `Forecast` object that uses a GluonTS distribution directly.
-    This can for instance be used to represent marginal probability distributions for each time
-    point -- although joint distributions are also possible, e.g. when using MultiVariateGaussian).
+    This can for instance be used to represent marginal probability
+    distributions for each time point -- although joint distributions are
+    also possible, e.g. when using MultiVariateGaussian).
 
     Parameters
     ----------
     distribution
-        GluonTS distribution or list of distributions.
-        The distribution should represent the entire prediction length, i.e., if we draw `num_samples` samples
-        from the distribution, the sample shape should be
+        Distribution object. This should represent the entire prediction
+        length, i.e., if we draw `num_samples` samples from the distribution,
+        the sample shape should be
 
            samples = trans_dist.sample(num_samples)
            samples.shape -> (num_samples, prediction_length)

diff --git a/test/evaluation/test_evaluator.py b/test/evaluation/test_evaluator.py
@@ -65,33 +65,26 @@ def iterable(it):
     return list(it)
 
 
-def naive_forecaster(ts, prediction_length, num_samples=100):
+def naive_forecaster(ts, prediction_length, num_samples=100, target_dim=0):
     """
     :param ts: pandas.Series
     :param prediction_length:
     :param num_samples: number of sample paths
+    :param target_dim: number of axes of target (0: scalar, 1: array, ...)
     :return: np.array with dimension (num_samples, prediction_length)
     """
 
     # naive prediction: last observed value
     naive_pred = ts.values[-prediction_length - 1]
-    return naive_pred + np.zeros((num_samples, prediction_length))
+    assert len(naive_pred.shape) == target_dim
+    return np.tile(
+        naive_pred,
+        (num_samples, prediction_length) + tuple(1 for _ in range(target_dim)),
+    )
 
 
-def naive_multivariate_forecaster(
-    ts, prediction_length, num_samples=100, target_dim=2
-):
-    """
-    :param ts: pandas.DataFrame
-    :param prediction_length:
-    :param num_samples: number of sample paths
-    :param target_dim: dimensionality of multivariate target
-    :return: np.array with dimension (num_samples, target_dim, prediction_length)
-    """
-    naive_pred = np.expand_dims(
-        ts.values.transpose()[:, -prediction_length - 1], axis=1
-    )
-    return naive_pred + np.zeros((num_samples, target_dim, prediction_length))
+def naive_multivariate_forecaster(ts, prediction_length, num_samples=100):
+    return naive_forecaster(ts, prediction_length, num_samples, target_dim=1)
 
 
 def calculate_metrics(
@@ -103,7 +96,7 @@ def calculate_metrics(
     input_type=iterator,
 ):
     num_timeseries = timeseries.shape[0]
-    num_timestamps = timeseries.shape[-1]
+    num_timestamps = timeseries.shape[1]
 
     if has_nans:
         timeseries[0, 1] = np.nan
@@ -125,9 +118,7 @@ def calculate_metrics(
             ts_start_dates[i], periods=num_timestamps, freq=freq
         )
 
-        pd_timeseries.append(
-            ts_datastructure(timeseries[i].transpose(), index=index)
-        )
+        pd_timeseries.append(ts_datastructure(timeseries[i], index=index))
         samples.append(
             forecaster(pd_timeseries[i], prediction_length, num_samples)
         )
@@ -433,29 +424,29 @@ def test_metrics(timeseries, res, has_nans, input_type):
 
 
 TIMESERIES_MULTIVARIATE = [
-    np.ones((5, 2, 10), dtype=np.float64),
-    np.ones((5, 2, 10), dtype=np.float64),
-    np.ones((5, 2, 10), dtype=np.float64),
+    np.ones((5, 10, 2), dtype=np.float64),
+    np.ones((5, 10, 2), dtype=np.float64),
+    np.ones((5, 10, 2), dtype=np.float64),
     np.stack(
         (
             np.arange(0, 50, dtype=np.float64).reshape(5, 10),
             np.arange(50, 100, dtype=np.float64).reshape(5, 10),
         ),
-        axis=1,
+        axis=2,
     ),
     np.stack(
         (
             np.arange(0, 50, dtype=np.float64).reshape(5, 10),
             np.arange(50, 100, dtype=np.float64).reshape(5, 10),
         ),
-        axis=1,
+        axis=2,
     ),
     np.stack(
         (
             np.arange(0, 50, dtype=np.float64).reshape(5, 10),
             np.arange(50, 100, dtype=np.float64).reshape(5, 10),
         ),
-        axis=1,
+        axis=2,
     ),
 ]