aai-institute · mdbenito · Jun 29, 2023 · Jun 8, 2023 · Jun 18, 2023 · Jun 28, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,8 @@
 
 - Fix adding valuation results with overlapping indices and different lengths
   [PR #370](https://github.com/appliedAI-Initiative/pyDVL/pull/370)
+- Fixed bugs in conjugate gradient and `linear_solve`
+  [PR #358](https://github.com/appliedAI-Initiative/pyDVL/pull/358)
 - Major changes to IF interface and functionality
   [PR #278](https://github.com/appliedAI-Initiative/pyDVL/pull/278)
 

diff --git a/src/pydvl/influence/frameworks/torch_differentiable.py b/src/pydvl/influence/frameworks/torch_differentiable.py
@@ -66,7 +66,7 @@ def solve_linear(
     all_y = cat(all_y)
     matrix = model.hessian(
         all_x, all_y, progress=progress
-    ) + hessian_perturbation * identity_tensor(model.num_params)
+    ) + hessian_perturbation * identity_tensor(model.num_params, device=model.device)
     return torch.linalg.solve(matrix, b.T).T
 
 
@@ -149,6 +149,9 @@ def solve_cg(
     optimal = False
 
     for k in range(maxiter):
+        if gamma < stopping_val:
+            optimal = True
+            break
         Ap = hvp(p).squeeze()
         alpha = gamma / torch.sum(torch.matmul(p, Ap)).item()
         x += alpha * p
@@ -158,10 +161,6 @@ def solve_cg(
         gamma = gamma_
         p = r + beta * p
 
-        if gamma < stopping_val:
-            optimal = True
-            break
-
     info = {"niter": k, "optimal": optimal}
     return x, info
 
@@ -269,8 +268,8 @@ def einsum(equation, *operands) -> torch.Tensor:
     return torch.einsum(equation, *operands)
 
 
-def identity_tensor(dim: int) -> torch.Tensor:
-    return torch.eye(dim, dim)
+def identity_tensor(dim: int, **kwargs) -> torch.Tensor:
+    return torch.eye(dim, dim, **kwargs)
 
 
 def mvp(
@@ -312,7 +311,9 @@ def mvp(
     return mvp.detach()  # type: ignore
 
 
-class TorchTwiceDifferentiable(TwiceDifferentiable[torch.Tensor, nn.Module]):
+class TorchTwiceDifferentiable(
+    TwiceDifferentiable[torch.Tensor, nn.Module, torch.device]
+):
     def __init__(
         self,
         model: nn.Module,

diff --git a/src/pydvl/influence/frameworks/twice_differentiable.py b/src/pydvl/influence/frameworks/twice_differentiable.py
@@ -3,17 +3,22 @@
 
 TensorType = TypeVar("TensorType", bound=Sequence)
 ModelType = TypeVar("ModelType")
+DeviceType = TypeVar("DeviceType")
 
 
-class TwiceDifferentiable(ABC, Generic[TensorType, ModelType]):
+class TwiceDifferentiable(ABC, Generic[TensorType, ModelType, DeviceType]):
     """
     Wraps a differentiable model and loss and provides methods to compute the
     second derivative of the loss wrt. the model parameters.
     """
 
     def __init__(
-        self, model: ModelType, loss: Callable[[TensorType, TensorType], TensorType]
+        self,
+        model: ModelType,
+        loss: Callable[[TensorType, TensorType], TensorType],
+        device: DeviceType,
     ):
+        self.device = device
         pass
 
     @property

diff --git a/src/pydvl/influence/general.py b/src/pydvl/influence/general.py
@@ -32,7 +32,7 @@ class InfluenceType(str, Enum):
 
 
 def compute_influence_factors(
-    model: TwiceDifferentiable[TensorType, ModelType],
+    model: TwiceDifferentiable,
     training_data: DataLoaderType,
     test_data: DataLoaderType,
     inversion_method: InversionMethod,
@@ -82,7 +82,7 @@ def compute_influence_factors(
 
 
 def compute_influences_up(
-    model: TwiceDifferentiable[TensorType, ModelType],
+    model: TwiceDifferentiable,
     input_data: DataLoaderType,
     influence_factors: TensorType,
     *,
@@ -115,7 +115,7 @@ def compute_influences_up(
 
 
 def compute_influences_pert(
-    model: TwiceDifferentiable[TensorType, ModelType],
+    model: TwiceDifferentiable,
     input_data: DataLoaderType,
     influence_factors: TensorType,
     *,
@@ -165,7 +165,7 @@ def compute_influences_pert(
 
 
 def compute_influences(
-    differentiable_model: TwiceDifferentiable[TensorType, ModelType],
+    differentiable_model: TwiceDifferentiable,
     training_data: DataLoaderType,
     *,
     test_data: Optional[DataLoaderType] = None,

diff --git a/src/pydvl/influence/inversion.py b/src/pydvl/influence/inversion.py
@@ -32,7 +32,7 @@ class InversionMethod(str, Enum):
 
 def solve_hvp(
     inversion_method: InversionMethod,
-    model: TwiceDifferentiable[TensorType, ModelType],
+    model: TwiceDifferentiable,
     training_data: DataLoaderType,
     b: TensorType,
     *,