Skip to content
103 changes: 103 additions & 0 deletions src/ATen/native/sparse/xpu/SparseTensorMath.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
#include <ATen/native/sparse/xpu/sycl/SparseTensorMathKernels.h>

#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#include <ATen/NativeFunctions.h>
#else
#include <ATen/ops/addmm.h>
#include <ATen/ops/matmul.h>
#endif

#include <ATen/ExpandUtils.h>

namespace at::native {

using namespace at::sparse;
Expand All @@ -26,4 +36,97 @@ Tensor _sparse_sum_backward_xpu(
return xpu::_sparse_sum_backward_kernel(grad_, input_, dims_to_sum);
}

Tensor& s_addmm_out_sparse_dense_xpu(Tensor& r_, const Tensor& t, const SparseTensor& sparse_, const Tensor& dense, const Scalar& beta, const Scalar& alpha) {
TORCH_CHECK(t.is_xpu(), "Expected all tensors to be on the same device. addmm: expected 'self' to be XPU, but got CPU");
TORCH_CHECK(r_.is_xpu(), "Expected all tensors to be on the same device. addmm: expected 'out' to be XPU, but got CPU");
TORCH_CHECK(sparse_.is_xpu(), "Expected all tensors to be on the same device. addmm: expected 'mat1' to be XPU, but got CPU");
TORCH_CHECK(dense.is_xpu(), "Expected all tensors to be on the same device. addmm: expected 'mat2' to be XPU, but got CPU");

// TORCH_CHECK(xpu::check_device({sparse_, r_, t, dense}));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

xpu::check_device can be found in sparse/xpu/sycl/SparseTensorMathKernels.cpp.

Copy link

Copilot AI Nov 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commented-out device check should either be implemented or removed. If device validation is needed beyond the individual tensor checks above, uncomment and ensure the function exists; otherwise, remove the dead code.

Suggested change
// TORCH_CHECK(xpu::check_device({sparse_, r_, t, dense}));

Copilot uses AI. Check for mistakes.

TORCH_CHECK(dense.dim() == 2, "addmm: 2D tensor expected, got ", dense.dim(), "D tensor");
TORCH_CHECK(sparse_.sparse_dim() == 2, "addmm: expected first two dims to be sparse (indices has size 2 at first dim), but got ", sparse_.sparse_dim(), " sparse dims");
// no need to check dense_dim because dense_dim + sparse_dim = dim

Tensor mat1_dense = sparse_._to_dense(std::nullopt, std::nullopt);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will Tensor mat1_dense = sparse_._to_dense() cause undefined issues? I noticed that dtype and masked are optional for sparse_to_dense.

at::addmm_out(r_, t, mat1_dense, dense, beta, alpha);
Comment on lines +51 to +52
Copy link

Copilot AI Nov 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Converting the sparse tensor to dense format defeats the purpose of sparse operations and may cause significant memory overhead for large sparse tensors. Consider implementing a proper sparse matrix multiplication kernel instead of this fallback approach.

Suggested change
Tensor mat1_dense = sparse_._to_dense(std::nullopt, std::nullopt);
at::addmm_out(r_, t, mat1_dense, dense, beta, alpha);
// Use a proper sparse matrix multiplication kernel for XPU
xpu::addmm_out_sparse_dense_kernel(r_, t, sparse_, dense, beta, alpha);

Copilot uses AI. Check for mistakes.

return r_;
}

Tensor s_addmm_sparse_dense_xpu(
const Tensor& t,
const SparseTensor& sparse,
const Tensor& dense,
const Scalar& beta,
const Scalar& alpha
) {
Tensor r = at::empty({0}, t.options());
s_addmm_out_sparse_dense_xpu(r, t, sparse, dense, beta, alpha);
return r;
}


Tensor& addmm_out_sparse_dense_xpu(
const Tensor& self,
const SparseTensor& mat1,
const Tensor& mat2,
const Scalar& beta,
const Scalar& alpha,
Tensor& result
) {
c10::MaybeOwned<Tensor> b_self = expand_size(self, {mat1.size(0), mat2.size(1)}, "addmm_out");
return s_addmm_out_sparse_dense_xpu(result, *b_self, mat1, mat2, beta, alpha);
}

Tensor addmm_sparse_dense_xpu(
const Tensor& self,
const SparseTensor& mat1,
const Tensor& mat2,
const Scalar& beta,
const Scalar& alpha
) {
c10::MaybeOwned<Tensor> b_self = expand_size(self, {mat1.size(0), mat2.size(1)}, "addmm_out");
return s_addmm_sparse_dense_xpu(*b_self, mat1, mat2, beta, alpha);
}

Tensor& s_addmm_sparse_dense_xpu_(
Tensor& t,
const SparseTensor& sparse,
const Tensor& dense,
const Scalar& beta,
const Scalar& alpha
) {
return s_addmm_out_sparse_dense_xpu(t, t, sparse, dense, beta, alpha);
}

Tensor sparse_sparse_matmul_xpu(const Tensor& mat1_, const Tensor& mat2_) {
TORCH_INTERNAL_ASSERT(mat1_.is_sparse());
TORCH_INTERNAL_ASSERT(mat2_.is_sparse());
TORCH_CHECK(mat1_.dim() == 2);
TORCH_CHECK(mat2_.dim() == 2);
TORCH_CHECK(mat1_.dense_dim() == 0, "sparse_mm: scalar values expected, mat1 got ", mat1_.dense_dim(), "D values");
TORCH_CHECK(mat2_.dense_dim() == 0, "sparse_mm: scalar values expected, mat2 got ", mat2_.dense_dim(), "D values");

TORCH_CHECK(
mat1_.size(1) == mat2_.size(0), "mat1 and mat2 shapes cannot be multiplied (",
mat1_.size(0), "x", mat1_.size(1), " and ", mat2_.size(0), "x", mat2_.size(1), ")");

TORCH_CHECK(mat1_.scalar_type() == mat2_.scalar_type(),
"mat1 dtype ", mat1_.scalar_type(), " does not match mat2 dtype ", mat2_.scalar_type());

// convert to dense
Tensor mat1_dense = mat1_._to_dense(std::nullopt, std::nullopt);
Tensor mat2_dense = mat2_._to_dense(std::nullopt, std::nullopt);

Tensor output_dense = at::matmul(mat1_dense, mat2_dense);
// convert back to sparse
Tensor output_sparse = output_dense._to_sparse(mat1_.layout());

return output_sparse;

// auto output = at::native::empty_like(mat1_);
// output.sparse_resize_and_clear_({mat1_.size(0), mat2_.size(1)}, mat1_.sparse_dim(), 0);
}

} // namespace at::native
5 changes: 1 addition & 4 deletions test/xpu/test_sparse_xpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2073,10 +2073,7 @@ def test_shape(di, dj, dk, nnz):
@precisionOverride({torch.bfloat16: 5e-2, torch.float16: 5e-2})
@dtypes(torch.double, torch.cdouble, torch.bfloat16, torch.float16)
@dtypesIfMPS(torch.float32, torch.complex64, torch.bfloat16, torch.float16)
@skipXPUIf(
True,
"addmm sprase xpu not supported yet, see https://github.com/intel/torch-xpu-ops/issues/2211",
)
@skipXPUIf(False, "https://github.com/intel/torch-xpu-ops/issues/2211")
def test_sparse_addmm(self, device, dtype, coalesced):
if (dtype is torch.bfloat16 or dtype is torch.float16) and device.startswith(
"cuda"
Expand Down
36 changes: 36 additions & 0 deletions yaml/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9463,3 +9463,39 @@
variants: function, method

- func: inverse.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)

- func: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
structured_delegate: addmm.out
variants: function, method
dispatch:
SparseXPU: addmm_sparse_dense_xpu

- func: addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
structured: True
dispatch:
SparseXPU: addmm_out_sparse_dense_xpu

- func: addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
structured_delegate: addmm.out
variants: method
dispatch:
# Warning! For whatever reason, the inplace sparse addmm is NON
# broadcasting
SparseXPU: s_addmm_sparse_dense_xpu_

- func: mm(Tensor self, Tensor mat2) -> Tensor
structured_delegate: mm.out
variants: function, method
dispatch:
SparseXPU: _sparse_mm
tags: core

- func: mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
structured: True
dispatch:
SparseXPU: _sparse_mm_out

- func: _sparse_sparse_matmul(Tensor self, Tensor other) -> Tensor
dispatch:
SparseXPU: sparse_sparse_matmul_xpu
autogen: _sparse_sparse_matmul.out
Loading