Typo in the addition kernel in muladd.cu #100

ferrarioa5 · 2024-09-07T10:05:13Z

There is a typo in the add_kernel routine in the CUDA file muladd.cu. I assumed that this kernel should compute the sum of two tensors, but it acually computes the multiplication:

__global__ void add_kernel(int numel, const float* a, const float* b, float* result) {
  int idx = blockIdx.x * blockDim.x + threadIdx.x;
  if (idx < numel) result[idx] = a[idx] * b[idx];
}

This bug can be tested by running the following python script:

import extension_cpp as ext
import torch

device = 'cuda'
n=100
a = torch.rand(n).to(device)
b = torch.rand(n).to(device)
add2 = torch.zeros(n).to(device)

add1=a+b
ext.ops.myadd_out(a,b,add2)
print(torch.equal(add1,add2))

The CPU implementation gives the correct result (with device="cpu" in the code above).

The text was updated successfully, but these errors were encountered:

cyk2018 · 2024-10-01T17:59:54Z

yes, I also hink this is an error.

jklincn · 2024-11-05T10:09:22Z

Hi! I'm currently learning the new version of custom operators (PyTorch 2.4 or later).

Tutorial link: PyTorch Custom Operators Tutorial

The following question isn’t directly related to the issue, but I wanted to seek some advice here.

In the code below, everything runs smoothly in my environment. However, based on my understanding, it seems that I should use torch.ops.extension_cpp.myadd_out() instead of extension_cpp.ops.myadd_out(), correct?

import torch
import extension_cpp

a = torch.rand(10, 10)
b = torch.rand(10, 10)
c = extension_cpp.ops.mymuladd(a, b, 0)
print(c)

However, I can't find extension_cpp under torch.ops. Does anyone know the reason?

cyk2018 · 2024-11-05T13:31:14Z

Hi! I'm currently learning the new version of custom operators (PyTorch 2.4 or later).

Tutorial link: PyTorch Custom Operators Tutorial

The following question isn’t directly related to the issue, but I wanted to seek some advice here.

In the code below, everything runs smoothly in my environment. However, based on my understanding, it seems that I should use torch.ops.extension_cpp.myadd_out() instead of extension_cpp.ops.myadd_out(), correct?
import torch
import extension_cpp

a = torch.rand(10, 10)
b = torch.rand(10, 10)
c = extension_cpp.ops.mymuladd(a, b, 0)
print(c)
However, I can't find extension_cpp under torch.ops. Does anyone know the reason?

may be this interface has been generated so you can call it directly?

jklincn · 2024-11-06T02:52:35Z

@cyk2018 Did you mean extension_cpp.ops.mymuladd ? That is correct and works fine. But I want to know why torch.ops.extension_cpp.myadd_out doesn't work.

If I use jit, I can call it directly from torch.ops .

import torch
from torch.utils.cpp_extension import load_inline

cpp_source = """
#include <torch/script.h>

at::Tensor mymuladd_cpu(at::Tensor a, const at::Tensor& b, double c) {
    TORCH_CHECK(a.sizes() == b.sizes());
    TORCH_CHECK(a.dtype() == at::kFloat);
    TORCH_CHECK(b.dtype() == at::kFloat);
    TORCH_INTERNAL_ASSERT(a.device().type() == at::DeviceType::CPU);
    TORCH_INTERNAL_ASSERT(b.device().type() == at::DeviceType::CPU);
    
    at::Tensor a_contig = a.contiguous();
    at::Tensor b_contig = b.contiguous();
    at::Tensor result = torch::empty(a_contig.sizes(), a_contig.options());
    
    const float* a_ptr = a_contig.data_ptr<float>();
    const float* b_ptr = b_contig.data_ptr<float>();
    float* result_ptr = result.data_ptr<float>();
    
    for (int64_t i = 0; i < result.numel(); i++) {
        result_ptr[i] = a_ptr[i] * b_ptr[i] + c;
    }
    return result;
}

TORCH_LIBRARY(extension_cpp, m) {
    m.def("mymuladd(Tensor a, Tensor b, float c) -> Tensor");
}

TORCH_LIBRARY_IMPL(extension_cpp, CPU, m) {
    m.impl("mymuladd", &mymuladd_cpu);
}
"""

load_inline(
    name='extension_cpp',
    cpp_sources=[cpp_source],
    is_python_module=False,
    verbose=True
)

a = torch.randn(3, dtype=torch.float32).rename(None)
b = torch.randn(3, dtype=torch.float32).rename(None)

result = torch.ops.extension_cpp.mymuladd(a, b, 1.0)
print("Input a:", a)
print("Input b:", b)
print("Result:", result)

jklincn · 2024-11-06T03:56:58Z

@cyk2018 Did you mean extension_cpp.ops.mymuladd ? That is correct and works fine. But I want to know why torch.ops.extension_cpp.myadd_out doesn't work.

If I use jit, I can call it directly from torch.ops .

import torch
from torch.utils.cpp_extension import load_inline

cpp_source = """
#include <torch/script.h>

at::Tensor mymuladd_cpu(at::Tensor a, const at::Tensor& b, double c) {
    TORCH_CHECK(a.sizes() == b.sizes());
    TORCH_CHECK(a.dtype() == at::kFloat);
    TORCH_CHECK(b.dtype() == at::kFloat);
    TORCH_INTERNAL_ASSERT(a.device().type() == at::DeviceType::CPU);
    TORCH_INTERNAL_ASSERT(b.device().type() == at::DeviceType::CPU);
    
    at::Tensor a_contig = a.contiguous();
    at::Tensor b_contig = b.contiguous();
    at::Tensor result = torch::empty(a_contig.sizes(), a_contig.options());
    
    const float* a_ptr = a_contig.data_ptr<float>();
    const float* b_ptr = b_contig.data_ptr<float>();
    float* result_ptr = result.data_ptr<float>();
    
    for (int64_t i = 0; i < result.numel(); i++) {
        result_ptr[i] = a_ptr[i] * b_ptr[i] + c;
    }
    return result;
}

TORCH_LIBRARY(extension_cpp, m) {
    m.def("mymuladd(Tensor a, Tensor b, float c) -> Tensor");
}

TORCH_LIBRARY_IMPL(extension_cpp, CPU, m) {
    m.impl("mymuladd", &mymuladd_cpu);
}
"""

load_inline(
    name='extension_cpp',
    cpp_sources=[cpp_source],
    is_python_module=False,
    verbose=True
)

a = torch.randn(3, dtype=torch.float32).rename(None)
b = torch.randn(3, dtype=torch.float32).rename(None)

result = torch.ops.extension_cpp.mymuladd(a, b, 1.0)
print("Input a:", a)
print("Input b:", b)
print("Result:", result)

I have solved it! I was missing packages=find_packages() in setup.py.

Now I can successfully run

import torch
import my_ops

a = torch.ones(3)
result = torch.ops.my_ops.ops1(a)
print("Result:", result)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Typo in the addition kernel in muladd.cu #100

Typo in the addition kernel in muladd.cu #100

ferrarioa5 commented Sep 7, 2024

cyk2018 commented Oct 1, 2024

jklincn commented Nov 5, 2024

cyk2018 commented Nov 5, 2024

jklincn commented Nov 6, 2024

jklincn commented Nov 6, 2024

Typo in the addition kernel in muladd.cu #100

Typo in the addition kernel in muladd.cu #100

Comments

ferrarioa5 commented Sep 7, 2024

cyk2018 commented Oct 1, 2024

jklincn commented Nov 5, 2024

cyk2018 commented Nov 5, 2024

jklincn commented Nov 6, 2024

jklincn commented Nov 6, 2024