Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

could run with cpu but not with CUDA #7

Open
nsjiow opened this issue Mar 20, 2019 · 2 comments
Open

could run with cpu but not with CUDA #7

nsjiow opened this issue Mar 20, 2019 · 2 comments

Comments

@nsjiow
Copy link

nsjiow commented Mar 20, 2019

thanks for your great work. I could run it with cpu and gpu in my PC, but when i planted the code from PC to NVIDIA TX2, errors came. I could run with cpu but not with cuda. details was that:
terminate called after throwing an instance of 'std::runtime_error'
what():
Tensor for argument #4 'running_mean' is on CPU, but expected it to be on GPU (while checking arguments for cudnn_batch_norm) (checkSameGPU at /home/nvidia/workspace/wangh/libraries/pytorch/aten/src/ATen/TensorUtils.cpp:122)
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits, std::allocator > const&) + 0x7c (0x7f7dc40374 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libc10.so)
frame #1: at::checkSameGPU(char const*, at::TensorArg const&, at::TensorArg const&) + 0xbec (0x7f904c771c in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #2: at::checkAllSameGPU(char const*, c10::ArrayRefat::TensorArg) + 0x44 (0x7f904c8374 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #3: at::native::cudnn_batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double) + 0x3b8 (0x7f7e9ef918 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2_gpu.so)
frame #4: at::CUDAFloatType::cudnn_batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double) const + 0xd8 (0x7f7eadd968 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2_gpu.so)
frame #5: torch::autograd::VariableType::cudnn_batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double) const + 0x568 (0x7f93da2160 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #6: at::native::_batch_norm_impl_index(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) + 0x3a0 (0x7f905463e0 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #7: at::TypeDefault::_batch_norm_impl_index(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0xe4 (0x7f90976f74 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #8: torch::autograd::VariableType::_batch_norm_impl_index(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0x2a4 (0x7f93d1d1ac in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #9: at::native::batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) + 0x8c (0x7f90544dac in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #10: at::TypeDefault::batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0xe4 (0x7f90976e04 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #11: torch::autograd::VariableType::batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0x21c (0x7f93d1ecdc in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #12: + 0x7cc8ac (0x7f93e6d8ac in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #13: + 0x88a660 (0x7f93f2b660 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #14: torch::jit::InterpreterState::run(std::vector<c10::IValue, std::allocatorc10::IValue >&) + 0x38 (0x7f93f26838 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #15: torch::jit::GraphExecutor::run(std::vector<c10::IValue, std::allocatorc10::IValue >&) + 0x188 (0x7f93f06cd0 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #16: torch::jit::script::Method::run(std::vector<c10::IValue, std::allocatorc10::IValue >&) + 0x98 (0x4dfe64 in ./yolo-app)
frame #17: torch::jit::script::Method::operator()(std::vector<c10::IValue, std::allocatorc10::IValue >) + 0x30 (0x4dfec0 in ./yolo-app)
frame #18: torch::jit::script::Module::forward(std::vector<c10::IValue, std::allocatorc10::IValue >) + 0x84 (0x4e0fa4 in ./yolo-app)
frame #19: classifier(std::shared_ptrtorch::jit::script::Module, at::Tensor) + 0xfc (0x4d6760 in ./yolo-app)
frame #20: main + 0xc88 (0x4d8954 in ./yolo-app)
frame #21: __libc_start_main + 0xe0 (0x7f7cf718a0 in /lib/aarch64-linux-gnu/libc.so.6)
:
operation failed in interpreter:
op_version_set = 0
def forward(self,
input_1: Tensor) -> Tensor:
input_2 = torch.convolution(input_1, getattr(self.layer1, "0").weight, None, [1, 3], [0, 0], [1, 1], False, [0, 0], 1, False, False, True)
0 = torch.add(getattr(self.layer1, "1").num_batches_tracked, CONSTANTS.c0, alpha=1)
input_3 = torch.batch_norm(input_2, getattr(self.layer1, "1").weight, getattr(self.layer1, "1").bias, getattr(self.layer1, "1").running_mean, getattr(self.layer1, "1").running_var, True, 0.10000000000000001, 1.0000000000000001e-05, True)
~~~~~~~~~~~~~~~~ <--- HERE
input_4 = torch.threshold
(input_3, 0., 0.)
input_5, _1 = torch.max_pool2d_with_indices(input_4, [2, 2], [2, 2], [0, 0], [1, 1], False)
input_6 = torch.convolution(input_5, getattr(self.layer2, "0").weight, None, [1, 1], [2, 0], [1, 1], False, [0, 0], 2, False, False, True)
2 = torch.add(getattr(self.layer2, "1").num_batches_tracked, CONSTANTS.c0, alpha=1)
input_7 = torch.batch_norm(input_6, getattr(self.layer2, "1").weight, getattr(self.layer2, "1").bias, getattr(self.layer2, "1").running_mean, getattr(self.layer2, "1").running_var, True, 0.10000000000000001, 1.0000000000000001e-05, True)
input_8 = torch.threshold
(input_7, 0., 0.)
input_9, _3 = torch.max_pool2d_with_indices(input_8, [3, 3], [2, 2], [1, 0], [1, 1], False)
input_10 = torch._convolution(input_9, getattr(self.layer3, "0").weight, None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1, False, False, True)
4 = torch.add(getattr(self.layer3, "1").num_batches_tracked, CONSTANTS.c0, alpha=1)
Aborted (core dumped)
why?

@asa008
Copy link

asa008 commented Apr 18, 2019

I encountered almost the same question, have you solved it? if so, how? @nsjiow thanks very much

@asa008
Copy link

asa008 commented Apr 19, 2019

you can try load model directly into GPU, like:

std::shared_ptr<torch::jit::script::Module> module = torch::jit::load(argv[1], torch::kCUDA);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants