We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
thanks for your great work. I could run it with cpu and gpu in my PC, but when i planted the code from PC to NVIDIA TX2, errors came. I could run with cpu but not with cuda. details was that: terminate called after throwing an instance of 'std::runtime_error' what(): Tensor for argument #4 'running_mean' is on CPU, but expected it to be on GPU (while checking arguments for cudnn_batch_norm) (checkSameGPU at /home/nvidia/workspace/wangh/libraries/pytorch/aten/src/ATen/TensorUtils.cpp:122) frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits, std::allocator > const&) + 0x7c (0x7f7dc40374 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libc10.so) frame #1: at::checkSameGPU(char const*, at::TensorArg const&, at::TensorArg const&) + 0xbec (0x7f904c771c in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so) frame #2: at::checkAllSameGPU(char const*, c10::ArrayRefat::TensorArg) + 0x44 (0x7f904c8374 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so) frame #3: at::native::cudnn_batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double) + 0x3b8 (0x7f7e9ef918 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2_gpu.so) frame #4: at::CUDAFloatType::cudnn_batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double) const + 0xd8 (0x7f7eadd968 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2_gpu.so) frame #5: torch::autograd::VariableType::cudnn_batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double) const + 0x568 (0x7f93da2160 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1) frame #6: at::native::_batch_norm_impl_index(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) + 0x3a0 (0x7f905463e0 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so) frame #7: at::TypeDefault::_batch_norm_impl_index(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0xe4 (0x7f90976f74 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so) frame #8: torch::autograd::VariableType::_batch_norm_impl_index(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0x2a4 (0x7f93d1d1ac in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1) frame #9: at::native::batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) + 0x8c (0x7f90544dac in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so) frame #10: at::TypeDefault::batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0xe4 (0x7f90976e04 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so) frame #11: torch::autograd::VariableType::batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0x21c (0x7f93d1ecdc in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1) frame #12: + 0x7cc8ac (0x7f93e6d8ac in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1) frame #13: + 0x88a660 (0x7f93f2b660 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1) frame #14: torch::jit::InterpreterState::run(std::vector<c10::IValue, std::allocatorc10::IValue >&) + 0x38 (0x7f93f26838 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1) frame #15: torch::jit::GraphExecutor::run(std::vector<c10::IValue, std::allocatorc10::IValue >&) + 0x188 (0x7f93f06cd0 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1) frame #16: torch::jit::script::Method::run(std::vector<c10::IValue, std::allocatorc10::IValue >&) + 0x98 (0x4dfe64 in ./yolo-app) frame #17: torch::jit::script::Method::operator()(std::vector<c10::IValue, std::allocatorc10::IValue >) + 0x30 (0x4dfec0 in ./yolo-app) frame #18: torch::jit::script::Module::forward(std::vector<c10::IValue, std::allocatorc10::IValue >) + 0x84 (0x4e0fa4 in ./yolo-app) frame #19: classifier(std::shared_ptrtorch::jit::script::Module, at::Tensor) + 0xfc (0x4d6760 in ./yolo-app) frame #20: main + 0xc88 (0x4d8954 in ./yolo-app) frame #21: __libc_start_main + 0xe0 (0x7f7cf718a0 in /lib/aarch64-linux-gnu/libc.so.6) : operation failed in interpreter: op_version_set = 0 def forward(self, input_1: Tensor) -> Tensor: input_2 = torch.convolution(input_1, getattr(self.layer1, "0").weight, None, [1, 3], [0, 0], [1, 1], False, [0, 0], 1, False, False, True) 0 = torch.add(getattr(self.layer1, "1").num_batches_tracked, CONSTANTS.c0, alpha=1) input_3 = torch.batch_norm(input_2, getattr(self.layer1, "1").weight, getattr(self.layer1, "1").bias, getattr(self.layer1, "1").running_mean, getattr(self.layer1, "1").running_var, True, 0.10000000000000001, 1.0000000000000001e-05, True) ~~~~~~~~~~~~~~~~ <--- HERE input_4 = torch.threshold(input_3, 0., 0.) input_5, _1 = torch.max_pool2d_with_indices(input_4, [2, 2], [2, 2], [0, 0], [1, 1], False) input_6 = torch.convolution(input_5, getattr(self.layer2, "0").weight, None, [1, 1], [2, 0], [1, 1], False, [0, 0], 2, False, False, True) 2 = torch.add(getattr(self.layer2, "1").num_batches_tracked, CONSTANTS.c0, alpha=1) input_7 = torch.batch_norm(input_6, getattr(self.layer2, "1").weight, getattr(self.layer2, "1").bias, getattr(self.layer2, "1").running_mean, getattr(self.layer2, "1").running_var, True, 0.10000000000000001, 1.0000000000000001e-05, True) input_8 = torch.threshold(input_7, 0., 0.) input_9, _3 = torch.max_pool2d_with_indices(input_8, [3, 3], [2, 2], [1, 0], [1, 1], False) input_10 = torch._convolution(input_9, getattr(self.layer3, "0").weight, None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1, False, False, True) 4 = torch.add(getattr(self.layer3, "1").num_batches_tracked, CONSTANTS.c0, alpha=1) Aborted (core dumped) why?
The text was updated successfully, but these errors were encountered:
I encountered almost the same question, have you solved it? if so, how? @nsjiow thanks very much
Sorry, something went wrong.
you can try load model directly into GPU, like:
std::shared_ptr<torch::jit::script::Module> module = torch::jit::load(argv[1], torch::kCUDA);
No branches or pull requests
thanks for your great work. I could run it with cpu and gpu in my PC, but when i planted the code from PC to NVIDIA TX2, errors came. I could run with cpu but not with cuda. details was that:
terminate called after throwing an instance of 'std::runtime_error'
what():
Tensor for argument #4 'running_mean' is on CPU, but expected it to be on GPU (while checking arguments for cudnn_batch_norm) (checkSameGPU at /home/nvidia/workspace/wangh/libraries/pytorch/aten/src/ATen/TensorUtils.cpp:122)
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits, std::allocator > const&) + 0x7c (0x7f7dc40374 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libc10.so)
frame #1: at::checkSameGPU(char const*, at::TensorArg const&, at::TensorArg const&) + 0xbec (0x7f904c771c in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #2: at::checkAllSameGPU(char const*, c10::ArrayRefat::TensorArg) + 0x44 (0x7f904c8374 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #3: at::native::cudnn_batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double) + 0x3b8 (0x7f7e9ef918 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2_gpu.so)
frame #4: at::CUDAFloatType::cudnn_batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double) const + 0xd8 (0x7f7eadd968 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2_gpu.so)
frame #5: torch::autograd::VariableType::cudnn_batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double) const + 0x568 (0x7f93da2160 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #6: at::native::_batch_norm_impl_index(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) + 0x3a0 (0x7f905463e0 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #7: at::TypeDefault::_batch_norm_impl_index(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0xe4 (0x7f90976f74 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #8: torch::autograd::VariableType::_batch_norm_impl_index(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0x2a4 (0x7f93d1d1ac in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #9: at::native::batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) + 0x8c (0x7f90544dac in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #10: at::TypeDefault::batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0xe4 (0x7f90976e04 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libcaffe2.so)
frame #11: torch::autograd::VariableType::batch_norm(at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, at::Tensor const&, bool, double, double, bool) const + 0x21c (0x7f93d1ecdc in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #12: + 0x7cc8ac (0x7f93e6d8ac in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #13: + 0x88a660 (0x7f93f2b660 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #14: torch::jit::InterpreterState::run(std::vector<c10::IValue, std::allocatorc10::IValue >&) + 0x38 (0x7f93f26838 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #15: torch::jit::GraphExecutor::run(std::vector<c10::IValue, std::allocatorc10::IValue >&) + 0x188 (0x7f93f06cd0 in /home/nvidia/workspace/wangh/libraries/pytorch/torch/lib/libtorch.so.1)
frame #16: torch::jit::script::Method::run(std::vector<c10::IValue, std::allocatorc10::IValue >&) + 0x98 (0x4dfe64 in ./yolo-app)
frame #17: torch::jit::script::Method::operator()(std::vector<c10::IValue, std::allocatorc10::IValue >) + 0x30 (0x4dfec0 in ./yolo-app)
frame #18: torch::jit::script::Module::forward(std::vector<c10::IValue, std::allocatorc10::IValue >) + 0x84 (0x4e0fa4 in ./yolo-app)
frame #19: classifier(std::shared_ptrtorch::jit::script::Module, at::Tensor) + 0xfc (0x4d6760 in ./yolo-app)
frame #20: main + 0xc88 (0x4d8954 in ./yolo-app)
frame #21: __libc_start_main + 0xe0 (0x7f7cf718a0 in /lib/aarch64-linux-gnu/libc.so.6)
:
operation failed in interpreter:
op_version_set = 0
def forward(self,
input_1: Tensor) -> Tensor:
input_2 = torch.convolution(input_1, getattr(self.layer1, "0").weight, None, [1, 3], [0, 0], [1, 1], False, [0, 0], 1, False, False, True)
0 = torch.add(getattr(self.layer1, "1").num_batches_tracked, CONSTANTS.c0, alpha=1)
input_3 = torch.batch_norm(input_2, getattr(self.layer1, "1").weight, getattr(self.layer1, "1").bias, getattr(self.layer1, "1").running_mean, getattr(self.layer1, "1").running_var, True, 0.10000000000000001, 1.0000000000000001e-05, True)
~~~~~~~~~~~~~~~~ <--- HERE
input_4 = torch.threshold(input_3, 0., 0.)
input_5, _1 = torch.max_pool2d_with_indices(input_4, [2, 2], [2, 2], [0, 0], [1, 1], False)
input_6 = torch.convolution(input_5, getattr(self.layer2, "0").weight, None, [1, 1], [2, 0], [1, 1], False, [0, 0], 2, False, False, True)
2 = torch.add(getattr(self.layer2, "1").num_batches_tracked, CONSTANTS.c0, alpha=1)
input_7 = torch.batch_norm(input_6, getattr(self.layer2, "1").weight, getattr(self.layer2, "1").bias, getattr(self.layer2, "1").running_mean, getattr(self.layer2, "1").running_var, True, 0.10000000000000001, 1.0000000000000001e-05, True)
input_8 = torch.threshold(input_7, 0., 0.)
input_9, _3 = torch.max_pool2d_with_indices(input_8, [3, 3], [2, 2], [1, 0], [1, 1], False)
input_10 = torch._convolution(input_9, getattr(self.layer3, "0").weight, None, [1, 1], [1, 1], [1, 1], False, [0, 0], 1, False, False, True)
4 = torch.add(getattr(self.layer3, "1").num_batches_tracked, CONSTANTS.c0, alpha=1)
Aborted (core dumped)
why?
The text was updated successfully, but these errors were encountered: