PaddlePaddle · tizhou86 · Nov 30, 2021 · Nov 29, 2021
diff --git a/paddle/fluid/operators/bernoulli_op.cu b/paddle/fluid/operators/bernoulli_op.cu
@@ -61,16 +61,16 @@ class BernoulliOpKernel<platform::CUDADeviceContext, T>
         BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()).GetDeviceId();
     auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
     auto seed_offset = gen_cuda->IncrementOffset(1);
-    int gen_offset = size * seed_offset.second;
+    int64_t gen_offset = size * seed_offset.second;
     platform::Transform<platform::CUDADeviceContext> trans;
-    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
+    thrust::counting_iterator<int64_t> index_sequence_begin(0);
     auto* context =
         static_cast<const platform::CUDADeviceContext*>(&ctx.device_context());
 
     trans(*context, index_sequence_begin, index_sequence_begin + size, in_data,
           out_data,
-          BernoulliCudaFunctor<T>(static_cast<unsigned int>(seed_offset.first),
-                                  static_cast<unsigned int>(gen_offset)));
+          BernoulliCudaFunctor<T>(static_cast<int64_t>(seed_offset.first),
+                                  static_cast<int64_t>(gen_offset)));
   }
 };
 

diff --git a/paddle/fluid/operators/gaussian_random_op.cu b/paddle/fluid/operators/gaussian_random_op.cu
@@ -59,7 +59,7 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> {
     }
     T mean = static_cast<T>(context.Attr<float>("mean"));
     T std = static_cast<T>(context.Attr<float>("std"));
-    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
+    thrust::counting_iterator<int64_t> index_sequence_begin(0);
     auto shape = GetShape(context);
     tensor->Resize(shape);
     T* data = tensor->mutable_data<T>(context.GetPlace());
@@ -72,7 +72,7 @@ class GPUGaussianRandomKernel : public framework::OpKernel<T> {
 
     if (gen_cuda->GetIsInitPy() && seed_flag) {
       auto seed_offset = gen_cuda->IncrementOffset(1);
-      int gen_offset = size * seed_offset.second;
+      int64_t gen_offset = size * seed_offset.second;
       thrust::transform(
           index_sequence_begin, index_sequence_begin + size,
           thrust::device_ptr<T>(data),
@@ -100,7 +100,7 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
     }
     T mean = static_cast<T>(context.Attr<float>("mean"));
     T std = static_cast<T>(context.Attr<float>("std"));
-    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
+    thrust::counting_iterator<int64_t> index_sequence_begin(0);
     int64_t size = tensor->numel();
 
     int device_id =
@@ -109,7 +109,7 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
 
     if (gen_cuda->GetIsInitPy() && seed_flag) {
       auto seed_offset = gen_cuda->IncrementOffset(1);
-      int gen_offset = size * seed_offset.second;
+      int64_t gen_offset = size * seed_offset.second;
       thrust::transform(index_sequence_begin, index_sequence_begin + size,
                         thrust::device_ptr<T>(data),
                         GaussianGenerator<T>(mean, std, seed_offset.first,

diff --git a/paddle/fluid/operators/gumbel_softmax_op.cu b/paddle/fluid/operators/gumbel_softmax_op.cu
@@ -129,15 +129,15 @@ struct GumbleNoiseGenerator<platform::CUDADeviceContext, T> {
     int64_t size = size_to_axis * size_from_axis;
     T* random_data =
         random_tensor.mutable_data<T>({size}, platform::CUDAPlace());
-    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
+    thrust::counting_iterator<int64_t> index_sequence_begin(0);
 
     // generate gumbel noise
     int device_id =
         BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace()).GetDeviceId();
     auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
     if (gen_cuda->GetIsInitPy()) {
       auto seed_offset = gen_cuda->IncrementOffset(1);
-      int gen_offset = size * seed_offset.second;
+      int64_t gen_offset = size * seed_offset.second;
       thrust::transform(
           index_sequence_begin, index_sequence_begin + size,
           thrust::device_ptr<T>(random_data),

diff --git a/paddle/fluid/operators/multinomial_op.cu b/paddle/fluid/operators/multinomial_op.cu
@@ -239,7 +239,7 @@ class MultinomialOpKernel<platform::CUDADeviceContext, T>
     auto* rng_data = rng_data_tensor.mutable_data<T>(
         {num_distributions, num_samples}, ctx.GetPlace());
 
-    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
+    thrust::counting_iterator<int64_t> index_sequence_begin(0);
     platform::Transform<platform::CUDADeviceContext> trans;
     auto* context =
         static_cast<const platform::CUDADeviceContext*>(&ctx.device_context());

diff --git a/paddle/fluid/operators/truncated_gaussian_random_op.cu b/paddle/fluid/operators/truncated_gaussian_random_op.cu
@@ -97,7 +97,7 @@ class GPUTruncatedGaussianRandomKernel : public framework::OpKernel<T> {
     }
     T mean = static_cast<T>(context.Attr<float>("mean"));
     T std = static_cast<T>(context.Attr<float>("std"));
-    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
+    thrust::counting_iterator<int64_t> index_sequence_begin(0);
     int64_t size = tensor->numel();
 
     int device_id =
@@ -106,7 +106,7 @@ class GPUTruncatedGaussianRandomKernel : public framework::OpKernel<T> {
 
     if (gen_cuda->GetIsInitPy() && seed_flag) {
       auto seed_offset = gen_cuda->IncrementOffset(1);
-      int gen_offset = size * seed_offset.second;
+      int64_t gen_offset = size * seed_offset.second;
       thrust::transform(
           index_sequence_begin, index_sequence_begin + size,
           thrust::device_ptr<T>(data),

diff --git a/paddle/fluid/operators/uniform_random_inplace_op.cu b/paddle/fluid/operators/uniform_random_inplace_op.cu
@@ -118,14 +118,14 @@ class GPUUniformRandomInplaceKernel : public framework::OpKernel<T> {
     unsigned int diag_step =
         static_cast<unsigned int>(ctx.Attr<int>("diag_step"));
     T diag_val = static_cast<T>(ctx.Attr<float>("diag_val"));
-    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
+    thrust::counting_iterator<int64_t> index_sequence_begin(0);
     int64_t size = tensor->numel();
     int device_id =
         BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()).GetDeviceId();
     auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
     if (gen_cuda->GetIsInitPy() && seed_flag) {
       auto seed_offset = gen_cuda->IncrementOffset(1);
-      int gen_offset = size * seed_offset.second;
+      int64_t gen_offset = size * seed_offset.second;
       thrust::transform(
           index_sequence_begin, index_sequence_begin + size,
           thrust::device_ptr<T>(data),

diff --git a/paddle/fluid/operators/uniform_random_op.cu b/paddle/fluid/operators/uniform_random_op.cu
@@ -139,14 +139,14 @@ class GPUUniformRandomKernel : public framework::OpKernel<T> {
     unsigned int diag_step =
         static_cast<unsigned int>(context.Attr<int>("diag_step"));
     T diag_val = static_cast<T>(context.Attr<float>("diag_val"));
-    thrust::counting_iterator<unsigned int> index_sequence_begin(0);
+    thrust::counting_iterator<int64_t> index_sequence_begin(0);
     int64_t size = tensor->numel();
     int device_id =
         BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace()).GetDeviceId();
     auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id);
     if (gen_cuda->GetIsInitPy() && seed_flag) {
       auto seed_offset = gen_cuda->IncrementOffset(1);
-      int gen_offset = size * seed_offset.second;
+      int64_t gen_offset = size * seed_offset.second;
       thrust::transform(
           index_sequence_begin, index_sequence_begin + size,
           thrust::device_ptr<T>(data),