From c84dc0bc80a27371b7f0b16e4503e2876223be5f Mon Sep 17 00:00:00 2001
From: David Seiler <davseile@amazon.com>
Date: Mon, 1 Apr 2019 10:49:29 +0200
Subject: [PATCH] PDF operators for each distribution for which we have a
 random sampler (plus also the PDF of the Dirichlet).  Supports probabilities
 and log-probabilities, as well as gradients.

---
 ci/windows/test_py2_cpu.ps1              |   2 +-
 ci/windows/test_py2_gpu.ps1              |   2 +-
 ci/windows/test_py3_cpu.ps1              |   2 +-
 ci/windows/test_py3_gpu.ps1              |   6 +-
 python/mxnet/contrib/amp/lists/symbol.py |  17 +
 src/operator/random/pdf_op.cc            | 319 ++++++++++++
 src/operator/random/pdf_op.cu            |  48 ++
 src/operator/random/pdf_op.h             | 596 +++++++++++++++++++++++
 tests/python/unittest/test_random.py     | 111 ++++-
 9 files changed, 1090 insertions(+), 13 deletions(-)
 create mode 100644 src/operator/random/pdf_op.cc
 create mode 100644 src/operator/random/pdf_op.cu
 create mode 100644 src/operator/random/pdf_op.h

diff --git a/ci/windows/test_py2_cpu.ps1 b/ci/windows/test_py2_cpu.ps1
index 26b02d890fe3..6694794d8393 100644
--- a/ci/windows/test_py2_cpu.ps1
+++ b/ci/windows/test_py2_cpu.ps1
@@ -24,7 +24,7 @@ $env:MXNET_HOME=[io.path]::combine($PSScriptRoot, 'mxnet_home')
 
 C:\Python27\Scripts\pip install -r tests\requirements.txt
 C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest
-if (! $?) { Throw ("Error running unittest") }
+if (! $?) { Throw ("Error running unittest $LastExitCode $error") }
 C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_train.xml tests\python\train
 if (! $?) { Throw ("Error running train tests") }
 # Adding this extra test since it's not possible to set env var on the fly in Windows.
diff --git a/ci/windows/test_py2_gpu.ps1 b/ci/windows/test_py2_gpu.ps1
index 8418bd0b490d..f1988e34d8b9 100644
--- a/ci/windows/test_py2_gpu.ps1
+++ b/ci/windows/test_py2_gpu.ps1
@@ -24,7 +24,7 @@ $env:MXNET_HOME=[io.path]::combine($PSScriptRoot, 'mxnet_home')
 
 C:\Python27\Scripts\pip install -r tests\requirements.txt
 C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest
-if (! $?) { Throw ("Error running unittest") }
+if (! $?) { Throw ("Error running unittest $LastExitCode $error") }
 C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_operator.xml tests\python\gpu\test_operator_gpu.py
 if (! $?) { Throw ("Error running tests") }
 C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_forward.xml tests\python\gpu\test_forward.py
diff --git a/ci/windows/test_py3_cpu.ps1 b/ci/windows/test_py3_cpu.ps1
index 56bf5c6d4595..ebe4eafa5f28 100644
--- a/ci/windows/test_py3_cpu.ps1
+++ b/ci/windows/test_py3_cpu.ps1
@@ -24,7 +24,7 @@ $env:MXNET_HOME=[io.path]::combine($PSScriptRoot, 'mxnet_home')
 
 C:\Python37\Scripts\pip install -r tests\requirements.txt
 C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest
-if (! $?) { Throw ("Error running unittest") }
+if (! $?) { Throw ("Error running unittest $LastExitCode $error) }
 C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_train.xml tests\python\train
 if (! $?) { Throw ("Error running train tests") }
 # Adding this extra test since it's not possible to set env var on the fly in Windows.
diff --git a/ci/windows/test_py3_gpu.ps1 b/ci/windows/test_py3_gpu.ps1
index ef07b428fca2..6d55484e6977 100644
--- a/ci/windows/test_py3_gpu.ps1
+++ b/ci/windows/test_py3_gpu.ps1
@@ -24,11 +24,11 @@ $env:MXNET_HOME=[io.path]::combine($PSScriptRoot, 'mxnet_home')
 
 C:\Python37\Scripts\pip install -r tests\requirements.txt
 C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest
-if (! $?) { Throw ("Error running unittest") }
+if (! $?) { Throw ("Error running unittest $LastExitCode $error") }
 C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_operator.xml tests\python\gpu\test_operator_gpu.py
-if (! $?) { Throw ("Error running tests") }
+if (! $?) { Throw ("Error running tests $LastExitCode $error") }
 C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_forward.xml tests\python\gpu\test_forward.py
-if (! $?) { Throw ("Error running tests") }
+if (! $?) { Throw ("Error running tests $LastExitCode $error") }
 C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_train.xml tests\python\train
 if (! $?) { Throw ("Error running tests") }
 # Adding this extra test since it's not possible to set env var on the fly in Windows.
diff --git a/python/mxnet/contrib/amp/lists/symbol.py b/python/mxnet/contrib/amp/lists/symbol.py
index 9c99340ab75d..3a03c48710dc 100644
--- a/python/mxnet/contrib/amp/lists/symbol.py
+++ b/python/mxnet/contrib/amp/lists/symbol.py
@@ -600,6 +600,23 @@
     '_sparse_elemwise_mul',
     '_sparse_elemwise_sub',
     '_sparse_sum',
+
+    'random_pdf_gamma',
+    'random_pdf_exponential',
+    'random_pdf_uniform',
+    'random_pdf_negative_binomial',
+    'random_pdf_generalized_negative_binomial',
+    'random_pdf_dirichlet',
+    'random_pdf_normal',
+    'random_pdf_poisson',
+    '_random_pdf_gamma',
+    '_random_pdf_exponential',
+    '_random_pdf_uniform',
+    '_random_pdf_negative_binomial',
+    '_random_pdf_generalized_negative_binomial',
+    '_random_pdf_dirichlet',
+    '_random_pdf_normal',
+    '_random_pdf_poisson',
     ]
 
 LOSS_OUTPUT_FUNCTIONS = [
diff --git a/src/operator/random/pdf_op.cc b/src/operator/random/pdf_op.cc
new file mode 100644
index 000000000000..070ca81d61ff
--- /dev/null
+++ b/src/operator/random/pdf_op.cc
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file pdf_op.cc
+ * \brief CPU-operators for computing the pdf of random distributions. 
+ */
+
+#include "./pdf_op.h"
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(PdfParam);
+
+#define MXNET_OPERATOR_REGISTER_PDF(distr, pdffunc, num_parms, \
+                                    parm_name_1, parm_name_2, \
+                                    parm_desc_1, parm_desc_2, \
+                                    description, vectorparms) \
+  NNVM_REGISTER_OP(_random_pdf_##distr) \
+  .add_alias("random_pdf_" #distr) \
+  .describe(description()+std::string(ADD_FILELINE)) \
+  .set_num_inputs(num_parms+1) \
+  .set_num_outputs(1) \
+  .set_attr_parser(ParamParser<PdfParam>) \
+  .set_attr<nnvm::FListInputNames>("FListInputNames", \
+    [](const NodeAttrs& attrs) { \
+      std::vector<std::string> v = {"sample", parm_name_1, parm_name_2}; \
+      v.resize(num_parms+1); \
+      return v; \
+    }) \
+  .set_attr<mxnet::FInferShape>("FInferShape", PdfOpShape<vectorparms>) \
+  .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<num_parms+1, 1>) \
+  .set_attr<FCompute>("FCompute<cpu>", PdfOpForward<cpu, pdffunc, num_parms, vectorparms>) \
+  .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseInOut{"_backward_pdf_" #distr}) \
+  .add_argument("sample", "NDArray-or-Symbol", "Samples from the distributions.") \
+  .add_argument(parm_name_1, "NDArray-or-Symbol", parm_desc_1) \
+  .add_arguments(PdfParam::__FIELDS__())
+
+#define MXNET_OPERATOR_REGISTER_PDF_GRAD(distr, pdffunc, num_parms, vectorparms) \
+  NNVM_REGISTER_OP(_backward_pdf_##distr) \
+  .set_num_inputs(num_parms+3) \
+  .set_num_outputs(num_parms+1) \
+  .set_attr_parser(ParamParser<PdfParam>) \
+  .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs) \
+    { std::vector<std::pair<int, int> > v = {{1, 0}, {2, 1}, {3, 2}}; \
+    v.resize(num_parms+1); \
+    return v; }) \
+  .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) \
+    { return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; }) \
+  .set_attr<nnvm::TIsBackward>("TIsBackward", true) \
+  .set_attr<FCompute>("FCompute<cpu>", PdfOpBackward<cpu, pdffunc##_Grad, num_parms, vectorparms>);
+
+
+#define MXNET_OPERATOR_REGISTER_PDF1(distr, pdffunc, parm_name, parm_desc, \
+                                     description, vectorparms) \
+    MXNET_OPERATOR_REGISTER_PDF(distr, pdffunc, 1, parm_name, parm_name, \
+                                parm_desc, parm_desc, description, vectorparms); \
+    MXNET_OPERATOR_REGISTER_PDF_GRAD(distr, pdffunc, 1, vectorparms)
+
+#define MXNET_OPERATOR_REGISTER_PDF2(distr, pdffunc, parm_name_1, parm_name_2, \
+                                     parm_desc_1, parm_desc_2, description) \
+  MXNET_OPERATOR_REGISTER_PDF(distr, pdffunc, 2, parm_name_1, parm_name_2, \
+                                   parm_desc_1, parm_desc_2, description, false) \
+  .add_argument(parm_name_2, "NDArray-or-Symbol", parm_desc_2); \
+  MXNET_OPERATOR_REGISTER_PDF_GRAD(distr, pdffunc, 2, false)
+
+inline std::string uniform_desc() {
+  return std::string(R"code(Computes the value of the PDF of *sample* of
+uniform distributions on the intervals given by *[low,high)*.
+
+*low* and *high* must have the same shape, which must match the leftmost subshape
+of *sample*.  That is, *sample* can have the same shape as *low* and *high*, in which
+case the output contains one density per distribution, or *sample* can be a tensor
+of tensors with that shape, in which case the output is a tensor of densities such that
+the densities at index *i* in the output are given by the samples at index *i* in *sample*
+parameterized by the values of *low* and *high* at index *i*.
+
+Examples::
+
+    random_pdf_uniform(sample=[[1,2,3,4]], low=[0], high=[10]) = [0.1, 0.1, 0.1, 0.1]
+
+    sample = [[[1, 2, 3],
+               [1, 2, 3]],
+              [[1, 2, 3],
+               [1, 2, 3]]]
+    low  = [[0, 0],
+            [0, 0]]
+    high = [[ 5, 10],
+            [15, 20]]
+    random_pdf_uniform(sample=sample, low=low, high=high) =
+        [[[0.2,        0.2,        0.2    ],
+          [0.1,        0.1,        0.1    ]],
+         [[0.06667,    0.06667,    0.06667],
+          [0.05,       0.05,       0.05   ]]]
+
+)code");
+}
+
+inline std::string normal_desc() {
+  return std::string(R"code(Computes the value of the PDF of *sample* of
+normal distributions with parameters *mu* (mean) and *sigma* (standard deviation).
+
+*mu* and *sigma* must have the same shape, which must match the leftmost subshape
+of *sample*.  That is, *sample* can have the same shape as *mu* and *sigma*, in which
+case the output contains one density per distribution, or *sample* can be a tensor
+of tensors with that shape, in which case the output is a tensor of densities such that
+the densities at index *i* in the output are given by the samples at index *i* in *sample*
+parameterized by the values of *mu* and *sigma* at index *i*.
+
+Examples::
+
+    sample = [[-2, -1, 0, 1, 2]]
+    random_pdf_normal(sample=sample, mu=[0], sigma=[1]) =
+        [[0.05399097, 0.24197073, 0.3989423, 0.24197073, 0.05399097]]
+
+    random_pdf_normal(sample=sample*2, mu=[0,0], sigma=[1,2]) =
+        [[0.05399097, 0.24197073, 0.3989423,  0.24197073, 0.05399097],
+         [0.12098537, 0.17603266, 0.19947115, 0.17603266, 0.12098537]]
+)code");
+}
+
+inline std::string gamma_desc() {
+  return std::string(R"code(Computes the value of the PDF of *sample* of
+gamma distributions with parameters *alpha* (shape) and *beta* (rate).
+
+*alpha* and *beta* must have the same shape, which must match the leftmost subshape
+of *sample*.  That is, *sample* can have the same shape as *alpha* and *beta*, in which
+case the output contains one density per distribution, or *sample* can be a tensor
+of tensors with that shape, in which case the output is a tensor of densities such that
+the densities at index *i* in the output are given by the samples at index *i* in *sample*
+parameterized by the values of *alpha* and *beta* at index *i*.
+
+Examples::
+
+  random_pdf_gamma(sample=[[1,2,3,4,5]], alpha=[5], beta=[1]) =
+      [[0.01532831, 0.09022352, 0.16803136, 0.19536681, 0.17546739]]
+
+  sample = [[1, 2, 3, 4, 5],
+            [2, 3, 4, 5, 6],
+            [3, 4, 5, 6, 7]]
+
+  random_pdf_gamma(sample=sample, alpha=[5,6,7], beta=[1,1,1]) =
+      [[0.01532831, 0.09022352, 0.16803136, 0.19536681, 0.17546739],
+       [0.03608941, 0.10081882, 0.15629345, 0.17546739, 0.16062315],
+       [0.05040941, 0.10419563, 0.14622283, 0.16062315, 0.14900276]]
+)code");
+}
+
+inline std::string exponential_desc() {
+  return std::string(R"code(Computes the value of the PDF of *sample* of
+exponential distributions with parameters *lam* (rate).
+
+The shape of *lam* must match the leftmost subshape of *sample*.  That is, *sample*
+can have the same shape as *lam*, in which case the output contains one density per
+distribution, or *sample* can be a tensor of tensors with that shape, in which case
+the output is a tensor of densities such that the densities at index *i* in the output
+are given by the samples at index *i* in *sample* parameterized by the value of *lam*
+at index *i*.
+
+Examples::
+
+  random_pdf_exponential(sample=[[1, 2, 3]], lam=[1]) =
+      [[0.36787945, 0.13533528, 0.04978707]]
+
+  sample = [[1,2,3],
+            [1,2,3],
+            [1,2,3]]
+
+  random_pdf_exponential(sample=sample, lam=[1,0.5,0.25]) =
+      [[0.36787945, 0.13533528, 0.04978707],
+       [0.30326533, 0.18393973, 0.11156508],
+       [0.1947002,  0.15163267, 0.11809164]]
+)code");
+}
+
+inline std::string poisson_desc() {
+  return std::string(R"code(Computes the value of the PDF of *sample* of
+Poisson distributions with parameters *lam* (rate).
+
+The shape of *lam* must match the leftmost subshape of *sample*.  That is, *sample*
+can have the same shape as *lam*, in which case the output contains one density per
+distribution, or *sample* can be a tensor of tensors with that shape, in which case
+the output is a tensor of densities such that the densities at index *i* in the output
+are given by the samples at index *i* in *sample* parameterized by the value of *lam*
+at index *i*.
+
+Examples::
+
+    random_pdf_poisson(sample=[[0,1,2,3]], lam=[1]) =
+        [[0.36787945, 0.36787945, 0.18393973, 0.06131324]]
+
+    sample = [[0,1,2,3],
+              [0,1,2,3],
+              [0,1,2,3]]
+
+    random_pdf_poisson(sample=sample, lam=[1,2,3]) =
+        [[0.36787945, 0.36787945, 0.18393973, 0.06131324],
+         [0.13533528, 0.27067056, 0.27067056, 0.18044704],
+         [0.04978707, 0.14936121, 0.22404182, 0.22404182]]
+)code");
+}
+
+inline std::string negative_binomial_desc() {
+  return std::string(R"code(Computes the value of the PDF of samples of
+negative binomial distributions with parameters *k* (failure limit) and *p* (failure probability).
+
+*k* and *p* must have the same shape, which must match the leftmost subshape
+of *sample*.  That is, *sample* can have the same shape as *k* and *p*, in which
+case the output contains one density per distribution, or *sample* can be a tensor
+of tensors with that shape, in which case the output is a tensor of densities such that
+the densities at index *i* in the output are given by the samples at index *i* in *sample*
+parameterized by the values of *k* and *p* at index *i*.
+
+Examples::
+
+    random_pdf_negative_binomial(sample=[[1,2,3,4]], k=[1], p=a[0.5]) =
+        [[0.25, 0.125, 0.0625, 0.03125]]
+
+    # Note that k may be real-valued
+    sample = [[1,2,3,4],
+              [1,2,3,4]]
+    random_pdf_negative_binomial(sample=sample, k=[1, 1.5], p=[0.5, 0.5]) =
+        [[0.25,       0.125,      0.0625,     0.03125   ],
+         [0.26516506, 0.16572815, 0.09667476, 0.05437956]]
+)code");
+}
+
+inline std::string generalized_negative_binomial_desc() {
+  return std::string(R"code(Computes the value of the PDF of *sample* of
+generalized negative binomial distributions with parameters *mu* (mean)
+and *alpha* (dispersion).  This can be understood as a reparameterization of
+the negative binomial, where *k* = *1 / alpha* and *p* = *1 / (mu \* alpha + 1)*.
+
+*mu* and *alpha* must have the same shape, which must match the leftmost subshape
+of *sample*.  That is, *sample* can have the same shape as *mu* and *alpha*, in which
+case the output contains one density per distribution, or *sample* can be a tensor
+of tensors with that shape, in which case the output is a tensor of densities such that
+the densities at index *i* in the output are given by the samples at index *i* in *sample*
+parameterized by the values of *mu* and *alpha* at index *i*.
+
+Examples::
+
+    random_pdf_generalized_negative_binomial(sample=[[1, 2, 3, 4]], alpha=[1], mu=[1]) =
+        [[0.25, 0.125, 0.0625, 0.03125]]
+
+    sample = [[1,2,3,4],
+              [1,2,3,4]]
+    random_pdf_generalized_negative_binomial(sample=sample, alpha=[1, 0.6666], mu=[1, 1.5]) =
+        [[0.25,       0.125,      0.0625,     0.03125   ],
+         [0.26517063, 0.16573331, 0.09667706, 0.05437994]]
+)code");
+}
+
+inline std::string dirichlet_desc() {
+  return std::string(R"code(Computes the value of the PDF of *sample* of
+Dirichlet distributions with parameter *alpha*.
+
+The shape of *alpha* must match the leftmost subshape of *sample*.  That is, *sample*
+can have the same shape as *alpha*, in which case the output contains one density per
+distribution, or *sample* can be a tensor of tensors with that shape, in which case
+the output is a tensor of densities such that the densities at index *i* in the output
+are given by the samples at index *i* in *sample* parameterized by the value of *alpha*
+at index *i*.
+
+Examples::
+
+    random_pdf_dirichlet(sample=[[1,2],[2,3],[3,4]], alpha=[2.5, 2.5]) =
+        [38.413498, 199.60245, 564.56085]
+
+    sample = [[[1, 2, 3], [10, 20, 30], [100, 200, 300]],
+              [[0.1, 0.2, 0.3], [0.01, 0.02, 0.03], [0.001, 0.002, 0.003]]]
+
+    random_pdf_dirichlet(sample=sample, alpha=[0.1, 0.4, 0.9]) =
+        [[2.3257459e-02, 5.8420084e-04, 1.4674458e-05],
+         [9.2589635e-01, 3.6860607e+01, 1.4674468e+03]]
+)code");
+}
+
+MXNET_OPERATOR_REGISTER_PDF2(uniform, PDF_Uniform, "low", "high",
+  "Lower bounds of the distributions.", "Upper bounds of the distributions.", uniform_desc)
+MXNET_OPERATOR_REGISTER_PDF2(normal, PDF_Normal, "mu", "sigma",
+  "Means of the distributions.", "Standard deviations of the distributions.", normal_desc)
+MXNET_OPERATOR_REGISTER_PDF2(gamma, PDF_Gamma, "alpha", "beta",
+  "Alpha (shape) parameters of the distributions.", "Beta (scale) parameters of the distributions.",
+  gamma_desc)
+MXNET_OPERATOR_REGISTER_PDF1(exponential, PDF_Exponential, "lam",
+  "Lambda (rate) parameters of the distributions.", exponential_desc, false)
+MXNET_OPERATOR_REGISTER_PDF1(poisson, PDF_Poisson, "lam",
+  "Lambda (rate) parameters of the distributions.", poisson_desc, false)
+MXNET_OPERATOR_REGISTER_PDF2(negative_binomial, PDF_NegativeBinomial, "k", "p",
+  "Limits of unsuccessful experiments.", "Failure probabilities in each experiment.",
+  negative_binomial_desc)
+MXNET_OPERATOR_REGISTER_PDF2(generalized_negative_binomial,
+  PDF_GeneralizedNegativeBinomial, "mu", "alpha",
+  "Means of the distributions.", "Alpha (dispersion) parameters of the distributions.",
+  generalized_negative_binomial_desc)
+MXNET_OPERATOR_REGISTER_PDF1(dirichlet, PDF_Dirichlet, "alpha",
+  "Concentration parameters of the distributions.", dirichlet_desc, true)
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/random/pdf_op.cu b/src/operator/random/pdf_op.cu
new file mode 100644
index 000000000000..e77720b2c329
--- /dev/null
+++ b/src/operator/random/pdf_op.cu
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file pdf_op.cu
+ * \brief GPU-operators for computing the pdf of random distributions. 
+ */
+
+#include "./pdf_op.h"
+
+namespace mxnet {
+namespace op {
+
+#define MXNET_OPERATOR_REGISTER_PDF(distr, pdffunc, num_parms, vector_parms) \
+  NNVM_REGISTER_OP(_random_pdf_##distr) \
+  .set_attr<FCompute>("FCompute<gpu>", PdfOpForward<gpu, pdffunc, num_parms, vector_parms>); \
+  NNVM_REGISTER_OP(_backward_pdf_##distr) \
+  .set_attr<FCompute>("FCompute<gpu>", PdfOpBackward<gpu, pdffunc##_Grad, num_parms, vector_parms>);
+
+MXNET_OPERATOR_REGISTER_PDF(uniform, PDF_Uniform, 2, false)
+MXNET_OPERATOR_REGISTER_PDF(normal, PDF_Normal, 2, false)
+MXNET_OPERATOR_REGISTER_PDF(gamma, PDF_Gamma, 2, false)
+MXNET_OPERATOR_REGISTER_PDF(exponential, PDF_Exponential, 1, false)
+MXNET_OPERATOR_REGISTER_PDF(poisson, PDF_Poisson, 1, false)
+MXNET_OPERATOR_REGISTER_PDF(negative_binomial, PDF_NegativeBinomial, 2, false)
+MXNET_OPERATOR_REGISTER_PDF(generalized_negative_binomial,
+                            PDF_GeneralizedNegativeBinomial, 2, false)
+MXNET_OPERATOR_REGISTER_PDF(dirichlet, PDF_Dirichlet, 1, true)
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/random/pdf_op.h b/src/operator/random/pdf_op.h
new file mode 100644
index 000000000000..14fdb53f705d
--- /dev/null
+++ b/src/operator/random/pdf_op.h
@@ -0,0 +1,596 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file pdf_op.h
+ * \brief Operators for computing the pdf of random distributions.
+ */
+#ifndef MXNET_OPERATOR_RANDOM_PDF_OP_H_
+#define MXNET_OPERATOR_RANDOM_PDF_OP_H_
+
+#include <mxnet/operator_util.h>
+#include <vector>
+#include <algorithm>
+#include "../mshadow_op.h"
+#include "../mxnet_op.h"
+#include "../operator_common.h"
+#include "../elemwise_op_common.h"
+#include "../special_functions-inl.h"
+#include "../tensor/broadcast_reduce_op.h"
+
+namespace mxnet {
+namespace op {
+
+template<typename DType>
+MSHADOW_XINLINE static DType ceph_psi(DType val) { return special_functions::cephes::psi(val); }
+template<>
+MSHADOW_XINLINE mshadow::half::half_t ceph_psi(mshadow::half::half_t val) {
+    return special_functions::cephes::psi<float>(val);
+}
+
+template<bool logpdf>
+struct PDF_Uniform {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size,
+                                  DType *out, IType1 *sample, IType2 *lower, IType2 *upper) {
+    const int index(start / sample_size);
+    const DType l(lower[index]), h(upper[index]);
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        // No check whether sample is in the support.
+        out[i] = logpdf ? -DType(log(h - l)) : DType(1.0) / (h - l);
+    }
+  }
+};
+
+template<bool logpdf>
+struct PDF_Uniform_Grad {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req,
+                  DType *out, IType1 *sample, IType2 *lower, IType2 *upper,
+                  DType *grad_out, IType1 *grad_sample, IType2 *grad_lower, IType2 *grad_upper) {
+    const int index(start / sample_size);
+    const DType l(lower[index]), h(upper[index]);
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const DType scaling(grad_out[i]*(logpdf ? DType(1) : out[i]));
+        grad_lower[i]  = scaling / (h - l);
+        grad_upper[i]  = scaling / (l - h);
+        KERNEL_ASSIGN(grad_sample[i], req, 0);
+    }
+  }
+};
+
+template<bool logpdf>
+struct PDF_Normal {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size,
+                                  DType *out, IType1 *sample, IType2 *loc, IType2 *scale) {
+    const int index(start / sample_size);
+    const DType u(loc[index]), s(scale[index]), sq(s * s);
+    const DType normalizer(sqrt(2.0 * mxnet_op::PI) * s);
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const DType x(sample[i]);
+        const DType exponent((DType(-0.5) * (x - u) * (x - u)) / (sq));
+        out[i] = logpdf ? exponent - log(normalizer) : exp(exponent) / normalizer;
+    }
+  }
+};
+
+template<bool logpdf>
+struct PDF_Normal_Grad {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req,
+                  DType *out, IType1 *sample, IType2 *loc, IType2 *scale,
+                  DType *grad_out, IType1 *grad_sample, IType2 *grad_loc, IType2 *grad_scale) {
+    const int index(start / sample_size);
+    const DType u(loc[index]), s(scale[index]), s_squared(s * s), s_cubed(s_squared * s);
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const DType x(sample[i]);
+        const DType scaling(grad_out[i]*(logpdf ? DType(1) : out[i]));
+        grad_loc[i]    = scaling * (x - u) / s_squared;
+        grad_scale[i]  = scaling * ((x - u) * (x - u) - s_squared) / s_cubed;
+        KERNEL_ASSIGN(grad_sample[i], req, scaling * (u - x) / s_squared);
+    }
+  }
+};
+
+template<bool logpdf>
+struct PDF_Gamma {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size,
+                                  DType *out, IType1 *sample, IType2 *alpha, IType2 *beta) {
+    const int index(start / sample_size);
+    const DType a(alpha[index]), b(beta[index]), lgamma_a(lgamma(a)), a_log_b(a * log(b));
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const DType x(sample[i]);
+        const DType lpdf(a_log_b + (a - 1) * log(x) - b * x - lgamma_a);
+        out[i] = logpdf ? lpdf : DType(exp(lpdf));
+    }
+  }
+};
+
+template<bool logpdf>
+struct PDF_Gamma_Grad {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req,
+                  DType *out, IType1 *sample, IType2 *alpha, IType2 *beta,
+                  DType *grad_out, IType1 *grad_sample, IType2 *grad_alpha, IType2 *grad_beta) {
+    const int index(start / sample_size);
+    const DType a(alpha[index]), b(beta[index]), log_b(log(b)), ceph_psi_a(ceph_psi(a));
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const DType x(sample[i]);
+        const DType scaling(grad_out[i]*(logpdf ? DType(1) : out[i]));
+        grad_alpha[i]  = scaling * (log_b + log(x) - ceph_psi_a);
+        grad_beta[i]   = scaling * (a / b - x);
+        KERNEL_ASSIGN(grad_sample[i], req, scaling * ((a - 1) / x - b));
+    }
+  }
+};
+
+template<bool logpdf>
+struct PDF_Exponential {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size,
+                                  DType *out, IType1 *sample, IType2 *lambda) {
+    const int index(start / sample_size);
+    const DType l(lambda[index]), log_l(log(l));
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const DType x(sample[i]);
+        out[i] = logpdf ? log_l - l * x : l * exp(-l * x);
+    }
+  }
+};
+
+template<bool logpdf>
+struct PDF_Exponential_Grad {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req,
+                  DType *out, IType1 *sample, IType2 *lambda,
+                  DType *grad_out, IType1 *grad_sample, IType2 *grad_lambda) {
+    const int index(start / sample_size);
+    const DType l(lambda[index]);
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const DType x(sample[i]);
+        const DType scaling(grad_out[i]*(logpdf ? DType(1) : out[i]));
+        grad_lambda[i] = scaling * (DType(1) / l - x);
+        KERNEL_ASSIGN(grad_sample[i], req, -scaling * l);
+    }
+  }
+};
+
+template<bool logpdf>
+struct PDF_Poisson {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size,
+                                  DType *out, IType1 *sample, IType2 *lambda) {
+    const int index(start / sample_size);
+    const DType l(lambda[index]), log_l(log(l));
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const DType x(sample[i]);
+        const DType lpdf((x * log_l - lgamma(x + 1)) - l);
+        out[i] = logpdf ? lpdf  : DType(exp(lpdf));
+    }
+  }
+};
+
+template<bool logpdf>
+struct PDF_Poisson_Grad {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req,
+                  DType *out, IType1 *sample, IType2 *lambda,
+                  DType *grad_out, IType1 *grad_sample, IType2 *grad_lambda) {
+    const int index(start / sample_size);
+    const DType l(lambda[index]);
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const DType x(sample[i]);
+        const DType scaling(grad_out[i]*(logpdf ? DType(1) : out[i]));
+        grad_lambda[i] = scaling * (x / l - DType(1));
+        KERNEL_ASSIGN(grad_sample[i], req, 0);
+    }
+  }
+};
+
+
+template<bool logpdf>
+struct PDF_NegativeBinomial {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size,
+                                  DType *out, IType1 *sample, IType2 *limit, IType2 *prob) {
+    const int index(start / sample_size);
+    const DType l(limit[index]), p(prob[index]), lgamma_l(lgamma(l));
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const DType x(sample[i]);
+        const DType lpdf((lgamma(x + l) - lgamma(x + 1) - lgamma_l) + l * log(p) + x * log(1 - p));
+        out[i] = logpdf ? lpdf : DType(exp(lpdf));
+    }
+  }
+
+  template<typename DType>
+  MSHADOW_XINLINE static DType LPDF(DType l, DType p, DType x) {
+    // Note that "p" is the failure and not the success probability.
+    return (lgamma(x + l) - lgamma(x + 1) - lgamma(l)) + l * log(p) + x * log(1 - p);
+  }
+};
+
+template<bool logpdf>
+struct PDF_NegativeBinomial_Grad {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req,
+                  DType *out, IType1 *sample, IType2 *limit, IType2 *prob,
+                  DType *grad_out, IType1 *grad_sample, IType2 *grad_limit, IType2 *grad_prob) {
+    const int index(start / sample_size);
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        DType grad_l(0), grad_p(0);
+        LPDF_GRAD(DType(limit[index]), DType(prob[index]),
+                  DType(sample[i]), out[i],
+                  grad_out[i], &grad_l, &grad_p);
+        grad_limit[i]  = grad_l;
+        grad_prob[i]   = grad_p;
+        KERNEL_ASSIGN(grad_sample[i], req, 0);
+    }
+  }
+
+  template<typename DType>
+  MSHADOW_XINLINE static void LPDF_GRAD(DType l, DType p, DType x, DType o, DType grad_o,
+                                        DType* grad_l, DType* grad_p) {
+    const DType scaling(grad_o * (logpdf ? DType(1) : o));
+    *grad_l = scaling * ((ceph_psi(x + l) - ceph_psi(l)) + log(p));
+    *grad_p = scaling * (l / p - x / (1 - p));
+  }
+};
+
+template<bool logpdf>
+struct PDF_GeneralizedNegativeBinomial {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size,
+                                  DType *out, IType1 *sample, IType2 *mu, IType2 *alpha) {
+    const int index(start / sample_size);
+
+    // Reparameterize with limit = 1 / alpha, prob = 1 / (mu * alpha + 1)
+    const DType limit(1.0 / alpha[index]), prob(1.0 / (mu[index]*alpha[index]+1.0));
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const DType lpdf(PDF_NegativeBinomial<logpdf>::LPDF(limit, prob, DType(sample[i])));
+        out[i] = logpdf ? lpdf : DType(exp(lpdf));
+    }
+  }
+};
+
+template<bool logpdf>
+struct PDF_GeneralizedNegativeBinomial_Grad {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req,
+                  DType *out, IType1 *sample, IType2 *mu, IType2 *alpha,
+                  DType *grad_out, IType1 *grad_sample, IType2 *grad_mu, IType2 *grad_alpha) {
+    const int index(start / sample_size);
+    const DType fmu(mu[index]), falpha(alpha[index]), den(fmu * falpha + 1.0);
+
+    // Reparameterize with limit = 1 / alpha, prob = 1 / (mu * alpha + 1)
+    const DType limit(1.0 / falpha), prob(1.0 / (fmu * falpha + 1.0));
+
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        // Grad returned as d_limit, d_prob
+        DType grad_l(0), grad_p(0);
+        PDF_NegativeBinomial_Grad<logpdf>::LPDF_GRAD(limit, prob,
+            DType(sample[i]), out[i],
+            grad_out[i], &grad_l, &grad_p);
+        grad_mu[i]     = -grad_p * falpha / (den * den);
+        grad_alpha[i]  = -grad_l / (falpha * falpha) - grad_p * fmu / (den * den);
+        KERNEL_ASSIGN(grad_sample[i], req, 0);
+    }
+  }
+};
+
+template<bool logpdf>
+struct PDF_Dirichlet {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size, int k,
+                                  DType *out, IType1 *sample, IType2 *alpha) {
+    const int index(start / sample_size);
+    const int end = start + length;
+    for (int i = start; i < end; ++i) {
+        const IType1 *cur_sample = sample + i * k;
+        const IType2 *cur_alpha  = alpha + index * k;
+        DType sum_alpha(0), sum_lgamma(0), sum_sample(0);
+        for (int j = 0; j < k; ++j) {
+          sum_alpha  += cur_alpha[j];
+          sum_lgamma += lgamma(cur_alpha[j]);
+          sum_sample += (cur_alpha[j]-1) * log(cur_sample[j]);
+        }
+        DType lpdf(sum_sample + (lgamma(sum_alpha) - sum_lgamma));
+        out[i] = logpdf ? lpdf : DType(exp(lpdf));
+    }
+  }
+};
+
+
+template<bool logpdf>
+struct PDF_Dirichlet_Grad {
+  template<typename DType, typename IType1, typename IType2>
+  MSHADOW_XINLINE static void Map(int start, int length, int sample_size,
+                  OpReqType req, int k,
+                  DType *out, IType1 *sample, IType2 *alpha,
+                  DType *grad_out, IType1 *grad_sample, IType2 *grad_alpha
+                  ) {
+
+  }
+};
+
+struct PdfParam : public dmlc::Parameter<PdfParam> {
+  bool is_log;
+  DMLC_DECLARE_PARAMETER(PdfParam) {
+    DMLC_DECLARE_FIELD(is_log).set_default(false)
+    .describe("If set, compute the density of the log-probability instead of the probability.");
+  }
+};
+
+template<bool vparm = false>
+inline bool PdfOpShape(const nnvm::NodeAttrs& attrs,
+                       std::vector<TShape>* in_attrs,
+                       std::vector<TShape>* out_attrs) {
+  CHECK_GT(in_attrs->size(), 1)
+    << "pdf operator takes at least 2 arguments (" << in_attrs->size() << " given)";
+  CHECK_EQ(out_attrs->size(), 1);
+  // All inputs must be defined in order to infer output shape.
+  if ( std::all_of((*in_attrs).begin(),
+                   (*in_attrs).end(),
+                   [](const TShape& s){ return s.ndim() > 0; }) ) {
+    // Tensors of distribution parameters must have same shape.
+    for (size_t i = 2; i < in_attrs->size(); ++i) {
+      SHAPE_ASSIGN_CHECK(*in_attrs, i, (*in_attrs)[i - 1]);
+    }
+    // Tensors of distribution parameters must match leftmost subshape of samples.
+    CHECK_LE((*in_attrs)[1].ndim(), (*in_attrs)[0].ndim())
+      << "dimension of input samples (" << (*in_attrs)[0].ndim()
+      << ") must be at least dimension of distribution parameters ("<< (*in_attrs)[1].ndim() << ")";
+    TShape tshape((*in_attrs)[0].begin(), (*in_attrs)[0].begin() + (*in_attrs)[1].ndim());
+    if (vparm) {
+      *(tshape.end() - 1) = *((*in_attrs)[0].end() - 1);
+    }
+    for (size_t i = 1; i < in_attrs->size(); ++i) {
+      SHAPE_ASSIGN_CHECK(*in_attrs, i, tshape);
+    }
+    // Output shape must equal input tensor of samples except for last dimension if we are
+    // dealing with samples that are itself vectors. Be aware of the special case where we
+    // are dealing with a single vector sample.
+    if (vparm && ((*in_attrs)[0].ndim() == 1)) {
+      // Special case where we are dealing with a single vector sample.
+      SHAPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::Shape1(1));
+    } else {
+      TShape oshape((*in_attrs)[0].begin(), (*in_attrs)[0].end() - (vparm ? 1 : 0));
+      SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
+    }
+    return true;
+  }
+  return false;
+}
+
+template<typename OP>
+struct LaunchExWrapper {
+  template<typename ...Args>
+  MSHADOW_XINLINE static void Map(const int start, const int length, const int sample_size,
+        Args... args) {
+    // Apply the operator to the sample in strides of sample_size, so that
+    // the operators can assume that their distribution parameters are constant.
+    int i = start;
+
+    // Get aligned
+    const int align_step = sample_size - (i % sample_size);
+    const int first_stride = length > align_step ? align_step : length;
+    OP::Map(i, first_stride, sample_size, args...);
+    i += first_stride;
+
+    const int end = start + length - sample_size;
+    for (; i < end; i += sample_size) {
+      OP::Map(i, sample_size, sample_size, args...);
+    }
+
+    // Last stride might not be aligned either
+    const int last_stride = start + length - i;
+    if (last_stride > 0) {  // Don't overstep even if length <= sample_size
+      OP::Map(i, last_stride, sample_size, args...);
+    }
+  }
+};
+
+template<typename xpu, typename DType, typename pdf, int pnum, bool vparm = false>
+struct PdfCaller;
+
+template<typename xpu, typename DType, typename pdf>
+struct PdfCaller<xpu, DType, pdf, 1, false> {
+  static void op(const std::vector<TBlob>& inputs,
+                 const std::vector<TBlob>& outputs,
+                 mshadow::Stream<xpu> *s) {
+    CHECK_EQ(inputs[0].Size()%inputs[1].Size(), 0);
+    CHECK_EQ(inputs[0].Size()%outputs[0].Size(), 0);
+    index_t num_samples(inputs[0].Size() / inputs[1].Size());
+    mxnet_op::Kernel<LaunchExWrapper<pdf>, xpu>::LaunchEx(s, outputs[0].Size(), num_samples,
+                outputs[0].dptr<DType>(), inputs[0].dptr<DType>(), inputs[1].dptr<DType>());
+  }
+};
+
+template<typename xpu, typename DType, typename pdf>
+struct PdfCaller<xpu, DType, pdf, 1, true> {
+  static void op(const std::vector<TBlob>& inputs,
+                 const std::vector<TBlob>& outputs,
+                 mshadow::Stream<xpu> *s) {
+    CHECK_EQ(inputs[0].Size()%inputs[1].Size(), 0);
+    CHECK_EQ(inputs[0].Size()%outputs[0].Size(), 0);
+    index_t num_samples(inputs[0].Size() / inputs[1].Size());
+    index_t sample_size(inputs[0].Size() / outputs[0].Size());
+    // Covers distributions parametrized by a vector of parameters (Dirichlet distribution).
+    mxnet_op::Kernel<LaunchExWrapper<pdf>, xpu>::LaunchEx(s, outputs[0].Size(),
+                num_samples, sample_size,
+                outputs[0].dptr<DType>(), inputs[0].dptr<DType>(), inputs[1].dptr<DType>());
+  }
+};
+
+template<typename xpu, typename DType, typename pdf>
+struct PdfCaller<xpu, DType, pdf, 2, false> {
+  static void op(const std::vector<TBlob>& inputs,
+                 const std::vector<TBlob>& outputs,
+                 mshadow::Stream<xpu> *s) {
+    CHECK_EQ(inputs[0].Size()%inputs[1].Size(), 0);
+    CHECK_EQ(inputs[0].Size(), outputs[0].Size());
+    index_t num_samples(inputs[0].Size() / inputs[1].Size());
+    mxnet_op::Kernel<LaunchExWrapper<pdf>, xpu>::LaunchEx(s, outputs[0].Size(), num_samples,
+                outputs[0].dptr<DType>(), inputs[0].dptr<DType>(),
+                inputs[1].dptr<DType>(), inputs[2].dptr<DType>());
+  }
+};
+
+template<typename xpu, template<bool> class pdf, int pnum, bool vparm>
+void PdfOpForward(const nnvm::NodeAttrs& attrs,
+                  const OpContext& ctx,
+                  const std::vector<TBlob>& inputs,
+                  const std::vector<OpReqType>& req,
+                  const std::vector<TBlob>& outputs) {
+  CHECK_NE(req[0], kAddTo);
+  CHECK_EQ(inputs.size(), pnum + 1);
+  CHECK_EQ(outputs.size(), 1);
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  const PdfParam& param = nnvm::get<PdfParam>(attrs.parsed);
+  MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    if ( param.is_log ) {
+      PdfCaller<xpu, DType, pdf<true>, pnum, vparm>::op(inputs, outputs, s);
+    } else {
+      PdfCaller<xpu, DType, pdf<false>, pnum, vparm>::op(inputs, outputs, s);
+    }
+  });
+}
+
+
+template<typename xpu, typename DType, typename pdfgrad, int pnum, int vparm = false>
+struct PdfGradCaller;
+
+template<typename xpu, typename DType, typename pdfgrad>
+struct PdfGradCaller<xpu, DType, pdfgrad, 1, false> {
+  static void op(const std::vector<TBlob>& inputs,
+                 const std::vector<OpReqType>& req,
+                 const std::vector<TBlob>& grads,
+                 mshadow::Stream<xpu> *s) {
+    index_t num_samples(inputs[1].Size() / inputs[2].Size());
+    mxnet_op::Kernel<LaunchExWrapper<pdfgrad>, xpu>::LaunchEx(s, inputs[0].Size(),
+                num_samples, req[0],
+                inputs[3].dptr<DType>(), inputs[1].dptr<DType>(), inputs[2].dptr<DType>(),
+                inputs[0].dptr<DType>(), grads[0].dptr<DType>(), grads[1].dptr<DType>());
+  }
+};
+
+template<typename xpu, typename DType, typename pdfgrad>
+struct PdfGradCaller<xpu, DType, pdfgrad, 1, true> {
+  static void op(const std::vector<TBlob>& inputs,
+                 const std::vector<OpReqType>& req,
+                 const std::vector<TBlob>& grads,
+                 mshadow::Stream<xpu> *s) {
+    index_t num_samples(inputs[1].Size() / inputs[2].Size());
+    index_t sample_size(inputs[1].Size() / inputs[0].Size());
+    mxnet_op::Kernel<LaunchExWrapper<pdfgrad>, xpu>::LaunchEx(s, inputs[0].Size(), num_samples,
+                req[0], sample_size,
+                inputs[3].dptr<DType>(), inputs[1].dptr<DType>(), inputs[2].dptr<DType>(),
+                inputs[0].dptr<DType>(), grads[0].dptr<DType>(), grads[1].dptr<DType>());
+  }
+};
+
+template<typename xpu, typename DType, typename pdfgrad>
+struct PdfGradCaller<xpu, DType, pdfgrad, 2, false> {
+  static void op(const std::vector<TBlob>& inputs,
+                 const std::vector<OpReqType>& req,
+                 const std::vector<TBlob>& grads,
+                 mshadow::Stream<xpu> *s) {
+    index_t num_samples(inputs[1].Size() / inputs[2].Size());
+    mxnet_op::Kernel<LaunchExWrapper<pdfgrad>, xpu>::LaunchEx(s, inputs[0].Size(),
+                num_samples, req[0],
+                inputs[4].dptr<DType>(), inputs[1].dptr<DType>(), inputs[2].dptr<DType>(),
+                inputs[3].dptr<DType>(), inputs[0].dptr<DType>(),
+                grads[0].dptr<DType>(), grads[1].dptr<DType>(), grads[2].dptr<DType>());
+  }
+};
+
+template<typename xpu, template<bool> class pdfgrad, int pnum, bool vparm>
+void PdfOpBackward(const nnvm::NodeAttrs& attrs,
+                   const OpContext& ctx,
+                   const std::vector<TBlob>& inputs,
+                   const std::vector<OpReqType>& req,
+                   const std::vector<TBlob>& outputs) {
+  using namespace mshadow;
+  CHECK_EQ(inputs.size(), pnum + 3);
+  CHECK_EQ(outputs.size(), pnum + 1);
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  const PdfParam& param = nnvm::get<PdfParam>(attrs.parsed);
+  const size_t N(outputs[1].Size());
+  const TShape src_shape(Shape2(N, outputs[0].Size() / N)), dst_shape(Shape2(N, 1));
+  // Inputs to PdfOpBackward: grad, samples, parm1, parm2, pdf.
+  MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    const size_t red_work_size(broadcast::ReduceWorkspaceSize<2, DType>(
+            s, dst_shape, kAddTo, src_shape));
+    const size_t tmp_size(outputs[0].Size() * pnum * sizeof(DType) + red_work_size);
+    Tensor<xpu, 1, char> tmp_space =
+            ctx.requested[0].get_space_typed<xpu, 1, char>(Shape1(tmp_size), s);
+    std::vector<TBlob> grads = {outputs[0]};
+    grads.push_back(TBlob(tmp_space.dptr_, outputs[0].shape_,
+                          outputs[1].dev_mask(), outputs[1].type_flag_, outputs[1].dev_id()));
+    if (pnum == 2) {
+      grads.push_back(TBlob(tmp_space.dptr_ + outputs[0].Size() * sizeof(DType), outputs[0].shape_,
+                            outputs[2].dev_mask(), outputs[2].type_flag_, outputs[2].dev_id()));
+    }
+    if (param.is_log) {
+      PdfGradCaller<xpu, DType, pdfgrad<true>, pnum, vparm>::op(inputs, req, grads, s);
+    } else {
+      PdfGradCaller<xpu, DType, pdfgrad<false>, pnum, vparm>::op(inputs, req, grads, s);
+    }
+    Tensor<xpu, 1, char> red_work(
+            tmp_space.dptr_ + pnum * outputs[0].Size() * sizeof(DType), Shape1(red_work_size), s);
+    broadcast::Reduce<red::sum, 2, DType, op::mshadow_op::identity>(
+       s, outputs[1].reshape(dst_shape), req[1], red_work, grads[1].reshape(src_shape));
+    if (pnum == 2) {
+      broadcast::Reduce<red::sum, 2, DType, op::mshadow_op::identity>(
+       s, outputs[2].reshape(dst_shape), req[2], red_work, grads[2].reshape(src_shape));
+    }
+  });
+}
+
+}  // namespace op
+}  // namespace mxnet
+
+#endif  // MXNET_OPERATOR_RANDOM_PDF_OP_H_
diff --git a/tests/python/unittest/test_random.py b/tests/python/unittest/test_random.py
index 4d147197a150..8fdca619a5c8 100644
--- a/tests/python/unittest/test_random.py
+++ b/tests/python/unittest/test_random.py
@@ -25,6 +25,7 @@
 from common import setup_module, with_seed, random_seed, teardown
 import scipy.stats as ss
 import unittest
+from mxnet.test_utils import *
 
 def same(a, b):
     return np.sum(a != b) == 0
@@ -38,6 +39,9 @@ def check_with_device(device, dtype):
             'name': 'normal',
             'symbol': mx.sym.random.normal,
             'ndop': mx.nd.random.normal,
+            'pdfsymbol': mx.sym.random_pdf_normal,
+            'pdffunc': ss.norm.pdf,
+            'discrete': False,
             'params': { 'loc': 10.0, 'scale': 0.5 },
             'inputs': [ ('loc',[ [ 0.0, 2.5 ], [ -9.75, -7.0 ] ]) , ('scale',[ [ 1.0, 3.7 ], [ 4.2, 1.5 ] ]) ],
             'checks': [
@@ -68,6 +72,9 @@ def check_with_device(device, dtype):
             'name': 'uniform',
             'symbol': mx.sym.random.uniform,
             'ndop': mx.nd.random.uniform,
+            'pdfsymbol': mx.sym.random_pdf_uniform,
+            'pdffunc': lambda x, low, high: ss.uniform.pdf(x, low, high-low),
+            'discrete': False,
             'params': { 'low': -1.5, 'high': 3.0 },
             'inputs': [ ('low', [ [ 0.0, 2.5 ], [ -9.75, -1.0 ] ]) , ('high', [ [ 1.0, 3.7 ], [ 4.2, 10.5 ] ]) ],
             'checks': [
@@ -89,8 +96,11 @@ def check_with_device(device, dtype):
             'name': 'gamma',
             'symbol': mx.sym.random.gamma,
             'ndop': mx.nd.random.gamma,
+            'pdfsymbol': mx.sym.random_pdf_gamma,
+            'pdffunc': lambda x, alpha, beta: ss.gamma.pdf(x, alpha, 0, 1/beta),
+            'discrete': False,
             'params': { 'alpha': 9.0, 'beta': 0.5 },
-            'inputs': [ ('alpha', [ [ 0.0, 2.5 ], [ 9.75, 11.0 ] ]) , ('beta', [ [ 1.0, 0.7 ], [ 0.5, 0.3 ] ]) ],
+            'inputs': [ ('alpha', [ [ 0.1, 2.5 ], [ 9.75, 11.0 ] ]) , ('beta', [ [ 1.0, 0.7 ], [ 0.5, 0.3 ] ]) ],
             'checks': [
                 ('mean', lambda x, params: np.mean(x.astype(np.float64)) - params['alpha'] * params['beta'], tol),
                 ('std', lambda x, params: np.std(x.astype(np.float64)) - np.sqrt(params['alpha'] * params['beta'] ** 2), tol)
@@ -110,6 +120,9 @@ def check_with_device(device, dtype):
             'name': 'exponential',
             'symbol': mx.sym.random.exponential,
             'ndop': mx.nd.random.exponential,
+            'pdfsymbol': mx.sym.random_pdf_exponential,
+            'pdffunc': lambda x, lam: ss.expon.pdf(x, 0, 1/lam),
+            'discrete': False,
             'params': { 'scale': 1.0/4.0 },
             'inputs': [ ('scale', [ [ 1.0/1.0, 1.0/8.5 ], [ 1.0/2.7 , 1.0/0.5 ] ]) ],
             'checks': [
@@ -131,6 +144,9 @@ def check_with_device(device, dtype):
             'name': 'poisson',
             'symbol': mx.sym.random.poisson,
             'ndop': mx.nd.random.poisson,
+            'pdfsymbol': mx.sym.random_pdf_poisson,
+            'pdffunc': ss.poisson.pmf,
+            'discrete': True,
             'params': { 'lam': 4.0 },
             'inputs': [ ('lam', [ [ 25.0, 8.5 ], [ 2.7 , 0.5 ] ]) ],
             'checks': [
@@ -152,6 +168,9 @@ def check_with_device(device, dtype):
             'name': 'neg_binomial',
             'symbol': mx.sym.random.negative_binomial,
             'ndop': mx.nd.random.negative_binomial,
+            'pdfsymbol': mx.sym.random_pdf_negative_binomial,
+            'pdffunc': ss.nbinom.pmf,
+            'discrete': True,
             'params': { 'k': 3, 'p': 0.4 },
             'inputs': [ ('k', [ [ 3, 4 ], [ 5 , 6 ] ]) , ('p', [ [ 0.4 , 0.77 ], [ 0.5, 0.84 ] ]) ],
             'checks': [
@@ -173,6 +192,9 @@ def check_with_device(device, dtype):
             'name': 'gen_neg_binomial',
             'symbol': mx.sym.random.generalized_negative_binomial,
             'ndop': mx.nd.random.generalized_negative_binomial,
+            'pdfsymbol': mx.sym.random_pdf_generalized_negative_binomial,
+            'pdffunc': lambda x, mu, alpha: ss.nbinom.pmf(x, 1.0/alpha, 1.0/(mu*alpha+1.0)),
+            'discrete': True,
             'params': { 'mu': 2.0, 'alpha': 0.3 },
             'inputs': [ ('mu', [ [ 2.0, 2.5 ], [ 1.3, 1.9 ] ]) , ('alpha', [ [ 1.0, 0.1 ], [ 0.2, 0.5 ] ]) ],
             'checks': [
@@ -195,6 +217,9 @@ def check_with_device(device, dtype):
 
     # Create enough samples such that we get a meaningful distribution.
     shape = (500, 500)
+    # Test pdf on smaller shapes as backward checks will take too long otherwise.
+    # This must be a subshape of the former one.
+    pdfshape = (30, 30)
     for symbdic in symbols:
         name = symbdic['name']
         ndop = symbdic['ndop']
@@ -270,7 +295,7 @@ def check_with_device(device, dtype):
         # check multi-distribution sampling
         symbol = symbdic['symbol']
         params = { 'shape' : shape, 'dtype' : dtype }
-        single_param = len(symbdic['inputs']) == 1;
+        single_param = len(symbdic['inputs']) == 1
         v1 = mx.sym.Variable('v1')
         v2 = mx.sym.Variable('v2')
         Y = symbol(v1,**params) if single_param else symbol(v1,v2,**params)
@@ -290,12 +315,84 @@ def check_with_device(device, dtype):
                 for check_name, check_func, tol in symbdic['checks']:
                     assert np.abs(check_func(samples, params)) < tol, "symbolic test: %s check for `%s` did not pass" % (check_name, name)
 
-@with_seed()
-def test_random():
-    check_with_device(mx.context.current_context(), 'float16')
-    check_with_device(mx.context.current_context(), 'float32')
-    check_with_device(mx.context.current_context(), 'float64')
+        # check pdfs with only a subset of the generated samples
+        un1 = np.resize(un1, (un1.shape[0], un1.shape[1], pdfshape[0], pdfshape[1]))
+        print(name)
+        symbol  = symbdic['pdfsymbol']
+        pdffunc = symbdic['pdffunc']
+        v0 = mx.sym.Variable('v0')
+        v1 = mx.sym.Variable('v1')
+        v2 = mx.sym.Variable('v2')
+        p1 = np.array(symbdic['inputs'][0][1])
+        p2 = None if single_param else np.array(symbdic['inputs'][1][1])
+        # Move samples away from boundaries of support
+        if name == 'gamma' or name == 'exponential':
+           un1 = np.maximum(un1, 1e-1)
+        if name == 'uniform':
+           un1 = np.minimum(np.maximum(un1.reshape((un1.shape[0],un1.shape[1],-1)), p1.reshape((p1.shape[0],p1.shape[1],-1))+1e-4),
+                            p2.reshape((p2.shape[0],p2.shape[1],-1))-1e-4).reshape(un1.shape) 
+        for use_log in [False, True]:
+            test_pdf = symbol(v0, v1, is_log=use_log) if single_param else symbol(v0, v1, v2, is_log=use_log)
+            forw_atol  = 1e-7 if dtype != np.float16 else 1e-3
+            forw_rtol  = 1e-4 if dtype != np.float16 else 5e-2
+            backw_atol = 1e-3
+            backw_rtol = 5e-2
+            if single_param:
+                res = pdffunc(un1.reshape((un1.shape[0],un1.shape[1],-1)),
+                    p1.reshape((p1.shape[0],p1.shape[1],-1))).reshape(un1.shape)
+                if use_log: 
+                    res = np.log(res)
+                check_symbolic_forward(test_pdf, [un1, p1], [res], atol=forw_atol, rtol=forw_rtol, dtype=dtype)
+                if dtype == np.float64:
+                  grad_nodes = ['v1'] if symbdic['discrete'] else ['v0', 'v1']
+                  check_numeric_gradient(test_pdf, [un1, p1], grad_nodes=grad_nodes, atol=backw_atol, rtol=backw_rtol, dtype=dtype)
+            else:
+                res = pdffunc(un1.reshape((un1.shape[0],un1.shape[1],-1)),
+                    p1.reshape((p1.shape[0],p1.shape[1],-1)),
+                    p2.reshape((p2.shape[0],p2.shape[1],-1))).reshape(un1.shape)
+                if use_log:
+                    res = np.log(res)
+                check_symbolic_forward(test_pdf, [un1, p1, p2], [res], atol=forw_atol, rtol=forw_rtol, dtype=dtype)
+                if dtype == np.float64:
+                  grad_nodes = ['v1', 'v2'] if symbdic['discrete'] else ['v0', 'v1', 'v2']
+                  print(backw_rtol)
+                  check_numeric_gradient(test_pdf, [un1, p1, p2], grad_nodes=grad_nodes, atol=backw_atol, rtol=backw_rtol, dtype=dtype)
+
+@with_seed(1000)
+def test_dirichlet():
+    num_classes = 2
+    num = 100
+    alpha = np.random.uniform(low=0.5, high=2, size=(4, num_classes))
+
+    samples = []
+    results = []
+    for a in alpha:
+        v = ss.dirichlet.rvs(a, size=num)
+        samples.append(v)
+        results.append(ss.dirichlet.logpdf(v.transpose(), a))
+    samples = np.concatenate(samples, axis=0).reshape((2, 2, num, num_classes))
+    results = np.concatenate(results, axis=0).reshape((2, 2, num))
+
+    alpha = alpha.reshape((2, 2, num_classes))
+
+    for dtype in [np.float32, np.float64]:
+        forw_atol  = 1e-5
+        forw_rtol  = 1e-4
+        for use_log in [False, True]:
+            v0 = mx.sym.Variable('v0')
+            v1 = mx.sym.Variable('v1')
+            test_pdf = mx.sym.random_pdf_dirichlet(v0, v1, is_log=use_log)
+            res = results if use_log else np.exp(results)
+            check_symbolic_forward(test_pdf, [samples, alpha], [res], atol=forw_atol, rtol=forw_rtol, dtype=dtype)
+            if dtype == np.float64:
+                backw_atol = 1e-2
+                backw_rtol = 1e-2
+                eps = 1e-5
+                check_numeric_gradient(test_pdf, [samples, alpha], numeric_eps=eps, atol=backw_atol, rtol=backw_rtol, dtype=dtype)
 
+def test_random():
+    for dtype in [np.float16, np.float32, np.float64]:
+        check_with_device(mx.context.current_context(), dtype)
 
 # Set seed variously based on `start_seed` and `num_init_seeds`, then set seed finally to `final_seed`
 def set_seed_variously(init_seed, num_init_seeds, final_seed):