From 32dc121c6e46f68a7f6e359897014a1fcc1b8323 Mon Sep 17 00:00:00 2001 From: David Seiler Date: Mon, 1 Apr 2019 10:49:29 +0200 Subject: [PATCH] PDF operators for each distribution for which we have a random sampler (plus also the PDF of the Dirichlet). Supports probabilities and log-probabilities, as well as gradients. --- src/operator/random/pdf_op.cc | 319 ++++++++++++++ src/operator/random/pdf_op.cu | 48 +++ src/operator/random/pdf_op.h | 613 +++++++++++++++++++++++++++ tests/python/unittest/test_random.py | 110 ++++- 4 files changed, 1084 insertions(+), 6 deletions(-) create mode 100644 src/operator/random/pdf_op.cc create mode 100644 src/operator/random/pdf_op.cu create mode 100644 src/operator/random/pdf_op.h diff --git a/src/operator/random/pdf_op.cc b/src/operator/random/pdf_op.cc new file mode 100644 index 000000000000..070ca81d61ff --- /dev/null +++ b/src/operator/random/pdf_op.cc @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file pdf_op.cc + * \brief CPU-operators for computing the pdf of random distributions. + */ + +#include "./pdf_op.h" + +namespace mxnet { +namespace op { + +DMLC_REGISTER_PARAMETER(PdfParam); + +#define MXNET_OPERATOR_REGISTER_PDF(distr, pdffunc, num_parms, \ + parm_name_1, parm_name_2, \ + parm_desc_1, parm_desc_2, \ + description, vectorparms) \ + NNVM_REGISTER_OP(_random_pdf_##distr) \ + .add_alias("random_pdf_" #distr) \ + .describe(description()+std::string(ADD_FILELINE)) \ + .set_num_inputs(num_parms+1) \ + .set_num_outputs(1) \ + .set_attr_parser(ParamParser) \ + .set_attr("FListInputNames", \ + [](const NodeAttrs& attrs) { \ + std::vector v = {"sample", parm_name_1, parm_name_2}; \ + v.resize(num_parms+1); \ + return v; \ + }) \ + .set_attr("FInferShape", PdfOpShape) \ + .set_attr("FInferType", ElemwiseType) \ + .set_attr("FCompute", PdfOpForward) \ + .set_attr("FGradient", ElemwiseGradUseInOut{"_backward_pdf_" #distr}) \ + .add_argument("sample", "NDArray-or-Symbol", "Samples from the distributions.") \ + .add_argument(parm_name_1, "NDArray-or-Symbol", parm_desc_1) \ + .add_arguments(PdfParam::__FIELDS__()) + +#define MXNET_OPERATOR_REGISTER_PDF_GRAD(distr, pdffunc, num_parms, vectorparms) \ + NNVM_REGISTER_OP(_backward_pdf_##distr) \ + .set_num_inputs(num_parms+3) \ + .set_num_outputs(num_parms+1) \ + .set_attr_parser(ParamParser) \ + .set_attr("FInplaceOption", [](const NodeAttrs& attrs) \ + { std::vector > v = {{1, 0}, {2, 1}, {3, 2}}; \ + v.resize(num_parms+1); \ + return v; }) \ + .set_attr("FResourceRequest", [](const NodeAttrs& attrs) \ + { return std::vector{ResourceRequest::kTempSpace}; }) \ + .set_attr("TIsBackward", true) \ + .set_attr("FCompute", PdfOpBackward); + + +#define MXNET_OPERATOR_REGISTER_PDF1(distr, pdffunc, parm_name, parm_desc, \ + description, vectorparms) \ + MXNET_OPERATOR_REGISTER_PDF(distr, pdffunc, 1, parm_name, parm_name, \ + parm_desc, parm_desc, description, vectorparms); \ + MXNET_OPERATOR_REGISTER_PDF_GRAD(distr, pdffunc, 1, vectorparms) + +#define MXNET_OPERATOR_REGISTER_PDF2(distr, pdffunc, parm_name_1, parm_name_2, \ + parm_desc_1, parm_desc_2, description) \ + MXNET_OPERATOR_REGISTER_PDF(distr, pdffunc, 2, parm_name_1, parm_name_2, \ + parm_desc_1, parm_desc_2, description, false) \ + .add_argument(parm_name_2, "NDArray-or-Symbol", parm_desc_2); \ + MXNET_OPERATOR_REGISTER_PDF_GRAD(distr, pdffunc, 2, false) + +inline std::string uniform_desc() { + return std::string(R"code(Computes the value of the PDF of *sample* of +uniform distributions on the intervals given by *[low,high)*. + +*low* and *high* must have the same shape, which must match the leftmost subshape +of *sample*. That is, *sample* can have the same shape as *low* and *high*, in which +case the output contains one density per distribution, or *sample* can be a tensor +of tensors with that shape, in which case the output is a tensor of densities such that +the densities at index *i* in the output are given by the samples at index *i* in *sample* +parameterized by the values of *low* and *high* at index *i*. + +Examples:: + + random_pdf_uniform(sample=[[1,2,3,4]], low=[0], high=[10]) = [0.1, 0.1, 0.1, 0.1] + + sample = [[[1, 2, 3], + [1, 2, 3]], + [[1, 2, 3], + [1, 2, 3]]] + low = [[0, 0], + [0, 0]] + high = [[ 5, 10], + [15, 20]] + random_pdf_uniform(sample=sample, low=low, high=high) = + [[[0.2, 0.2, 0.2 ], + [0.1, 0.1, 0.1 ]], + [[0.06667, 0.06667, 0.06667], + [0.05, 0.05, 0.05 ]]] + +)code"); +} + +inline std::string normal_desc() { + return std::string(R"code(Computes the value of the PDF of *sample* of +normal distributions with parameters *mu* (mean) and *sigma* (standard deviation). + +*mu* and *sigma* must have the same shape, which must match the leftmost subshape +of *sample*. That is, *sample* can have the same shape as *mu* and *sigma*, in which +case the output contains one density per distribution, or *sample* can be a tensor +of tensors with that shape, in which case the output is a tensor of densities such that +the densities at index *i* in the output are given by the samples at index *i* in *sample* +parameterized by the values of *mu* and *sigma* at index *i*. + +Examples:: + + sample = [[-2, -1, 0, 1, 2]] + random_pdf_normal(sample=sample, mu=[0], sigma=[1]) = + [[0.05399097, 0.24197073, 0.3989423, 0.24197073, 0.05399097]] + + random_pdf_normal(sample=sample*2, mu=[0,0], sigma=[1,2]) = + [[0.05399097, 0.24197073, 0.3989423, 0.24197073, 0.05399097], + [0.12098537, 0.17603266, 0.19947115, 0.17603266, 0.12098537]] +)code"); +} + +inline std::string gamma_desc() { + return std::string(R"code(Computes the value of the PDF of *sample* of +gamma distributions with parameters *alpha* (shape) and *beta* (rate). + +*alpha* and *beta* must have the same shape, which must match the leftmost subshape +of *sample*. That is, *sample* can have the same shape as *alpha* and *beta*, in which +case the output contains one density per distribution, or *sample* can be a tensor +of tensors with that shape, in which case the output is a tensor of densities such that +the densities at index *i* in the output are given by the samples at index *i* in *sample* +parameterized by the values of *alpha* and *beta* at index *i*. + +Examples:: + + random_pdf_gamma(sample=[[1,2,3,4,5]], alpha=[5], beta=[1]) = + [[0.01532831, 0.09022352, 0.16803136, 0.19536681, 0.17546739]] + + sample = [[1, 2, 3, 4, 5], + [2, 3, 4, 5, 6], + [3, 4, 5, 6, 7]] + + random_pdf_gamma(sample=sample, alpha=[5,6,7], beta=[1,1,1]) = + [[0.01532831, 0.09022352, 0.16803136, 0.19536681, 0.17546739], + [0.03608941, 0.10081882, 0.15629345, 0.17546739, 0.16062315], + [0.05040941, 0.10419563, 0.14622283, 0.16062315, 0.14900276]] +)code"); +} + +inline std::string exponential_desc() { + return std::string(R"code(Computes the value of the PDF of *sample* of +exponential distributions with parameters *lam* (rate). + +The shape of *lam* must match the leftmost subshape of *sample*. That is, *sample* +can have the same shape as *lam*, in which case the output contains one density per +distribution, or *sample* can be a tensor of tensors with that shape, in which case +the output is a tensor of densities such that the densities at index *i* in the output +are given by the samples at index *i* in *sample* parameterized by the value of *lam* +at index *i*. + +Examples:: + + random_pdf_exponential(sample=[[1, 2, 3]], lam=[1]) = + [[0.36787945, 0.13533528, 0.04978707]] + + sample = [[1,2,3], + [1,2,3], + [1,2,3]] + + random_pdf_exponential(sample=sample, lam=[1,0.5,0.25]) = + [[0.36787945, 0.13533528, 0.04978707], + [0.30326533, 0.18393973, 0.11156508], + [0.1947002, 0.15163267, 0.11809164]] +)code"); +} + +inline std::string poisson_desc() { + return std::string(R"code(Computes the value of the PDF of *sample* of +Poisson distributions with parameters *lam* (rate). + +The shape of *lam* must match the leftmost subshape of *sample*. That is, *sample* +can have the same shape as *lam*, in which case the output contains one density per +distribution, or *sample* can be a tensor of tensors with that shape, in which case +the output is a tensor of densities such that the densities at index *i* in the output +are given by the samples at index *i* in *sample* parameterized by the value of *lam* +at index *i*. + +Examples:: + + random_pdf_poisson(sample=[[0,1,2,3]], lam=[1]) = + [[0.36787945, 0.36787945, 0.18393973, 0.06131324]] + + sample = [[0,1,2,3], + [0,1,2,3], + [0,1,2,3]] + + random_pdf_poisson(sample=sample, lam=[1,2,3]) = + [[0.36787945, 0.36787945, 0.18393973, 0.06131324], + [0.13533528, 0.27067056, 0.27067056, 0.18044704], + [0.04978707, 0.14936121, 0.22404182, 0.22404182]] +)code"); +} + +inline std::string negative_binomial_desc() { + return std::string(R"code(Computes the value of the PDF of samples of +negative binomial distributions with parameters *k* (failure limit) and *p* (failure probability). + +*k* and *p* must have the same shape, which must match the leftmost subshape +of *sample*. That is, *sample* can have the same shape as *k* and *p*, in which +case the output contains one density per distribution, or *sample* can be a tensor +of tensors with that shape, in which case the output is a tensor of densities such that +the densities at index *i* in the output are given by the samples at index *i* in *sample* +parameterized by the values of *k* and *p* at index *i*. + +Examples:: + + random_pdf_negative_binomial(sample=[[1,2,3,4]], k=[1], p=a[0.5]) = + [[0.25, 0.125, 0.0625, 0.03125]] + + # Note that k may be real-valued + sample = [[1,2,3,4], + [1,2,3,4]] + random_pdf_negative_binomial(sample=sample, k=[1, 1.5], p=[0.5, 0.5]) = + [[0.25, 0.125, 0.0625, 0.03125 ], + [0.26516506, 0.16572815, 0.09667476, 0.05437956]] +)code"); +} + +inline std::string generalized_negative_binomial_desc() { + return std::string(R"code(Computes the value of the PDF of *sample* of +generalized negative binomial distributions with parameters *mu* (mean) +and *alpha* (dispersion). This can be understood as a reparameterization of +the negative binomial, where *k* = *1 / alpha* and *p* = *1 / (mu \* alpha + 1)*. + +*mu* and *alpha* must have the same shape, which must match the leftmost subshape +of *sample*. That is, *sample* can have the same shape as *mu* and *alpha*, in which +case the output contains one density per distribution, or *sample* can be a tensor +of tensors with that shape, in which case the output is a tensor of densities such that +the densities at index *i* in the output are given by the samples at index *i* in *sample* +parameterized by the values of *mu* and *alpha* at index *i*. + +Examples:: + + random_pdf_generalized_negative_binomial(sample=[[1, 2, 3, 4]], alpha=[1], mu=[1]) = + [[0.25, 0.125, 0.0625, 0.03125]] + + sample = [[1,2,3,4], + [1,2,3,4]] + random_pdf_generalized_negative_binomial(sample=sample, alpha=[1, 0.6666], mu=[1, 1.5]) = + [[0.25, 0.125, 0.0625, 0.03125 ], + [0.26517063, 0.16573331, 0.09667706, 0.05437994]] +)code"); +} + +inline std::string dirichlet_desc() { + return std::string(R"code(Computes the value of the PDF of *sample* of +Dirichlet distributions with parameter *alpha*. + +The shape of *alpha* must match the leftmost subshape of *sample*. That is, *sample* +can have the same shape as *alpha*, in which case the output contains one density per +distribution, or *sample* can be a tensor of tensors with that shape, in which case +the output is a tensor of densities such that the densities at index *i* in the output +are given by the samples at index *i* in *sample* parameterized by the value of *alpha* +at index *i*. + +Examples:: + + random_pdf_dirichlet(sample=[[1,2],[2,3],[3,4]], alpha=[2.5, 2.5]) = + [38.413498, 199.60245, 564.56085] + + sample = [[[1, 2, 3], [10, 20, 30], [100, 200, 300]], + [[0.1, 0.2, 0.3], [0.01, 0.02, 0.03], [0.001, 0.002, 0.003]]] + + random_pdf_dirichlet(sample=sample, alpha=[0.1, 0.4, 0.9]) = + [[2.3257459e-02, 5.8420084e-04, 1.4674458e-05], + [9.2589635e-01, 3.6860607e+01, 1.4674468e+03]] +)code"); +} + +MXNET_OPERATOR_REGISTER_PDF2(uniform, PDF_Uniform, "low", "high", + "Lower bounds of the distributions.", "Upper bounds of the distributions.", uniform_desc) +MXNET_OPERATOR_REGISTER_PDF2(normal, PDF_Normal, "mu", "sigma", + "Means of the distributions.", "Standard deviations of the distributions.", normal_desc) +MXNET_OPERATOR_REGISTER_PDF2(gamma, PDF_Gamma, "alpha", "beta", + "Alpha (shape) parameters of the distributions.", "Beta (scale) parameters of the distributions.", + gamma_desc) +MXNET_OPERATOR_REGISTER_PDF1(exponential, PDF_Exponential, "lam", + "Lambda (rate) parameters of the distributions.", exponential_desc, false) +MXNET_OPERATOR_REGISTER_PDF1(poisson, PDF_Poisson, "lam", + "Lambda (rate) parameters of the distributions.", poisson_desc, false) +MXNET_OPERATOR_REGISTER_PDF2(negative_binomial, PDF_NegativeBinomial, "k", "p", + "Limits of unsuccessful experiments.", "Failure probabilities in each experiment.", + negative_binomial_desc) +MXNET_OPERATOR_REGISTER_PDF2(generalized_negative_binomial, + PDF_GeneralizedNegativeBinomial, "mu", "alpha", + "Means of the distributions.", "Alpha (dispersion) parameters of the distributions.", + generalized_negative_binomial_desc) +MXNET_OPERATOR_REGISTER_PDF1(dirichlet, PDF_Dirichlet, "alpha", + "Concentration parameters of the distributions.", dirichlet_desc, true) + +} // namespace op +} // namespace mxnet diff --git a/src/operator/random/pdf_op.cu b/src/operator/random/pdf_op.cu new file mode 100644 index 000000000000..e77720b2c329 --- /dev/null +++ b/src/operator/random/pdf_op.cu @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file pdf_op.cu + * \brief GPU-operators for computing the pdf of random distributions. + */ + +#include "./pdf_op.h" + +namespace mxnet { +namespace op { + +#define MXNET_OPERATOR_REGISTER_PDF(distr, pdffunc, num_parms, vector_parms) \ + NNVM_REGISTER_OP(_random_pdf_##distr) \ + .set_attr("FCompute", PdfOpForward); \ + NNVM_REGISTER_OP(_backward_pdf_##distr) \ + .set_attr("FCompute", PdfOpBackward); + +MXNET_OPERATOR_REGISTER_PDF(uniform, PDF_Uniform, 2, false) +MXNET_OPERATOR_REGISTER_PDF(normal, PDF_Normal, 2, false) +MXNET_OPERATOR_REGISTER_PDF(gamma, PDF_Gamma, 2, false) +MXNET_OPERATOR_REGISTER_PDF(exponential, PDF_Exponential, 1, false) +MXNET_OPERATOR_REGISTER_PDF(poisson, PDF_Poisson, 1, false) +MXNET_OPERATOR_REGISTER_PDF(negative_binomial, PDF_NegativeBinomial, 2, false) +MXNET_OPERATOR_REGISTER_PDF(generalized_negative_binomial, + PDF_GeneralizedNegativeBinomial, 2, false) +MXNET_OPERATOR_REGISTER_PDF(dirichlet, PDF_Dirichlet, 1, true) + +} // namespace op +} // namespace mxnet diff --git a/src/operator/random/pdf_op.h b/src/operator/random/pdf_op.h new file mode 100644 index 000000000000..45561c67a156 --- /dev/null +++ b/src/operator/random/pdf_op.h @@ -0,0 +1,613 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file pdf_op.h + * \brief Operators for computing the pdf of random distributions. + */ +#ifndef MXNET_OPERATOR_RANDOM_PDF_OP_H_ +#define MXNET_OPERATOR_RANDOM_PDF_OP_H_ + +#include +#include +#include +#include "../mshadow_op.h" +#include "../mxnet_op.h" +#include "../operator_common.h" +#include "../elemwise_op_common.h" +#include "../special_functions-inl.h" +#include "../tensor/broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +template +MSHADOW_XINLINE static DType ceph_psi(DType val) { return special_functions::cephes::psi(val); } +template<> +MSHADOW_XINLINE mshadow::half::half_t ceph_psi(mshadow::half::half_t val) { + return special_functions::cephes::psi(val); +} + +template +struct PDF_Uniform { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, + DType *out, IType1 *sample, IType2 *lower, IType2 *upper) { + const int index(start / sample_size); + const DType l(lower[index]), h(upper[index]); + const int end = start + length; + for (int i = start; i < end; ++i) { + // No check whether sample is in the support. + out[i] = logpdf ? -DType(log(h-l)) : DType(1.0)/(h-l); + } + } +}; + +template +struct PDF_Uniform_Grad { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req, + DType *out, IType1 *sample, IType2 *lower, IType2 *upper, + DType *grad_out, IType1 *grad_sample, IType2 *grad_lower, IType2 *grad_upper) { + const int index(start / sample_size); + const DType l(lower[index]), h(upper[index]); + + const int end = start + length; + for (int i = start; i < end; ++i) { + const DType scaling(grad_out[i]*(logpdf ? DType(1) : out[i])); + grad_lower[i] = scaling/(h-l); + grad_upper[i] = scaling/(l-h); + KERNEL_ASSIGN(grad_sample[i], req, 0); + } + } +}; + +template +struct PDF_Normal { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, + DType *out, IType1 *sample, IType2 *loc, IType2 *scale) { + const int index(start / sample_size); + const DType u(loc[index]), s(scale[index]), sq(s*s); + const DType normalizer(sqrt(2.0*mxnet_op::PI)*s); + + const int end = start + length; + for (int i = start; i < end; ++i) { + const DType x(sample[i]); + const DType exponent((DType(-0.5)*(x-u)*(x-u))/(sq)); + out[i] = logpdf ? exponent-log(normalizer) : exp(exponent)/normalizer; + } + } +}; + +template +struct PDF_Normal_Grad { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req, + DType *out, IType1 *sample, IType2 *loc, IType2 *scale, + DType *grad_out, IType1 *grad_sample, IType2 *grad_loc, IType2 *grad_scale) { + const int index(start / sample_size); + const DType u(loc[index]), s(scale[index]), s_squared(s*s), s_cubed(s_squared*s); + + const int end = start + length; + for (int i = start; i < end; ++i) { + const DType x(sample[i]); + const DType scaling(grad_out[i]*(logpdf ? DType(1) : out[i])); + grad_loc[i] = scaling*(x-u)/s_squared; + grad_scale[i] = scaling*((x-u)*(x-u)-s_squared)/s_cubed; + KERNEL_ASSIGN(grad_sample[i], req, scaling*(u-x)/s_squared); + } + } +}; + +template +struct PDF_Gamma { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, + DType *out, IType1 *sample, IType2 *alpha, IType2 *beta) { + const int index(start / sample_size); + const DType a(alpha[index]), b(beta[index]), lgamma_a(lgamma(a)), a_log_b(a*log(b)); + + const int end = start + length; + for (int i = start; i < end; ++i) { + const DType x(sample[i]); + const DType lpdf(a_log_b+(a-1)*log(x)-b*x-lgamma_a); + out[i] = logpdf ? lpdf : DType(exp(lpdf)); + } + } +}; + +template +struct PDF_Gamma_Grad { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req, + DType *out, IType1 *sample, IType2 *alpha, IType2 *beta, + DType *grad_out, IType1 *grad_sample, IType2 *grad_alpha, IType2 *grad_beta) { + const int index(start / sample_size); + const DType a(alpha[index]), b(beta[index]), log_b(log(b)), ceph_psi_a(ceph_psi(a)); + + const int end = start + length; + for (int i = start; i < end; ++i) { + const DType x(sample[i]); + const DType scaling(grad_out[i]*(logpdf ? DType(1) : out[i])); + grad_alpha[i] = scaling*(log_b+log(x)-ceph_psi_a); + grad_beta[i] = scaling*(a/b-x); + KERNEL_ASSIGN(grad_sample[i], req, scaling*((a-1)/x-b)); + } + } +}; + +template +struct PDF_Exponential { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, + DType *out, IType1 *sample, IType2 *lambda) { + const int index(start / sample_size); + const DType l(lambda[index]), log_l(log(l)); + + const int end = start + length; + for (int i = start; i < end; ++i) { + const DType x(sample[i]); + out[i] = logpdf ? log_l-l*x : l*exp(-l*x); + } + } +}; + +template +struct PDF_Exponential_Grad { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req, + DType *out, IType1 *sample, IType2 *lambda, + DType *grad_out, IType1 *grad_sample, IType2 *grad_lambda) { + const int index(start / sample_size); + const DType l(lambda[index]); + + const int end = start + length; + for (int i = start; i < end; ++i) { + const DType x(sample[i]); + const DType scaling(grad_out[i]*(logpdf ? DType(1) : out[i])); + grad_lambda[i] = scaling*(DType(1)/l-x); + KERNEL_ASSIGN(grad_sample[i], req, -scaling*l); + } + } +}; + +template +struct PDF_Poisson { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, + DType *out, IType1 *sample, IType2 *lambda) { + const int index(start / sample_size); + const DType l(lambda[index]), log_l(log(l)); + + const int end = start + length; + for (int i = start; i < end; ++i) { + const DType x(sample[i]); + const DType lpdf((x*log_l-lgamma(x+1))-l); + out[i] = logpdf ? lpdf : DType(exp(lpdf)); + } + } +}; + +template +struct PDF_Poisson_Grad { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req, + DType *out, IType1 *sample, IType2 *lambda, + DType *grad_out, IType1 *grad_sample, IType2 *grad_lambda) { + const int index(start / sample_size); + const DType l(lambda[index]); + + const int end = start + length; + for (int i = start; i < end; ++i) { + const DType x(sample[i]); + const DType scaling(grad_out[i]*(logpdf ? DType(1) : out[i])); + grad_lambda[i] = scaling*(x/l-DType(1)); + KERNEL_ASSIGN(grad_sample[i], req, 0); + } + } +}; + + +template +struct PDF_NegativeBinomial { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, + DType *out, IType1 *sample, IType2 *limit, IType2 *prob) { + const int index(start / sample_size); + const DType l(limit[index]), p(prob[index]), lgamma_l(lgamma(l)); + + const int end = start + length; + for (int i = start; i < end; ++i) { + const DType x(sample[i]); + const DType lpdf((lgamma(x+l)-lgamma(x+1)-lgamma_l)+l*log(p)+x*log(1-p)); + out[i] = logpdf ? lpdf : DType(exp(lpdf)); + } + } + + template + MSHADOW_XINLINE static DType LPDF(DType l, DType p, DType x) { + // Note that "p" is the failure and not the success probability. + return (lgamma(x+l)-lgamma(x+1)-lgamma(l))+l*log(p)+x*log(1-p); + } +}; + +template +struct PDF_NegativeBinomial_Grad { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req, + DType *out, IType1 *sample, IType2 *limit, IType2 *prob, + DType *grad_out, IType1 *grad_sample, IType2 *grad_limit, IType2 *grad_prob) { + const int index(start / sample_size); + const int end = start + length; + for (int i = start; i < end; ++i) { + DType grad_l(0), grad_p(0); + LPDF_GRAD(DType(limit[index]), DType(prob[index]), + DType(sample[i]), out[i], + grad_out[i], &grad_l, &grad_p); + grad_limit[i] = grad_l; + grad_prob[i] = grad_p; + KERNEL_ASSIGN(grad_sample[i], req, 0); + } + } + + template + MSHADOW_XINLINE static void LPDF_GRAD(DType l, DType p, DType x, DType o, DType grad_o, + DType* grad_l, DType* grad_p) { + const DType scaling(grad_o*(logpdf ? DType(1) : o)); + *grad_l = scaling*((ceph_psi(x+l)-ceph_psi(l))+log(p)); + *grad_p = scaling*(l/p-x/(1-p)); + } +}; + +template +struct PDF_GeneralizedNegativeBinomial { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, + DType *out, IType1 *sample, IType2 *mu, IType2 *alpha) { + const int index(start / sample_size); + + // Reparameterize with limit = 1/alpha, prob = 1/(mu*alpha+1) + const DType limit(1.0/alpha[index]), prob(1.0/(mu[index]*alpha[index]+1.0)); + + const int end = start + length; + for (int i = start; i < end; ++i) { + const DType lpdf(PDF_NegativeBinomial::LPDF(limit, prob, DType(sample[i]))); + out[i] = logpdf ? lpdf : DType(exp(lpdf)); + } + } +}; + +template +struct PDF_GeneralizedNegativeBinomial_Grad { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, OpReqType req, + DType *out, IType1 *sample, IType2 *mu, IType2 *alpha, + DType *grad_out, IType1 *grad_sample, IType2 *grad_mu, IType2 *grad_alpha) { + const int index(start / sample_size); + const DType fmu(mu[index]), falpha(alpha[index]), den(fmu*falpha+1.0); + + // Reparameterize with limit = 1/alpha, prob = 1/(mu*alpha+1) + const DType limit(1.0/falpha), prob(1.0/(fmu*falpha+1.0)); + + const int end = start + length; + for (int i = start; i < end; ++i) { + // Grad returned as d_limit, d_prob + DType grad_l(0), grad_p(0); + PDF_NegativeBinomial_Grad::LPDF_GRAD(limit, prob, + DType(sample[i]), out[i], + grad_out[i], &grad_l, &grad_p); + grad_mu[i] = -grad_p*falpha/(den*den); + grad_alpha[i] = -grad_l/(falpha*falpha)-grad_p*fmu/(den*den); + KERNEL_ASSIGN(grad_sample[i], req, 0); + } + } +}; + +template +struct PDF_Dirichlet { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, int k, + DType *out, IType1 *sample, IType2 *alpha) { + const int index(start / sample_size); + const int end = start + length; + for (int i = start; i < end; ++i) { + const IType1 *cur_sample = sample+i*k; + const IType2 *cur_alpha = alpha+index*k; + DType sum_alpha(0), sum_lgamma(0), sum_sample(0); + for ( int j = 0; j < k; ++j ) { + sum_alpha += cur_alpha[j]; + sum_lgamma += lgamma(cur_alpha[j]); + sum_sample += (cur_alpha[j]-1)*log(cur_sample[j]); + } + DType lpdf(sum_sample+(lgamma(sum_alpha)-sum_lgamma)); + out[i] = logpdf ? lpdf : DType(exp(lpdf)); + } + } +}; + + +template +struct PDF_Dirichlet_Grad { + template + MSHADOW_XINLINE static void Map(int start, int length, int sample_size, + OpReqType req, int k, + DType *out, IType1 *sample, IType2 *alpha, + DType *grad_out, IType1 *grad_sample, IType2 *grad_alpha) { + const int index(start / sample_size); + const int end = start + length; + for (int i = start; i < end; ++i) { + // Digamma function + const IType1 *cur_sample = sample+i*k; + const IType2 *cur_alpha = alpha+index*k; + const DType scaling(grad_out[i]*(logpdf ? DType(1) : out[i])); + DType sum_alpha(0); + for ( int j = 0; j < k; ++j ) { + sum_alpha += cur_alpha[j]; + } + const DType psi_sum(ceph_psi(sum_alpha)); + for ( int j = 0; j < k; ++j ) { + // order grad_alpha differently to allow efficient reduction at the end. + grad_alpha[i%sample_size+sample_size*(j+k*index)] = + scaling * (log(cur_sample[j])+(psi_sum-ceph_psi(cur_alpha[j]))); + KERNEL_ASSIGN(grad_sample[i*k+j], req, scaling*(cur_alpha[j]-1)/cur_sample[j]); + } + } + } +}; + +struct PdfParam : public dmlc::Parameter { + bool is_log; + DMLC_DECLARE_PARAMETER(PdfParam) { + DMLC_DECLARE_FIELD(is_log).set_default(false) + .describe("If set, compute the density of the log-probability instead of the probability."); + } +}; + +template +inline bool PdfOpShape(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_GT(in_attrs->size(), 1) + << "pdf operator takes at least 2 arguments (" << in_attrs->size() << " given)"; + CHECK_EQ(out_attrs->size(), 1); + // All inputs must be defined in order to infer output shape. + if ( std::all_of((*in_attrs).begin(), + (*in_attrs).end(), + [](const TShape& s){ return s.ndim() > 0; }) ) { + // Tensors of distribution parameters must have same shape. + for (size_t i = 2; i < in_attrs->size(); ++i) { + SHAPE_ASSIGN_CHECK(*in_attrs, i, (*in_attrs)[i-1]); + } + // Tensors of distribution parameters must match leftmost subshape of samples. + CHECK_LE((*in_attrs)[1].ndim(), (*in_attrs)[0].ndim()) + << "dimension of input samples (" << (*in_attrs)[0].ndim() + << ") must be at least dimension of distribution parameters ("<< (*in_attrs)[1].ndim() << ")"; + TShape tshape((*in_attrs)[0].begin(), (*in_attrs)[0].begin()+(*in_attrs)[1].ndim()); + if (vparm) { + *(tshape.end()-1) = *((*in_attrs)[0].end()-1); + } + for (size_t i = 1; i < in_attrs->size(); ++i) { + SHAPE_ASSIGN_CHECK(*in_attrs, i, tshape); + } + // Output shape must equal input tensor of samples except for last dimension if we are + // dealing with samples that are itself vectors. Be aware of the special case where we + // are dealing with a single vector sample. + if (vparm && ((*in_attrs)[0].ndim() == 1)) { + // Special case where we are dealing with a single vector sample. + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::Shape1(1)); + } else { + TShape oshape((*in_attrs)[0].begin(), (*in_attrs)[0].end()-(vparm ? 1 : 0)); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); + } + return true; + } + return false; +} + +template +struct LaunchExWrapper { + template + MSHADOW_XINLINE static void Map(const int start, const int length, const int sample_size, + Args... args) { + // Apply the operator to the sample in strides of sample_size, so that + // the operators can assume that their distribution parameters are constant. + int i = start; + + // Get aligned + const int align_step = sample_size - (i % sample_size); + const int first_stride = length > align_step ? align_step : length; + OP::Map(i, first_stride, sample_size, args...); + i += first_stride; + + const int end = start + length - sample_size; + for (; i < end; i += sample_size) { + OP::Map(i, sample_size, sample_size, args...); + } + + // Last stride might not be aligned either + const int last_stride = start + length - i; + if (last_stride > 0) { // Don't overstep even if length <= sample_size + OP::Map(i, last_stride, sample_size, args...); + } + } +}; + +template +struct PdfCaller; + +template +struct PdfCaller { + static void op(const std::vector& inputs, + const std::vector& outputs, + mshadow::Stream *s) { + CHECK_EQ(inputs[0].Size()%inputs[1].Size(), 0); + CHECK_EQ(inputs[0].Size()%outputs[0].Size(), 0); + index_t num_samples(inputs[0].Size()/inputs[1].Size()); + mxnet_op::Kernel, xpu>::LaunchEx(s, outputs[0].Size(), num_samples, + outputs[0].dptr(), inputs[0].dptr(), inputs[1].dptr()); + } +}; + +template +struct PdfCaller { + static void op(const std::vector& inputs, + const std::vector& outputs, + mshadow::Stream *s) { + CHECK_EQ(inputs[0].Size()%inputs[1].Size(), 0); + CHECK_EQ(inputs[0].Size()%outputs[0].Size(), 0); + index_t num_samples(inputs[0].Size()/inputs[1].Size()); + index_t sample_size(inputs[0].Size()/outputs[0].Size()); + // Covers distributions parametrized by a vector of parameters (Dirichlet distribution). + mxnet_op::Kernel, xpu>::LaunchEx(s, outputs[0].Size(), + num_samples, sample_size, + outputs[0].dptr(), inputs[0].dptr(), inputs[1].dptr()); + } +}; + +template +struct PdfCaller { + static void op(const std::vector& inputs, + const std::vector& outputs, + mshadow::Stream *s) { + CHECK_EQ(inputs[0].Size()%inputs[1].Size(), 0); + CHECK_EQ(inputs[0].Size(), outputs[0].Size()); + index_t num_samples(inputs[0].Size()/inputs[1].Size()); + mxnet_op::Kernel, xpu>::LaunchEx(s, outputs[0].Size(), num_samples, + outputs[0].dptr(), inputs[0].dptr(), + inputs[1].dptr(), inputs[2].dptr()); + } +}; + +template class pdf, int pnum, bool vparm> +void PdfOpForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_NE(req[0], kAddTo); + CHECK_EQ(inputs.size(), pnum+1); + CHECK_EQ(outputs.size(), 1); + mshadow::Stream *s = ctx.get_stream(); + const PdfParam& param = nnvm::get(attrs.parsed); + MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, { + if ( param.is_log ) { + PdfCaller, pnum, vparm>::op(inputs, outputs, s); + } else { + PdfCaller, pnum, vparm>::op(inputs, outputs, s); + } + }); +} + + +template +struct PdfGradCaller; + +template +struct PdfGradCaller { + static void op(const std::vector& inputs, + const std::vector& req, + const std::vector& grads, + mshadow::Stream *s) { + index_t num_samples(inputs[1].Size()/inputs[2].Size()); + mxnet_op::Kernel, xpu>::LaunchEx(s, inputs[0].Size(), + num_samples, req[0], + inputs[3].dptr(), inputs[1].dptr(), inputs[2].dptr(), + inputs[0].dptr(), grads[0].dptr(), grads[1].dptr()); + } +}; + +template +struct PdfGradCaller { + static void op(const std::vector& inputs, + const std::vector& req, + const std::vector& grads, + mshadow::Stream *s) { + index_t num_samples(inputs[1].Size()/inputs[2].Size()); + index_t sample_size(inputs[1].Size()/inputs[0].Size()); + mxnet_op::Kernel, xpu>::LaunchEx(s, inputs[0].Size(), num_samples, + req[0], sample_size, + inputs[3].dptr(), inputs[1].dptr(), inputs[2].dptr(), + inputs[0].dptr(), grads[0].dptr(), grads[1].dptr()); + } +}; + +template +struct PdfGradCaller { + static void op(const std::vector& inputs, + const std::vector& req, + const std::vector& grads, + mshadow::Stream *s) { + index_t num_samples(inputs[1].Size()/inputs[2].Size()); + mxnet_op::Kernel, xpu>::LaunchEx(s, inputs[0].Size(), + num_samples, req[0], + inputs[4].dptr(), inputs[1].dptr(), inputs[2].dptr(), + inputs[3].dptr(), inputs[0].dptr(), + grads[0].dptr(), grads[1].dptr(), grads[2].dptr()); + } +}; + +template class pdfgrad, int pnum, bool vparm> +void PdfOpBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + CHECK_EQ(inputs.size(), pnum+3); + CHECK_EQ(outputs.size(), pnum+1); + mshadow::Stream *s = ctx.get_stream(); + const PdfParam& param = nnvm::get(attrs.parsed); + const size_t N(outputs[1].Size()); + const TShape src_shape(Shape2(N, outputs[0].Size()/N)), dst_shape(Shape2(N, 1)); + // Inputs to PdfOpBackward: grad, samples, parm1, parm2, pdf. + MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, { + const size_t red_work_size(broadcast::ReduceWorkspaceSize<2, DType>( + s, dst_shape, kAddTo, src_shape)); + const size_t tmp_size(outputs[0].Size()*pnum*sizeof(DType)+red_work_size); + Tensor tmp_space = + ctx.requested[0].get_space_typed(Shape1(tmp_size), s); + std::vector grads = {outputs[0]}; + grads.push_back(TBlob(tmp_space.dptr_, outputs[0].shape_, + outputs[1].dev_mask(), outputs[1].type_flag_, outputs[1].dev_id())); + if (pnum == 2) { + grads.push_back(TBlob(tmp_space.dptr_+outputs[0].Size()*sizeof(DType), outputs[0].shape_, + outputs[2].dev_mask(), outputs[2].type_flag_, outputs[2].dev_id())); + } + if (param.is_log) { + PdfGradCaller, pnum, vparm>::op(inputs, req, grads, s); + } else { + PdfGradCaller, pnum, vparm>::op(inputs, req, grads, s); + } + Tensor red_work( + tmp_space.dptr_+pnum*outputs[0].Size()*sizeof(DType), Shape1(red_work_size), s); + broadcast::Reduce( + s, outputs[1].reshape(dst_shape), req[1], red_work, grads[1].reshape(src_shape)); + if (pnum == 2) { + broadcast::Reduce( + s, outputs[2].reshape(dst_shape), req[2], red_work, grads[2].reshape(src_shape)); + } + }); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_RANDOM_PDF_OP_H_ diff --git a/tests/python/unittest/test_random.py b/tests/python/unittest/test_random.py index 8fbd97d8a162..362ec44400b7 100644 --- a/tests/python/unittest/test_random.py +++ b/tests/python/unittest/test_random.py @@ -25,6 +25,7 @@ from common import setup_module, with_seed, random_seed, teardown import scipy.stats as ss import unittest +from mxnet.test_utils import * def same(a, b): return np.sum(a != b) == 0 @@ -38,6 +39,9 @@ def check_with_device(device, dtype): 'name': 'normal', 'symbol': mx.sym.random.normal, 'ndop': mx.nd.random.normal, + 'pdfsymbol': mx.sym.random_pdf_normal, + 'pdffunc': ss.norm.pdf, + 'discrete': False, 'params': { 'loc': 10.0, 'scale': 0.5 }, 'inputs': [ ('loc',[ [ 0.0, 2.5 ], [ -9.75, -7.0 ] ]) , ('scale',[ [ 1.0, 3.7 ], [ 4.2, 1.5 ] ]) ], 'checks': [ @@ -68,6 +72,9 @@ def check_with_device(device, dtype): 'name': 'uniform', 'symbol': mx.sym.random.uniform, 'ndop': mx.nd.random.uniform, + 'pdfsymbol': mx.sym.random_pdf_uniform, + 'pdffunc': lambda x, low, high: ss.uniform.pdf(x, low, high-low), + 'discrete': False, 'params': { 'low': -1.5, 'high': 3.0 }, 'inputs': [ ('low', [ [ 0.0, 2.5 ], [ -9.75, -1.0 ] ]) , ('high', [ [ 1.0, 3.7 ], [ 4.2, 10.5 ] ]) ], 'checks': [ @@ -89,8 +96,11 @@ def check_with_device(device, dtype): 'name': 'gamma', 'symbol': mx.sym.random.gamma, 'ndop': mx.nd.random.gamma, + 'pdfsymbol': mx.sym.random_pdf_gamma, + 'pdffunc': lambda x, alpha, beta: ss.gamma.pdf(x, alpha, 0, 1/beta), + 'discrete': False, 'params': { 'alpha': 9.0, 'beta': 0.5 }, - 'inputs': [ ('alpha', [ [ 0.0, 2.5 ], [ 9.75, 11.0 ] ]) , ('beta', [ [ 1.0, 0.7 ], [ 0.5, 0.3 ] ]) ], + 'inputs': [ ('alpha', [ [ 0.1, 2.5 ], [ 9.75, 11.0 ] ]) , ('beta', [ [ 1.0, 0.7 ], [ 0.5, 0.3 ] ]) ], 'checks': [ ('mean', lambda x, params: np.mean(x.astype(np.float64)) - params['alpha'] * params['beta'], tol), ('std', lambda x, params: np.std(x.astype(np.float64)) - np.sqrt(params['alpha'] * params['beta'] ** 2), tol) @@ -110,6 +120,9 @@ def check_with_device(device, dtype): 'name': 'exponential', 'symbol': mx.sym.random.exponential, 'ndop': mx.nd.random.exponential, + 'pdfsymbol': mx.sym.random_pdf_exponential, + 'pdffunc': lambda x, lam: ss.expon.pdf(x, 0, 1/lam), + 'discrete': False, 'params': { 'scale': 1.0/4.0 }, 'inputs': [ ('scale', [ [ 1.0/1.0, 1.0/8.5 ], [ 1.0/2.7 , 1.0/0.5 ] ]) ], 'checks': [ @@ -131,6 +144,9 @@ def check_with_device(device, dtype): 'name': 'poisson', 'symbol': mx.sym.random.poisson, 'ndop': mx.nd.random.poisson, + 'pdfsymbol': mx.sym.random_pdf_poisson, + 'pdffunc': ss.poisson.pmf, + 'discrete': True, 'params': { 'lam': 4.0 }, 'inputs': [ ('lam', [ [ 25.0, 8.5 ], [ 2.7 , 0.5 ] ]) ], 'checks': [ @@ -152,6 +168,9 @@ def check_with_device(device, dtype): 'name': 'neg_binomial', 'symbol': mx.sym.random.negative_binomial, 'ndop': mx.nd.random.negative_binomial, + 'pdfsymbol': mx.sym.random_pdf_negative_binomial, + 'pdffunc': ss.nbinom.pmf, + 'discrete': True, 'params': { 'k': 3, 'p': 0.4 }, 'inputs': [ ('k', [ [ 3, 4 ], [ 5 , 6 ] ]) , ('p', [ [ 0.4 , 0.77 ], [ 0.5, 0.84 ] ]) ], 'checks': [ @@ -173,6 +192,9 @@ def check_with_device(device, dtype): 'name': 'gen_neg_binomial', 'symbol': mx.sym.random.generalized_negative_binomial, 'ndop': mx.nd.random.generalized_negative_binomial, + 'pdfsymbol': mx.sym.random_pdf_generalized_negative_binomial, + 'pdffunc': lambda x, mu, alpha: ss.nbinom.pmf(x, 1.0/alpha, 1.0/(mu*alpha+1.0)), + 'discrete': True, 'params': { 'mu': 2.0, 'alpha': 0.3 }, 'inputs': [ ('mu', [ [ 2.0, 2.5 ], [ 1.3, 1.9 ] ]) , ('alpha', [ [ 1.0, 0.1 ], [ 0.2, 0.5 ] ]) ], 'checks': [ @@ -195,6 +217,9 @@ def check_with_device(device, dtype): # Create enough samples such that we get a meaningful distribution. shape = (500, 500) + # Test pdf on smaller shapes as backward checks will take too long otherwise. + # This must be a subshape of the former one. + pdfshape = (30, 30) for symbdic in symbols: name = symbdic['name'] ndop = symbdic['ndop'] @@ -270,7 +295,7 @@ def check_with_device(device, dtype): # check multi-distribution sampling symbol = symbdic['symbol'] params = { 'shape' : shape, 'dtype' : dtype } - single_param = len(symbdic['inputs']) == 1; + single_param = len(symbdic['inputs']) == 1 v1 = mx.sym.Variable('v1') v2 = mx.sym.Variable('v2') Y = symbol(v1,**params) if single_param else symbol(v1,v2,**params) @@ -290,11 +315,84 @@ def check_with_device(device, dtype): for check_name, check_func, tol in symbdic['checks']: assert np.abs(check_func(samples, params)) < tol, "symbolic test: %s check for `%s` did not pass" % (check_name, name) -@with_seed() + # check pdfs with only a subset of the generated samples + un1 = np.resize(un1, (un1.shape[0], un1.shape[1], pdfshape[0], pdfshape[1])) + print(name) + symbol = symbdic['pdfsymbol'] + pdffunc = symbdic['pdffunc'] + v0 = mx.sym.Variable('v0') + v1 = mx.sym.Variable('v1') + v2 = mx.sym.Variable('v2') + p1 = np.array(symbdic['inputs'][0][1]) + p2 = None if single_param else np.array(symbdic['inputs'][1][1]) + # Move samples away from boundaries of support + if name == 'gamma' or name == 'exponential': + un1 = np.maximum(un1, 1e-1) + if name == 'uniform': + un1 = np.minimum(np.maximum(un1.reshape((un1.shape[0],un1.shape[1],-1)), p1.reshape((p1.shape[0],p1.shape[1],-1))+1e-4), + p2.reshape((p2.shape[0],p2.shape[1],-1))-1e-4).reshape(un1.shape) + for use_log in [False, True]: + test_pdf = symbol(v0, v1, is_log=use_log) if single_param else symbol(v0, v1, v2, is_log=use_log) + forw_atol = 1e-7 if dtype != np.float16 else 1e-3 + forw_rtol = 1e-4 if dtype != np.float16 else 5e-2 + backw_atol = 1e-3 + backw_rtol = 5e-2 + if single_param: + res = pdffunc(un1.reshape((un1.shape[0],un1.shape[1],-1)), + p1.reshape((p1.shape[0],p1.shape[1],-1))).reshape(un1.shape) + if use_log: + res = np.log(res) + check_symbolic_forward(test_pdf, [un1, p1], [res], atol=forw_atol, rtol=forw_rtol, dtype=dtype) + if dtype == np.float64: + grad_nodes = ['v1'] if symbdic['discrete'] else ['v0', 'v1'] + check_numeric_gradient(test_pdf, [un1, p1], grad_nodes=grad_nodes, atol=backw_atol, rtol=backw_rtol, dtype=dtype) + else: + res = pdffunc(un1.reshape((un1.shape[0],un1.shape[1],-1)), + p1.reshape((p1.shape[0],p1.shape[1],-1)), + p2.reshape((p2.shape[0],p2.shape[1],-1))).reshape(un1.shape) + if use_log: + res = np.log(res) + check_symbolic_forward(test_pdf, [un1, p1, p2], [res], atol=forw_atol, rtol=forw_rtol, dtype=dtype) + if dtype == np.float64: + grad_nodes = ['v1', 'v2'] if symbdic['discrete'] else ['v0', 'v1', 'v2'] + check_numeric_gradient(test_pdf, [un1, p1, p2], grad_nodes=grad_nodes, atol=backw_atol, rtol=backw_rtol, dtype=dtype) + +@with_seed(1000) +def test_dirichlet(): + num_classes = 2 + num = 100 + alpha = np.random.uniform(low=0.5, high=10, size=(4, num_classes)) + + samples = [] + results = [] + for a in alpha: + v = ss.dirichlet.rvs(a, size=num) + samples.append(v) + results.append(ss.dirichlet.logpdf(v.transpose(), a)) + samples = np.concatenate(samples, axis=0).reshape((2, 2, num, num_classes)) + results = np.concatenate(results, axis=0).reshape((2, 2, num)) + + alpha = alpha.reshape((2, 2, num_classes)) + + for dtype in [np.float32, np.float64]: + forw_atol = 1e-5 + forw_rtol = 1e-4 + backw_atol = 1e-5 if dtype == np.float64 else 1e-3 + backw_rtol = 1e-4 if dtype == np.float64 else 5e-2 + for use_log in [False, True]: + print("use_log",use_log) + v0 = mx.sym.Variable('v0') + v1 = mx.sym.Variable('v1') + test_pdf = mx.sym.random_pdf_dirichlet(v0, v1, is_log=use_log) + res = results if use_log else np.exp(results) + check_symbolic_forward(test_pdf, [samples, alpha], [res], atol=forw_atol, rtol=forw_rtol, dtype=dtype) + if dtype == 'float64': + check_numeric_gradient(test_pdf, [samples, alpha], numeric_eps=1e-7, atol=backw_atol, rtol=backw_rtol, dtype=dtype) + def test_random(): - check_with_device(mx.context.current_context(), 'float16') - check_with_device(mx.context.current_context(), 'float32') - check_with_device(mx.context.current_context(), 'float64') + check_with_device(mx.context.current_context(), np.float16) + check_with_device(mx.context.current_context(), np.float32) + check_with_device(mx.context.current_context(), np.float64) # Set seed variously based on `start_seed` and `num_init_seeds`, then set seed finally to `final_seed`