From d58f6cb01803f255f8d42c740bc399d083db4640 Mon Sep 17 00:00:00 2001 From: zburning <798672141@qq.com> Date: Mon, 9 Dec 2019 11:23:27 +0800 Subject: [PATCH 01/62] Add micro averaging strategy to pearsonr metric (#16878) Strategy to be used for aggregating across mini-batches. "macro": average the pearsonr scores for each batch. "micro": compute a single pearsonr score across all batches. --- python/mxnet/metric.py | 79 +++++++++++++++++++++++----- tests/python/unittest/test_metric.py | 42 ++++++++++++--- 2 files changed, 102 insertions(+), 19 deletions(-) diff --git a/python/mxnet/metric.py b/python/mxnet/metric.py index 6e2d66cb9d15..d1074c923337 100644 --- a/python/mxnet/metric.py +++ b/python/mxnet/metric.py @@ -590,8 +590,9 @@ def update(self, labels, preds): class _BinaryClassificationMetrics(object): - """Private container class for classification metric statistics. True/false positive and - true/false negative counts are sufficient statistics for various classification metrics. + """Private container class for classification metric statistics. + + True/false positive and true/false negative counts are sufficient statistics for various classification metrics. This class provides the machinery to track those statistics across mini-batches of (label, prediction) pairs. """ @@ -1430,6 +1431,10 @@ class PearsonCorrelation(EvalMetric): label_names : list of str, or None Name of labels that should be used when updating with update_dict. By default include all labels. + average : str, default 'macro' + Strategy to be used for aggregating across mini-batches. + "macro": average the pearsonr scores for each batch. + "micro": compute a single pearsonr score across all batches. Examples -------- @@ -1438,13 +1443,46 @@ class PearsonCorrelation(EvalMetric): >>> pr = mx.metric.PearsonCorrelation() >>> pr.update(labels, predicts) >>> print pr.get() - ('pearson-correlation', 0.42163704544016178) + ('pearsonr', 0.42163704544016178) """ def __init__(self, name='pearsonr', - output_names=None, label_names=None): + output_names=None, label_names=None, average='macro'): + self.average = average super(PearsonCorrelation, self).__init__( name, output_names=output_names, label_names=label_names, has_global_stats=True) + if self.average == 'micro': + self.reset_micro() + + def reset_micro(self): + self._sse_p = 0 + self._mean_p = 0 + self._sse_l = 0 + self._mean_l = 0 + self._pred_nums = 0 + self._label_nums = 0 + self._conv = 0 + + def reset(self): + self.num_inst = 0 + self.sum_metric = 0.0 + self.global_num_inst = 0 + self.global_sum_metric = 0.0 + if self.average == 'micro': + self.reset_micro() + + def update_variance(self, new_values, *aggregate): + #Welford's online algorithm for variance update + count, mean, m_2 = aggregate + count += len(new_values) + delta = new_values - mean + mean += numpy.sum(delta / count) + delta_2 = new_values - mean + m_2 += numpy.sum(delta * delta_2) + return count, mean, m_2 + + def update_cov(self, label, pred): + self._conv = self._conv + numpy.sum((label - self._mean_l) * (pred - self._mean_p)) def update(self, labels, preds): """Updates the internal evaluation result. @@ -1457,17 +1495,34 @@ def update(self, labels, preds): Predicted values. """ labels, preds = check_label_shapes(labels, preds, True) - for label, pred in zip(labels, preds): check_label_shapes(label, pred, False, True) - label = label.asnumpy() - pred = pred.asnumpy() - pearson_corr = numpy.corrcoef(pred.ravel(), label.ravel())[0, 1] - self.sum_metric += pearson_corr - self.global_sum_metric += pearson_corr - self.num_inst += 1 - self.global_num_inst += 1 + label = label.asnumpy().ravel().astype(numpy.float64) + pred = pred.asnumpy().ravel().astype(numpy.float64) + if self.average == 'macro': + pearson_corr = numpy.corrcoef(pred, label)[0, 1] + self.sum_metric += pearson_corr + self.global_sum_metric += pearson_corr + self.num_inst += 1 + self.global_num_inst += 1 + else: + self.global_num_inst += 1 + self.num_inst += 1 + self._label_nums, self._mean_l, self._sse_l = \ + self.update_variance(label, self._label_nums, self._mean_l, self._sse_l) + self.update_cov(label, pred) + self._pred_nums, self._mean_p, self._sse_p = \ + self.update_variance(pred, self._pred_nums, self._mean_p, self._sse_p) + def get(self): + if self.num_inst == 0: + return (self.name, float('nan')) + if self.average == 'macro': + return (self.name, self.sum_metric / self.num_inst) + else: + n = self._label_nums + pearsonr = self._conv / ((n-1) * numpy.sqrt(self._sse_p / (n - 1)) * numpy.sqrt(self._sse_l / (n - 1))) + return (self.name, pearsonr) @register class PCC(EvalMetric): diff --git a/tests/python/unittest/test_metric.py b/tests/python/unittest/test_metric.py index 0ae8aeaa697f..a1e5128d8ac6 100644 --- a/tests/python/unittest/test_metric.py +++ b/tests/python/unittest/test_metric.py @@ -17,6 +17,7 @@ import mxnet as mx import numpy as np +import scipy import json import math from common import with_seed @@ -263,13 +264,40 @@ def test_perplexity(): assert perplexity == perplexity_expected def test_pearsonr(): - pred = mx.nd.array([[0.7, 0.3], [0.1, 0.9], [1., 0]]) - label = mx.nd.array([[0, 1], [1, 0], [1, 0]]) - pearsonr_expected = np.corrcoef(pred.asnumpy().ravel(), label.asnumpy().ravel())[0, 1] - metric = mx.metric.create('pearsonr') - metric.update([label], [pred]) - _, pearsonr = metric.get() - assert pearsonr == pearsonr_expected + pred1 = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6]]) + label1 = mx.nd.array([[1, 0], [0, 1], [0, 1]]) + pearsonr_expected_np = np.corrcoef(pred1.asnumpy().ravel(), label1.asnumpy().ravel())[0, 1] + pearsonr_expected_scipy, _ = scipy.stats.pearsonr(pred1.asnumpy().ravel(), label1.asnumpy().ravel()) + macro_pr = mx.metric.create('pearsonr', average='macro') + micro_pr = mx.metric.create('pearsonr', average='micro') + + assert np.isnan(macro_pr.get()[1]) + assert np.isnan(micro_pr.get()[1]) + + macro_pr.update([label1], [pred1]) + micro_pr.update([label1], [pred1]) + + np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_np) + np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_scipy) + np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_np) + np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_scipy) + + pred2 = mx.nd.array([[1, 2], [3, 2], [4, 6]]) + label2 = mx.nd.array([[1, 0], [0, 1], [0, 1]]) + # Note that pred12 = pred1 + pred2; label12 = label1 + label2 + pred12 = mx.nd.array([[0.3, 0.7], [0, 1.], [0.4, 0.6],[1, 2], [3, 2], [4, 6]]) + label12 = mx.nd.array([[1, 0], [0, 1], [0, 1], [1, 0], [0, 1], [0, 1]]) + + pearsonr_expected_np = np.corrcoef(pred12.asnumpy().ravel(), label12.asnumpy().ravel())[0, 1] + pearsonr_expected_scipy, _ = scipy.stats.pearsonr(pred12.asnumpy().ravel(), label12.asnumpy().ravel()) + + macro_pr.reset() + micro_pr.update([label2], [pred2]) + macro_pr.update([label12], [pred12]) + np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_np) + np.testing.assert_almost_equal(macro_pr.get()[1], pearsonr_expected_scipy) + np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_np) + np.testing.assert_almost_equal(micro_pr.get()[1], pearsonr_expected_scipy) def cm_batch(cm): # generate a batch yielding a given confusion matrix From b0098647742663a6937c96b5dea2707388e51da6 Mon Sep 17 00:00:00 2001 From: Rohit Kumar Srivastava Date: Sun, 8 Dec 2019 21:30:06 -0800 Subject: [PATCH 02/62] large tensor faq doc fix (#16953) --- .../src/pages/api/faq/large_tensor_support.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/static_site/src/pages/api/faq/large_tensor_support.md b/docs/static_site/src/pages/api/faq/large_tensor_support.md index 4c77ede24b02..ab251a78fb0b 100644 --- a/docs/static_site/src/pages/api/faq/large_tensor_support.md +++ b/docs/static_site/src/pages/api/faq/large_tensor_support.md @@ -69,13 +69,16 @@ The following are the cases for large tensor usage where you must specify `dtype * _randint():_ + ```python low_large_value = 2*32* *high_large_value = 2*34 # dtype is explicitly specified since default type is int32 for randint a = nd.random.randint(low_large_value, high_large_value, dtype=np.int64) ``` + * _ravel_multi_index()_ and _unravel_index()_: + ```python x1, y1 = rand_coord_2d((LARGE_X - 100), LARGE_X, 10, SMALL_Y) x2, y2 = rand_coord_2d((LARGE_X - 200), LARGE_X, 9, SMALL_Y) @@ -87,9 +90,11 @@ idx = mx.nd.ravel_multi_index(mx.nd.array(indices_2d, dtype=np.int64), indices_2d = mx.nd.unravel_index(mx.nd.array(idx_numpy, dtype=np.int64), shape=(LARGE_X, SMALL_Y)) ``` + * _argsort()_ and _topk()_ They both return indices which are specified by `dtype=np.int64`. + ```python b = create_2d_tensor(rows=LARGE_X, columns=SMALL_Y) # argsort @@ -97,9 +102,11 @@ s = nd.argsort(b, axis=0, is_ascend=False, dtype=np.int64) # topk k = nd.topk(b, k=10, axis=0, dtype=np.int64) ``` + * _index_copy()_ Again whenever we are passing indices as arguments and using large tensor, the `dtype` of indices must be `int64`. + ```python x = mx.nd.zeros((LARGE_X, SMALL_Y)) t = mx.nd.arange(1, SMALL_Y + 1).reshape((1, SMALL_Y)) @@ -107,9 +114,11 @@ t = mx.nd.arange(1, SMALL_Y + 1).reshape((1, SMALL_Y)) index = mx.nd.array([LARGE_X - 1], dtype="int64") x = mx.nd.contrib.index_copy(x, index, t) ``` + * _one_hot()_ Here again array is used as indices that act as location of bits inside the large vector that need to be activated. + ```python # a is the index array here whose dtype should be int64. a = nd.array([1, (VLARGE_X - 1)], dtype=np.int64) @@ -142,6 +151,7 @@ Not supported: Randint operator is flaky: https://github.com/apache/incubator-mxnet/issues/16172 dgemm operations using BLAS libraries currently don’t support int64. linspace() is not supported. + ```python a = mx.sym.Variable('a') b = mx.sym.Variable('b') @@ -156,7 +166,9 @@ Traceback (most recent call last): py_array('i', provided_arg_shape_data)), OverflowError: signed integer is greater than maximum} ``` + Symbolic reshape is not supported. Please see the following example. + ```python a = mx.sym.Variable('a') b = mx.sym.Variable('b') @@ -174,6 +186,7 @@ OverflowError: signed integer is greater than maximum ## Working DGL Example(dgl.ai) The following is a sample running code for DGL which works with int64 but not with int32. + ```python import mxnet as mx from mxnet import gluon From 3b8fdacd16fd10143e841f202df551cebb89fbf6 Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Mon, 9 Dec 2019 13:57:52 +0800 Subject: [PATCH 03/62] skip quantized conv flaky case (#16866) * Fix quantized concat when inputs are mixed int8 and uint8 Change-Id: I4da04bf4502425134a466823fb5f73da2d7a419b * skip flaky test * trigger ci --- tests/python/quantization/test_quantization.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 723873ac6fb3..6fe33f5ee52b 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -200,8 +200,9 @@ def check_quantized_conv(data_shape, kernel, num_filter, pad, stride, no_bias, q if is_test_for_native_cpu(): print('skipped testing quantized_conv for native cpu since it is not supported yet') return - elif qdtype == 'int8' and is_test_for_mkldnn(): - print('skipped testing quantized_conv for mkldnn cpu int8 since it is not supported yet') + elif is_test_for_mkldnn(): + # (TODO)Xinyu: https://github.com/apache/incubator-mxnet/issues/16830 + print('skipped testing quantized_conv for mkldnn cpu since it is a flaky case') return elif qdtype == 'uint8' and is_test_for_gpu(): print('skipped testing quantized_conv for gpu uint8 since it is not supported yet') From 7736bfdb7843e3cac7ecea4ee1a66e55716142ac Mon Sep 17 00:00:00 2001 From: alicia <32725332+Alicia1529@users.noreply.github.com> Date: Mon, 9 Dec 2019 15:19:12 +0800 Subject: [PATCH 04/62] [Numpy] add op full_like, c++ impl, fix zeros_like, ones_like type inference (#16804) --- python/mxnet/_numpy_op_doc.py | 71 ------- python/mxnet/ndarray/numpy/_op.py | 175 +++++++++++++++++- python/mxnet/numpy/multiarray.py | 173 ++++++++++++++++- python/mxnet/numpy_dispatch_protocol.py | 3 +- python/mxnet/symbol/numpy/_symbol.py | 132 ++++++++++++- src/operator/numpy/np_init_op.cc | 29 +-- src/operator/numpy/np_init_op.cu | 7 +- src/operator/tensor/init_op.h | 46 +++++ .../unittest/test_numpy_interoperability.py | 20 +- tests/python/unittest/test_numpy_op.py | 48 +++++ 10 files changed, 596 insertions(+), 108 deletions(-) diff --git a/python/mxnet/_numpy_op_doc.py b/python/mxnet/_numpy_op_doc.py index cf991fc8949f..7679d21f0e59 100644 --- a/python/mxnet/_numpy_op_doc.py +++ b/python/mxnet/_numpy_op_doc.py @@ -20,77 +20,6 @@ """Doc placeholder for numpy ops with prefix _np.""" -def _np_ones_like(a): - """ - Return an array of ones with the same shape and type as a given array. - - Parameters - ---------- - a : ndarray - The shape and data-type of `a` define these same attributes of - the returned array. - - Returns - ------- - out : ndarray - Array of ones with the same shape and type as `a`. - - Examples - -------- - >>> x = np.arange(6) - >>> x = x.reshape((2, 3)) - >>> x - array([[0., 1., 2.], - [3., 4., 5.]]) - >>> np.ones_like(x) - array([[1., 1., 1.], - [1., 1., 1.]]) - - >>> y = np.arange(3, dtype=float) - >>> y - array([0., 1., 2.], dtype=float64) - >>> - >>> np.ones_like(y) - array([1., 1., 1.], dtype=float64) - """ - pass - - -def _np_zeros_like(a): - """ - Return an array of zeros with the same shape and type as a given array. - - Parameters - ---------- - a : ndarray - The shape and data-type of `a` define these same attributes of - the returned array. - - Returns - ------- - out : ndarray - Array of zeros with the same shape and type as `a`. - - Examples - -------- - >>> x = np.arange(6) - >>> x = x.reshape((2, 3)) - >>> x - array([[0., 1., 2.], - [3., 4., 5.]]) - >>> np.zeros_like(x) - array([[0., 0., 0.], - [0., 0., 0.]]) - >>> y = np.arange(3, dtype=float) - >>> y - array([0., 1., 2.], dtype=float64) - >>> - >>> np.zeros_like(y) - array([0., 0., 0.], dtype=float64) - """ - pass - - def _np_cumsum(a, axis=None, dtype=None, out=None): """ Return the cumulative sum of the elements along a given axis. diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index 2dde199fe5ba..0925f93a7740 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -28,7 +28,8 @@ from . import _internal as _npi from ..ndarray import NDArray -__all__ = ['shape', 'zeros', 'ones', 'full', 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', +__all__ = ['shape', 'zeros', 'zeros_like', 'ones', 'ones_like', 'full', 'full_like', + 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', @@ -145,7 +146,119 @@ def ones(shape, dtype=_np.float32, order='C', ctx=None): # pylint: disable=rede # pylint: disable=too-many-arguments, redefined-outer-name @set_module('mxnet.ndarray.numpy') -def full(shape, fill_value, dtype=None, order='C', ctx=None, out=None): +def zeros_like(a, dtype=None, order='C', ctx=None, out=None): + """ + Return an array of zeros with the same shape and type as a given array. + + Parameters + ---------- + a : ndarray + The shape and data-type of `a` define these same attributes of + the returned array. + dtype : data-type, optional + Overrides the data type of the result. + Temporarily do not support boolean type. + order : {'C'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. Currently only supports C order. + ctx: to specify the device, e.g. the i-th GPU. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + out : ndarray + Array of zeros with the same shape and type as a. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + ones_like : Return an array of ones with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + full : Return a new array of given shape filled with value. + + Examples + -------- + >>> x = np.arange(6) + >>> x = x.reshape((2, 3)) + >>> x + array([[0., 1., 2.], + [3., 4., 5.]]) + >>> np.zeros_like(x) + array([[0., 0., 0.], + [0., 0., 0.]]) + >>> np.zeros_like(x, int) + array([[0, 0, 0], + [0, 0, 0]], dtype=int64) + >>> y = np.arange(3, dtype=float) + >>> y + array([0., 1., 2.], dtype=float64) + >>> np.zeros_like(y) + array([0., 0., 0.], dtype=float64) + """ + return _npi.full_like(a, fill_value=0, dtype=dtype, ctx=None, out=None) + + +@set_module('mxnet.ndarray.numpy') +def ones_like(a, dtype=None, order='C', ctx=None, out=None): + """ + Return an array of ones with the same shape and type as a given array. + + Parameters + ---------- + a : ndarray + The shape and data-type of `a` define these same attributes of + the returned array. + dtype : data-type, optional + Overrides the data type of the result. + Temporarily do not support boolean type. + order : {'C'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. Currently only supports C order. + ctx: to specify the device, e.g. the i-th GPU. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + out : ndarray + Array of ones with the same shape and type as a. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + full_like : Return a new array with shape of input filled with value. + ones : Return a new array setting values to one. + + Examples + -------- + >>> x = np.arange(6) + >>> x = x.reshape((2, 3)) + >>> x + array([[0., 1., 2.], + [3., 4., 5.]]) + >>> np.ones_like(x) + array([[1., 1., 1.], + [1., 1., 1.]]) + >>> np.ones_like(x, int) + array([[1, 1, 1], + [1, 1, 1]], dtype=int64) + >>> y = np.arange(3, dtype=float) + >>> y + array([0., 1., 2.], dtype=float64) + >>> np.ones_like(y) + array([1., 1., 1.], dtype=float64) + """ + return _npi.full_like(a, fill_value=1, dtype=dtype, ctx=None, out=None) + + +@set_module('mxnet.ndarray.numpy') +def full(shape, fill_value, dtype=None, order='C', ctx=None, out=None): # pylint: disable=too-many-arguments """ Return a new array of given shape and type, filled with `fill_value`. Parameters @@ -200,6 +313,64 @@ def full(shape, fill_value, dtype=None, order='C', ctx=None, out=None): # pylint: enable=too-many-arguments, redefined-outer-name +@set_module('mxnet.ndarray.numpy') +def full_like(a, fill_value, dtype=None, order='C', ctx=None, out=None): # pylint: disable=too-many-arguments + """ + Return a full array with the same shape and type as a given array. + + Parameters + ---------- + a : ndarray + The shape and data-type of `a` define these same attributes of + the returned array. + fill_value : scalar + Fill value. + dtype : data-type, optional + Overrides the data type of the result. + Temporarily do not support boolean type. + order : {'C'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. Currently only supports C order. + ctx: to specify the device, e.g. the i-th GPU. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + out : ndarray + Array of `fill_value` with the same shape and type as `a`. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + ones_like : Return an array of ones with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + full : Return a new array of given shape filled with value. + + Examples + -------- + >>> x = np.arange(6, dtype=int) + >>> np.full_like(x, 1) + array([1, 1, 1, 1, 1, 1], dtype=int64) + >>> np.full_like(x, 0.1) + array([0, 0, 0, 0, 0, 0], dtype=int64) + >>> np.full_like(x, 0.1, dtype=np.float64) + array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1], dtype=float64) + >>> np.full_like(x, np.nan, dtype=np.double) + array([nan, nan, nan, nan, nan, nan], dtype=float64) + >>> y = np.arange(6, dtype=np.float32) + >>> np.full_like(y, 0.1) + array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) + """ + if order != 'C': + raise NotImplementedError + if ctx is None: + ctx = current_context() + return _npi.full_like(a, fill_value=fill_value, dtype=dtype, ctx=ctx, out=out) + + @set_module('mxnet.ndarray.numpy') def arange(start, stop=None, step=1, dtype=None, ctx=None): """Return evenly spaced values within a given interval. diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index 701dc06e043d..5aecae4c3997 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -46,8 +46,9 @@ from ..ndarray.numpy import _internal as _npi from ..ndarray.ndarray import _storage_type -__all__ = ['ndarray', 'empty', 'array', 'shape', 'zeros', 'ones', 'full', 'add', 'subtract', 'multiply', 'divide', - 'mod', 'remainder', 'power', 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', +__all__ = ['ndarray', 'empty', 'array', 'shape', 'zeros', 'zeros_like', 'ones', 'ones_like', 'full', 'full_like', + 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', + 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'append', 'argsort', @@ -946,7 +947,7 @@ def attach_grad(self, grad_req='write'): # pylint: disable=arguments-differ - 'add': gradient will be added to existing value on every backward. - 'null': do not compute gradient for this NDArray. """ - grad = _mx_np_op.zeros_like(self) # pylint: disable=undefined-variable + grad = _mx_nd_np.zeros_like(self) # pylint: disable=undefined-variable grad_req = _GRAD_REQ_MAP[grad_req] check_call(_LIB.MXAutogradMarkVariables( 1, ctypes.pointer(self.handle), @@ -7425,6 +7426,172 @@ def resize(a, new_shape): return _mx_nd_np.resize(a, new_shape) +@set_module('mxnet.numpy') +def full_like(a, fill_value, dtype=None, order='C', ctx=None, out=None): # pylint: disable=too-many-arguments + """ + Return a full array with the same shape and type as a given array. + + Parameters + ---------- + a : ndarray + The shape and data-type of `a` define these same attributes of + the returned array. + fill_value : scalar + Fill value. + dtype : data-type, optional + Overrides the data type of the result. + Temporarily do not support boolean type. + order : {'C'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. Currently only supports C order. + ctx: to specify the device, e.g. the i-th GPU. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + out : ndarray + Array of `fill_value` with the same shape and type as `a`. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + ones_like : Return an array of ones with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + full : Return a new array of given shape filled with value. + + Examples + -------- + >>> x = np.arange(6, dtype=int) + >>> np.full_like(x, 1) + array([1, 1, 1, 1, 1, 1], dtype=int64) + >>> np.full_like(x, 0.1) + array([0, 0, 0, 0, 0, 0], dtype=int64) + >>> np.full_like(x, 0.1, dtype=np.float64) + array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1], dtype=float64) + >>> np.full_like(x, np.nan, dtype=np.float64) + array([nan, nan, nan, nan, nan, nan], dtype=float64) + >>> y = np.arange(6, dtype=np.float32) + >>> np.full_like(y, 0.1) + array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) + """ + return _mx_nd_np.full_like(a, fill_value=fill_value, dtype=dtype, order=order, ctx=None, out=None) + + +@set_module('mxnet.numpy') +def zeros_like(a, dtype=None, order='C', ctx=None, out=None): + """ + Return an array of zeros with the same shape and type as a given array. + + Parameters + ---------- + a : ndarray + The shape and data-type of `a` define these same attributes of + the returned array. + dtype : data-type, optional + Overrides the data type of the result. + Temporarily do not support boolean type. + order : {'C'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. Currently only supports C order. + ctx: to specify the device, e.g. the i-th GPU. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + out : ndarray + Array of zeros with the same shape and type as a. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + ones_like : Return an array of ones with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + full : Return a new array of given shape filled with value. + + Examples + -------- + >>> x = np.arange(6) + >>> x = x.reshape((2, 3)) + >>> x + array([[0., 1., 2.], + [3., 4., 5.]]) + >>> np.zeros_like(x) + array([[0., 0., 0.], + [0., 0., 0.]]) + >>> np.zeros_like(x, int) + array([[0, 0, 0], + [0, 0, 0]], dtype=int64) + >>> y = np.arange(3, dtype=float) + >>> y + array([0., 1., 2.], dtype=float64) + >>> np.zeros_like(y) + array([0., 0., 0.], dtype=float64) + """ + return _mx_nd_np.full_like(a, fill_value=0, dtype=dtype, order=order, ctx=None, out=None) + + +@set_module('mxnet.numpy') +def ones_like(a, dtype=None, order='C', ctx=None, out=None): + """ + Return an array of ones with the same shape and type as a given array. + + Parameters + ---------- + a : ndarray + The shape and data-type of `a` define these same attributes of + the returned array. + dtype : data-type, optional + Overrides the data type of the result. + Temporarily do not support boolean type. + order : {'C'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. Currently only supports C order. + ctx: to specify the device, e.g. the i-th GPU. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + out : ndarray + Array of ones with the same shape and type as a. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + full_like : Return a new array with shape of input filled with value. + ones : Return a new array setting values to one. + + Examples + -------- + >>> x = np.arange(6) + >>> x = x.reshape((2, 3)) + >>> x + array([[0., 1., 2.], + [3., 4., 5.]]) + >>> np.ones_like(x) + array([[1., 1., 1.], + [1., 1., 1.]]) + >>> np.ones_like(x, int) + array([[1, 1, 1], + [1, 1, 1]], dtype=int64) + >>> y = np.arange(3, dtype=float) + >>> y + array([0., 1., 2.], dtype=float64) + >>> np.ones_like(y) + array([1., 1., 1.], dtype=float64) + """ + return _mx_nd_np.full_like(a, fill_value=1, dtype=dtype, order=order, ctx=None, out=None) + + @set_module('mxnet.numpy') def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None, **kwargs): """ diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index 0f926102ba8c..a6bceb51cd01 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -142,6 +142,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs): 'diff', 'resize', 'where', + 'full_like', ] @@ -239,7 +240,7 @@ def _register_array_function(): 'less', 'less_equal', 'greater', - 'greater_equal' + 'greater_equal', ] diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index 0385ef85ea03..8303f3246e87 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -36,7 +36,8 @@ except ImportError: from builtins import slice as py_slice -__all__ = ['zeros', 'ones', 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'arctan2', +__all__ = ['zeros', 'zeros_like', 'ones', 'ones_like', 'full_like', + 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', @@ -1094,6 +1095,135 @@ def full(shape, fill_value, dtype=None, order='C', ctx=None, out=None): # pylin return _npi.full(shape=shape, value=fill_value, ctx=ctx, dtype=dtype, out=out) +@set_module('mxnet.symbol.numpy') +def full_like(a, fill_value, dtype=None, order='C', ctx=None, out=None): # pylint: disable=too-many-arguments + """ + Return a full array with the same shape and type as a given array. + + Parameters + ---------- + a : _Symbol + The shape and data-type of `a` define these same attributes of + the returned array. + fill_value : scalar + Fill value. + dtype : data-type, optional + Overrides the data type of the result. + Temporarily do not support boolean type. + order : {'C'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. Currently only supports C order. + ctx: to specify the device, e.g. the i-th GPU. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + out : _Symbol + Array `fill_value` with the same shape and type as `a`. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + ones_like : Return an array of ones with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + full : Return a new array of given shape filled with value. + """ + if order != 'C': + raise NotImplementedError + if ctx is None: + ctx = current_context() + return _npi.full_like(a, fill_value=fill_value, ctx=ctx, dtype=dtype, out=out) + + +@set_module('mxnet.symbol.numpy') +def zeros_like(a, dtype=None, order='C', ctx=None, out=None): # pylint: disable=too-many-arguments + """ + Return an array of zeros with the same shape and type as a given array. + + Parameters + ---------- + a : _Symbol + The shape and data-type of `a` define these same attributes of + the returned array. + fill_value : scalar + Fill value. + dtype : data-type, optional + Overrides the data type of the result. + Temporarily do not support boolean type. + order : {'C'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. Currently only supports C order. + ctx: to specify the device, e.g. the i-th GPU. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + out : _Symbol + Array of zeros with the same shape and type as `a`. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + ones_like : Return an array of ones with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + zeros : Return a new array of given shape filled with zeros. + """ + if order != 'C': + raise NotImplementedError + if ctx is None: + ctx = current_context() + return _npi.full_like(a, fill_value=0, ctx=ctx, dtype=dtype, out=out) + + +@set_module('mxnet.symbol.numpy') +def ones_like(a, dtype=None, order='C', ctx=None, out=None): # pylint: disable=too-many-arguments + """ + Return an array of ones with the same shape and type as a given array. + + Parameters + ---------- + a : _Symbol + The shape and data-type of `a` define these same attributes of + the returned array. + fill_value : scalar + Fill value. + dtype : data-type, optional + Overrides the data type of the result. + Temporarily do not support boolean type. + order : {'C'}, optional + Whether to store multidimensional data in C- or Fortran-contiguous + (row- or column-wise) order in memory. Currently only supports C order. + ctx: to specify the device, e.g. the i-th GPU. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + out : _Symbol + Array of ones with the same shape and type as `a`. + + See Also + -------- + empty_like : Return an empty array with shape and type of input. + ones_like : Return an array of ones with shape and type of input. + zeros_like : Return an array of zeros with shape and type of input. + zeros : Return a new array of given shape filled with zeros. + """ + if order != 'C': + raise NotImplementedError + if ctx is None: + ctx = current_context() + return _npi.full_like(a, fill_value=1, ctx=ctx, dtype=dtype, out=out) + + @set_module('mxnet.symbol.numpy') def identity(n, dtype=None, ctx=None): """ diff --git a/src/operator/numpy/np_init_op.cc b/src/operator/numpy/np_init_op.cc index f2446c85b182..ee52a2939e12 100644 --- a/src/operator/numpy/np_init_op.cc +++ b/src/operator/numpy/np_init_op.cc @@ -34,6 +34,7 @@ namespace op { DMLC_REGISTER_PARAMETER(NumpyEyeParam); DMLC_REGISTER_PARAMETER(IndicesOpParam); DMLC_REGISTER_PARAMETER(LogspaceParam); +DMLC_REGISTER_PARAMETER(FullLikeOpParam); inline bool NumpyIndicesShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_shapes, @@ -97,11 +98,12 @@ NNVM_REGISTER_OP(_npi_identity) .set_attr("FCompute", IdentityCompute) .add_arguments(InitOpParam::__FIELDS__()); -NNVM_REGISTER_OP(_np_zeros_like) +NNVM_REGISTER_OP(_npi_full_like) .set_num_inputs(1) .set_num_outputs(1) +.set_attr_parser(ParamParser) .set_attr("FInferShape", ElemwiseShape<1, 1>) -.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FInferType", FullLikeOpType) .set_attr("FIgnoreInputs", [](const NodeAttrs& attrs) { return std::vector(1, 0); @@ -110,28 +112,11 @@ NNVM_REGISTER_OP(_np_zeros_like) [](const NodeAttrs& attrs) { return std::vector{"a"}; }) -.set_attr("FCompute", FillCompute) -.set_attr("FGradient", MakeZeroGradNodes) -.add_argument("a", "NDArray-or-Symbol", - "The shape and data-type of a define these same attributes of the returned array."); - -NNVM_REGISTER_OP(_np_ones_like) -.set_num_inputs(1) -.set_num_outputs(1) -.set_attr("FInferShape", ElemwiseShape<1, 1>) -.set_attr("FInferType", ElemwiseType<1, 1>) -.set_attr("FIgnoreInputs", - [](const NodeAttrs& attrs) { - return std::vector(1, 0); - }) -.set_attr("FListInputNames", - [](const NodeAttrs& attrs) { - return std::vector{"a"}; - }) -.set_attr("FCompute", FillCompute) +.set_attr("FCompute", FullLikeOpCompute) .set_attr("FGradient", MakeZeroGradNodes) .add_argument("a", "NDArray-or-Symbol", - "The shape and data-type of a define these same attributes of the returned array."); + "The shape and data-type of a define these same attributes of the returned array.") +.add_arguments(FullLikeOpParam::__FIELDS__()); NNVM_REGISTER_OP(_npi_arange) .set_num_inputs(0) diff --git a/src/operator/numpy/np_init_op.cu b/src/operator/numpy/np_init_op.cu index b6e2f93e8d3c..5095fe60bdef 100644 --- a/src/operator/numpy/np_init_op.cu +++ b/src/operator/numpy/np_init_op.cu @@ -38,11 +38,8 @@ NNVM_REGISTER_OP(_npi_ones) NNVM_REGISTER_OP(_npi_identity) .set_attr("FCompute", IdentityCompute); -NNVM_REGISTER_OP(_np_zeros_like) -.set_attr("FCompute", FillCompute); - -NNVM_REGISTER_OP(_np_ones_like) -.set_attr("FCompute", FillCompute); +NNVM_REGISTER_OP(_npi_full_like) +.set_attr("FCompute", FullLikeOpCompute); NNVM_REGISTER_OP(_npi_arange) .set_attr("FCompute", RangeCompute); diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h index a0139f7fde2d..c7afdc6baaa3 100644 --- a/src/operator/tensor/init_op.h +++ b/src/operator/tensor/init_op.h @@ -80,6 +80,39 @@ struct InitOpWithoutDTypeParam : public dmlc::Parameter } }; +struct FullLikeOpParam : public dmlc::Parameter { + double fill_value; + std::string ctx; + dmlc::optional dtype; + DMLC_DECLARE_PARAMETER(FullLikeOpParam) { + DMLC_DECLARE_FIELD(fill_value) + .describe("Value with which to fill newly created tensor"); + DMLC_DECLARE_FIELD(ctx) + .set_default("") + .describe("Context of output, in format [cpu|gpu|cpu_pinned](n)." + "Only used for imperative calls."); + DMLC_DECLARE_FIELD(dtype).set_default(dmlc::optional()) + MXNET_ADD_ALL_TYPES + .describe("Target data type."); + } +}; + +/*! \brief Infer type of FullLikeOpCompute*/ +template +inline bool FullLikeOpType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + const ParamType& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + if (param.dtype.has_value()) { + TYPE_ASSIGN_CHECK(*out_attrs, 0, param.dtype.value()); + } else { + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + } + return out_attrs->at(0) != -1;; +} + struct EyeParam : public dmlc::Parameter { nnvm::dim_t N; nnvm::dim_t M; @@ -402,6 +435,19 @@ void FillCompute(const nnvm::NodeAttrs& attrs, Fill(ctx.get_stream(), outputs[0], req[0], value); } +/*! \brief Fill output with a scalar integer value */ +template +void FullLikeOpCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + const auto& param = nnvm::get(attrs.parsed); + Fill(ctx.get_stream(), outputs[0], req[0], param.fill_value); +} + /*! \brief Fill output with an arbitrary value */ template void InitFillWithScalarCompute(const nnvm::NodeAttrs &attrs, diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 797bb43824af..53bd8f4d9235 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -18,6 +18,8 @@ # pylint: skip-file from __future__ import absolute_import from __future__ import division +from distutils.version import StrictVersion +import platform import itertools import numpy as _np from mxnet import np @@ -776,9 +778,17 @@ def _add_workload_var(array_pool): def _add_workload_zeros_like(array_pool): OpArgMngr.add_workload('zeros_like', array_pool['4x1']) - OpArgMngr.add_workload('zeros_like', np.random.uniform(size=(3, 3)).astype(np.float64)) - OpArgMngr.add_workload('zeros_like', np.random.uniform(size=(3, 3)).astype(np.float32)) - OpArgMngr.add_workload('zeros_like', np.random.randint(2, size = (3, 3))) + OpArgMngr.add_workload('zeros_like', np.random.uniform(size=(3, 3)).astype(np.float64), np.int64) + OpArgMngr.add_workload('zeros_like', np.random.uniform(size=(3, 3)).astype(np.float32), np.float64) + OpArgMngr.add_workload('zeros_like', np.random.randint(2, size = (3, 3)), int) + + +def _add_workload_full_like(array_pool): + OpArgMngr.add_workload('full_like', array_pool['4x1'], 1) + OpArgMngr.add_workload('full_like', np.random.uniform(low=0, high=100, size=(1,3,4), dtype='float64'), 1) + OpArgMngr.add_workload('full_like', np.random.uniform(low=0, high=100, size=(9,3,1)), 2, np.int64) + OpArgMngr.add_workload('full_like', np.random.uniform(low=0, high=100, size=(9,3)), np.nan) + OpArgMngr.add_workload('full_like', np.random.uniform(low=0, high=100, size=(2,0)), 0, np.float32) def _add_workload_outer(): @@ -1432,6 +1442,7 @@ def _prepare_workloads(): _add_workload_shape() _add_workload_diff() _add_workload_resize() + _add_workload_full_like(array_pool) _prepare_workloads() @@ -1478,6 +1489,9 @@ def check_interoperability(op_list): continue if name in ['shares_memory', 'may_share_memory']: # skip list continue + if name in ['full_like', 'zeros_like', 'ones_like'] and \ + StrictVersion(platform.python_version()) < StrictVersion('3.0.0'): + continue print('Dispatch test:', name) workloads = OpArgMngr.get_workloads(name) assert workloads is not None, 'Workloads for operator `{}` has not been ' \ diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 38633760c400..5c14f0d6c701 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -17,6 +17,7 @@ # pylint: skip-file from __future__ import absolute_import +from distutils.version import StrictVersion import sys import unittest import itertools @@ -3661,6 +3662,53 @@ def g(data): assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol) +@with_seed() +@use_np +def test_np_full_like(): + class TestFullLike(HybridBlock): + def __init__(self, fill_value, dtype, ctx): + super(TestFullLike, self).__init__() + self._fill_value = fill_value + self._dtype = dtype + self._ctx = ctx + + def hybrid_forward(self, F, x, *args, **kwargs): + return F.np.full_like(x, self._fill_value, dtype=self._dtype, ctx=self._ctx) + + if StrictVersion(platform.python_version()) < StrictVersion('3.0.0'): + return + + dtypes = ['float64', 'float32', 'float16', 'int64', 'int32', 'int8'] + shapes = [ + (), + (1,), + (4, 3), + (4, 5), + (2, 1), + (6, 5, 6), + (4, 2, 1, 2), + (5, 1, 3, 3), + (3, 3, 1, 0), + ] + # numpy.full_like operator in py2 cannot handle shape like (5, 0, 3) properly + fill_values = [0, 1, 2, 3, 4, 5, 6] + flags = [True, False] + for fill_value, dtype, shape, hybridize in itertools.product( + fill_values, dtypes, shapes, flags): + param_dtype= _np.random.choice(dtypes) + a = np.random.uniform(low=0, high=100, size=shape, dtype='float64').astype(dtype) + test = TestFullLike(fill_value, param_dtype, npx.current_context()) + expected_ret = _np.full_like(a.asnumpy(), fill_value=fill_value, dtype=param_dtype) + if hybridize: + test.hybridize() + ret = test(a) + assert_almost_equal(ret.asnumpy(), expected_ret, rtol=1e-3, atol=1e-5) + + # check imperative again + ret = np.full_like(a, fill_value, param_dtype) + assert_almost_equal(ret.asnumpy(), expected_ret, rtol=1e-3, atol=1e-5) + + @with_seed() @use_np def test_np_roll(): From 71b62725a0af8ffa74a76c47589b2473d278db30 Mon Sep 17 00:00:00 2001 From: dw_sjtu <46704444+sjtuWangDing@users.noreply.github.com> Date: Mon, 9 Dec 2019 15:54:21 +0800 Subject: [PATCH 05/62] use identity_with_cast (#16913) change the doc move shape and dtype inference func to .cc file fix format fix bug in test fix bug in MXNET_LAPACK_FSIG_GESV fix format fix undefined #gesv --- python/mxnet/ndarray/numpy/linalg.py | 56 +- python/mxnet/numpy/linalg.py | 56 +- python/mxnet/numpy_dispatch_protocol.py | 1 + python/mxnet/symbol/numpy/linalg.py | 55 +- src/operator/c_lapack_api.cc | 10 + src/operator/c_lapack_api.h | 39 +- src/operator/numpy/linalg/np_solve-inl.h | 496 ++++++++++++++++++ src/operator/numpy/linalg/np_solve.cc | 116 ++++ src/operator/numpy/linalg/np_solve.cu | 43 ++ .../unittest/test_numpy_interoperability.py | 22 + tests/python/unittest/test_numpy_op.py | 91 ++++ 11 files changed, 980 insertions(+), 5 deletions(-) create mode 100644 src/operator/numpy/linalg/np_solve-inl.h create mode 100644 src/operator/numpy/linalg/np_solve.cc create mode 100644 src/operator/numpy/linalg/np_solve.cu diff --git a/python/mxnet/ndarray/numpy/linalg.py b/python/mxnet/ndarray/numpy/linalg.py index 74ba41f22979..a85c6324f685 100644 --- a/python/mxnet/ndarray/numpy/linalg.py +++ b/python/mxnet/ndarray/numpy/linalg.py @@ -21,7 +21,7 @@ from . import _op as _mx_nd_np from . import _internal as _npi -__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet'] +__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve'] def norm(x, ord=None, axis=None, keepdims=False): @@ -352,3 +352,57 @@ def slogdet(a): (1., -1151.2925464970228) """ return _npi.slogdet(a) + + +def solve(a, b): + r""" + Solve a linear matrix equation, or system of linear scalar equations. + + Computes the "exact" solution, `x`, of the well-determined, i.e., full + rank, linear matrix equation `ax = b`. + + Parameters + ---------- + a : (..., M, M) ndarray + Coefficient matrix. + b : {(..., M,), (..., M, K)}, ndarray + Ordinate or "dependent variable" values. + + Returns + ------- + x : {(..., M,), (..., M, K)} ndarray + Solution to the system a x = b. Returned shape is identical to `b`. + + Raises + ------ + MXNetError + If `a` is singular or not square. + + Notes + ----- + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + The solutions are computed using LAPACK routine ``_gesv``. + + `a` must be square and of full-rank, i.e., all rows (or, equivalently, + columns) must be linearly independent; if either is not true, use + `lstsq` for the least-squares best "solution" of the + system/equation. + + Examples + -------- + Solve the system of equations ``3 * x0 + x1 = 9`` and ``x0 + 2 * x1 = 8``: + + >>> a = np.array([[3,1], [1,2]]) + >>> b = np.array([9,8]) + >>> x = np.linalg.solve(a, b) + >>> x + array([2., 3.]) + + Check that the solution is correct: + + >>> np.allclose(np.dot(a, x), b) + True + """ + return _npi.solve(a, b) diff --git a/python/mxnet/numpy/linalg.py b/python/mxnet/numpy/linalg.py index fbe3631eb6e6..33d636b7044c 100644 --- a/python/mxnet/numpy/linalg.py +++ b/python/mxnet/numpy/linalg.py @@ -20,7 +20,7 @@ from __future__ import absolute_import from ..ndarray import numpy as _mx_nd_np -__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet'] +__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve'] def norm(x, ord=None, axis=None, keepdims=False): @@ -370,3 +370,57 @@ def slogdet(a): (1., -1151.2925464970228) """ return _mx_nd_np.linalg.slogdet(a) + + +def solve(a, b): + r""" + Solve a linear matrix equation, or system of linear scalar equations. + + Computes the "exact" solution, `x`, of the well-determined, i.e., full + rank, linear matrix equation `ax = b`. + + Parameters + ---------- + a : (..., M, M) ndarray + Coefficient matrix. + b : {(..., M,), (..., M, K)}, ndarray + Ordinate or "dependent variable" values. + + Returns + ------- + x : {(..., M,), (..., M, K)} ndarray + Solution to the system a x = b. Returned shape is identical to `b`. + + Raises + ------ + MXNetError + If `a` is singular or not square. + + Notes + ----- + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + The solutions are computed using LAPACK routine ``_gesv``. + + `a` must be square and of full-rank, i.e., all rows (or, equivalently, + columns) must be linearly independent; if either is not true, use + `lstsq` for the least-squares best "solution" of the + system/equation. + + Examples + -------- + Solve the system of equations ``3 * x0 + x1 = 9`` and ``x0 + 2 * x1 = 8``: + + >>> a = np.array([[3,1], [1,2]]) + >>> b = np.array([9,8]) + >>> x = np.linalg.solve(a, b) + >>> x + array([2., 3.]) + + Check that the solution is correct: + + >>> np.allclose(np.dot(a, x), b) + True + """ + return _mx_nd_np.linalg.solve(a, b) diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index a6bceb51cd01..c8b11d85b000 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -131,6 +131,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs): 'linalg.norm', 'linalg.cholesky', 'linalg.inv', + 'linalg.solve', 'shape', 'trace', 'tril', diff --git a/python/mxnet/symbol/numpy/linalg.py b/python/mxnet/symbol/numpy/linalg.py index cf33777b2637..1aaf4b990e31 100644 --- a/python/mxnet/symbol/numpy/linalg.py +++ b/python/mxnet/symbol/numpy/linalg.py @@ -22,7 +22,7 @@ from . import _op as _mx_sym_np from . import _internal as _npi -__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet'] +__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve'] def norm(x, ord=None, axis=None, keepdims=False): @@ -339,3 +339,56 @@ def slogdet(a): (1., -1151.2925464970228) """ return _npi.slogdet(a) + +def solve(a, b): + r""" + Solve a linear matrix equation, or system of linear scalar equations. + + Computes the "exact" solution, `x`, of the well-determined, i.e., full + rank, linear matrix equation `ax = b`. + + Parameters + ---------- + a : (..., M, M) ndarray + Coefficient matrix. + b : {(..., M,), (..., M, K)}, ndarray + Ordinate or "dependent variable" values. + + Returns + ------- + x : {(..., M,), (..., M, K)} ndarray + Solution to the system a x = b. Returned shape is identical to `b`. + + Raises + ------ + MXNetError + If `a` is singular or not square. + + Notes + ----- + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + The solutions are computed using LAPACK routine ``_gesv``. + + `a` must be square and of full-rank, i.e., all rows (or, equivalently, + columns) must be linearly independent; if either is not true, use + `lstsq` for the least-squares best "solution" of the + system/equation. + + Examples + -------- + Solve the system of equations ``3 * x0 + x1 = 9`` and ``x0 + 2 * x1 = 8``: + + >>> a = np.array([[3,1], [1,2]]) + >>> b = np.array([9,8]) + >>> x = np.linalg.solve(a, b) + >>> x + array([2., 3.]) + + Check that the solution is correct: + + >>> np.allclose(np.dot(a, x), b) + True + """ + return _npi.solve(a, b) diff --git a/src/operator/c_lapack_api.cc b/src/operator/c_lapack_api.cc index e7a97848700d..73b6138df5ea 100644 --- a/src/operator/c_lapack_api.cc +++ b/src/operator/c_lapack_api.cc @@ -71,6 +71,13 @@ return 1; \ } + #define MXNET_LAPACK_CWRAPPER7(func, dtype) \ + int MXNET_LAPACK_##func(int matrix_order, int n, int nrhs, dtype *a, \ + int lda, int *ipiv, dtype *b, int ldb) { \ + LOG(FATAL) << "MXNet build without lapack. Function " << #func << " is not available."; \ + return 1; \ + } + #define MXNET_LAPACK_UNAVAILABLE(func) \ int mxnet_lapack_##func(...) { \ LOG(FATAL) << "MXNet build without lapack. Function " << #func << " is not available."; \ @@ -101,4 +108,7 @@ MXNET_LAPACK_CWRAPPER6(sgesvd, float) MXNET_LAPACK_CWRAPPER6(dgesvd, double) + MXNET_LAPACK_CWRAPPER7(sgesv, float) + MXNET_LAPACK_CWRAPPER7(dgesv, double) + #endif // MSHADOW_USE_MKL == 0 diff --git a/src/operator/c_lapack_api.h b/src/operator/c_lapack_api.h index a47bbd0b5857..8a7cbc067feb 100644 --- a/src/operator/c_lapack_api.h +++ b/src/operator/c_lapack_api.h @@ -150,6 +150,19 @@ extern "C" { MXNET_LAPACK_FSIG_GETRI(sgetri, float) MXNET_LAPACK_FSIG_GETRI(dgetri, double) + + #ifdef __ANDROID__ + #define MXNET_LAPACK_FSIG_GESV(func, dtype) \ + int func##_(int *n, int *nrhs, dtype *a, int *lda, \ + int *ipiv, dtype *b, int *ldb, int *info); + #else + #define MXNET_LAPACK_FSIG_GESV(func, dtype) \ + void func##_(int *n, int *nrhs, dtype *a, int *lda, \ + int *ipiv, dtype *b, int *ldb, int *info); + #endif + + MXNET_LAPACK_FSIG_GESV(sgesv, float) + MXNET_LAPACK_FSIG_GESV(dgesv, double) } #endif // MSHADOW_USE_MKL == 0 @@ -197,6 +210,8 @@ inline void flip(int m, int n, DType *b, int ldb, DType *a, int lda) { #define MXNET_LAPACK_dpotri LAPACKE_dpotri #define mxnet_lapack_sposv LAPACKE_sposv #define mxnet_lapack_dposv LAPACKE_dposv + #define MXNET_LAPACK_dgesv LAPACKE_dgesv + #define MXNET_LAPACK_sgesv LAPACKE_sgesv // The following functions differ in signature from the // MXNET_LAPACK-signature and have to be wrapped. @@ -440,9 +455,23 @@ inline void flip(int m, int n, DType *b, int ldb, DType *a, int lda) { MXNET_LAPACK_CWRAP_GETRI(s, float) MXNET_LAPACK_CWRAP_GETRI(d, double) -#else - + #define MXNET_LAPACK_CWRAP_GESV(prefix, dtype) \ + inline int MXNET_LAPACK_##prefix##gesv(int matrix_layout, \ + int n, int nrhs, dtype *a, int lda, \ + int *ipiv, dtype *b, int ldb) { \ + if (matrix_layout == MXNET_LAPACK_ROW_MAJOR) { \ + CHECK(false) << "MXNET_LAPACK_" << #prefix << "gesv implemented for col-major layout only"; \ + return 1; \ + } else { \ + int info(0); \ + prefix##gesv_(&n, &nrhs, a, &lda, ipiv, b, &ldb, &info); \ + return info; \ + } \ + } + MXNET_LAPACK_CWRAP_GESV(s, float) + MXNET_LAPACK_CWRAP_GESV(d, double) +#else #define MXNET_LAPACK_ROW_MAJOR 101 #define MXNET_LAPACK_COL_MAJOR 102 @@ -473,6 +502,9 @@ inline void flip(int m, int n, DType *b, int ldb, DType *a, int lda) { int ldut, dtype* s, dtype* v, int ldv, \ dtype* work, int lwork); + #define MXNET_LAPACK_CWRAPPER7(func, dtype) \ + int MXNET_LAPACK_##func(int matrix_order, int n, int nrhs, dtype *a, \ + int lda, int *ipiv, dtype *b, int ldb); \ #define MXNET_LAPACK_UNAVAILABLE(func) \ int mxnet_lapack_##func(...); @@ -501,6 +533,9 @@ inline void flip(int m, int n, DType *b, int ldb, DType *a, int lda) { MXNET_LAPACK_CWRAPPER6(sgesvd, float) MXNET_LAPACK_CWRAPPER6(dgesvd, double) + MXNET_LAPACK_CWRAPPER7(sgesv, float) + MXNET_LAPACK_CWRAPPER7(dgesv, double) + #undef MXNET_LAPACK_CWRAPPER1 #undef MXNET_LAPACK_CWRAPPER2 #undef MXNET_LAPACK_CWRAPPER3 diff --git a/src/operator/numpy/linalg/np_solve-inl.h b/src/operator/numpy/linalg/np_solve-inl.h new file mode 100644 index 000000000000..03134f8b5688 --- /dev/null +++ b/src/operator/numpy/linalg/np_solve-inl.h @@ -0,0 +1,496 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_solve-inl.h + * \brief Placeholder for solve linear equation + */ +#ifndef MXNET_OPERATOR_NUMPY_LINALG_NP_SOLVE_INL_H_ +#define MXNET_OPERATOR_NUMPY_LINALG_NP_SOLVE_INL_H_ + +#include +#include +#include "../../tensor/la_op.h" +#include "../../tensor/la_op-inl.h" +#include "../../linalg.h" +#include "../../operator_common.h" +#include "../../mshadow_op.h" + +namespace mxnet { +namespace op { + +using namespace mshadow; + +template +void linalg_solve(const Tensor& A, + const Tensor& X, + const Tensor& ipiv, + Stream *s); + +template +void linalg_batch_solve(const Tensor& A, + const Tensor& X, + const Tensor& ipiv, + const mxnet::OpContext& ctx); + +template inline +int linalg_dn_getrf_workspace_query(const Tensor& A, + Stream *s); + +template inline +void linalg_dn_getrf(const Tensor& A, + const Tensor& ipiv, + Stream *s); + +template inline +void linalg_dn_getrs(const Tensor& A, + const Tensor& X, + const Tensor& ipiv, + Stream *s); + +// kernel for transpose +struct SolveTypeTransposeHelper { + template + MSHADOW_XINLINE static void Map(int i, const InDType *in_data, OutDType *out_data, + const int ncol1, const int ncol2, const int step) { + int idx = i / step, row = (i % step) / ncol1, col = (i % step) % ncol1; + out_data[idx * step + row + col * ncol2] = static_cast(in_data[i]); + } +}; + +template +inline void check_solve(const Tensor& A, + const Tensor& B) { + CHECK_EQ(A.size(0), A.size(1)) << "A must bu square matrix"; + CHECK_EQ(A.size(1), B.size(1)) << "A, B have incompatible sizes"; +} + +#define LINALG_CPU_SOLVE(fname, DType) \ +template<> inline \ +void linalg_solve(const Tensor& A, \ + const Tensor& X, \ + const Tensor& ipiv, \ + Stream *s) { \ + check_solve(A, X); \ + const int N = X.size(1), nrhs = X.size(0); \ + const int lda = (N == 0 ? 1 : N), ldx = (N == 0 ? 1 : N); \ + int res(MXNET_LAPACK_##fname(MXNET_LAPACK_COL_MAJOR, N, nrhs, \ + A.dptr_, lda, ipiv.dptr_, X.dptr_, ldx)); \ + CHECK_LE(res, 0) << #fname << ": U(" << res << ", " << res \ + << ") is exactly zero. The factorization has been completed," \ + << "but the factor U is exactly singular, so the solution could not be computed."; \ + CHECK_GE(res, 0) << #fname << ": the " << -res \ + << "-th argument had an illegal value"; \ +} +LINALG_CPU_SOLVE(sgesv, float) +LINALG_CPU_SOLVE(dgesv, double) + +#ifdef __CUDACC__ + +#if CUDA_VERSION >= 8000 + +#define LINALG_GPU_DN_GETRF_WORKSPACE_QUERY(fname, DType) \ +template<> inline \ +int linalg_dn_getrf_workspace_query(const Tensor& A, \ + Stream *s) { \ + using namespace mxnet; \ + using mshadow::gpu; \ + int lwork(0); \ + CUSOLVER_CALL(cusolver##fname##_bufferSize(Stream::GetSolverHandle(s), \ + A.size(1), A.size(1), A.dptr_, \ + (A.size(1) == 0 ? 1 : A.size(1)), &lwork)); \ + return lwork; \ +} + +#define LINALG_GPU_DN_GETRF(fname, DType) \ +template<> inline \ +void linalg_dn_getrf(const Tensor& A, \ + const Tensor& ipiv, \ + Stream *s) { \ + using namespace mxnet; \ + using mshadow::gpu; \ + Storage::Handle info = Storage::Get()->Alloc(sizeof(int), Context::GPU()); \ + const int lwork = linalg_dn_getrf_workspace_query(A, s); \ + Storage::Handle workspace = Storage::Get()->Alloc(sizeof(DType) * lwork, Context::GPU()); \ + CUSOLVER_CALL(cusolver##fname(Stream::GetSolverHandle(s), \ + A.size(1), A.size(1), A.dptr_, (A.size(1) == 0 ? 1 : A.size(1)), \ + static_cast(workspace.dptr), ipiv.dptr_, \ + static_cast(info.dptr))); \ + Storage::Get()->Free(info); \ + Storage::Get()->Free(workspace); \ +} + +#define LINALG_GPU_DN_GETRS(fname, DType) \ +template<> inline \ +void linalg_dn_getrs(const Tensor& A, \ + const Tensor& X, \ + const Tensor& ipiv, \ + Stream *s) { \ + using namespace mxnet; \ + using mshadow::gpu; \ + const int N = A.size(0), nrhs = X.size(0); \ + const int lda = (A.size(1) == 0 ? 1 : A.size(1)), ldx = (X.size(1) == 0 ? 1 : X.size(1)); \ + Storage::Handle info = Storage::Get()->Alloc(sizeof(int), Context::GPU()); \ + CUSOLVER_CALL(cusolver##fname(Stream::GetSolverHandle(s), \ + CUBLAS_OP_N, N, nrhs, \ + A.dptr_, lda, ipiv.dptr_, X.dptr_, ldx, \ + static_cast(info.dptr))); \ + Storage::Get()->Free(info); \ +} + +#define LINALG_GPU_SOLVE(DType) \ +template<> inline \ +void linalg_solve(const Tensor& A, \ + const Tensor& X, \ + const Tensor& ipiv, \ + Stream *s) { \ + using namespace mxnet; \ + using mshadow::gpu; \ + CHECK_NOTNULL(s); \ + check_solve(A, X); \ + linalg_dn_getrf(A, ipiv, s); \ + linalg_dn_getrs(A, X, ipiv, s); \ +} + +#else // CUDA_VERSION >= 8000 + +#define LINALG_GPU_DN_GETRF_WORKSPACE_QUERY(fname, DType) \ +template<> inline \ +int linalg_dn_getrf_workspace_query(const Tensor& A, \ + Stream *s) { \ + LOG(FATAL) << "Dn_getrf_workspace_query requires CUDA version >= 8.0!"; \ +} + +#define LINALG_GPU_DN_GETRF(fname, DType) \ +template<> inline \ +void linalg_dn_getrf(const Tensor& A, \ + const Tensor& ipiv, \ + Stream *s) { \ + LOG(FATAL) << "Dn_getrf requires CUDA version >= 8.0!"; \ +} + +#define LINALG_GPU_DN_GETRS(fname, DType) \ +template<> inline \ +void linalg_dn_getrs(const Tensor& A, \ + const Tensor& X, \ + const Tensor& ipiv, \ + Stream *s) { \ + LOG(FATAL) << "Dn_getrs requires CUDA version >= 8.0!"; \ +} + +#define LINALG_GPU_SOLVE(DType) \ +template<> inline \ +void linalg_solve(const Tensor& A, \ + const Tensor& X, \ + const Tensor& ipiv, \ + Stream *s) { \ + LOG(FATAL) << "gpu solve requires CUDA version >= 8.0!"; \ +} + +#endif // CUDA_VERSION >= 8000 + +LINALG_GPU_DN_GETRF_WORKSPACE_QUERY(DnSgetrf, float) +LINALG_GPU_DN_GETRF_WORKSPACE_QUERY(DnDgetrf, double) + +LINALG_GPU_DN_GETRF(DnSgetrf, float) +LINALG_GPU_DN_GETRF(DnDgetrf, double) + +LINALG_GPU_DN_GETRS(DnSgetrs, float) +LINALG_GPU_DN_GETRS(DnDgetrs, double) + +LINALG_GPU_SOLVE(float) +LINALG_GPU_SOLVE(double) + +#endif // __CUDACC__ + +#define LINALG_XPU_BATCH_SOLVE(xpu, DType) \ +template<> inline \ +void linalg_batch_solve(const Tensor& A, \ + const Tensor& X, \ + const Tensor& ipiv, \ + const mxnet::OpContext& ctx) { \ + Stream *s = ctx.get_stream(); \ + for (index_t i = 0; i < A.size(0); ++i) { \ + linalg_solve(A[i], X[i], ipiv[i], s); \ + } \ +} +LINALG_XPU_BATCH_SOLVE(cpu, float) +LINALG_XPU_BATCH_SOLVE(cpu, double) + +#ifdef __CUDACC__ + +LINALG_XPU_BATCH_SOLVE(gpu, float) +LINALG_XPU_BATCH_SOLVE(gpu, double) + +#endif // __CUDACC__ + +struct solve { + template + static void op(const Tensor& A, + const Tensor& X, + const Tensor& ipiv, + const OpContext& ctx, + const nnvm::NodeAttrs& attrs) { + linalg_batch_solve(A, X, ipiv, ctx); // ipiv for work_space in Lapacke_#gesv + } +}; + +template +void LaOpForwardSolve(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + CHECK_EQ(inputs.size(), inum); + CHECK_EQ(outputs.size(), onum); + CHECK_EQ(req.size(), onum); + MSHADOW_SGL_DBL_TYPE_SWITCH(outputs[0].type_flag_, OType, { + mshadow::Stream *s = ctx.get_stream(); + const mxnet::TBlob& a_tblob = inputs[0]; + const mxnet::TBlob& b_tblob = inputs[1]; + const mxnet::TBlob& x_tblob = outputs[0]; + const mxnet::TShape& a_shape = a_tblob.shape_; + mxnet::TShape b_shape(a_shape.ndim(), 1); + for (int i = 0; i < a_shape.ndim() - 1; ++i) { b_shape[i] = b_tblob.shape_[i]; } + if (b_tblob.shape_.ndim() == a_shape.ndim()) { + b_shape[a_shape.ndim() - 1] = b_tblob.shape_[a_shape.ndim() - 1]; + } + const int ndim = a_shape.ndim(); + mxnet::TShape ipiv_shape(a_shape); + ipiv_shape[ndim - 1] = 1; + if (0 == a_shape[ndim - 1] || 0 == a_shape[ndim - 2] || + 0 == b_shape[ndim - 1] || 0 == b_shape[ndim - 2]) { return; } + + const int work_space_size = + sizeof(OType) * (a_shape.Size() + b_shape.Size()) + sizeof(int) * ipiv_shape.Size(); + Tensor work_buffer = + ctx.requested[0].get_space_typed(Shape1(work_space_size), s); + MSHADOW_TYPE_SWITCH(a_tblob.type_flag_, AType, { + // cast type and transpose + mxnet_op::Kernel::Launch( + s, a_shape.Size(), + a_tblob.dptr(), + reinterpret_cast(work_buffer.dptr_), + a_shape[ndim - 1], a_shape[ndim - 2], a_shape[ndim - 1] * a_shape[ndim - 2]); + }); + MSHADOW_TYPE_SWITCH(b_tblob.type_flag_, BType, { + // cast type and transpose + mxnet_op::Kernel::Launch( + s, b_shape.Size(), + b_tblob.dptr(), + reinterpret_cast(work_buffer.dptr_) + a_shape.Size(), + b_shape[ndim - 1], b_shape[ndim - 2], b_shape[ndim - 1] * b_shape[ndim - 2]); + }); + // transpose shape + int temp = b_shape[ndim - 1]; + b_shape[ndim - 1] = b_shape[ndim - 2]; + b_shape[ndim - 2] = temp; + mxnet::TBlob a_transpose_tblob(reinterpret_cast(work_buffer.dptr_), + a_shape, a_tblob.dev_mask(), a_tblob.dev_id()); + mxnet::TBlob b_transpose_tblob(reinterpret_cast(work_buffer.dptr_) + a_shape.Size(), + b_shape, b_tblob.dev_mask(), b_tblob.dev_id()); + mxnet::TBlob ipiv_tblob(reinterpret_cast( + reinterpret_cast(work_buffer.dptr_) + a_shape.Size() + b_shape.Size()), + ipiv_shape, b_tblob.dev_mask(), b_tblob.dev_id()); + + laop::op(a_transpose_tblob.FlatToKD(s), + b_transpose_tblob.FlatToKD(s), + ipiv_tblob.FlatToKD(s), + ctx, + attrs); + // X = transpose(B) + mxnet_op::Kernel::Launch( + s, b_shape.Size(), + b_transpose_tblob.dptr(), + x_tblob.dptr(), + b_shape[ndim - 1], b_shape[ndim - 2], b_shape[ndim - 1] * b_shape[ndim - 2]); + }); +} + +// X = (inv_A) * B +struct solve_backward { + template + static void op(const Tensor& dX, + const Tensor& inv_A, + const Tensor& B, + const Tensor& X, + const Tensor& dA, + const Tensor& dB, + const OpContext& ctx, + const nnvm::NodeAttrs& attrs) { + // (1) calcualte dB = trans(inv(A)) * dX + // (2) calcualte dA = dB * trans(X) + Stream *s = ctx.get_stream(); + gemm2::op(inv_A, dX, dB, DType(1), true, false, s); + gemm2::op(dB, X, dA, DType(-1), false, true, s); + } +}; + +template +inline void batch_inverse(const Tensor& inv_A, + const Tensor& LU, + const Tensor& pivot, + const mxnet::OpContext& ctx); + +#define CPU_BATCH_INVERSE(xpu, DType) \ +template<> inline \ +void batch_inverse(const Tensor& inv_A, \ + const Tensor& LU, \ + const Tensor& pivot, \ + const mxnet::OpContext& ctx) { \ + Stream *s = ctx.get_stream(); \ + for (index_t i = 0; i < inv_A.size(0); ++i) { \ + linalg_getrf(inv_A[i], pivot[i], true, s); \ + const Tensor work( \ + LU[i].dptr_, Shape1(LU.size(1) * LU.size(2))); \ + linalg_getri(inv_A[i], pivot[i], work, s); \ + } \ +} +CPU_BATCH_INVERSE(cpu, float) +CPU_BATCH_INVERSE(cpu, double) + +#ifdef __CUDACC__ + +// GETRF and GETRI only available with cuda8 or higher. +#if CUDA_VERSION >= 8000 + +#define GPU_BATCH_INVERSE(xpu, DType) \ +template<> inline \ +void batch_inverse(const Tensor& inv_A, \ + const Tensor& LU, \ + const Tensor& pivot, \ + const mxnet::OpContext& ctx) { \ + Stream *s = ctx.get_stream(); \ + if (LU.dptr_ != inv_A.dptr_) Copy(LU, inv_A, s); \ + linalg_batch_getrf(LU, pivot, true, s); \ + linalg_batch_getri(inv_A, LU, pivot, s); \ +} + +#else // CUDA_VERSION >= 8000 + +#define GPU_BATCH_INVERSE(xpu, DType) \ +template<> inline \ +void batch_inverse(const Tensor& inv_A, \ + const Tensor& LU, \ + const Tensor& pivot, \ + const mxnet::OpContext& ctx) { \ + LOG(FATAL) << "gpu matrix inverse requires CUDA version >= 8.0!"; \ +} + +#endif // CUDA_VERSION >= 8000 + +GPU_BATCH_INVERSE(gpu, float) +GPU_BATCH_INVERSE(gpu, double) + +#endif // __CUDACC__ + +template +void LaOpBackwardSolve(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + CHECK_EQ(inputs.size(), inum); + CHECK_EQ(outputs.size(), onum); + CHECK_EQ(req.size(), onum); + MSHADOW_SGL_DBL_TYPE_SWITCH(outputs[0].type_flag_, OType, { + mshadow::Stream *s = ctx.get_stream(); + const mxnet::TBlob& a_tblob = inputs[1]; + const mxnet::TBlob& b_tblob = inputs[2]; + const mxnet::TBlob& x_tblob = inputs[3]; + + const mxnet::TShape& a_shape = a_tblob.shape_; + mxnet::TShape b_shape(a_shape.ndim(), 1); + for (int i = 0; i < a_shape.ndim() - 1; ++i) { b_shape[i] = b_tblob.shape_[i]; } + if (b_tblob.shape_.ndim() == a_shape.ndim()) { + b_shape[a_shape.ndim() - 1] = b_tblob.shape_[a_shape.ndim() - 1]; + } + const int ndim = a_shape.ndim(); + const int N = a_shape[ndim - 1]; + if (0 == a_shape[ndim - 1] || 0 == a_shape[ndim - 2] || + 0 == b_shape[ndim - 1] || 0 == b_shape[ndim - 2]) { return; } + + const Tensor A = a_tblob.FlatToKD(s); + int work_space_size = sizeof(OType) * a_shape.Size(); // for inverse(A) + work_space_size += sizeof(OType) * a_shape.Size(); // for getri work space + work_space_size += 2 * sizeof(OType) * b_shape.Size(); // for B and X + work_space_size += sizeof(int) * A.size(0) * N; // for pivot work space + Tensor work_buffer = + ctx.requested[0].get_space_typed(Shape1(work_space_size), s); + + MSHADOW_TYPE_SWITCH(a_tblob.type_flag_, AType, { + mxnet_op::Kernel::Launch( + s, a_shape.Size(), + reinterpret_cast(work_buffer.dptr_), + a_tblob.dptr()); + }); + mxnet::TBlob a_inverse_tblob(reinterpret_cast(work_buffer.dptr_), + a_shape, a_tblob.dev_mask(), a_tblob.dev_id()); + const Tensor inv_A = a_inverse_tblob.FlatToKD(s); + + mxnet::TBlob lu_tblob(reinterpret_cast(work_buffer.dptr_) + a_shape.Size(), + inv_A.shape_, a_tblob.dev_mask(), a_tblob.dev_id()); + const Tensor LU = lu_tblob.FlatToKD(s); + + MSHADOW_TYPE_SWITCH(b_tblob.type_flag_, BType, { + mxnet_op::Kernel::Launch( + s, b_shape.Size(), + reinterpret_cast(work_buffer.dptr_) + 2 * a_shape.Size(), + b_tblob.dptr()); + }); + mxnet::TBlob b_cp_tblob(reinterpret_cast(work_buffer.dptr_) + 2 * a_shape.Size(), + b_shape, b_tblob.dev_mask(), b_tblob.dev_id()); + const Tensor B = b_cp_tblob.FlatToKD(s); + + MSHADOW_TYPE_SWITCH(x_tblob.type_flag_, XType, { + mxnet_op::Kernel::Launch( + s, b_shape.Size(), + reinterpret_cast(work_buffer.dptr_) + 2 * a_shape.Size() + b_shape.Size(), + x_tblob.dptr()); + }); + mxnet::TBlob x_cp_tblob( + reinterpret_cast(work_buffer.dptr_) + 2 * a_shape.Size() + b_shape.Size(), + b_shape, b_tblob.dev_mask(), b_tblob.dev_id()); + const Tensor X = x_cp_tblob.FlatToKD(s); + + mxnet::TBlob pivot_tblob(reinterpret_cast( + reinterpret_cast(work_buffer.dptr_) + 2 * a_shape.Size() + 2 * b_shape.Size()), + Shape2(A.size(0), N), a_tblob.dev_mask(), a_tblob.dev_id()); + const Tensor pivot = pivot_tblob.FlatToKD(s); + + // calculate inverse(A) on CPU or GPU + batch_inverse(inv_A, LU, pivot, ctx); + laop::op(inputs[0].FlatToKD(s), + inv_A, + B, + X, + outputs[0].FlatToKD(s), + outputs[1].FlatToKD(s), + ctx, + attrs); + }); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_LINALG_NP_SOLVE_INL_H_ diff --git a/src/operator/numpy/linalg/np_solve.cc b/src/operator/numpy/linalg/np_solve.cc new file mode 100644 index 000000000000..55d02f18d4dc --- /dev/null +++ b/src/operator/numpy/linalg/np_solve.cc @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_solve.cc + * \brief CPU implementation placeholder of Solve Operator + */ +#include +#include +#include "../../mxnet_op.h" +#include "../../operator_common.h" +#include "../../elemwise_op_common.h" + +#include "./np_solve-inl.h" + +namespace mxnet { +namespace op { + +inline bool SolveOpShape(const nnvm::NodeAttrs &attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 2U); + CHECK_EQ(out_attrs->size(), 1U); + const mxnet::TShape& in_a_shape = (*in_attrs)[0]; + const mxnet::TShape& in_b_shape = (*in_attrs)[1]; + if (!ndim_is_known(in_a_shape)) { return false; } + int in_a_ndim = in_a_shape.ndim(), in_b_ndim = in_b_shape.ndim(); + + CHECK_GE(in_a_ndim, 2) + << "Array must be at least two-dimensional"; + CHECK_EQ(in_a_shape[in_a_ndim - 2], in_a_shape[in_a_ndim - 1]) + << "Input A's last two dimension must be equal"; + + if (in_a_ndim == in_b_ndim + 1) { + CHECK_EQ(in_a_shape[in_a_ndim - 1], in_b_shape[in_b_ndim - 1]) + << "Input A's and B's last dimension must be equal"; + } else if (in_a_ndim == in_b_ndim) { + CHECK_EQ(in_a_shape[in_a_ndim - 1], in_b_shape[in_b_ndim - 2]) + << "Input A's and B's last second dimension must be equal"; + } else { + dmlc::LogMessageFatal(__FILE__, __LINE__).stream() << "A's and B's dimensions don't match"; + } + for (int i = 0; i < in_a_ndim - 2; ++i) { + CHECK_EQ(in_a_shape[i], in_b_shape[i]) << "A's and B's dimensions don't match"; + } + + SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_b_shape); + return !mxnet::op::shape_is_none(in_b_shape) && !mxnet::op::shape_is_none(out_attrs->at(0)); +} + +inline bool SolveOpType(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 2U); + CHECK_EQ(out_attrs->size(), 1U); + int a_type = in_attrs->at(0); + int b_type = in_attrs->at(1); + // unsupport float16 + CHECK_NE(a_type, mshadow::kFloat16) + << "array type float16 is unsupported in linalg"; + CHECK_NE(b_type, mshadow::kFloat16) + << "array type float16 is unsupported in linalg"; + if (mshadow::kFloat32 == a_type && mshadow::kFloat32 == b_type) { + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(1)); + } else { + TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat64); + } + return out_attrs->at(0) != -1; +} + +NNVM_REGISTER_OP(_npi_solve) +.describe(R"code()code" ADD_FILELINE) +.set_num_inputs(2) +.set_num_outputs(1) +.set_attr("FListInputNames", [](const NodeAttrs& attrs){ + return std::vector{"A", "B"}; +}) +.set_attr("FInferShape", SolveOpShape) +.set_attr("FInferType", SolveOpType) +.set_attr("FResourceRequest", [](const NodeAttrs& attrs){ + return std::vector{ResourceRequest::kTempSpace}; +}) +.set_attr("THasDeterministicOutput", true) +.set_attr("FCompute", LaOpForwardSolve) +.set_attr("FGradient", ElemwiseGradUseInOut{"_backward_npi_solve"}) +.add_argument("A", "NDArray-or-Symbol", "Tensor of square matrix") +.add_argument("B", "NDArray-or-Symbol", "Tensor of right side vector"); + +NNVM_REGISTER_OP(_backward_npi_solve) +.set_num_inputs(4) +.set_num_outputs(2) +.set_attr("FResourceRequest", [](const NodeAttrs& ){ + return std::vector{ResourceRequest::kTempSpace}; +}) +.set_attr("TIsBackward", true) +.set_attr("FCompute", LaOpBackwardSolve); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/linalg/np_solve.cu b/src/operator/numpy/linalg/np_solve.cu new file mode 100644 index 000000000000..b849cf55540e --- /dev/null +++ b/src/operator/numpy/linalg/np_solve.cu @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_solve.cu + * \brief GPU implementation of the Solve Operator + */ + +#include +#include +#include "./np_solve-inl.h" + +namespace mxnet { +namespace op { + +#if MXNET_USE_CUSOLVER == 1 + +NNVM_REGISTER_OP(_npi_solve) +.set_attr("FCompute", LaOpForwardSolve); + +NNVM_REGISTER_OP(_backward_npi_solve) +.set_attr("FCompute", LaOpBackwardSolve); + +#endif + +} // namespace op +} // namespace mxnet diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 53bd8f4d9235..6b5efa0c96b0 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -319,6 +319,27 @@ def _add_workload_linalg_inv(): OpArgMngr.add_workload('linalg.inv', np.array(_np.ones((0, 1, 1)), dtype=np.float64)) +def _add_workload_linalg_solve(): + shapes = [(0,0), (1,1), (5,5), (20,20), (3,5,5), (3,0,0), (2,20,20), (0,20,20), (2,3,20,20)] + nrhs = (0, 1, 2, 10) + dtypes = (np.float32, np.float64) + for dtype, shape in itertools.product(dtypes, shapes): + a = _np.random.rand(*shape) + shape_b = list(shape) + shape_b[-1] = 1 + x = _np.random.rand(*shape_b) + b = _np.matmul(a, x) + shape_b.pop() + b = b.reshape(shape_b) + OpArgMngr.add_workload('linalg.solve', np.array(a, dtype=dtype), np.array(b, dtype=dtype)) + for nrh in nrhs: + shape_b = list(shape) + shape_b[-1] = nrh + x = _np.random.rand(*shape_b) + b = _np.matmul(a, x) + OpArgMngr.add_workload('linalg.solve', np.array(a, dtype=dtype), np.array(b, dtype=dtype)) + + def _add_workload_linalg_det(): OpArgMngr.add_workload('linalg.det', np.array(_np.ones((2, 2)), dtype=np.float32)) OpArgMngr.add_workload('linalg.det', np.array(_np.ones((0, 1, 1)), dtype=np.float64)) @@ -1374,6 +1395,7 @@ def _prepare_workloads(): _add_workload_linalg_norm() _add_workload_linalg_cholesky() _add_workload_linalg_inv() + _add_workload_linalg_solve() _add_workload_linalg_det() _add_workload_linalg_slogdet() _add_workload_trace() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 5c14f0d6c701..6b62a4386524 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -3506,6 +3506,97 @@ def check_inv(A_inv, data_np): check_inv(A_inv, data_np) +@with_seed() +@use_np +def test_np_linalg_solve(): + class TestSolve(HybridBlock): + def __init__(self): + super(TestSolve, self).__init__() + + def hybrid_forward(self, F, a, b): + return F.np.linalg.solve(a, b) + + def check_solve(x, a_np, b_np): + try: + x_expected = _np.linalg.solve(a_np, b_np) + except Exception as e: + print("a:", a_np) + print("a shape:", a_np.shape) + print("b", b_np) + print("b shape:", b_np.shape) + print(e) + else: + assert x.shape == x_expected.shape + assert_almost_equal(x.asnumpy(), x_expected, rtol=rtol, atol=atol) + + def get_grad_b(A, X): + dX = _np.ones_like(X) + A_inv = _np.linalg.inv(A) + A_inv_trans = _np.swapaxes(A_inv, -1, -2) + return _np.matmul(A_inv_trans, dX) + + shapes = [ + (0, 0), + (1, 1), + (3, 3), + (20, 20), + (3, 20, 20), + (1, 0, 0), + (0, 1, 1), + (0, 5, 3, 3), + (5, 0, 0, 0), + (2, 3, 10, 10) + ] + nrhs = (-1, 0, 1, 2, 5) + dtypes = ['float32', 'float64'] + for hybridize, shape, dtype, nrh in itertools.product([False, True], shapes, dtypes, nrhs): + rtol = 1e-3 + atol = 1e-5 + test_solve = TestSolve() + if hybridize: + test_solve.hybridize() + + if 0 in shape: + a = _np.ones(shape) + b = _np.ones(shape) + else: + shape_a = shape + a = _np.random.rand(*shape_a) + shape_b = list(shape_a) + if nrh == -1: + shape_b[-1] = 1 + x = _np.random.rand(*shape_b) + b = _np.matmul(a, x) + shape_b.pop() + b = b.reshape(shape_b) + else : + shape_b[-1] = nrh + x = _np.random.rand(*shape_b) + b = _np.matmul(a, x) + a = np.array(a, dtype=dtype) + b = np.array(b, dtype=dtype) + a.attach_grad() + b.attach_grad() + with mx.autograd.record(): + mx_out = test_solve(a, b) + # check solve validity + assert mx_out.shape == b.shape + check_solve(mx_out, a, b) + + # check backward. backward does not support empty input + if 0 not in mx_out.shape: + if nrh != -1: + mx.autograd.backward(mx_out) + b_backward_expected = get_grad_b(a.asnumpy(), mx_out.asnumpy()) + a_backward_expected = -_np.matmul(b_backward_expected, _np.swapaxes(mx_out, -1, -2).asnumpy()) + assert_almost_equal(a.grad.asnumpy(), a_backward_expected, rtol=rtol, atol=atol) + assert_almost_equal(b.grad.asnumpy(), b_backward_expected, rtol=rtol, atol=atol) + + # check imperative once again + mx_out = np.linalg.solve(a, b) + check_solve(mx_out, a, b) + + @with_seed() @use_np def test_np_linalg_det(): From 44cd63e5a8f6cd9c3d786974a487e5c993cbee59 Mon Sep 17 00:00:00 2001 From: Li Jun <875241499@qq.com> Date: Mon, 9 Dec 2019 15:56:49 +0800 Subject: [PATCH 06/62] [Numpy] Implement numpy operator 'average' (#16720) * Complete forward and backward Add tests Implement returned Implement none weights returned Replace reduce implementation with workspace version Temp space can be requested only once. Use FNumVisibleOutputs attribute, always return sum of weights Fix bug Fix GPU bug Fix GPU bug Formatting Apply suggestions from code review two blank lines between python functions Fix pylint Fix GPU bug Add support for null and addTo reqs Remove TBlob constructors Refactor * Retrigger * Retrigger * Retrigger * Retrigger --- python/mxnet/ndarray/numpy/_op.py | 100 ++++- python/mxnet/numpy/multiarray.py | 101 ++++- python/mxnet/symbol/numpy/_symbol.py | 98 ++++- src/operator/numpy/np_broadcast_reduce_op.h | 348 ++++++++++++++++++ .../numpy/np_broadcast_reduce_op_value.cc | 71 ++++ .../numpy/np_broadcast_reduce_op_value.cu | 6 + tests/python/unittest/test_numpy_op.py | 113 ++++++ 7 files changed, 823 insertions(+), 14 deletions(-) diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index 0925f93a7740..538d5202942d 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -35,13 +35,14 @@ 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'argsort', 'tensordot', 'histogram', 'eye', 'linspace', 'logspace', 'expand_dims', 'tile', 'arange', 'split', 'vsplit', 'concatenate', 'append', - 'stack', 'vstack', 'column_stack', 'dstack', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', - 'argmin', 'std', 'var', 'indices', 'copysign', 'ravel', 'unravel_index', 'hanning', 'hamming', 'blackman', - 'flip', 'around', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', 'tril', - 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', + 'stack', 'vstack', 'column_stack', 'dstack', 'average', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', + 'argmax', 'argmin', 'std', 'var', 'indices', 'copysign', 'ravel', 'unravel_index', 'hanning', 'hamming', + 'blackman', 'flip', 'around', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', + 'tril', 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', 'greater_equal', 'less_equal', 'hsplit', 'rot90', 'einsum', 'true_divide', 'nonzero', 'shares_memory', 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where'] + @set_module('mxnet.ndarray.numpy') def shape(a): """ @@ -3706,6 +3707,97 @@ def argmin(a, axis=None, out=None): return _npi.argmin(a, axis=axis, keepdims=False, out=out) +@set_module('mxnet.ndarray.numpy') +def average(a, axis=None, weights=None, returned=False, out=None): + """ + Compute the weighted average along the specified axis. + + Parameters + -------- + a : ndarray + Array containing data to be averaged. + axis : None or int or tuple of ints, optional + Axis or axes along which to average a. + The default, axis=None, will average over + all of the elements of the input array. + If axis is negative it counts from the last to the first axis. + New in version 1.7.0. + If axis is a tuple of ints, averaging is + performed on all of the axes specified in the tuple + instead of a single axis or all the axes as before. + weights : ndarray, optional + An array of weights associated with the values in a, must be the same dtype with a. + Each value in a contributes to the average according to its associated weight. + The weights array can either be 1-D (in which case its length must be + the size of a along the given axis) or of the same shape as a. + If weights=None, then all data in a are assumed to have a weight equal to one. + The 1-D calculation is: avg = sum(a * weights) / sum(weights) + The only constraint on weights is that sum(weights) must not be 0. + returned : bool, optional + Default is False. + If True, the tuple (average, sum_of_weights) is returned, + otherwise only the average is returned. + If weights=None, sum_of_weights is equivalent to + the number of elements over which the average is taken. + out : ndarray, optional + If provided, the calculation is done into this array. + + Returns + -------- + retval, [sum_of_weights] : ndarray + Return the average along the specified axis. + When returned is True, return a tuple with the average as the first element + and the sum of the weights as the second element. sum_of_weights is of the same type as retval. + If a is integral, the result dtype will be float32, otherwise it will be the same as dtype of a. + + Raises + -------- + MXNetError + - When all weights along axis sum to zero. + - When the length of 1D weights is not the same as the shape of a along axis. + - When given 1D weights, the axis is not specified or is not int. + - When the shape of weights and a differ, but weights are not 1D. + + See also + -------- + mean + + Notes + -------- + This function differs from the original `numpy.average` + `_ in + the following way(s): + + - Does not guarantee the same behavior with numpy when given float16 dtype and overflow happens + - Does not support complex dtype + - The dtypes of a and weights must be the same + - Integral a results in float32 returned dtype, not float64 + + Examples + -------- + >>> data = np.arange(1, 5) + >>> data + array([1., 2., 3., 4.]) + >>> np.average(data) + array(2.5) + >>> np.average(np.arange(1, 11), weights=np.arange(10, 0, -1)) + array(4.) + >>> data = np.arange(6).reshape((3,2)) + >>> data + array([[0., 1.], + [2., 3.], + [4., 5.]]) + >>> weights = np.array([0.25, 0.75]) + array([0.25, 0.75]) + >>> np.average(data, axis=1, weights=weights) + array([0.75, 2.75, 4.75]) + """ + if weights is None: + return _npi.average(a, axis=axis, weights=None, returned=returned, weighted=False, out=out) + else: + return _npi.average(a, axis=axis, weights=weights, returned=returned, out=out) + + @set_module('mxnet.ndarray.numpy') def mean(a, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ """ diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index 5aecae4c3997..aa0762bf0e3f 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -53,12 +53,13 @@ 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'append', 'argsort', 'tensordot', 'histogram', 'eye', 'linspace', 'logspace', 'expand_dims', 'tile', 'arange', - 'split', 'vsplit', 'concatenate', 'stack', 'vstack', 'column_stack', 'dstack', 'mean', 'maximum', 'minimum', - 'swapaxes', 'clip', 'argmax', 'argmin', 'std', 'var', 'indices', 'copysign', 'ravel', 'unravel_index', - 'hanning', 'hamming', 'blackman', 'flip', 'around', 'arctan2', 'hypot', 'bitwise_xor', 'bitwise_or', - 'rad2deg', 'deg2rad', 'unique', 'lcm', 'tril', 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', - 'equal', 'not_equal', 'greater', 'less', 'greater_equal', 'less_equal', 'hsplit', 'rot90', 'einsum', - 'true_divide', 'nonzero', 'shares_memory', 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where'] + 'split', 'vsplit', 'concatenate', 'stack', 'vstack', 'column_stack', 'dstack', 'average', 'mean', + 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'argmin', 'std', 'var', 'indices', 'copysign', + 'ravel', 'unravel_index', 'hanning', 'hamming', 'blackman', 'flip', 'around', 'arctan2', 'hypot', + 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', 'tril', 'identity', 'take', + 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', 'greater_equal', + 'less_equal', 'hsplit', 'rot90', 'einsum', 'true_divide', 'nonzero', 'shares_memory', + 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where'] # Return code for dispatching indexing function call _NDARRAY_UNSUPPORTED_INDEXING = -1 @@ -5476,6 +5477,94 @@ def argmin(a, axis=None, out=None): return _mx_nd_np.argmin(a, axis, out) +@set_module('mxnet.numpy') +def average(a, axis=None, weights=None, returned=False, out=None): + """ + Compute the weighted average along the specified axis. + + Parameters + -------- + a : ndarray + Array containing data to be averaged. + axis : None or int or tuple of ints, optional + Axis or axes along which to average a. + The default, axis=None, will average over + all of the elements of the input array. + If axis is negative it counts from the last to the first axis. + New in version 1.7.0. + If axis is a tuple of ints, averaging is + performed on all of the axes specified in the tuple + instead of a single axis or all the axes as before. + weights : ndarray, optional + An array of weights associated with the values in a, must be the same dtype with a. + Each value in a contributes to the average according to its associated weight. + The weights array can either be 1-D (in which case its length must be + the size of a along the given axis) or of the same shape as a. + If weights=None, then all data in a are assumed to have a weight equal to one. + The 1-D calculation is: avg = sum(a * weights) / sum(weights) + The only constraint on weights is that sum(weights) must not be 0. + returned : bool, optional + Default is False. + If True, the tuple (average, sum_of_weights) is returned, + otherwise only the average is returned. + If weights=None, sum_of_weights is equivalent to + the number of elements over which the average is taken. + out : ndarray, optional + If provided, the calculation is done into this array. + + Returns + -------- + retval, [sum_of_weights] : ndarray + Return the average along the specified axis. + When returned is True, return a tuple with the average as the first element + and the sum of the weights as the second element. sum_of_weights is of the same type as retval. + If a is integral, the result dtype will be float32, otherwise it will be the same as dtype of a. + + Raises + -------- + MXNetError + - When all weights along axis sum to zero. + - When the length of 1D weights is not the same as the shape of a along axis. + - When given 1D weights, the axis is not specified or is not int. + - When the shape of weights and a differ, but weights are not 1D. + + See also + -------- + mean + + Notes + -------- + This function differs from the original `numpy.average` + `_ in + the following way(s): + + - Does not guarantee the same behavior with numpy when given float16 dtype and overflow happens + - Does not support complex dtype + - The dtypes of a and weights must be the same + - Integral a results in float32 returned dtype, not float64 + + Examples + -------- + >>> data = np.arange(1, 5) + >>> data + array([1., 2., 3., 4.]) + >>> np.average(data) + array(2.5) + >>> np.average(np.arange(1, 11), weights=np.arange(10, 0, -1)) + array(4.) + >>> data = np.arange(6).reshape((3,2)) + >>> data + array([[0., 1.], + [2., 3.], + [4., 5.]]) + >>> weights = np.array([0.25, 0.75]) + array([0.25, 0.75]) + >>> np.average(data, axis=1, weights=weights) + array([0.75, 2.75, 4.75]) + """ + return _mx_nd_np.average(a, axis=axis, weights=weights, returned=returned, out=out) + + @set_module('mxnet.numpy') def mean(a, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ """ diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index 8303f3246e87..4b06bbec7cae 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -43,10 +43,10 @@ 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'argsort', 'tensordot', 'histogram', 'eye', 'linspace', 'logspace', 'expand_dims', 'tile', 'arange', 'split', 'vsplit', 'concatenate', 'append', - 'stack', 'vstack', 'column_stack', 'dstack', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', - 'argmin', 'std', 'var', 'indices', 'copysign', 'ravel', 'unravel_index', 'hanning', 'hamming', 'blackman', - 'flip', 'around', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', 'tril', - 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', + 'stack', 'vstack', 'column_stack', 'dstack', 'average', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', + 'argmax', 'argmin', 'std', 'var', 'indices', 'copysign', 'ravel', 'unravel_index', 'hanning', 'hamming', + 'blackman', 'flip', 'around', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', + 'tril', 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', 'greater_equal', 'less_equal', 'hsplit', 'rot90', 'einsum', 'true_divide', 'shares_memory', 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where'] @@ -3639,6 +3639,96 @@ def argmin(a, axis=None, out=None): return _npi.argmin(a, axis=axis, keepdims=False, out=out) +def average(a, axis=None, weights=None, returned=False, out=None): + """ + Compute the weighted average along the specified axis. + + Parameters + -------- + a : _Symbol + Array containing data to be averaged. + axis : None or int or tuple of ints, optional + Axis or axes along which to average a. + The default, axis=None, will average over + all of the elements of the input array. + If axis is negative it counts from the last to the first axis. + New in version 1.7.0. + If axis is a tuple of ints, averaging is + performed on all of the axes specified in the tuple + instead of a single axis or all the axes as before. + weights : _Symbol, optional + An array of weights associated with the values in a, must be the same dtype with a. + Each value in a contributes to the average according to its associated weight. + The weights array can either be 1-D (in which case its length must be + the size of a along the given axis) or of the same shape as a. + If weights=None, then all data in a are assumed to have a weight equal to one. + The 1-D calculation is: avg = sum(a * weights) / sum(weights) + The only constraint on weights is that sum(weights) must not be 0. + returned : bool, optional + Default is False. + If True, the tuple (average, sum_of_weights) is returned, + otherwise only the average is returned. + If weights=None, sum_of_weights is equivalent to + the number of elements over which the average is taken. + out : _Symbol, optional + If provided, the calculation is done into this array. + + Returns + -------- + retval, [sum_of_weights] : _Symbol + Return the average along the specified axis. + When returned is True, return a tuple with the average as the first element + and the sum of the weights as the second element. sum_of_weights is of the same type as retval. + If a is integral, the result dtype will be float32, otherwise it will be the same as dtype of a. + + Raises + -------- + MXNetError + - When all weights along axis sum to zero. + - When the length of 1D weights is not the same as the shape of a along axis. + - When given 1D weights, the axis is not specified or is not int. + - When the shape of weights and a differ, but weights are not 1D. + + See also + -------- + mean + + Notes + -------- + This function differs from the original `numpy.average` + `_ in + the following way(s): + + - Does not guarantee the same behavior with numpy when given float16 dtype and overflow happens + - Does not support complex dtype + - The dtypes of a and weights must be the same + - Integral a results in float32 returned dtype, not float64 + + Examples + -------- + >>> data = np.arange(1, 5) + >>> data + array([1., 2., 3., 4.]) + >>> np.average(data) + array(2.5) + >>> np.average(np.arange(1, 11), weights=np.arange(10, 0, -1)) + array(4.) + >>> data = np.arange(6).reshape((3,2)) + >>> data + array([[0., 1.], + [2., 3.], + [4., 5.]]) + >>> weights = np.array([0.25, 0.75]) + array([0.25, 0.75]) + >>> np.average(data, axis=1, weights=weights) + array([0.75, 2.75, 4.75]) + """ + if weights is None: + return _npi.average(a, axis=axis, weights=None, returned=returned, weighted=False, out=out) + else: + return _npi.average(a, axis=axis, weights=weights, returned=returned, out=out) + + @set_module('mxnet.symbol.numpy') def mean(a, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ """ diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h index 3566323f1eb3..df9a7c932490 100644 --- a/src/operator/numpy/np_broadcast_reduce_op.h +++ b/src/operator/numpy/np_broadcast_reduce_op.h @@ -30,6 +30,7 @@ #include #include "../nn/moments-inl.h" #include "../tensor/broadcast_reduce_op.h" +#include "../tensor/elemwise_binary_broadcast_op.h" namespace mxnet { namespace op { @@ -406,6 +407,353 @@ void ReduceAxesComputeWithWorkspaceImpl(const OpContext& ctx, }); } +struct NumpyWeightedAverageParam : public dmlc::Parameter { + dmlc::optional> axis; + bool returned; + bool weighted; + + DMLC_DECLARE_PARAMETER(NumpyWeightedAverageParam) { + DMLC_DECLARE_FIELD(axis) + .set_default(dmlc::optional>()) + .describe("Axis or axes along which a average is performed. " + "The default, axis=None, will average " + "all of the elements of the input array. If axis is negative it counts from the " + "last to the first axis."); + DMLC_DECLARE_FIELD(returned) + .set_default(false) + .describe("If True, the tuple (average, sum_of_weights) is returned," + "otherwise only the average is returned." + "If weights=None, sum_of_weights is equivalent to" + "the number of elements over which the average is taken."); + DMLC_DECLARE_FIELD(weighted) + .set_default(true) + .describe("Auxiliary flag to deal with none weights."); + } +}; + +inline bool NumpyWeightedAverageShape(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + const auto& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_attrs->size(), (param.weighted ? 2U : 1U)); + CHECK_EQ(out_attrs->size(), 2U); + if (!shape_is_known(in_attrs->at(0))) { + return false; + } + + const TShape& a_shape = (*in_attrs)[0]; + SHAPE_ASSIGN_CHECK(*out_attrs, 0, + NumpyReduceAxesShapeImpl(a_shape, param.axis, false)); + + if (param.weighted) { + const TShape& w_shape = (*in_attrs)[1]; + if (w_shape.ndim() != a_shape.ndim()) { + CHECK_EQ(w_shape.ndim(), 1U) + << "1D weights expected when shapes of a and weights differ."; + CHECK_EQ(param.axis.has_value(), true) + << "Axis must be specified when shapes of a and weights differ."; + mxnet::Tuple axes(param.axis.value()); + CHECK_EQ(axes.ndim(), 1U) << "Axis must be int when shapes of a and weights differ."; + int red_axis = axes[0] < 0 ? axes[0] + a_shape.ndim() : axes[0]; + CHECK_EQ(a_shape[red_axis], w_shape[0]) + << "Length of weights not compatible with specified axis."; + SHAPE_ASSIGN_CHECK(*out_attrs, 1, + NumpyReduceAxesShapeImpl( + w_shape, dmlc::optional>(), false)); + } else { + for (int i = 0; i < w_shape.ndim(); i++) { + CHECK_EQ(w_shape[i], a_shape[i]); + } + SHAPE_ASSIGN_CHECK(*out_attrs, 1, + NumpyReduceAxesShapeImpl(w_shape, param.axis, false)); + } + } else { + SHAPE_ASSIGN_CHECK(*out_attrs, 1, TShape(0, -1)); + } + + return shape_is_known(out_attrs->at(0)) && shape_is_known(out_attrs->at(1)); +} + +template +struct avg_grad_a_kernel { + template + MSHADOW_XINLINE static void Map(int i, + DType* out, + const DType* w, + const DType* scl, + const DType* ograd, + mshadow::Shape small, + mshadow::Shape big) { + // partial a = w / sum(w) + size_t big_idx = i; + size_t small_idx = i; + size_t big_stride = 1; + size_t small_stride = 1; + size_t red_axis_idx = 0; + for (int axis = NDim-1; axis >= 0; --axis) { + size_t axis_idx = big_idx % big[axis]; + small_idx -= axis_idx * big_stride; + if (small[axis] != 1) { + small_idx += axis_idx * small_stride; + } else if (onedim && small[axis] != big[axis]) { + red_axis_idx = axis_idx; + } + big_idx /= big[axis]; + big_stride *= big[axis]; + small_stride *= small[axis]; + } + if (onedim) { + KERNEL_ASSIGN(out[i], req, (ograd[small_idx] * (w[red_axis_idx] / *scl))); + } else { + KERNEL_ASSIGN(out[i], req, (ograd[small_idx] * (w[i] / scl[small_idx]))); + } + } +}; + +template +struct avg_grad_w_kernel { + template + MSHADOW_XINLINE static void Map(int i, + DType* out, + const DType* a, + const DType* scl, + const DType* sum_of_wa, + const DType* ograd, + mshadow::Shape small, + mshadow::Shape big) { + // partial w = (a * sum(w) - sum(a*w)) / (sum(w) * sum(w)) + size_t big_idx = i; + size_t small_idx = i; + size_t big_stride = 1; + size_t small_stride = 1; + for (int axis = NDim-1; axis >= 0; --axis) { + size_t axis_idx = big_idx % big[axis]; + small_idx -= axis_idx * big_stride; + if (small[axis] != 1) { + small_idx += axis_idx * small_stride; + } + big_idx /= big[axis]; + big_stride *= big[axis]; + small_stride *= small[axis]; + } + DType ret = ograd[small_idx] * + (((a[i] * scl[small_idx] - sum_of_wa[small_idx]) / scl[small_idx]) / scl[small_idx]); + KERNEL_ASSIGN(out[i], req, ret); + } +}; + +template +struct avg_grad_w_1D_kernel { + template + MSHADOW_XINLINE static void Map(int i, + DType* out, + const DType* a, + const DType* scl, + const DType* sum_of_wa, + const DType* ograd, + mshadow::Shape big, + const int red_axis) { + DType scl_val = *scl; + size_t tail = 1; + size_t head = 1; + for (int axis = NDim-1; axis > red_axis; --axis) { + tail *= big[axis]; + } + for (int axis = 0; axis < red_axis; ++axis) { + head *= big[axis]; + } + DType ret = 0; + for (size_t j = 0; j < head; ++j) { + for (size_t k = 0; k < tail; ++k) { + size_t a_idx = j*(tail*big[red_axis]) + i * tail + k; + size_t small_idx = j*tail + k; + ret += (ograd[small_idx] * + (((a[a_idx] * scl_val - sum_of_wa[small_idx]) / scl_val) / scl_val)); + } + } + KERNEL_ASSIGN(out[i], req, ret); + } +}; + +template +void NumpyWeightedAverageComputeImpl(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs, + const dmlc::optional>& axis) { + using namespace mshadow; + using namespace mxnet_op; + Stream* s = ctx.get_stream(); + const TBlob& data = inputs[0]; + TShape small1 = NumpyReduceAxesShapeImpl(data.shape_, axis, true); + // Reshape weights + TShape small2 = small1; + TBlob weights = inputs[1]; + + bool one_dim = weights.shape_.ndim() != data.shape_.ndim(); + + int red_axis = -1; + + if (one_dim) { + CHECK_EQ(weights.shape_.ndim(), 1U) + << "1D weights expected when shapes of a and weights differ."; + CHECK_EQ(axis.has_value(), true) + << "Axis must be specified when shapes of a and weights differ."; + Tuple axes(axis.value()); + CHECK_EQ(axes.ndim(), 1U) + << "Axis must be int when shapes of a and weights differ."; + red_axis = axes[0] < 0 ? axes[0] + data.shape_.ndim() : axes[0]; + CHECK_EQ(weights.shape_[0], data.shape_[red_axis]) + << "Length of weights not compatible with specified axis."; + TShape new_w_shape(data.shape_.ndim(), 1); + new_w_shape[red_axis] = weights.shape_[0]; + weights = weights.reshape(new_w_shape); + small2 = TShape(new_w_shape.ndim(), 1); + } + MSHADOW_TYPE_SWITCH(data.type_flag_, DType, { + // Get temp space + size_t temp_data_size = data.shape_.Size() * sizeof(DType); + size_t temp_sum_size = small1.Size() * sizeof(DType); + TShape src_shape, dst_shape; + BroadcastReduceShapeCompact(data.shape_, small1, &src_shape, &dst_shape); + size_t workspace_size = 0; + MXNET_NDIM_SWITCH(dst_shape.ndim(), NDim, { + workspace_size = broadcast::ReduceWorkspaceSize( + s, dst_shape, {kWriteTo}, src_shape); + }); + size_t temp_mem_size = temp_data_size + temp_sum_size + workspace_size; + Tensor temp_mem = + ctx.requested[0].get_space_typed(Shape1(temp_mem_size), s); + DType *temp_data_ptr = reinterpret_cast(temp_mem.dptr_); + DType *temp_sum_ptr = reinterpret_cast(temp_mem.dptr_ + temp_data_size); + char *workspace_ptr = temp_mem.dptr_ + temp_data_size + temp_sum_size; + Tensor workspace(workspace_ptr, Shape1(workspace_size), s); + + // Compute weighted data + TBlob wa = TBlob(temp_data_ptr, data.shape_, xpu::kDevMask); + BinaryBroadcastCompute( + attrs, ctx, {data, weights}, {kWriteTo}, {wa}); + + // Compute sum of weighted data + TBlob sum_of_wa = TBlob(temp_sum_ptr, small1, xpu::kDevMask); + ReduceAxesComputeWithWorkspaceImpl( + ctx, {wa}, {kWriteTo}, {sum_of_wa}, workspace, src_shape, dst_shape); + if (!back) { + const TBlob& avg = outputs[0]; + const TBlob& sum_of_weights = outputs[1]; + TShape w_src_shape, w_dst_shape; + BroadcastReduceShapeCompact(weights.shape_, small2, &w_src_shape, &w_dst_shape); + // Compute sum of weight + TBlob scl = sum_of_weights.reshape(small2); + ReduceAxesComputeWithWorkspaceImpl( + ctx, {weights}, {kWriteTo}, {scl}, workspace, w_src_shape, w_dst_shape); + + // Compute avg and assign output + BinaryBroadcastCompute( + attrs, ctx, {sum_of_wa, scl}, req, {avg.reshape(small1)}); + } else { + // Compute and assign the derivatives of a and weights + const TBlob& igrad_a = outputs[0]; + const TBlob& igrad_w = outputs[1]; + const TBlob& scl = inputs[2]; + const TBlob& ograd = inputs[3]; + MXNET_NDIM_SWITCH(igrad_a.shape_.ndim(), NDim, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_a, { + if (one_dim) { + // 1D weights + Kernel, xpu>::Launch( + s, igrad_a.shape_.Size(), igrad_a.dptr(), + weights.dptr(), scl.dptr(), ograd.dptr(), + small1.get(), + igrad_a.shape_.get()); + } else { + Kernel, xpu>::Launch( + s, igrad_a.shape_.Size(), igrad_a.dptr(), + weights.dptr(), scl.dptr(), ograd.dptr(), + small1.get(), + igrad_a.shape_.get()); + } + }); + MXNET_ASSIGN_REQ_SWITCH(req[1], req_w, { + if (one_dim) { + Kernel, xpu>::Launch( + s, igrad_w.shape_.Size(), igrad_w.dptr(), + data.dptr(), scl.dptr(), sum_of_wa.dptr(), ograd.dptr(), + data.shape_.get(), + red_axis); + } else { + Kernel, xpu>::Launch( + s, igrad_w.shape_.Size(), igrad_w.dptr(), + data.dptr(), scl.dptr(), sum_of_wa.dptr(), ograd.dptr(), + small1.get(), + igrad_w.shape_.get()); + } + }); + }) + } + }); +} + +template +void NumpyWeightedAverageForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mshadow::expr; + if (req[0] == kNullOp) return; + CHECK_NE(req[0], kWriteInplace) << "Average does not support write in-place"; + const auto& param = nnvm::get(attrs.parsed); + const TBlob& data = inputs[0]; + MSHADOW_TYPE_SWITCH(data.type_flag_, DType, { + if (!param.weighted) { + TShape small = NumpyReduceAxesShapeImpl(data.shape_, param.axis, true); + // Compute sum of weights which equals to the product of sizes of reduced axes + Stream* s = ctx.get_stream(); + auto ret = outputs[1].FlatTo1D(s); + ret = scalar(data.shape_.Size()/small.Size()); + // Compute mean + ReduceAxesComputeImpl( + ctx, inputs, req, {outputs[0]}, small); + } else { + NumpyWeightedAverageComputeImpl( + attrs, ctx, inputs, req, outputs, param.axis); + } + }); +} + +template +void NumpyWeightedAverageBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mshadow::expr; + const auto& param = nnvm::get(attrs.parsed); + if (req[0] == kNullOp && !param.weighted) return; + CHECK_EQ(inputs.size(), (param.weighted ? 6U : 5U)); + CHECK_EQ(outputs.size(), (param.weighted ? 2U : 1U)); + const TBlob& ograd = inputs[0]; + const TBlob& data = inputs[2]; + MSHADOW_TYPE_SWITCH(data.type_flag_, DType, { + if (!param.weighted) { + TShape small = NumpyReduceAxesShapeImpl(outputs[0].shape_, param.axis, true); + Stream* s = ctx.get_stream(); + auto ograd_tensor = ograd.FlatTo1D(s); + ograd_tensor /= scalar(data.shape_.Size()/small.Size()); + BroadcastComputeImpl(attrs, ctx, {ograd}, req, {outputs[0]}, small); + } else { + const TBlob& weights = inputs[3]; + const TBlob& scl = inputs[5]; + NumpyWeightedAverageComputeImpl( + attrs, ctx, {data, weights, scl, ograd}, req, outputs, param.axis); + } + }); +} + template void NumpyMomentsForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx, diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cc b/src/operator/numpy/np_broadcast_reduce_op_value.cc index fb133568a7a5..2a1bc5261701 100644 --- a/src/operator/numpy/np_broadcast_reduce_op_value.cc +++ b/src/operator/numpy/np_broadcast_reduce_op_value.cc @@ -35,6 +35,7 @@ namespace op { DMLC_REGISTER_PARAMETER(NumpyReduceAxesParam); DMLC_REGISTER_PARAMETER(NumpyReduceAxesNoDTypeParam); DMLC_REGISTER_PARAMETER(NumpyMomentsParam); +DMLC_REGISTER_PARAMETER(NumpyWeightedAverageParam); inline bool NumpySumType(const nnvm::NodeAttrs& attrs, std::vector *in_attrs, @@ -249,6 +250,76 @@ inline bool IsIntType(const int dtype) { dtype == mshadow::kInt64); } +inline bool NumpyWeightedAverageType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + const auto ¶m = nnvm::get(attrs.parsed); + CHECK_EQ(in_attrs->size(), (param.weighted ? 2U : 1U)); + CHECK_EQ(out_attrs->size(), 2U); + + TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + if (param.weighted) { + TYPE_ASSIGN_CHECK(*in_attrs, 1, in_attrs->at(0)); + } + TYPE_ASSIGN_CHECK(*out_attrs, 1, in_attrs->at(0)); + + return in_attrs->at(0) != -1 && out_attrs->at(0) != -1 && + (!param.weighted || (in_attrs->at(1) != -1)) && + out_attrs->at(1) != -1; +} + +NNVM_REGISTER_OP(_npi_average) +.set_num_inputs( + [](const NodeAttrs& attrs) { + const auto& param = nnvm::get(attrs.parsed); + return param.weighted ? 2 : 1; + }) +.set_num_outputs(2) +.set_attr("FNumVisibleOutputs", + [](const NodeAttrs& attrs) { + const auto& param = nnvm::get(attrs.parsed); + return param.returned ? 2 : 1; + }) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyWeightedAverageShape) +.set_attr("FInferType", NumpyWeightedAverageType) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + const auto& param = nnvm::get(attrs.parsed); + return param.weighted ? + std::vector{"a", "weights"} : + std::vector{"a"}; + }) +.add_argument("a", "NDArray-or-Symbol", "The input") +.add_argument("weights", "NDArray-or-Symbol", "The weights to calculate average") +.add_arguments(NumpyWeightedAverageParam::__FIELDS__()) +.set_attr("FCompute", NumpyWeightedAverageForward) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FGradient", ElemwiseGradUseInOut{"_backward_np_average"}); + +NNVM_REGISTER_OP(_backward_np_average) +.set_num_outputs( + [](const NodeAttrs& attrs) { + const auto& param = nnvm::get(attrs.parsed); + return param.weighted ? 2 : 1; + }) +.set_attr_parser(ParamParser) +.set_attr("TIsBackward", true) +.set_num_inputs( + [](const NodeAttrs& attrs) { + const auto& param = nnvm::get(attrs.parsed); + return param.weighted ? 6 : 5; + }) +.set_attr("FCompute", NumpyWeightedAverageBackward) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; +}); + inline bool NumpyMeanType(const nnvm::NodeAttrs& attrs, std::vector *in_attrs, std::vector *out_attrs) { diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cu b/src/operator/numpy/np_broadcast_reduce_op_value.cu index 53e78787d47d..56194ff34a7e 100644 --- a/src/operator/numpy/np_broadcast_reduce_op_value.cu +++ b/src/operator/numpy/np_broadcast_reduce_op_value.cu @@ -50,6 +50,12 @@ NNVM_REGISTER_OP(_np_prod) NNVM_REGISTER_OP(_backward_np_prod) .set_attr("FCompute", NumpyReduceAxesBackwardUseInOut); +NNVM_REGISTER_OP(_npi_average) +.set_attr("FCompute", NumpyWeightedAverageForward); + +NNVM_REGISTER_OP(_backward_np_average) +.set_attr("FCompute", NumpyWeightedAverageBackward); + NNVM_REGISTER_OP(_npi_mean) .set_attr("FCompute", NumpyReduceAxesCompute); diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 6b62a4386524..549141b92c4c 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -600,6 +600,119 @@ def _test_np_exception(func, shape, dim): _test_np_exception(func, shape, dim) +@with_seed() +@use_np +def test_np_average(): + class TestAverage(HybridBlock): + def __init__(self, axis=None, returned=False): + super(TestAverage, self).__init__() + # necessary initializations + self._axis = axis + self._returned = returned + + def hybrid_forward(self, F, a, weights): + return F.np.average(a, weights=weights, axis=self._axis, returned=self._returned) + + def avg_backward(a, w, avg, axes, init_a_grad=None, init_w_grad=None): + # avg = sum(a * w) / sum(w) + if axes is not None and not isinstance(axes, tuple) and axes < 0: + axes += a.ndim + if w is None: + a_grad = _np.ones(shape=a.shape, dtype=a.dtype)/(a.size/avg.size) + if init_a_grad is not None: + a_grad += init_a_grad.asnumpy() + return [a_grad, None] + onedim = a.ndim != w.ndim + if onedim: + new_shape = [a.shape[i] if i == axes else 1 for i in range(a.ndim)] + w = w.reshape(new_shape) + w = _np.broadcast_to(w, a.shape) + + # partial a = w / sum(w) + # partial w = (a*sum(w) - sum(a*w)) / (sum(w) * sum(w)) + scl = _np.sum(w, axis=axes, keepdims=True) + a_grad = _np.divide(w, scl) + w_grad = _np.divide(a*scl-_np.sum(a*w, axis=axes, keepdims=True), scl*scl) + + if onedim: + axis = list(range(a.ndim)) + axis.remove(axes) + w_grad = _np.sum(w_grad, axis=tuple(axis)) + if init_a_grad is not None: + a_grad += init_a_grad.asnumpy() + if init_w_grad is not None: + w_grad += init_w_grad.asnumpy() + return [a_grad, w_grad] + + tensor_shapes = [ + ((3, 5), (3, 5), None), # (a_shape, w_shape, axes) + ((4, 5, 6), (4, 5, 6), (0, 2)), + ((3,), (3,), 0), + ((2, 3), (3,), 1), + ((2, 3, 4), (2,), 0), + ((2, 3, 4), (3,), 1), + ((2, 3, 4), (4,), -1), + ((2, 3, 4, 5), (5,), 3) + ] + + flags = [True, False] + dtypes = ['float32', 'float64'] + reqs = ['null', 'add', 'write'] + for hybridize, returned, (a_shape, w_shape, axes), dtype, is_weighted, req_a in \ + itertools.product(flags, flags, tensor_shapes, dtypes, flags, reqs): + if req_a == 'null' and not is_weighted: + continue + rtol, atol = 1e-3, 1e-4 + test_average = TestAverage(axes, returned) + if hybridize: + test_average.hybridize() + a = np.random.uniform(-1.0, 1.0, size=a_shape, dtype=dtype) + a.attach_grad(req_a) + init_a_grad = np.random.uniform(-1.0, 1.0, size=a_shape, dtype=dtype) if req_a == 'add' else None + init_w_grad = None + req_w = req_a + w, np_w = None, None + if is_weighted: + w = np.random.uniform(-1.0, 1.0, size=w_shape, dtype=dtype) + if req_a == 'null': + req_w = random.choice(['add', 'write']) + w.attach_grad(req_w) + if req_w == 'add': + init_w_grad = np.random.uniform(-1.0, 1.0, size=w_shape, dtype=dtype) + np_w = w.asnumpy() + np_out = _np.average(a.asnumpy(), axis=axes, weights=np_w, returned=returned) + with mx.autograd.record(): + mx_out = test_average(a, w) + if returned: + np_out, np_sum_of_weights = np_out + mx_out, mx_sum_of_weights = mx_out + assert_almost_equal(mx_sum_of_weights.asnumpy(), np_sum_of_weights, rtol=rtol, atol=atol) + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out.astype(dtype), rtol=rtol, atol=atol) + if req_a == 'add': + a.grad[:] = init_a_grad + if is_weighted and req_w == 'add': + w.grad[:] = init_w_grad + mx_out.backward() + # Code to get reference backward value + a_grad, w_grad = avg_backward(a.asnumpy(), np_w, np_out, axes, init_a_grad, init_w_grad) + if is_weighted: + assert_almost_equal(w.grad.asnumpy(), w_grad, rtol=rtol*10, atol=atol*10) + if req_a == 'null': + assert a.grad is None + else: + assert_almost_equal(a.grad.asnumpy(), a_grad, rtol=rtol, atol=atol) + + # Test imperative once again + np_out = _np.average(a.asnumpy(), weights=np_w, axis=axes, returned=returned) + mx_out = np.average(a, weights=w, axis=axes, returned=returned) + if returned: + np_out, np_sum_of_weights = np_out + mx_out, mx_sum_of_weights = mx_out + assert_almost_equal(mx_sum_of_weights.asnumpy(), np_sum_of_weights, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), np_out.astype(dtype), rtol=rtol, atol=atol) + + @with_seed() @use_np def test_np_mean(): From e40cceb271a10e15fbdc6a5e9e42e88694ca43cb Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Mon, 9 Dec 2019 08:19:28 +0000 Subject: [PATCH 07/62] Upgrade 3rdparty/openmp to release_90 version (#17012) Fixes https://github.com/apache/incubator-mxnet/issues/10856 --- 3rdparty/openmp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/openmp b/3rdparty/openmp index 37c72127e903..b76842ed1698 160000 --- a/3rdparty/openmp +++ b/3rdparty/openmp @@ -1 +1 @@ -Subproject commit 37c72127e90360a020f351f18d9cccfc30e5145a +Subproject commit b76842ed16984ae5edcbbc4b00a94fda20419431 From ce97e22813442b55211703c1bfa8fa20d58ef0e8 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Mon, 9 Dec 2019 16:53:41 +0800 Subject: [PATCH 08/62] fix axis=-1 bug (#17016) --- src/operator/numpy/np_broadcast_reduce_op.h | 2 +- tests/python/unittest/test_numpy_op.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h index df9a7c932490..7d0025a62ad2 100644 --- a/src/operator/numpy/np_broadcast_reduce_op.h +++ b/src/operator/numpy/np_broadcast_reduce_op.h @@ -188,7 +188,7 @@ inline bool NumpyReduceAxesNoDTypeShape(const nnvm::NodeAttrs& attrs, if (param.axis.has_value()) { const mxnet::Tuple& axes = param.axis.value(); for (int i = 0; i < axes.ndim(); ++i) { - if (ishape[axes[i]] == 0) { + if ((axes[i] >= 0) && (ishape[axes[i]] == 0)) { is_all_reducded_axes_not_zero = false; break; } diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 549141b92c4c..0947b6112158 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -532,7 +532,7 @@ def get_grad(axis, func_name): elif axis == 2: temp[:,:,index,:] = 1 return temp - elif axis == 3: + elif (axis == 3 or axis == -1): temp[:,:,:,index] = 1 return temp elif not axis: @@ -550,7 +550,7 @@ def _test_np_exception(func, shape, dim): for func in ['max', 'min']: for hybridize in [False, True]: for keepdims in [True, False]: - for axis in ([i for i in range(in_data_dim)] + [(), None]): + for axis in ([i for i in range(in_data_dim)] + [(), None] + [-1]): for itype in ['float16', 'float32', 'float64', 'int']: # test gluon if func == 'max': From 538b18bb4ea25448334092d0a15af174dcb44663 Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Mon, 9 Dec 2019 20:36:26 +0800 Subject: [PATCH 09/62] [MKL-DNN] Enable and Optimization for s8 eltwise_add (#16931) * optimization for s8 sum * fix lint * fix lint * exclude sum in lstm cell * remove debug info * remove todo --- src/operator/operator_common.h | 5 ++ .../mkldnn/mkldnn_quantized_elemwise_add.cc | 58 ++++++++++++++++++- .../quantization/quantized_elemwise_add.cc | 4 -- .../python/quantization/test_quantization.py | 2 + 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h index c23a5a852dcb..f6af58bce995 100644 --- a/src/operator/operator_common.h +++ b/src/operator/operator_common.h @@ -607,6 +607,11 @@ class OpSignature { eles.push_back(val); } + void AddSign(float val) { + hash = dmlc::HashCombine(hash, val); + eles.push_back(val); + } + bool operator==(const OpSignature &sign) const { if (hash != sign.hash) return false; diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc index 2078ac4fead8..06a0ea37f95b 100644 --- a/src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc +++ b/src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc @@ -39,6 +39,57 @@ static inline float GetScale(const NDArray& data, float min, float max) { return data_range / MaxAbs(min, max); } +class MKLDNNQuantizedElemwiseAddFwd { + public: + mkldnn::sum::primitive_desc fwd_pd; + + MKLDNNQuantizedElemwiseAddFwd( + const mkldnn::memory::desc &output_desc, + const std::vector &scales, + const std::vector &data_md) + : fwd_pd(output_desc, scales, data_md, CpuEngine::Get()->get_engine()) { + fwd_ = std::make_shared(fwd_pd); + data_.resize(data_md.size()); + } + + const mkldnn::sum &GetFwd() const { return *fwd_; } + + private: + std::shared_ptr fwd_; + std::vector> data_; + std::shared_ptr out_; +}; + +static MKLDNNQuantizedElemwiseAddFwd &GetQuantizedElemwiseAddForward( + const mkldnn::memory::desc &output_desc, const std::vector &scales, + const std::vector &in_data, const std::vector &out_data, + const std::vector &data_md) { +#if DMLC_CXX11_THREAD_LOCAL + static thread_local std::unordered_map fwds; +#else + static MX_THREAD_LOCAL std::unordered_map fwds; +#endif + OpSignature key; + key.AddSign(in_data); + key.AddSign(in_data[quantized_elemwise_add_enum::kAMin].data().dptr()[0]); + key.AddSign(in_data[quantized_elemwise_add_enum::kAMax].data().dptr()[0]); + key.AddSign(in_data[quantized_elemwise_add_enum::kBMin].data().dptr()[0]); + key.AddSign(in_data[quantized_elemwise_add_enum::kBMax].data().dptr()[0]); + key.AddSign(out_data); + key.AddSign(out_data[quantized_elemwise_add_enum::kMin].data().dptr()[0]); + key.AddSign(out_data[quantized_elemwise_add_enum::kMax].data().dptr()[0]); + + auto it = fwds.find(key); + if (it == fwds.end()) { + MKLDNNQuantizedElemwiseAddFwd fwd(output_desc, scales, data_md); + it = AddToCache(&fwds, key, fwd); + } + return it->second; +} + + static void MKLDNNQuantizedElemwiseAddForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx, const std::vector& in_data, const std::vector& req, @@ -166,16 +217,17 @@ static void MKLDNNQuantizedElemwiseAddForward(const nnvm::NodeAttrs& attrs, cons auto output_desc = mkldnn::memory::desc(i_dims, output_data_type, mkldnn::memory::format_tag::any); - mkldnn::sum::primitive_desc pdesc(output_desc, scales, in_desc, engine); + MKLDNNQuantizedElemwiseAddFwd &fwd = GetQuantizedElemwiseAddForward(output_desc, scales, + in_data, out_data, in_desc); auto mem = CreateMKLDNNMem(out_data[quantized_elemwise_add_enum::kOut], - pdesc.dst_desc(), + fwd.fwd_pd.dst_desc(), req[0], &in_data[0]); mkldnn_args_map_t args({{MKLDNN_ARG_MULTIPLE_SRC, *dataA_mem}, {MKLDNN_ARG_MULTIPLE_SRC + 1, *dataB_mem}, {MKLDNN_ARG_DST, *mem.second}}); MKLDNNStream *stream = MKLDNNStream::Get(); - stream->RegisterPrimArgs(mkldnn::sum(pdesc), args); + stream->RegisterPrimArgs(fwd.GetFwd(), args); CommitOutput(out_data[quantized_elemwise_add_enum::kOut], mem); stream->Submit(); diff --git a/src/operator/quantization/quantized_elemwise_add.cc b/src/operator/quantization/quantized_elemwise_add.cc index 0e7034e88b8c..f821e6598192 100644 --- a/src/operator/quantization/quantized_elemwise_add.cc +++ b/src/operator/quantization/quantized_elemwise_add.cc @@ -125,9 +125,6 @@ and max thresholds representing the threholds for quantizing the float32 output .add_argument("rhs_max", "NDArray-or-Symbol", "6th input"); -// TODO(zhangrong): need extra condition check if there's benefited if it's switched on -// Since it's not compute-intensive. -#if 0 NNVM_REGISTER_OP(elemwise_add) .set_attr("FQuantizedOp", [](const NodeAttrs& attrs) { nnvm::NodePtr node = nnvm::Node::Create(); @@ -139,7 +136,6 @@ NNVM_REGISTER_OP(elemwise_add) } return node; }); -#endif } // namespace op } // namespace mxnet diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index 6fe33f5ee52b..a371abddd22e 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -958,6 +958,8 @@ def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape, label_shape=N excluded_sym_names = excluded_names else: excluded_sym_names = excluded_names + optional_names + if name == 'sym4': + excluded_op_names += ['elemwise_add'] qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=s, arg_params=arg_params, From 2d41f2a83d4205831fccecdd23cf1b547c2f1244 Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Mon, 9 Dec 2019 20:37:29 +0800 Subject: [PATCH 10/62] add inference benchmark script (#16978) --- example/quantization/README.md | 53 ++++++--- .../quantization/launch_inference_mkldnn.sh | 111 ++++++++++++++++++ 2 files changed, 149 insertions(+), 15 deletions(-) create mode 100644 example/quantization/launch_inference_mkldnn.sh diff --git a/example/quantization/README.md b/example/quantization/README.md index 032ca97176df..8cdc1bb7e06f 100644 --- a/example/quantization/README.md +++ b/example/quantization/README.md @@ -80,6 +80,29 @@ optional arguments: if calibration mode is enabled ``` +A new benchmark script `launch_inference_mkldnn.sh` has been designed to launch performance benchmark for float32 or int8 image-classification models with Intel® MKL-DNN. +``` +usage: bash ./launch_inference_mkldnn.sh [[[-s symbol_file ] [-b batch_size] [-iter iteraton] [-ins instance] [-c cores/instance]] | [-h]] + +optional arguments: + -h, --help show this help message and exit + -s, --symbol_file symbol file for benchmark + -b, --batch_size inference batch size + default: 64 + -iter, --iteration inference iteration + default: 500 + -ins, --instance launch multi-instance inference + default: one instance per socket + -c, --core number of cores per instance + default: divide full physical cores + +example: resnet int8 performance benchmark on c5.24xlarge(duo sockets, 24 physical cores per socket). + + bash ./launch_inference_mkldnn.sh -s ./model/resnet50_v1-quantized-5batches-naive-symbol.json + +will launch two instances for throughput benchmark and each instance will use 24 physical cores. +``` + Use the following command to install [Gluon-CV](https://gluon-cv.mxnet.io/): ``` @@ -120,8 +143,8 @@ python imagenet_inference.py --symbol-file=./model/resnet50_v1-symbol.json --par python imagenet_inference.py --symbol-file=./model/resnet50_v1-quantized-5batches-naive-symbol.json --param-file=./model/resnet50_v1-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/resnet50_v1-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/resnet50_v1-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/resnet50_v1-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/resnet50_v1-quantized-5batches-naive-symbol.json ```

SqueezeNet 1.0

@@ -142,8 +165,8 @@ python imagenet_inference.py --symbol-file=./model/squeezenet1.0-symbol.json --p python imagenet_inference.py --symbol-file=./model/squeezenet1.0-quantized-5batches-naive-symbol.json --param-file=./model/squeezenet1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/squeezenet1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/squeezenet1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/squeezenet1.0-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/squeezenet1.0-quantized-5batches-naive-symbol.json ```

MobileNet 1.0

@@ -164,8 +187,8 @@ python imagenet_inference.py --symbol-file=./model/mobilenet1.0-symbol.json --pa python imagenet_inference.py --symbol-file=./model/mobilenet1.0-quantized-5batches-naive-symbol.json --param-file=./model/mobilenet1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/mobilenet1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/mobilenet1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/mobilenet1.0-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/mobilenet1.0-quantized-5batches-naive-symbol.json ```

MobileNetV2 1.0

@@ -186,8 +209,8 @@ python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-symbol.json - python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-quantized-5batches-naive-symbol.json --param-file=./model/mobilenetv2_1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/mobilenetv2_1.0-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/mobilenetv2_1.0-quantized-5batches-naive-symbol.json ```

Inception-V3

@@ -208,8 +231,8 @@ python imagenet_inference.py --symbol-file=./model/inceptionv3-symbol.json --par python imagenet_inference.py --symbol-file=./model/inceptionv3-quantized-5batches-naive-symbol.json --param-file=./model/inceptionv3-quantized-0000.params --image-shape=3,299,299 --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/inceptionv3-symbol.json --image-shape=3,299,299 --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/inceptionv3-quantized-5batches-naive-symbol.json --image-shape=3,299,299 --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/inceptionv3-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/inceptionv3-quantized-5batches-naive-symbol.json ```

ResNet152-V2

@@ -231,8 +254,8 @@ python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-symbol. python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-quantized-5batches-naive-symbol.json --param-file=./model/imagenet1k-resnet-152-quantized-0000.params --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-resnet-152-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-resnet-152-quantized-5batches-naive-symbol.json ```

Inception-BN

@@ -254,8 +277,8 @@ python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-symbo python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-quantized-5batches-naive-symbol.json --param-file=./model/imagenet1k-inception-bn-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-inception-bn-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-inception-bn-quantized-5batches-naive-symbol.json ```

SSD-VGG16

@@ -307,7 +330,7 @@ python imagenet_gen_qsym_mkldnn.py --model=custom --num-calib-batches=5 --calib- python imagenet_inference.py --symbol-file=./model/*.json --param-file=./model/*.params --rgb-mean=* --rgb-std=* --num-skipped-batches=* --batch-size=* --num-inference-batches=*--dataset=./data/* --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/*.json --batch-size=* --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/*.json ```

Model Quantization with CUDNN

diff --git a/example/quantization/launch_inference_mkldnn.sh b/example/quantization/launch_inference_mkldnn.sh new file mode 100644 index 000000000000..f67787b41b03 --- /dev/null +++ b/example/quantization/launch_inference_mkldnn.sh @@ -0,0 +1,111 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +usage() +{ + echo "usage: bash ./launch_inference_mkldnn.sh [[[-s symbol_file ] [-b batch_size] [-iter iteraton] [-ins instance] [-c cores/instance]] | [-h]]" +} + +while [ $# -gt 0 ]; do + case "$1" in + --symbol | -s) + shift + SYMBOL=$1 + ;; + --batch-size | -b) + shift + BS=$1 + ;; + --iteration | -iter) + shift + ITERATIONS=$1 + ;; + --instance | -ins) + shift + INS=$1 + ;; + --core | -c) + shift + CORES=$1 + ;; + --help | -h) + usage + exit 1 + ;; + *) + usage + exit 1 + esac + shift +done + +NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'` +NUM_NUMA_NODE=`lscpu | grep 'NUMA node(s)' | awk '{print $NF}'` +CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'` +NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET)) +CORES_PER_NUMA=$((NUM_CORES / NUM_NUMA_NODE)) +echo "target machine has $NUM_CORES physical core(s) on $NUM_NUMA_NODE numa nodes of $NUM_SOCKET socket(s)." + +if [ -z $SYMBOL ]; then + echo "Error: Need a symbol file as input." +fi +if [ -z $INS ]; then + echo "Default: launch one instance per socket." + INS=$NUM_SOCKET +fi +if [ -z $CORES ]; then + echo "Default: divide full physical cores." + CORES=$((NUM_CORES / $INS)) +fi +if [ -z $BS ]; then + echo "Default: set batch size to 64." + BS=64 +fi +if [ -z $ITERATIONS ]; then + echo "Default: set iterations to 500." + ITERATIONS=500 +fi + +echo " benchmark configs" +echo " cores per instance: $CORES" +echo " total instances: $INS" +echo " batch size: $BS" +echo " iterations: $ITERATIONS" +echo "" + +rm BENCHMARK_*.log || echo "benchmarking..." + +for((i=0;i<$INS;i++)); +do + ((a=$i*$CORES)) + ((b=$a+$CORES-1)) + memid=$((b/CORES_PER_NUMA)) + LOG=BENCHMARK_$i.log + echo " $i instance use $a-$b cores and $memid mem with $LOG" + KMP_AFFINITY=granularity=fine,noduplicates,compact,1,0 \ + OMP_NUM_THREADS=$CORES \ + nohup numactl --physcpubind=$a-$b --membind=$memid python imagenet_inference.py --symbol-file=$SYMBOL --batch-size=$BS --num-inference-batches=$ITERATIONS --ctx=cpu --benchmark=True > $LOG 2>&1 & +done +wait + +fps=`grep image/sec BENCHMARK_*.log | awk '{ sum += $(NF) }; END { print sum }'` +latency=$(echo "scale=2; 1000*$INS/$fps" | bc) +echo "overall throughput: $fps" +echo "latency per instance: $latency" +echo "benchmark finish:)" From e48ff96ef4375f3d7c505e152b73b1f15a8b7afe Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Mon, 9 Dec 2019 20:38:43 +0800 Subject: [PATCH 11/62] Quantized Embedding (#16691) * add quantized embedding * add quantized embedding * add quantized embedding * imporve lint * change to ksupport * fix lint * add quantized embedding test case * skip gpu ut --- .../quantization/quantized_indexing_op.cc | 192 ++++++++++++++++++ src/operator/tensor/indexing_op.h | 6 + .../python/quantization/test_quantization.py | 46 +++++ 3 files changed, 244 insertions(+) create mode 100644 src/operator/quantization/quantized_indexing_op.cc diff --git a/src/operator/quantization/quantized_indexing_op.cc b/src/operator/quantization/quantized_indexing_op.cc new file mode 100644 index 000000000000..b4af3ecb704f --- /dev/null +++ b/src/operator/quantization/quantized_indexing_op.cc @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2017 by Contributors + * \file quantized_indexing_op.cc +*/ +#include +#include "../tensor/indexing_op.h" + +namespace mxnet { +namespace op { + + +inline bool QuantizedEmbeddingOpShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + using namespace mshadow; + const mxnet::TShape &dshape = (*in_attrs)[quantized_embedding::kData]; + if (!ndim_is_known(dshape)) return false; + const EmbeddingParam& param = nnvm::get(attrs.parsed); + SHAPE_ASSIGN_CHECK(*in_attrs, quantized_embedding::kWeight, Shape2(param.input_dim, + param.output_dim)); + SHAPE_ASSIGN_CHECK(*in_attrs, quantized_embedding::kWeightMin, mxnet::TShape(1, 1)); + SHAPE_ASSIGN_CHECK(*in_attrs, quantized_embedding::kWeightMax, mxnet::TShape(1, 1)); + out_attrs->clear(); + + mxnet::TShape oshape(dshape.ndim()+1, -1); + for (int i = 0; i < dshape.ndim(); ++i) { + oshape[i] = dshape[i]; + } + oshape[dshape.ndim()] = param.output_dim; + out_attrs->push_back(oshape); + out_attrs->push_back(mxnet::TShape(1, 1)); + out_attrs->push_back(mxnet::TShape(1, 1)); + return shape_is_known(oshape); +} + +inline bool QuantizedEmbeddingOpType(const nnvm::NodeAttrs& attrs, + std::vector *in_type, + std::vector *out_type) { + CHECK_EQ(in_type->size(), 4U); + CHECK_GE(out_type->size(), 3U); + int itype = (*in_type)[0]; + CHECK_NE(itype, -1) << "First input must have specified type"; + TYPE_ASSIGN_CHECK(*in_type, 1, mshadow::kInt8); + TYPE_ASSIGN_CHECK(*in_type, 2, mshadow::kFloat32); + TYPE_ASSIGN_CHECK(*in_type, 3, mshadow::kFloat32); + out_type->clear(); + out_type->push_back(mshadow::kInt8); + int dtype_out_min = 0; + int dtype_out_max = 0; + out_type->push_back(dtype_out_min); + out_type->push_back(dtype_out_max); + return true; +} + +// storage type inference function for Embedding +inline bool QuantizedEmbeddingOpForwardStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 4U); + CHECK_EQ(out_attrs->size(), 3U); + const int& data_stype = in_attrs->at(quantized_embedding::kData); + const int& weight_stype = in_attrs->at(quantized_embedding::kWeight); + const int& weight_min_stype = in_attrs->at(quantized_embedding::kWeightMin); + const int& weight_max_stype = in_attrs->at(quantized_embedding::kWeightMax); + int& out_stype = out_attrs->at(quantized_embedding::kOut); + int& out_stype_min = out_attrs->at(quantized_embedding::kOutMin); + int& out_stype_max = out_attrs->at(quantized_embedding::kOutMax); + bool dispatched = false; + CHECK_EQ(weight_min_stype, kDefaultStorage); + CHECK_EQ(weight_max_stype, kDefaultStorage); + if (!dispatched && data_stype == kDefaultStorage && weight_stype == kDefaultStorage) { + // dns, dns -> dns + dispatched = storage_type_assign(&out_stype, kDefaultStorage, + dispatch_mode, DispatchMode::kFCompute); + dispatched = storage_type_assign(&out_stype_min, kDefaultStorage, + dispatch_mode, DispatchMode::kFCompute); + dispatched = storage_type_assign(&out_stype_max, kDefaultStorage, + dispatch_mode, DispatchMode::kFCompute); + } + if (!dispatched && data_stype == kDefaultStorage && weight_stype == kRowSparseStorage) { + // dns, rsp -> dns + dispatched = storage_type_assign(&out_stype, kDefaultStorage, + dispatch_mode, DispatchMode::kFComputeEx); + } + return dispatched; +} + +void QuantizedEmbeddingOpForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(req[quantized_embedding::kOut], kWriteTo); + CHECK_EQ(inputs.size(), 4U); + CHECK_EQ(outputs.size(), 3U); + CHECK_EQ(inputs[quantized_embedding::kWeight].ndim(), 2U) + << "Embedding layer expects its weight to be two-dimensional. " + << inputs[quantized_embedding::kWeight].ndim() + << " dimensional input is given instead"; + mshadow::Stream *s = ctx.get_stream(); + EmbeddingOpForwardDnsImpl(s, inputs[quantized_embedding::kData], + inputs[quantized_embedding::kWeight], + req[quantized_embedding::kOut], + outputs[quantized_embedding::kOut]); + float min_weight = inputs[quantized_embedding::kWeightMin].dptr()[0]; + float max_weight = inputs[quantized_embedding::kWeightMax].dptr()[0]; + outputs[quantized_embedding::kOutMin].dptr()[0] = min_weight; + outputs[quantized_embedding::kOutMax].dptr()[0] = max_weight; +} + +NNVM_REGISTER_OP(_contrib_quantized_embedding) +.describe(R"code(Maps integer indices to int8 vector representations (embeddings). +)code" ADD_FILELINE) +.set_num_inputs(4) +.set_num_outputs(3) +.set_attr_parser(ParamParser) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"data", "weight", "min_weight", "max_weight"}; + }) +.set_attr("FListOutputNames", + [](const NodeAttrs& attrs) { + return std::vector{"output", "min_output", "max_output"}; + }) +.set_attr("FInferShape", QuantizedEmbeddingOpShape) +.set_attr("FInferType", QuantizedEmbeddingOpType) +.set_attr("FInferStorageType", QuantizedEmbeddingOpForwardStorageType) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FCompute", QuantizedEmbeddingOpForward) +// TODO(Xinyu): a temp solution to enable GluonCV INT8 flow, +// will be reverted after the improvement of CachedOP is done. +.set_attr("FGradient", MakeZeroGradNodes) +.add_argument("data", "NDArray-or-Symbol", "The input array to the embedding operator.") +.add_argument("weight", "NDArray-or-Symbol", "The embedding weight matrix.") +.add_argument("min_weight", "NDArray-or-Symbol", "Minimum value of data.") +.add_argument("max_weight", "NDArray-or-Symbol", "Maximum value of data.") +.add_arguments(EmbeddingParam::__FIELDS__()); + +NNVM_REGISTER_OP(Embedding) +.set_attr("FQuantizable", [](const NodeAttrs& attrs) { + return QuantizeType::kSupport; +}) +.set_attr("FQuantizedOp", [](const NodeAttrs& attrs) { + EmbeddingParam param; + param.Init(attrs.dict); + nnvm::NodePtr node = nnvm::Node::Create(); + if (param.dtype == mshadow::kFloat32) { + node->attrs.op = Op::Get("_contrib_quantized_embedding"); + node->attrs.name = "quantized_" + attrs.name; + } else { + node->attrs.op = Op::Get("Embedding"); + node->attrs.name = attrs.name; + } + node->attrs.dict = attrs.dict; + if (node->op()->attr_parser != nullptr) { + node->op()->attr_parser(&(node->attrs)); + } + return node; + }) +.set_attr("FAvoidQuantizeInput", [](const NodeAttrs &attrs, size_t index) { + if (index == 0) + return true; + else + return false; +}); +} // namespace op +} // namespace mxnet + diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h index 828d761fefd4..f81d831b562b 100644 --- a/src/operator/tensor/indexing_op.h +++ b/src/operator/tensor/indexing_op.h @@ -58,6 +58,12 @@ enum EmbeddingOpOutputs {kOut}; enum EmbeddingOpResource {kTempSpace}; } // namespace embedding +namespace quantized_embedding { +enum QuantizedEmbeddingOpInputs {kData, kWeight, kWeightMin, kWeightMax}; +enum QuantizedEmbeddingOpOutputs {kOut, kOutMin, kOutMax}; +enum QuantizedEmbeddingOpResource {kTempSpace}; +} // namespace quantized_embedding + struct SparseEmbeddingParam: public dmlc::Parameter { int input_dim; diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py index a371abddd22e..527737e03cd7 100644 --- a/tests/python/quantization/test_quantization.py +++ b/tests/python/quantization/test_quantization.py @@ -517,6 +517,52 @@ def maxabs(a, b): check_quantized_fc((256, 2048, 2, 2), 800, True, qdtype) check_quantized_fc((256, 111, 2, 2), 800, True, qdtype) +@with_seed() +def test_quantized_embedding(): + def check_quantized_embedding(data_shape, input_dim, output_dim): + if is_test_for_gpu(): + print('skipped testing test_quantized_embedding for gpu since it is not supported yet') + return + + def maxabs(a, b): + return mx.nd.maximum(mx.nd.abs(a), mx.nd.abs(b)) + + data0 = mx.sym.Variable(name='data', shape=data_shape, dtype='int32') + embedding_fp32 = mx.sym.Embedding(data=data0, input_dim=input_dim, output_dim=output_dim) + arg_shapes, _, _ = embedding_fp32.infer_shape(data=data_shape) + arg_names = embedding_fp32.list_arguments() + embedding_fp32_exe = embedding_fp32.simple_bind(ctx=mx.current_context(), grad_req='null') + int8_range = 127.0 + data = mx.nd.random.uniform(low=0, high=input_dim, + shape=arg_shapes[0]).astype('int32') + weight = mx.nd.random.uniform(low=-int8_range, high=int8_range, + shape=arg_shapes[1]).astype('int32') + embedding_fp32_exe.arg_dict[arg_names[0]][:] = data + embedding_fp32_exe.arg_dict[arg_names[1]][:] = weight + + weight_min = mx.nd.min(weight).astype('float32') + weight_max = mx.nd.max(weight).astype('float32') + weight_range = maxabs(weight_min, weight_max) + + output = embedding_fp32_exe.forward()[0] + + embedding_int8 = mx.sym.contrib.quantized_embedding(data=data0, input_dim=input_dim, output_dim=output_dim) + qarg_names = embedding_int8.list_arguments() + type_dict = {qarg_names[1]: 'int8'} + embedding_int8_exe = embedding_int8.simple_bind(ctx=mx.current_context(), type_dict=type_dict, grad_req='null') + embedding_int8_exe.arg_dict[qarg_names[0]][:] = embedding_fp32_exe.arg_dict[arg_names[0]] + embedding_int8_exe.arg_dict[qarg_names[1]][:] = embedding_fp32_exe.arg_dict[arg_names[1]].astype('int8') + embedding_int8_exe.arg_dict[qarg_names[2]][:] = -weight_range + embedding_int8_exe.arg_dict[qarg_names[3]][:] = weight_range + qoutput, min_range, max_range = embedding_int8_exe.forward() + + assert_almost_equal(output.asnumpy(), qoutput.asnumpy()) + + check_quantized_embedding((1,), 1000, 256) + check_quantized_embedding((1,), 1024, 512) + check_quantized_embedding((32,), 1000, 256) + check_quantized_embedding((32,), 1024, 512) + @with_seed() def test_quantized_flatten(): def check_quantized_flatten(shape, qdtype): From c573154e9deb3881ee3a64a9fa0886248622bacf Mon Sep 17 00:00:00 2001 From: Talia <31782251+TEChopra1000@users.noreply.github.com> Date: Mon, 9 Dec 2019 10:20:03 -0800 Subject: [PATCH 12/62] Link fixes5 (#16986) * link fixes * fixing broken links, added data tutorials back into gluon package section * Added autograd images back in to the tutorial in the python package docs * created relative link for symbol api reference * renaming data folder * incorporating new folder-name on gluon index cards * fixed gotcha link * Fixed relative link * fixing links on performance index page * took out data files, checked and modified index files for python tutorials * added license header to multi_devices.md * link fixes * fixing broken links, added data tutorials back into gluon package section * Added autograd images back in to the tutorial in the python package docs * created relative link for symbol api reference * renaming data folder * incorporating new folder-name on gluon index cards * fixed gotcha link * fixing links on performance index page * took out data files, checked and modified index files for python tutorials * added license header to multi_devices.md * adding updated submodule --- docs/python_docs/python/tutorials/index.rst | 2 +- .../_static/autograd_control_flow.png | Bin 0 -> 21635 bytes .../_static/autograd_control_flow_grad.png | Bin 0 -> 20585 bytes .../_static/autograd_forward_backward.png | Bin 0 -> 63061 bytes .../autograd/_static/autograd_grad_req.mp4 | Bin 0 -> 71224 bytes .../autograd/_static/autograd_gradient.png | Bin 0 -> 39559 bytes .../autograd/_static/autograd_graph.mp4 | Bin 0 -> 113606 bytes .../autograd/_static/autograd_head_grad.mp4 | Bin 0 -> 59549 bytes .../tutorials/packages/autograd/index.md | 14 +- .../tutorials/packages/gluon/image/index.rst | 10 - .../packages/gluon/image/info_gan.md | 454 ++++++++++++++++++ .../python/tutorials/packages/gluon/index.rst | 22 +- .../python/tutorials/packages/index.rst | 6 +- .../tutorials/performance/backend/index.rst | 2 +- .../python/tutorials/performance/index.rst | 19 +- .../src/pages/api/faq/multi_devices.md | 217 +++++++++ 16 files changed, 701 insertions(+), 45 deletions(-) create mode 100644 docs/python_docs/python/tutorials/packages/autograd/_static/autograd_control_flow.png create mode 100644 docs/python_docs/python/tutorials/packages/autograd/_static/autograd_control_flow_grad.png create mode 100644 docs/python_docs/python/tutorials/packages/autograd/_static/autograd_forward_backward.png create mode 100644 docs/python_docs/python/tutorials/packages/autograd/_static/autograd_grad_req.mp4 create mode 100644 docs/python_docs/python/tutorials/packages/autograd/_static/autograd_gradient.png create mode 100644 docs/python_docs/python/tutorials/packages/autograd/_static/autograd_graph.mp4 create mode 100644 docs/python_docs/python/tutorials/packages/autograd/_static/autograd_head_grad.mp4 create mode 100644 docs/python_docs/python/tutorials/packages/gluon/image/info_gan.md create mode 100644 docs/static_site/src/pages/api/faq/multi_devices.md diff --git a/docs/python_docs/python/tutorials/index.rst b/docs/python_docs/python/tutorials/index.rst index c130ce607ea6..8e2362e4ea5f 100644 --- a/docs/python_docs/python/tutorials/index.rst +++ b/docs/python_docs/python/tutorials/index.rst @@ -92,7 +92,7 @@ Performance .. card:: :title: MKL-DNN - :link: performance/backend/mkldnn/mkldnn_quantization + :link: performance/backend/mkldnn/index.html How to get the most from your CPU by using Intel's MKL-DNN. diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_control_flow.png b/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_control_flow.png new file mode 100644 index 0000000000000000000000000000000000000000..804950f8b5bc4cf966cb490d98035ce20791b4db GIT binary patch literal 21635 zcmbq*1z42Z`tJbJ-3`)6OE*e~fRuoAgLHREw=~j?ba#W4fRuDgON*3*ca8g;dv5)o z=icYe-fo0F%=gWT_x;tHh}TLo=qMy85C{ZaPF7MC0)e%IKww@VA%LI2@16~VFHfAs z<RfuQ0*|G-SI`rU&c3b;sVxv1HjySN)UnL%ugTpVodU2Lq3 zpSqbjIa}G=J>_Hr|FAr@ba8PIWM}`+6WHvXEZ9SHzM(=OPa$%WFV#IVk5;`s)Mq_x|xcj#yI9Td=XQ zV+<{F0{vZU$E4@_Hl!sbx!(_6VXl98s^vt3`uDyCgN>1d7l}U7yUHO`piN<+*@26O z-AF{my1{=MOX3h=O<2)#pke;!$&xN=eW}H5SgbA2TjKFKpjSXwq2xGI&A^C*yXja* z{+g7K{F$}IcBKVdLqp?JMaAbbjdF~;-|0No4!;jdOnTqL^PhHH)Hv@cU07eQd9yPy zGplH5grw?wJ=2$x@^Vx~1qL*h;1UJjt`aZ(SgDc&LXO96rP4&KMStAil z={{wGS2VNVf3iDV?2do>^y!7Cr@)FG9|605^wQFjQCvhQF9hic?1#eP_ROWFw=DV{ zL=QAdR-;KvT z_=$nPVtB9gV~Ry5DhVPpzp3a^_WOx@(Q0O9wre7f|_f$~{h0Bpkxl_>j+ZEfLrR2(mQy^t$e=t_!s$h$k&u7%RTlt6N?t z(?tsBOV-t!jr02b;Exb;0T^+0RX)Rw9XB|A#Iq32JtG5)ftZS?Y`x>B*9En;Q}->k zwb*oYbTU5o7NQr#_nVmh968IAlZJkkrKMKi_HeM{MLOn6xBduTN@DgmxEU0}4_nA* z4UHApz;> zBsL2*Fq;lU;CY|Q%loZLvIzbTUi1+L5&xfcDxV+Yuk*uTpE{}b;iAV|OohkC}*vFt`W=-R@4$qcf(+cNj{g_+!{&%FA zN(@fU&f~7{oOj21cTx>vJuh~94_v-?-JOuS$~?oy)?QzAW#Wc$v5u#EPQw})QL>le z+VHGr8 zh^XYWWjzzNE}?WQ0}=WS-^BLI$c|t+aB{m|R|pHNzfay`s^gOg5b5`$i0!}8?+a#L zq>~9e+vH#YQwiN&Dl6Pi(1qu6(MQE2vadW{YT$?Qio+(w^zf>l2#6d?|Jy@{+5x*sGOrW*u2zX&6?(dW2CRCwldkQk(32*J7if`AZPI49wBIQv5GeoVZ$W$R>aJ&MyX=oY`ss_H$4^D-`4*8T zG0x&B6tl5cl*^SL@cHUsp|vscNP*@e{9 ze{Ph1HOrOB1x9zr{re5g(Ngqv)wJmGMr)w~X@7CBMD2$3n;vM%PyEpj19yl}Zp$1i z<`mc9amu7OpPQ!)Cz`!SG?%h%39ar%poQhjxekS)pg{~+oQQ6Q({$94X*E&t9> z;_g-NyJdf6SL5G|v%F8ctXEFfxwLFEPJ&0D*D?<36`8_ATMqZLrmKA|{}*9HFe^?; z??|z_dF+Jl(C%9m!zN&Ky;p8Z;k)@u#lTMG(wa?t=Y)60Ap=OoqA=dNeCMg+3l~;+HgOvrf`$z@Q%Oa&5Vt5|`z!f6Vv z`}eHk<`cn+-9J^eZ9n!P6#jYB7tgIV%4y7j*jhT%qtl6ivJwxs69>N>_#qj(Tc?7B z+m!pWb9;`^kPR3~{rMwzs-`U)kIRQzH%KjVg>l}1mIU3UmMaL{fkKDWo0dBfcEh-g z3Lo3$860&XXJAnhzIACK?{S@p_5IJ#aodSiYk~&KFESLUjIU_3>0hCwbfevg0<#gR z$`IMvF|OhivE02ML&}$5R)&(CoUGOOZq~@aepQ(R+3XAXR9f15v{*krq^_Z1G*hf7tDw+N6DRy0pZU$k z2t&>j$c>L^$K&tAb0Q{eZ1Mp;R}?SqjSg7E$1S2Io6p8E%M#uiPh2+o#ECVuaVeK& z7lxp%5x;pngRdYzA9iPF$Dw^ytHa~@J8SDOk=;z^uP@)eC4p>2bA5ebVqzj3DlN%P zFOGz?)>_M8xY64gA32v*Kj_vl8~^HR1Q|weqqo;t&OZHl?N+69AJ__Xj>vY3ii&4< z54%Ht-IU<3pzY!+3kRaIwe;NcvYlSA0X^l<|`XUAR6^WB;;kJ02s#nP+#j=i<UMod~?;p^UACk~$6l206>{6N-)|QX23T9hPZ#qJlmHC_Ak#e*q z!;NFL*t>(B;5@L+y%J;G3vHtKeBJ9}Hb3hB!5Cp8C&`-ny&~5uL6?W~{afd0Iqa;g z@Q|d`)UBiXi9w;$4&1w!xY#h3=%_x{G7Ie=;`!Mu|yxNUq z*d1p5qkup?Nn&HUdU#N~Zt8@C;-``oOD#tl=7>@C(wT$LKW58oEvKmW+hXYyQVhRl z3TU-D(d~|BgekCh;z5yKPY} z@dG(?gX2xN-0hq~s?77w5-B37qD5pb*T|x~Cz{(zRf#%0y&@6!jV~T@^1O0R$5v&i zF|>8E7Da?h*Z9Z~RiZe!Wy8%YI;ZoUs-xOHr1xNog};(mbId2T8`>U73$k9Je^gst zD%h$%Mwm9+XgjD~O-p%KEuxJJTK^PPw*B1MtHfAtVvlW8OtJ*LM7BR}HD2!9mgQVd zWLmnMtx=B!VzVZ<@l_Nna=&-BGKaGYpRGvL#*zNTI#XEWkv*5oG-~!@f>SY(zlO~? z3G^N-PhJdj1Wm2hGx&ksM|M~>y$YIQh6_L4hW{56OA8XV-7)l5{vf&JJ z{2W9ynF#dkmWzyEAc|_xZf$uct{ZP@DJ!*H)8HcnFe_xG{fK%vjs~ka0Dv*eNz`$Y zY`f`AXNWaIiEKfk+>wAeqK>zV@KkOu9gJAE@6ouMj1vn30~FWQ8UzK8SSvfxmJ%(E zv_Y#c#DxiVohXyK(^uHEXm|}MrsU1M-XiCN1QHho+gnchB~!o32YA!d()Cb*5~@m1OxB?nY^L~(seb5rz{R{s^CVy z#F44z<2RdTGl^hvI%w+A;#FpE1v=wo9LJhcGcerS#6|j%x8%MaJP@cAc=afoa8txU z`u7el#p^>ENWlasUqyTIw+d9Ier`TJJ&g>`my+UqEO>gAXW3!B;-wy#GcWUgr_!;k z2?miA9``TyEGnF<>2W=k{!0BY#loi~{N?NC{kCS)$`;#$sq@3$D@yl)PY*+RgUbt7 zd7VsN4a3h(YxkMmxkr94C3b>VtMUa~e1r5fbgCGVGfe48c#xOS>fMC}%Xbpq@JH*- zNu>5&oJ35?civp&dzUl3u+u?@l0(^+e}Nm-uWi!k6V3Yow8Ui#|74q%pu5m6BE{JE zsAQgg+-i5sdLK$vh*B{Q;tR~;2Vn)1)?f)MN$uotDg*2(ya zi(-yB(<&^IF)U%%DQy0n_v4x@KW@E7)TxMmR72nI6Xv>{`&cC>#&!8`6qui6gcQi$ zjVoPEpdHX%XmfPTmf%|UYsi&}L>%^_j{Ld(A4kTa4dqKqu^EGt*-5h4=vs66`}rm* zx@BwSD=0dAwncbJqJI$n3R#qUo~!DMBBxTa-=uw7a>bppQ(Rj)FTr5lU?osBt_v(B zHf}VRApm>41*@Nt?&t^K+3Gb7%m$&eNZNPe^<)0UVtG*D=e$wWVSH-GBDkEl3`R&4 zi2N+8BYElSnvV1PK5HyPPMGMdfk2U8SV9c@=Rt|+CEU&X^@!2PAu+l%8?~awO2NnJ zVwz>-6#?e1cJ6{$f>3XbKn+SMruPwG>H(~4gihRa`yLIxy&GxTk7S2+Ib76dBitG! z-{89%w>CTrw<<-IQVOy%&kQ>wmGE!0s^Ip9@~FN_ z&daY)+!g6()DOzyFtTO5RvT{YWQpE2@Ysp`=*=n=BOoVsy8lW; z9@9E_{DJa^^7W_v*?eY$^267E`1-i@#8?wQVP z6MzvEXrbD?8){eip?JUJU)X4J9N9Ag2ubt6?W^g6+CyvQ5xVp4RzOu`kpio2Pz&f- z9F(%**3+L0SlFin{JEnX)eRWb0(rQNtk#?C7Ib8Zn1=FSYjNto4tqjEI7L%ZZ{Csi zNbHXMT}P$&PC223IMJyW(D?Z}i-*h|R?s5Sc|B=o}>aUWZjVS|gEo z)QEC^rOYPk?mrp&63Q+@_jezW0P?YNuWu@NmX|1!_9LT1VMPm5vYCYHN!2<F$L- zCYyLRgPa}9K@x_}$Kb*UyS_+V`eKc1IYzj-@tusse9LN+LLkhBf?5_oRsY*&dh3U8 zjI#sB%_dueF`vrH#Ldj;-c-JIU6`lz*nk(z{+4OYWI(5t=e^>@cD0-JN!_zlcG3_r)4R`KPD6x+@Zj^Yrxe&WU)j7hiVO zXE*K$%QtX#nSti!as2FdV5hmYv=rH|d;NX^h-syz2jOx4-y;|tdf^GvCRmx7g?>2C zSLsCoMGv5Olks$(M1C*ox-yw-YmX{znPM(ZDUwlVzGH3;uXQ{8(@WH`SBb_X=5e!= zq?64SZ66Nn%xy>C56H_KIcooUDxIdoue$j7AN)S(DB^xFU8L3K@(j|mQ+E;)8BRZR zB?`ArB8}eQiyD4292RUEFHg`(z+8#K23Q%JJ6LQ7}uZ(~F0I z_3abDCrw9i#y)Z(GHX_S?(n>DI4DXr9*D#zA|mS1(A0y6qzSq*@!Bi|baV)>IE}Ju zwKz~gHs*BfrS$dn|1O?i%k=2oOVzzwknHq5DKE6Q{?VT^nPNuQdHv8|!ZmkMeiZEL zo;GRCwv|Apnw+0U)X=~cNqSy;qMV=_kB9W6!izkeqsoy+UWpQ&h~HNY%a;VKdcH0} zR=@7U^%aUi32&bJU$o6Tb0)}C4!64;?{r*Qm=bD@x_FlT(ddGq>|%OmhE;5ixjFc( z-JHBzU^ZwXX{kU^_KGu`@A*BUtYlLz5v*n>zH|04l2+Ikelr^16$M@a5Ki8+iXar<(%%2vzR^so@VYp8 z{RDuJgGIM_8@_u28$+RQ;5NcnVZ$wLDjrCcB%f@=v6A?;F*reP+tfsct}c~3us_eM=GUg_vg-9TGY-Vv9oJp7!WH$WzeTu)Lu@TJ>f zed=Be#&F>@KkIqNcdDo2y2n zko#`nNez9kxt%a@^Bv9(ddrKq5n^p4OsL=5)dQBd;T}vNPN*_^!{IHivcoljk4EafQn*pJShdeE`8+;cPu4}~t8tJ)p z3Srb|0CK1mx{o<+1*2ITbcJ9w2}HS6;ON=X{koohy4Jd{vS zh_aoaAy@qAD`P5iRo0HHqB@BdSiXj0vkzOm!rxx!4%WKUPJ_I;3E{B3**y! z`uRJKw{961Cuc4r%JlR!ORJJ&KF%sis3oJfE_yg)sgHeXWVb63<4@U}V;{^F0x@gc z@Kf~za>}xWB@@Fo<-P}Aq(+rGmV%lZj;a2q^6%!VJHeH;*9$ip+BE>_{^&`OiHa|=v>DM6n3IYN`?x7dU zptz)5g%1ABQ3@7<%f^Kt@p{e6>pyw<8mU@J$!llX?|-Wvz9C^L+R(p zJrp!u{Qc6IdW^>onH*l_vr2VlsFC5r?@N!}j}|qqhi?F+V2phK{ynTNP~&c~G_`r@>a*E(lYCzP(W-r+w(B=BNw4+1b1#o;5QXX`w-q5`e0 zYWakRynOqhPtj4fZ|WKF~4YZ#MXnj7yX*Whc=Fd2&`LwAfWkdtq$Y;{E@hlmSt6~!BWS< zpjW>dPkYWOS{GmNW<}rUd;mL+Wn^@;-z-%xbd;@qOgJ$%eE&5&xAjdgA`>m-Q0X#P zcw9e^_Dd#H*0va51*(r9@S(l7Gv|ubj-Q9DfM!MYgp-Rav1ythd1ak^aa^T3cWTn@ zVw{iiJjgO7JZaq=5Dc@Ud*%Q^+y6ieGe*O3QW)~9q*2thn#G{M3zxG?95?ZJ%Qgf6FM6n zx%xyKV_wI{z#WRl;=5hzzAe~UnK!Mx64NWXzZKUceM_i z;@`e~GX$FSu1T>{R-_Dt=acJo|35|(*S9_%50~>pztFK&jE~TXZjR=#xM5aapL|ZJ z+iq)B9wcc7$f0~4r5AFx`jIVD(=f-7Sq=v@zPz$`U*MY=uYhRF|8C&;dldVi1g1#v z!`(IJT}M76^i>^?8|RHz+guACi8J`@rGdNuWps4Yt-aRc+!z=R7~W$IRx?l@#ZNZ! zyiz<#{x?scvOPwDUPQ&>@AHan>p`yy-xYOh!4wb>FnLsvezzi&JqP(OEP|m5y>^1`-?P;a$h^KcYgOmf((^yJlH88q(&s)= zu{I26?K2(2wQ}rr=aKq#^iCIkCjp^%2zQMlu4Ra7HVsvk1qE z6r9R$oW22b{dR8)H|OTbTOFPAZ)(a$Co8RKTozwj9&x(Y;|2$%FSfvcl9H05Z*Pl= ziXZ`i{AK!HEqd%_xnrQ(0o!5##2{eKXc6;0wA+;j%2t^&an8CgI@6NlI~2(C`A*My zHy#{%dQzLEem6dNq>#>fUX{L;ns2=P_3J4!GxJnG6?L>x%(9;QYRC|UHyY@2U0oC) zY(c0b_TVy>BSxud;Cq?&nDBqfJ)~tX&(4N8@Hx3sDdK@5m#&tV?oZ9uAt7KA_tYJutQ~VX__PJ{@Cu5E&v1jr*wD}*i5U`(K?M&R zSm=$}TdJDBV1d)0X2M((qUfe^16y|*y6uX+g!f9qZjpPR;{NRDT_4(wpdt@Y6t@m%JpJ=VO3HquQCi0_ zBV%iegZx7^Ydn|io`ula&9hSMrGOv?hUlHF6M!`Mj>37VuNLSVq;7pghwGw${rdIy zE{(e32_=)T#F2E{S)=`3i5V~CJx;hWrxP~NrfUUT>K%UVgK7erLCs?LO&O=Ro;jVI zIM|?db#)7x&XcuAjP&U?_m#By&s!fP@F%zrhCc+Z=bT7I@HMhgnk;INOv8)T(?zG$ zfUS?A!$VCV-ot^>L}+eFQ%C=3jx(k5(DriRn6eCn3C4vnm6{g6t()>MCDmZT8mw0t zI>!Ybh$1VGUo4ejI|8mtLPCOt5`rUwH@aD+T9@#x{Ws23OUJk3&GKdH!^RV#k(j8G z1?wO7b!3&uhe4Ml**244I7yW*a(irZCh3D;a1aF-ThZ{oQg*%aqM65osaEu7f6{U0t89y&+RjUjluoZDxrE^9Y1Jn|FK>LhBr%8Th^$6X zfYp_cJ&TA2@;My8T{oPIIQ_VpT_s2zw)KiGVc<0F&|ebv#Bk}hn2 zjgOfz#RWQH`3 zbxc0>EW?vxm=~oPfQ$NiTydnrqN3U*UX;6zvLvYIJzR(yNM|qQ)u?zWf74G)VcPDo z3>%uMA*khcd!+>L|86G!6gFFIHHs|bA=6Y_T22o6eeY9=Wun^e|3oYx)IzA;jS{^V ztb$INFOG@zZ`3L@mn}n2n0!sEBN4AG9prj`-|DLuE??|zUwUmqSsL1Cy(Vpm=@4LP zkN^<3IgjcjS;5#?oasnQw3^_+VV=^SZ1zG21RN3&w%b=-%ZW<*PoPdMuP5=IY-iO&W~ua=?#f(Mmy>f1N&Dv3(9 zte6xwRTPk5VjK`zuh?v(LU$ufJ*(qL$m2Uz#vf_flJpMYg*de80>1Me3zjYQVKB#% z1>_#iP!yB;9_OmI%9{H)AS-SisOz$u2<-&`b>mC(P19m34ZD1gP&-G%!^5k2 z7~)?%O)fSa+>lPnUpty}yxqmZgS+|7&Cum8EU_H#;_lv0dNe@l?8{%(KqR`T-~98% zQ%KGPq7{dYJTF?7BwjE0&G$gFy_w(>-=7a;Rxn#}PsrfAP}$zzexaeUy}R3ITR%yF zhA_3Vf)+Ov1XgC(S6rwHEDvT?b*w8lP+<)zPXeBeE$N&&JJLZNBMeSTN-Bb4Fc@@M z{;aP?Ai-J3j=-0XH%&m$vVDmKOb*IX*Nu<26fGW1ec`|F?(!H>Kv>L{+yfv(kUJ#9 z;Kfe+6eM!5NhP_b`P)6shLX>64Wh_M{=BIUB(}oQ7w$J{r%C@Vmtuy8k545ew45^Z z611D#ygV4d<02^JRm0!tYTJVtCYZSqc-O$uHMGt3)%Xh;KDM*`U>WTgStO&i!egJa zHX|a4mKBY>G}v5nEET9MYdC{qg6K&ev^K&sQC3!Hwlq`R=#`a~jEsyNB}B(e?Ch8z zbD$z1frHBu6S-y z5qzjoZc7W15(E2jZI7s(Dg6C0y*y_qR*PH4`#&PNG&9Kej)8<4+Pa6_3HwvZ3ADYN z4!HoB^wOjg>CNX^k;K(SVw1WD<@G&xp?}iB)hof_wOynQwLsE$>!+Fdqv7 z@@%<5=o73g&@RE&iJK8p3=0-?!y5A(gi~-uT8ZWG zV66S4BS`}TN_lyC8?nl?tSt1@(SiQ0t#EHK5G-vPwEbU|e?Hr#;QFtEs2@Lm^Z?8$ z)Yle3*)$<{w%xB;QLcw`10ZRWVvZDOkA`eJk+o}PDTE@WVn{St(`e-8w7)PjJx#Rq zJpkQfME<)|^1LhgXeztRe}cFmyE&FEvg|Vk0u1NFG;te`cc+wBD__4VtpMb@&|rm2 zD<|&i$^$aEHjCB~mUTqp8;7S%TJ2&uOVGM{(Nb)yoe z*Q%<0s|AwJnEuPqjrmf874(ge#coRcJ~a3f36<_WJw1oV#|K*XvZJ8EVv2tHI@-{t zT3m5M@X+e!6$<4A$*CFc;~HEX#;bfOMQHlze+%Re-mie}ULfjzywv!qs!AHXgSiTA z=c6F{x~Jne%Y9XhA7=|vVPq{0TUHecbA#~35&KD;5C;60R0rSV-7?VaRhiW`EHOZcneB_X4R z_3uifXs1w(`iyVISg#Dre^+(9xO=d)vEdf8v$2tslN(8cMsYdv_HgK;a@o!dGHCKx zmyz$SU?i5cK$F{S*@~5rmX^*BF(~HA0{qIfStO*>;LEjLb!PVyhi35 z_J^W_v=s4-^M9eLKc{#e@A}IM_Rp;}lE2-I$_9eI;7nA#6Y-KXj_oZrM%ehoVyifQ zmL9*$G5bKOwxh{N49inCO*p_xX3I5Art+ouFTbfd-tMHD{F8wxYLznrq3SdNXZmd4 zD{U5?CbWya2}H>D&dz3vj$LnG2r889t#}<)4v&sT4b?>8R4L%M>|bI%PFdxCrrF%# z^5y4Q?s`c(2lnjb>T1mH?rx!0Pfl`jNL?MLDX_h~7$r@yRi9$Xwq0ZCZb^^i@~}9Z zlTz6n@^FkZY_g+ve3G@;G2@;}A=cK`ZW6(#B0}riajIPn3%NgogLn7&^c}dC4!}1i z5>HhA8~N8nphkU%MAC9|iM2+F#ZRTzmqY0Be0FG(7pI@&sa(RXfuo~iei*kASjgk! z;~&h3f&DW#(;Ipn0i~l9)?!ES?A}U$yY-k{+E`9QgE7*Wx47}{mvMup zPJ@@Mqzj|B!A^L>Fp!vaYk8srFuuKtKL;47aHPXCZt(Cjzolhi&bkj2J3OhE0K-B~ zo;Feg51qMoFFBTRq{8~O<1sIMIwU1Y;C!^zj-MPf4dk$XEb%l&*JH0>e28UwDA47B z?>g4Y9NouXS96xPTbk~qQ%WfJ9*|n z($7y6Y%3e`7i#bze+ggFf=w5W1>4+J{hgtm24;J0(Qbt!sM1Dy3k(5}v>o_ugm_RS z_3!pcc&{w$MZ03CnQKpzrt-Vc&CL3jR*5_*L!Y z_Xo?wjf4aoov;b@&p_gUsV0$hoGV2FJ+V(r2)%H$$9~V#VH59>DOM6G@aoEqDAWjn zSBQO$P&~J}nXs(P*w7FH>URVMP7Dkm!-wQEyO($}?QjSLo!EUi2t&@poz>bZ(fIcO{xMhV@ z;MBg#$i(!%f3wEc5?A~w5n8&Cw3)WKWj!(P1|2k$8yN&LUpL>Eje^mjl8|!FkC!Q< z+13rltVC$yNJa?c?ajlM*47gK{$jwm9MiXzl*AoqzLR`T8{FgDuA$q?gR_*PA z7#o@z6h2K!-F7rnuB$7of(@%-dY~OPG&dJ~`ZSD|gJyd*$OD46nM`!)rptg(+NfcH zC>`UuEc6gYXcG-MB@h!61MZW2u6q1y8#Ig;HZ?n&yq|}qg7bYf*b=96l58;;NPZ8fH4P07TmtRO6f;nh=)ANf zlM2CP#;Uv^y!p|@0rHJhFaR(UcIovhB0*6VD=X{p$Ov?(0)|7MwBv~9wzd-Y8e+9o z)vwAVd&J?95FkM3QB$seyWPoh%6S_An)a0W&vj8|`d z{L%wJIuLfPKUQZaCwEUyNIsL7;8tqGy4gfTVOu6cwgn;l`|>4FwqO-!CYP2#au!CC zNxikTwP`i@Y48gf8N{p8b+L+<91^711E|b)l3SBt-*^f|gFv+dM9VWD6%dS#8plh< z6mSj(3k?CFJQP$^T#-zGnETh+cOgRpuZ~#5^ zr(RE+S|@+^WOj0a$1_Go5CSMUcGs46?~{fDp#|NU6miCT+jJaT7*xt{MsVP7v041TF<<6fu;GW-o-hd4S zdmX6QJH*ekC{I+idn8_5FHqcx_s{_{u^ZxXw z2%ro-n+`R6q}bXxVBB(JY)QKAW8cFW1(GqGStcXqFzr?k#}}5A^dDZPTe#3eFBjZz zWNK>4(GVXIHkO$D`S?o%i2j1SbY5K@PWhWk!J|WPCI={5Jm933t$>~}rBmh_v z)POml60r2VhexfkS*-Iw|EMbYnoz5YCOVz3j~|QR(^uDx@et9_CJE3knwoftLSddx zziKWl{5)6k6co&qloTWM_oyf+3Fa{r*mOB9D<6T>`EzGfMMnn}NO>Wsg!#qA2!Bs_ zUEFNN(J(L|E;ri`fxN+a=czBIvgj=>#Bea07%I>Eh|l$jfS ziui29yy(=m-=?SEJMg1vr;K;mxS`vq{Npcx<_wIz2nq~sJj{;_%n=F$w*TitUY_Rz zgy$jbJ}{8817-ieQu|_2y73?utk|t=?~p#~X6N7Nl}*{zcOkA-KqWrp#(} z&sRwS_*qt7UTZ#{PUOpwgysemm!IoYD?htXG$0^t`#mN^bZo+0ZI-d0V{&RL5TNf1 zXZ>ozfq?;FAP9u81it_%b!mBde4rC9cn+Sv z&}dBvcv~&7xS&e(LY>*D(dqAhgICA|W+67Q_y!31y*X0Fnglsx!|@RcT~*VGr#K3c z`MD(}hzYcS!wNlnA5&}v!WlqE0PqSVG+v`AqH`Q1!nwaXc;(Yis#Sl}6`eZ@&!8 zn8{ZG6bfb>XvJVn>Q~Rt<*|AVGGcj3q~_#cmduPyN^kuWum8xDCFU>sb8!9YNwMF8aZ1wPOs+uGXh##W1XU*&?+Yar_nf+ch?5=g=| zG6Nl&K{pm9fz)xqmlq;h?q#WYw&7sFL7)mVGCwV6bY!IV^5hGE1kj)K8cyEpzIpRT zm25ytvXmKw-v5J%Ctb-EQdoL31)OV~cI2VqEGS`t9$aJr8G8`8hw_+e{|9SGk9Bot z=(2q`IUuw^$G{Xm3urBfvE&Bl#(Z3Lh^k$s%4|T?&v9LjVw`xg@XN9k^*EQz;48)C~H4LCvu%{FKq3{qd2afOKI8I1z#q4|Nh zpsh+`hWo;s6Ado#1q2vzP4+tC5#W6@rF55USkGj7k)+sTSf__Dt2 z>SN+fM2RN37N@JW`+2o~b=V|%98aX|3otzaJ^I^Jb*jw($AKQs1frU@HZiRnfI=J- zq;!G=M_ba+G_qVCE6dq&7RnsYJ4k@`0S?sM4!ONTr%ANhr+xq%Y`88{eJ0GVnY50j z|G=e>15GFFHFf?JM@xJqSfb|-BzC?AFH2B7$a>r@+8nmf2r)xtOxtSy>KMZnRRUxf zWI*w5$`Zwc!moU=bU_>0-Mj+Eft=%{h&Cp)cY;^5?OZD|x%Ea_JBqat<*)7u%+d4$ zg*+?>b9Q|lQ*Rz5C&rjER{Iq}_Xe06jCEaoJxqWGZzQqzpQhJ!0Lv{4U~z(QtA_L2 z2EbiT7VUJknU+OAkTHYp3f4V{e$=o+=#K?L*x^t3uM&zOsg3&5@bRzh1D0V}*xff%KC+qes|nRe@c^kD4+Y33X(dq$o%Y(` zo`Ad`pO|RU1pD*WN3Kyit^#_n7Id@WpTStrv%aTC>aY10I(NmQOjm@KVr!zU%q^?5n?Sn z6!h};&a+C_)^q*`fKTkz`yv%>_qsiW9tSf;2JzG%t4tIXn%I*th|s8K%kdLJ`ObzZ@9Yye6B^YUg98pywAgdhpi7!d z)qav4Bxh)QF2uzHm+|kK2q+2+R;i@oN6vw!WJandprvD@A*g6+SyI#|r=}vNj>4BD zrc&3jL7Ca+YywWciLBuvC7KN(;g_37WVX!ge(5q_UtdVT{k>0pZAI3^DxP>QaLq!E zfK4@e^o-CpTR~FN1O$N`XTgDn#q7<%gGEN{6++pbF0wDBE zQ80k^PoxkgF z=>mI?Bm*N~WDW?q0kZ6M01xQRhxx84S`FRn)L-O;!q=se%jy{*4E4c!?5@J zN|S&YFCU@AAdKZ59zk)z75P*Tuh|zouFu#-C{Qol$f4jlID7J^4Gq`~{lDQzV1tf@ z>$2i6tqn4E`y%AJ{1i1bfXxSifSm=AEVBk}K1|VniLODa#1+O6D0AiY z19kbcu|q+@5;@1&tAfxHQl1Vmhcl7AMNbSLt1{Ho)Zw#{5zW9US8_NwI0!~l9axpG zHD>Ry5#z~6iM_?MfvgH z9>v!bjzw4(3p)CE{Zu!|R|^qGa(?@Ih8u8D&?cz4xMnIOlT%WF)}!6^yU>aR*g3Rv zobH~|cY|^X+!fd9>!*Btud$|v7chdM=Xzd^Q^EOL&YrKpkh3=*h7UHn5Z|cRMo%OQ zI_d|arS?Daxl=s}#xlS80TNeWZGs4iin{ufr%s}l($a%QQKto!92JuDt^jKf*V)7> ztKc;>Hui$^ZM}*>@yEj>yAoM2dz;FLedIq-VA{8l@*M5nTW{W`V&&Aahh|Keu7{9zaf%@DJ8%-;>>yn>rnT{Z(0_2%- zS665d3?yXVyh$Pc0*$v!Ph(iWAH_ur9}N4a38=o(inHOkQOJlDYqnaLaz9huc7*H^ z4r{5?ZmA7`Q?;;tcB5{ZJzZq}3l%)TDKbo=vejQh z%U8_KUz02B7$w<^7ywFSVm@#L9U(|30XP6`Pitxwhk&!-e>yq7nhW``^Mvgq4)Qy& zP=^~-%#J57o&(YjTAs?AH|B4?qV%1YPgz`(LPe z+zb?Rs`bH#DCxdtG%?DCvx}5D16s5A_R2fGvmQq$FOwc|!ow zPGPQ>?)+qqt6EPHG`VH^jR0i_jZc>?RX~IJZ)6W>-C(S5wSbic3e1BiSgkAPa*|2; zL@PExz=Q_m6q;k_+tERceX110|5X<%fAxz&22`3qx2#^jlJXhB+Xdc{`6M8TGS=3I zHHvU`EEzMI63ohguRv#+$?9A<^cfz?OAU>`1OEfq0m{ml0JM)cnt_^GU5zQ4Gl6DU z#{v|Ltr~rYkdP2ZQZek9Xeh_yXju%#f<^3$f49`652xehVspUznvnlq{`=xV}C`w$D$wR~ft@vJY|%AVZ%Wz%mLB z661^BjwlaWFE|oT!-E56z;32QLpnO*JJUDV?zTHV8}cT9p^*Z2QBTg#%78Ohd}r)W z8((Y%Dz{R}jIJ^PN(eM^9XG^P(bizU-t}jFZFNvu`^1y%&sT|(BIOySI9o$`8|8@J z?Cky%JVb*}t2~i~Qn63FIE065>u9Z!s552@$E$iju`tnxwuC-;^4T(z#6txq>LvQ~ z_DFb@2!}pe0?U%jqQ<61vuq24Z^VLG#zP)yVP8)#LSOB5Sv($hbBI;lwN;}gj^G_y z`~s^+hJz@2`GOMviGg~#4qIdUr4naq0ewp( zez^7!u0{SiglS*Rif7Z zTO-#V&xG2?SER@)cQ0M%E;F)JE;$tqBSJ3cU~Un~v3Lt}&@rY9UX#lZi8v}sPHxj0 z!%|D9B<*!6m#oFmkWCKnZ|D5=UO(@@d$xU^>+ktJ-{1H9H8E*vdiA1L4};olg2meI z-o^}V=<*Mai!02@QGf}U{{68(2BH{*T+p7kHP2W4_4s>?vYf&u=}j{C^(NZd+Q{_% z6njS5WeWYdA@fk80(X+jx3IjPsZ(*Z?`8uQTFRP zN*+kk=aHV!UImc#o~4V~hmuBi6tT+7QHU7~qq{FJctY&7;zfv`egtMQDOOr-CJUAk zS2%CfG{s?k2)jTVmB#aqc)i|T8X7?&N}vpTb5Ow)l&c{G=fBqJ7&QS3 znX-y=$*vuKMOGI%Q;Wj!Y?~mg#`w?0FL_tdWeSg_)#cJm)a3#7^1gW`ofR!wKCwAB zbu6T!qQaMcArk~V-g6q7n!y19DWC&+H&hpWkRvJZ#4vBcB7tf8$)*Ec4I7NeX~+qY@vDCKZ578$Gk(S^z1dJ9H^w9%{c=J@V=t*{v~V*Te{N9zS|9GX7}t8yI*x!A>i{s z3Yo5?XR~ntN2&79$7*DfkCr#j1=oAeB|IkXVZ+u<(?yLK)uE}CAP84Ay?sjJI+@#rBp62cKY)PmXZR;A<^ zcR_xl)S8=@v=jMS@)eS~X^3lK^Q33&?Qi6P(oivl|8}})CNZ}ME2!`Dd)we`B&)Pi z?Vok6C@OHECuaOs)USt&CgU&Lo11AO@746)bcyN-H}LuGE~vkn2A$#ZOnOoVk41=m zV_D%*QIo%wA46@r7JHZ1;~DEED*=VpZNTC!G(snFJm6p(ct<;J?SPSY--?g z>ip>5^+6lTT{FN&z@0#N`BgATJhK)Z(Zppim9`J%B<#JahAI6jw%6`xJU4je z%o!#>R@m=FxZT_#D;H(9DeY+FaBH3xE-QL@A>7wkjR&*{%h0ve->>K85x-9RfRmRK zi9{;8^8(qk5CUUK1SGt~DxON&nUK-g*oZw_uHGA#eIDm#?5i-A7p4RH@0>~mHY=o>Db=M3hR*$yS%5(jC!XeL z9)1rr(Luo^0Dh;JW`+S@*GcR5tNvh~3Q0fUm%r53b_LZxNLY-pu(TY{I+U=W4;b+V z3AHm3)B(1rR^E81LzNzIg_TuR*Tc({KQ)EN)g02HF%XgO88>QwXmWFP{adpV zFwejC% z`A!6&0vv<%8A|0f#uz# z^QHjqDzleXtWs-|n~&!S{N${oi8?G%Uw^+HV0KHx7bU79So7O=I3>AG1lS2MtFWoi zpq5ax`qHTcRiR*PM4OJ^zK;$5Pu7{iWkoFD0{=iU{hr>gKeHEc*QlrX9F;H21cy2f zEIF~ocY?>Oe%M)pbtfFh;lwla**NgOk*EdrTuLW$a64Zi6 z#zDZ$YmE0-!|*}cG6y?a>H+8x+x#)`L^h~(ALITRA+vv6;Kno!7ZeU;a%%#Oki+;vV16c)e0cyg6DSNV54!jbeJlp z3qPc8JwMhdivaMUIeK(GsA&262g0CVpj|yIo=`&cmf2(vOnn62h6Aoe*Z}N+k$Bz? z$cJEjl>pSD+noy>!vf5UxlmjH)05rGauQK6yqCF*pDedfQb8@;0BAhiYvE_k+-z$r zN!_@$iazawrjRJeP1bmDgXbbW?ut4%S0 zD`AK!5*^GwysQs$W<@w$6Wp+}k7;WD_7{CnR2G)~=jCyx^FLl9yAxZYij7o_RMfS?b$jXWc*>Af-c9{{s z>+X3zpYQSi9>3r5_Z-jhJd*o;zh3wCx~}s&&-1z;=;^4Do?$qHLZL|2uPI+gp|Cwr zD6A`l`0yuq-?zHqKe)FQ)eQ;ZE0EAW2EHdkU%PP|g(9Xx{>2)a4myTEyyT-|>SN&P z;B&{u`xeUG#s}@@>Eq^X%jS2>`?j;E2b-uc{3FQbuxp3is{(`Wlx8sGVtS`hU z6dOui`KqCR#@h5}-qVXX-< zF)@7V7wahDi<<~v3Vxcy)jFpNKYh$akAa`Yq(lg@=vF!nax|X&UZoeY#jSCh@a|sU zpT5I-(|;Zh6_V$C@IBqL+;djm-qF!+=#~04P0jEQ+t5!A?MIu2!amUS@+)b zyN{P+;o;%EDJp7jy!-2-*Mf52bBUs{uBT6*<~BAmq4G5z#Kbr)ix_pdj}#i!&4m#M zSP1a(p-^AHeY2TqynDYi8QEWkLM>{HnVH$v%)QO|wWf@jKYaAM=) zs3nwV%1%)y)@@mC9JGV2eo7D9XZYM^ISGX&b3wsQChWeEkN4(gOXDjaie|yHvt%G58+|Us&i`hH4#ZdB@40+XmJGk2{xKwxU_YYBy;k>e z!fJ7C^0~i3>3h3N2M>$+U|-+3ulucH7;Z>9bvM)Bz?514x#Iux9xWZJmYH+^Zg7sj z9TMF?b{d%3-(Z{SIkkU%yQTJuM!3$a@Ud1Xa+icHOC=09-wd&thnOc-@CHaLH`yY; zgNYZuP1xweo?kM)^~ezv~lKc%>J#8guaNszw=~M*5`O&V@qdfRyUPh zrS2$XsqUXAF<9@k2CN@t)Zc!hNO~&F$ ziH6tV)~dwO*{qFMJQJ}~Evxm!)KTWDCMe#xaqi>CkL7`f-itN6t7CSPZ=LzhpKq1h z$51E1sT`#6{Uyathl zZS!tjo-+g8g`4r!u48SB+tzz)6VGJt2=nM>GOa@18KkIsd9}ShfOJK2?tAXUwagR) z3x1D(6G<~`?X6zAcvb}#6&00-x6b`q8t9s=cD1{x2uJv%4?B9XW^0>OD!!J}Br$NF zA-(TW+B1iQsqdfC^^J|AZV$+vHLvx+Lm3(wwJvVc?{E8Gep6T|vo>4tzp~9Q5o$_( zZ~vDX?T#ffXI@a1^2@wUzN}vv(`)E#l_v+`T;(#MTv(ahkb68`OKt*^~^N^ zou2iha!Zb?N5?^@509x@CE%pAPdN%PXv`9~@MC2Sc+Efi5~?j~dEkW#E*jiA-X`lC zSTYpoP&K()bRq8Xwj?!~)*-sFdh}k{5D@u)2M~3Sxm|WYZR@N!zZ8;T#=8rnV z{&T9%k+O%%`b8DF-=Xa-H^;XUwrl;*v5LPXtLUU3%DQCc^*v^p<+Sy`N0G>U%I9*8 zZY18k%T%%%*B*fYJWwPbJl=_*IM^Mf5XU;UjVI-H9z6A5EWIv@gWQBhK-m`;|JoojCCEVi zC)LnWl!eXtIQvebBH0SH0PCXdkuw*&s~>HvJ-!PV4ebqlOu#NQo|Y{kwZq z9IuGq^8ZOv(yWa>O3oB%i>>-)+rS>$?e=%U8w`rrVkZ-!M4m_U2At%c=}ioo$2TJ_ zu{RQI6^1yIWlY{xET}rPAlcVzcJh2pM;RGi^n__nj0hBIeG5#pd25zm^jLn$ z^9}L-n{lre{HALt%#7xWFAMaaOM6C2B=~RE)$c1Q(BEDynU9@VR6x%?wh;K*ubcfo zY-)-Wsbh0*QNbBhS)@eM6?3W%e+!JrZkYoqu{^0F%G58O`6671t%FA%$6AOFRPiyE|DnJ8p)2TP%S_fX;0{BaC8nk9)%(nT_!R6l&vw z7yZ=>`gKK*AwRUb59X-Q%lZjA^gSC~UBA${g{`5fsc&W$YoK!p&nj>`(rfCK`Xh83 z5pQ;JF$n;*_%8H=7^jEFFC44ae-dkH*3LyPGMAeN;FochctQ2u9%G05&A}GX`J`@xAFXf%x%87_r*YW9>APbP%0PEukWAQAMj?zJP|PALE9@U ze|b7}hu`$D=}bP}!4U~>>@eA>$n81_k6I7%`J<4Q?Q?V%@{+>4csZ|Z3TI>P%-%n% zE4NEC6ZlJeaWaaOQBO}V1aM$9t{%8H$nLG`tdlHxH)FO?rSPDA`z zM|kwUrl40vz90T{{|BGNXm+^Iw?&TEd2uo@GK$!=;R;&T)5KEAr{@Fa?tOYe&A@=V z^xgO8TiI@sji)C^NH3sr))E(`XGD9}0(~`U*AtSnsa5TY^k(iBRs*e_Tiq?iuc1Y* zh2n3b$$vC8{pzcU&XakVGAA|_)2H$C=#SD*SzM%Zuw6YOW4Alc0&){?k_9<)5X6?zl)oN@x;5BGb(qZ*(&JVhphbq|7_*XoQh+&m*dO) zPP-lAu1KoCD=vWT=t&k`LtQ24|Gn-cQgZG}$9e}p?L8S>@3ofc%T)=>F$dE5nOfU9j z(}m^1kePVo?6+iCdB((a8%9vn1(_29OiZ@LyD8K9;Jn&$EX9LI-o?){_~$2c*XTDT zJ(*iEUMW$GNR9h3%<6akVAzA!kH}PRIyAVK;ds7I?X~Gvhx&Paddt9aNO;yWWzx#Z z_GgmTjL@rOcvKfL)b@Z1jgFCNvl}n0F4ld{Tb1D# z5Ez`CeC~rK{7zLBXFJImqTW+`E`1fc@8)+yjM|s|t`R+m1QiQ#f_;w+=V35cizVwVF z&Tw)u%pg^c5T!@^*=zJW$|Vdt|EuZNnitu)U^uyZ=oQrN?(TSVu!4_eQroq~B7xdU zq}UZ)=ipGhuQ%7HjTN|b_F1M)>**(S+oh0`+@^{NH}pFTv@}z>sp$na1`^op9GlV` zlm|WEQ8m@XTI!FsTneb2YnI)a$|v0DC0q=@oV@p6WRMc24%%hsVCaNYb~hd8pwXyn z(1WP#sZ6q1OWroYu=uBgDlT=5iUtiHf?wJVqj}IXYFKVRf6%X{l$Z8B(C=Rk6kM)P z{ByLQTT>%>eyrNJ3lP^|*b?k`veQ*BeYFlWpEYA5i^*z|$SPn|M^Ui`#P!Kod6}bn-NW)=hbJj3xV9ahB8z>EO zZ9WzePfoNbsUYmk#+K1+4}ogFW!#C>Cok7ZY(QPEZ#@rAiZ zy7Hldy@Zj9-Jd>noO8?w?G{*k0Pj9Gt?4$G-Ix1g)^u5!wtgF)Kogt6605|U#tAt z)P1~KgTLK1&A*YswK%zrWD-3Z?wq6D8(yntu8djUVg6vVrzpk+M9tW`Y}?CE{8$65 ztm&WeBc#~v9URCCKI4}!sD7u~jy0^EN;v$r)V}l!3x!mgjDQ~nB8>FVrq29)!7eye z(1Ja;V#h3f$8bj}ePBuRchw{ZdtKRMEw+W(xhanZ(hKhoIQpd*Nu-&`>ZUy~K}YKq zOh}j8_M$=YU_BA-bktixW}!#%=QAB4uAaF$ar&pIwzf9*i8%3zB(aw-xff;^YG;EO zb>$DafJ<_5aoO5xCtrMKm-yh@IJD*=8-FAtUQf|pLmBHl+7{Dw%Tp7gBeuU0HN3)< z^0GvBbgG+#<3y-owX^}fT2gB}UGM9^@ouTyx)r$cO;PWO%p zM1yY9H~4l}3brq9v+?rsIt(N`xrypJ3^}4#Cm$l@)jSkbT-4*okCB3CTH|&j>4>Q( zMPPM@QBMD zK0R-rdG9eTvo={xjP#By{&QFsw?9b$HJ8UDX!V{6!cbV!0~7PD_h~NBYXeK191I@N z$XNG`!U~-V<9j-X;x??G#3U13^|o>ye@{h}{@a&RM%1>hK8=cefu`MzJ7AN>G_Kn z3B|>NNWAzjCP@9>=YJ4`Utmc)KIt&?;t}NVt)jB_m+QeA_J$w$-eO%*QzL++c7Qq0 z5t}J*QX~v@dLMI}oKDab)i9kH6@_1CAq=6Z>dR(iWTdKOi`x3P0pFb|ws=RSdwCld zl?_FmmX=nZnUj)Kp`OCup*a>J;|ngHWajdRW|en~E!J}Hbz1=!D^0QHch?2FxebhJ zX=;l7en=L5{yIWFtccb5x?hoAe>=R@O2EpdprF8mvr>W)Rv)q{+t2$K<@obmg!qsC z#6TC!+}0&T!0-M9>-PP?GEDwl>;+gJ0ahR; zC%1*&CP(%-QeRBpO%keyUa0?M{M##!=zS`Rwq3gGo`a6KpAYjLp^qh&`+41?hw6yqk zSf?Fef{`+?aHy}szU-+4Q$$T2~M)L~w!JmGL*x8kqQ)3sjX-GQH`1Wp(Fw;}Y zpaw(j%D(l`=`*Ek87xirD&srUI%Tcoc z>DdKXoiivWlK8CcVrPPro7;y7BIhACFOpuz1nW%EaL3z}#a%zGj~49oqcVEFghie^;QQfPcNSa5rL%n z5*6tuT;Hch?`6?nyY6?0md4w>F3~hLrbDsG$=x?Cc3n+u(CE?PYMxc#Iz8{5z(}dG z0PL0~Bu^c?y2r*TJT@v7IQd`qFdCHsJEB6toM(?T2 zX1yZCjyWs|yVW)BH9Y8@6Ro6V_^RQp-7rpMO!rslT5H|O=eO_k`=BQL=I&76I<|dF zD%tyzmfL)Zd;OvLg5zO7dQ+chzTMZ3nN31l$<c5=B8z ztD#Xx3?--SNuYy7yqG|mURFAFed3ruM7f6wCmCPgdy_SlRt(&+*mBjv-b#L! z?d<#e{=Zl9y|z2fvR0IOpM>KSFWUJ2C=UNQUC!gCnB-@I!I>uhgyeI6pIg^kk z=D0`bk;1j>>Uf?~|8jK!cJu7(Io$@@2p_hQGZ^o;%q9s{?KbPPUE86t=7A~c6xd_# z#PhlhM&n#_?-rR3_K(M-CAEjM3;CJI2twLf{pFdE9{KZ9Lkq2mMW{e&9kz+peTj|c z(-Z%Azyf+?9tL=*r*xBBv()R|clO>XzA~|7q+1X%DkMf)LC=hG%@*P=S#%<`FccCz>yiPA@R;TR`JK;7b!(NqdRA6Rzkdf<_)XmMKbViUyClhx6XRQ5@chqJl)225 z;h$U2aTs-d$YcO_l>C|&O{J{`P&UXr|2fqKZ!}eAkztZcUD3ls_zW@eT{;ITq&`q> z%eGma{5;!uw9%lt64;Z%E&c7W!B(%;iKs(3C&v#+6Lp)ic)w+4Y z3u>0Ny*mPKhw0KjJM}_&N-lR-;*@2~Dc=~K!*S#i*M>!ij8;v!T|OV%UCeQ^PNUf3 zBDck3TWvN`mKZ~&N*F33!BDz6n)jozU(!hJHHjPZRPu+P(huoW<}#A753+BlWs)=+ ze{Uln$~|QW;EcGUtxbxu{@R^P;@&ie)o9N{c^V~bT8`p<;X?(4E&)B{HhA+>JNCe-J6yCUQqXgcuq{0@}T*@feqb%maxE zhk?ula)o0GMZ(L!mvg1}#_j&@Fbl{awd;iQ6-~+21+hH2J%Kci429S-L6zr2pdW+S zFBZ6M>qb%-sb*U{rEh9Vs68CeSNIw0Z}}~Ie&E%`9KDz*D{O4X&hg_(4)NuBla9(4 z?BW9RKgLcOUE53Uzb%N(b27o~9r$KcoDbPN#x}Ul4QprG(&xrukGRP1W?b~-Wdff% zS_FqOZ88d~U zO>W(F=;fW*K4d!kJ>f04puKYL!k%$>6ziz~(_}xrO-Iq|ZE|UI>OqWt# z3SlF7pUMk8Rc_PHJ~mor#X8bTF}U@bI5hvgiQcge0c2&2`Oc6cV@>8-oA2VcpnbZ8 znpR`XNtszkV-mL0SV$JF>Gu9PvR5AtC43dy&PzxILMPc_{g*(kAuT zp(r34?ix>_&?e{8)U%qK&ffHANANEuV6~*~&)!tRYC%B(l(NK?xfmy>B9i60!*q(1 zTG7VD5~%WrT;~*inJ%)p{YF~rnIv*gcHGp>>u3;TQ0OCNSr2;8q@$fz6xaaUpkZ=y zcmH-zN}p;~tnQY*JsT${PLlj@zMBCn+(_?HSjbR23u=Q1GaDtT9G3Q6QUmR=%~NbE zk|{%#M;dNS!6;JcnOMU%Fa0xVno?IFNzfU!Fq}G=&#hKtP}!PGMw9N{VS>bWGH< znMuQGzxeoAkqS>Z%AMbF|7`6Ske@mB(L|*;$*6X>OkU?r9Jr?CL>#H;3Rp z)VDtS+b5&xQix?xsFyI&Dc|h{r#lOI!MARx-n_{ou5ITON98+baQYuF0HWSO)^Hsy ze(+5?#))v^bGA*-*ww%nbCO1b_UjoghW93n>nDol^I7rqSg{keYv-O&Gttv_z0Zw) z^!)krWp)yx%VZIKTnl$n2XoaH|Lo6pA^n7iUFVe)E`XDr-fqf|?ceXN93%;0AxFAw z3NKM3c`JCB*}vpV+LxE4(#KehWut`$+s$n+*LNt58+SeXBO)a&$Mm7$QqiaSd8|xx zkFvP|X(PtpI#U~$nBlPQ@A;@_x7vW9@+c->L*e%_XCPD~)!GQ? z4T6d|8NU0lZ@ImWHGi+KM@ib+z6LDbs0s@h5XOw zwGfrQMi@{aYEP<~b9|q1@eIm{w5HU+7GS`b+esx_!k9L1xDPH|` zNO30eVOm-bKjYv&?ULvKvs4@gYJdFWLPOT+&Npr{b&>h0DitL4JX z?*#u@wW{!08M+p&s_gaIJiV|(#Se3_wb8vWr)RWeMpO|Wq421wEG)l>lluDlQYc@th0xq{F4UZRO$ij>y%oNgpGom6=(Am=JIm;qe{C zn0f*d7OO+7B`7@;6R9~~L?|vU77r>dE>;c-3M%{SXE{0OQrN3A*3(0w`1twTGA`ZT zI$X-yXTZep$wXLF3H}AXLP=e){wY$^(LJGLW(;h_~cM-hN;$bHnFI* z{tR6lUVW(@Jof16FISXt?m%5NxGh+BCl-fWeP-L5&Co~%D6j|$Ams9X?4~_=f~P(F z@X3=~v25zf+uVnLPB?=e)In^cP_lP6K6J4LyBU5w*-i?!{@$1N7w0N`#F}G}%IOYV z9rpTDLvgc|v{qMvU!@KDhnq#lpu!m%I zB`u$**XruK$?|;<@FK3Tp)AJ|rqdiIU$5*2yj+!$<)G02+7%iITPH`?_OUYl{?T5C zy`D0SEN~R1RoX_o6i~Ga^!V(3Tp(KV%y!E7&OA>~$BMY~@cBz@X$+Hem(a_0Htf$y zb9+@;e@JeB?`s9e5}P{J&7i|u7J=Ix2&bkpk>@}f7pPe*M`mBsVPViYN>92qdhXu7aR8cIm7t6A7Hv6nx!h=vq1Zqm_+O6s;0S2 z`qhaJG?U~6;no5@LPajOLjm0Vb~x)XDqYG`72G5@@2yvxCMLz;XV*#^DKI;Cyy$4p z*EX^e*A_b|`;5;l}26X(y^sa7uo#o5CSER@YhCS86@s6*8F6_P60_<>+B>7atZmw!~{rS z>N|Rbn2akxkjkdE&tIhL#1Fr!l_5$es&(Gv3}7}|j5Day87=;bPrW^3%Ci-E7S zjSH5f>4NW1(I&-D?w7b~1b% zGL@idc#PAvX}THsnp%!jm#(OZm7Ud8SHBt*ByWF1@>V(1#<+bZf?;BSB?QrsJJpUo zM;=-N&1Rx`0!G=b>m~TLv~_kl%!thcqc0g%qjR(DEL0oFd}6-?pC=R+LQ?1)(8+)F zG9l(S%{D_V7*K4#eSAVM=0F0jIsN|G#=Ex#yVR7Io3uv;QX zvhJ5I+UN2->c#>S~$#*CEu!Mo=$iZ3akZ%@bqT2%S#*RQ~16Ub*>$P4FW&@lHjIv1|!9$st zT88%3@w`_WNZMBT_)Xl*b8>2G5O_n_)WtgTswhcRrxzDIy_J=f2P|Zi+!gZ8omrsv zgGG~z05zM%JNxnsJBCgVzIw2fY(@vq7{rYvs zvjFj`4Yg#S7bBhLhrYHW7AR0~HX48FSoqIJ9;{lO#@Z#x(V@10eBz)dS?D-x6@Q3& z5XAw&y%>@!^_1am9G5zPHLl|zg`uG#D!GjIGx*_%)Z3lRnXo9|=Pyn~$^BG5+#K-x zOwf@#A`FbzxHF>W`oso4^zddetyWNysuRGTb9aT&LX0cJnG3zrvK(YxHKC>OZQO`< zC!ndjw^wrq?L^_9i1bTKbuR$ykhm&`Z@sYUA~jbs9iGT5?B<@|u&pWh5m#8KN)Yk* zF-@ICsWSO_4&dp_r`9ns7jd`79>FmI{|e4<-Wkk0&DQ$}rPbn&=keUSOG3p@y5v}S zIqs8#5-0;cmQuU;xF|wgY*6?*y;yJDxB-boh4NpUp-7vg5zf-DLm>3ye>|VaR|&<5 zM&*NfjF&gs=nH!Um@ARi-qzZnD$2Dss{Gwwlp`9{4H@9)(VsUZC6Oa8DmFGYLq2qH zcWyQ+$|7Lvq){=#Vy+F$otuD;I2Y2JC43I$ZIBsdUI1IGUYILpA$%5zzn}tfA2gVV zh{755Y09-nT8P-^ejUbV^QconTGdtAl>3bZ*4P~6d&6L^NmQREX4P=W%I8TeCuvBoj$KAl0nP6tqL+hE!4bnRAF+c8Q{ z6qCZc2xZ{?FHlCGYz3rI7dc9woBYiVZaccTjyGo9alm@M%FE-pa3Kk@&;pMzt(5j~ zx&O9pv1J1tOAKbugBY{ev_BKhrT*;FZJzJ_ zFM8eg9$(aE51j?NXz)J2IF|iH=u~3zww0Y*`Hcgt86aG-LqjU;zRaVM+gGp16rwqE zE}eo)eWA6!9T)K@e)#sn8}vqcg`mrz53%)%ejmt60&Aq|&bZWUAa#Cb0DomwRq~T3 zkp{YQv>-*Q#;a-h@WH`wN($@QJlOtxlU5Pc%;ef$^T+_*AuX$9jvGPornFSm*H;2Q z+)M79qB#gNh<^V0^XFUwglO&fHeFd*c!njZ>tF(0hlPbd9v+q4sKS5!`gPsGLR>}g zY0zRtWMt%hc{-GFsl@|8`nCu-JKpZfJ?3|kzE^bhFVXzGti4G^x2M9o*X=BrhstZ6 z6t1P?aSV(XM)p$YFXmW~ir`1)lUuIR%0^N!S0gqf#Utd2I zOg=(6U4G{;Tu{4v_Y!JjBWHQJM0>=tL$v=^dSQ(7b9}&Kvps8TZXh7>Oy70X$)|?f zN9=+c5p{-`%Pi!u!h|ef6_T^EW*6nb7TLM*6%m3F^=^B-^vBdZizbcxdST0ur{c5U z9t5LbT+rsBrl<%6Z$EAY3$DRr# z2t)e_jItoY*o~E1j#oL7Lz*@&yP5Fr_R?ORuCH(P_7Xfj+l3DgP$9!Qve3S`UAcW* z44`0zO*?*u-=y<4y(Fm;phmyuu!TD^=%w2EnVhHoZU?{Z&w4y#tAaiBHOd#H(;U(k zfsbxm`t|btd-;_--v>_1;65(^kA5tz0J{Iu(v@Lxi-48ae8R$=mA87)u#=0(IcO(W zhzDYY7U~`3692V|E>uLmkq1OhU#1)*3O%eX#V#u=%hH}_69t_RoIi#q=i@rUAn}`) z^hi}_z)3bTN`@~VyZq$^urN~hZ0?iy1jm7*pIk_`Tz9J>%yAD$AM3kKH^n6+&aIB8 zBn&Kt_kPoewJfDk(A_lAJGo1Utf{x6$9IN}n_NYoqn=fk_j5VzXWWfVH-nLxiM$^s z(4j(_!r|!^4f%?>J3%XBU|=9(`+?ASq8n{(vhuL_wqA-2I@e|i?Oq6847@EUhKQ&5Z~3T<2r#9Sw~$&@5%>@G3DNzg6(idb z%KFWl**tiqJ`oE2o5I+(Hbu^2ZUZN9L5$Dy_j(WcptBGDE*4*kfes4n?v$OqAQV`v z)fp@CbNQ-Mlap|MLIC5Qqg9HM{E_hG-1IXLrs*+G*9?3^yY>nWrW4U!IK*x4g+ce? z`w|IoCw^~Y;$Hb9?Wrxmv zRRet4WSP?V2T3D+Zgnb<%0O$G_mQEN%fQ+`4Ai6PSe|3+t@OHbQ9pP~ngUs>h6bRg zdb<>61yEVA27Q?Upi~Mnj#R&WD-N~^?r-&1RP+%C0ty^6>J7q4G0WB@9*+%?4XUTx zFb8wRt5@`K`;VSXd#Hhi$fa)kBmqAG{KZg2`OXuvZ)uc7t=3l;$T+^Dy;Gq*65^}C zfcUh*w&U#Vto|+$3b-_rotc3n z8m0rxdT0toQ?PlK+{kGD*J0EL;a*rGluJgnrxcW=MkF5gGSE*P$kW#P0vNmt+hGB} zK%t`l7bqYRXK187iQ@z^q^4$EMdByhitOy{ke3OzVgINB-N>f=2GFMuDib^@I~nn` zq)xzVd3pHI2STZo-Ux^K7zGAq(a7l_W+^w~`ArEo16ub=K_x<);|mj~r@(JiR#rl^ z&HFXLgJr-+{A1uqqbh{fPD-bI(ZtO_!Wu5e%*-rDmdPv^Aim~zFHB7m4dAQ3lv7BE z^h%v)A!Lr<6dKB3tl(Kuz$Y+1&wB~=Ns$l)#N5@ZqR}$FGWNr;WLlD9qK!`PLfAg6}u%$)`SUrQ-7cc4YvZ7a%e(NgMP zfRE&X&;mR0)y5t;C5!t<`UVEWxh0P}Y_EciCMR0by09<@x)nvjcm|0xDO{}%RqRqy zQiDpIqW>P5*g`<(AU|EMtc4&^v2!m6K;7&+>p$oH&boFAGECHmyKYzf9i7-HLMX5c z3wd@oOgM7 z>1>91 zB*ZD7qSxJ8;5PpsB(0eHqKCBT>eE8vPClpSaU(d@JBSzCJDFI z>s%}2vl1cmpTN=~`$6=G6}nokHr7jWkl+FCfQAkTT|KJ5j5vS+A@dsRZ-3Xy@EKC~ zKm7L8PR3Z~vL)f=TC0x7cVxDrJ;A_Yn?!4_>o?y?B&uYtuw0KY>siCY!|c7HOk`@< zYP6QH49K1!La9cUxLPvmi_Sx-e*{=6Af4_K#&Gi2fI<>Dws2R+%A`u&XWui!oE@U9 z1^s-KmHp}!t{SuN7xpAeu={5EFA$-wfaL=W$~ik{VgOZi{e>+opp$j_ya7t1Yf=5j zc?Wh}8Wj;U_jl7CkgVB(wJ5MU3$tt=xa;m-W-&B2W>7FOHs(-JIDL53v^#2E2Zpb5 zkLh}rC(!^-P**_M0Tdbz9=yh}Ipdo*6Mz2nM)gb^PH7e5$fSChZ23ThCzBD~(Ryol zm3_*P^f8F*rK>$7=T$ln=%&aQaau85!E43e<8U{rB9U$?8&vH>Bu zBNW+-Z#FT2!DihnYe4>p#Kh8<7UYzX+)vs7<-%IoPzXW58g#%86Oc4XHsr1_?zQ2m z#21v01K@|Vp?g7Ua zXtSmz7=10JzeVU;P;>CL6N9gY&ZkiR=J3L=9{f3Bxi4vMX2#b0^Pw{~IZ8r8V(aJ+ zfPyPJIyyks(--z6j-i!T zj2#zLB@pPgBcYumCX41=v0~KcLZ;v%THWaZN+|@FE!3>Di-WdkCf*q}#Z#$m#N}fd z4WuuSSq$$T9UK(LcG*XWvfsTs2O*3E*>8jB_T-)6CYX3~S`Mt&Yi=CQw%HzKd+$OV z9DVuxR2Z!R)4^4lu?m$!5R9N8A(L%@8&NyYIJAwQ=UpB1&8>&R9<{f(_ez1{nNv2x z01$;wU2KO;4g4l_W`e2cI^TwArBdHV;PP~L7s}Ad_p(m{ZWb`p;bwT0kj~#KPD%|; zhUF~3>GpIeRKdjBMH97&dE2gV`4P~oN=irp23#>SV+h%rtd5V5e?=y)NVr_@;Ul&z z|L2ln|1+@qY0zV$q`?$bd%gYBY=j;yNwC%NjZoU=EO0b_eY*?{EH(rWE(27<9jf(| zH5*lZ8c-3jLk6o`U8-#%GL&2rSO%@P5!m!zmfVKbx2E7XVog~&M_0_3>SZgLv@e|k z9x-t#DNX@_Uel^n;IwL|+_fI@2tcbf23YcC;d~-J2+*#R03l=g;x<1kQ2-I6YvR9h zM$mFNlE7pvkBfEXSAd+auSVvTy|1Lq6<9?jkW%OFD*~9wvq3Xr12~uP{0E+~!(oIPB-o3xX7U=mf5&OetjzxPstjVMI}?eR zIdh|(pgg%rnqP00%u$|0x*7ahuG(ttt`raVLY{n$x?0oDB(*Jt^;R=;b7+xK;FeA1 zRKq4T$w+Cc=ZUy-kzQQUY2SGOxP1#pUnn$(+rx6|5(3!0ei2(}z&W)noQvKy1=#59 zqRY-a`}<22TkMr~R&sEYevcl?y_52Q#zbo|bLlBG`m zFrf!@ZFXIW+LPM&iiD7ee0+S^#Kfqt_?wN*qp#>cW^tXY_ltocn9pr4%fW=2HaT8PnV7(;rlwsPM z9;4+@S6lGoJxj?yup8z!*S%i-rNo5an;0QT95x5?0X9YEsIN2=OgcW#J7i-j6nQD~UI1uA9=#!gE*SV*7vOB6BtYwuoS8|+#$*A2gmMzKBYk;n z4e}m9!3YZYJUohIMf9SMS3-FST~&aHjmnULJv?*AKrS` zm4_WA$?>9N_wbPTJ)zC9~$f|Y{s{OqGd9|Jl?m~O` zB2+xo{EzDsm0O@a9!UG%e0)_WSo?9x)QR_k)LtuNy1539bw_E-p;BxUA1 z1m1^H@12Wv9pkO;g)o3!ZW|l#ywW2ggdC2kDR%V~5g|TxDtLJSTf`OET9J;lM~`sg zI))bCkcnmdC|i|@py-sQRk;d&HgMzF*szCext@ts1byqJUN@mL9Ek3|zP0V=Db8k3 zWAKx@LLWjW*l|s?rT`f3t;4{Fw@0E}nwel^Q=I7(PsHnq6agm|w4jx_p@Pod-J|(io z^&dnGuCY1Ua37NCpG-OP3;BVxV^^m_vLmLpsfqce9wE4sI$gbi3LoUOgbo8pQL+N> z$`qId?cwuyVEgCy?lVbu2CxaB2jupdyzgD4{{WVHz3bP*K)A8qoN4NGhp{?f3N$Vv zkbk?Aaht*WLyf;g-yvH_WEP(OISORDrs4GahvBYEc65_8Dp#RJKMo2`as>y0Dp=E# zj-nLVp~Ql0eClwUMs zqqqVJ5#-ExSJ)^>BhgcSJ$-%R0eAh>b#>LiJCJmH2pj?HU&JZ7&myj(h3j6!|Ieo7 z*VE=>L|HueQB+p$GQy8cz9CIZjr&v-I9(Ru%@_#H3rutz%xJXH#I?I({{_N^P?k>R z1`{?+TaAqbkBZGd%(Wb%C7kMU~Jy(c9{Uu-=* zHrPvY0G@dns_l0{`9UpD@K!}e2F8%r{HY@WH)rXl-$67D;=weo~pb>+?m9hl#|1OYB@gq zRRN>xEUoK^vs|bagN>5jZ2W~@uO5qPATG9yLk3rXJqLXnp~&GyCSgE0_>(l50OTH& zW0saNO{#LEIx+buGHH0o;8K2i4-aYF51gHxu0VGWtXMqWPiS=@LzRQwN6I5(`;L^j zIM7t5voq6?%GDb-s9@E6)h-{VUO$PNUQ5|FF$%}xf;cC(-xLx(@?;UtDe6o)27{s4 zO@z5A^6@Zxxno1)&Y*OGWT>2@o=;35g~|e5oFQ`U!jLHM(xOEjOv2`Vntbf~$y-ig z*><#1>GU{%b^HF9nP*^Np!HcSpc{`ja`bJXk_%c(|IgcWz=+R8nyOPn$<`X8R7f7BC^ zubY*4NwsK%~Y44mYnP&b43 zay01Mnr7v;4nv;S8~}~c6pkb*_D}~WC%(1Nk`uIqsQuxl4~hH+xp3OfC= z#i{#n;>07SLm_y0{*O!O$FdogRLZ4{t@i&^cR4p0ATn+@MNKAH)?o zwn}HVx%NjpwmpF8(3{(%LWZc!}p@uGduDX4mkeKj91hOt<+vU|& zC1^Rcc&O{^>#-MMu*7if)PHVY8dcSn2zV`mnbAFt_q2H7+R5uaYfdHBi^qbMgB~38 z>mEH${)EDm$tm9kzFsJ#VdlD&i!?zLxITV<3sbe8Zwd;oLNC_+4D=C;20v2NKSNmf z398|6%wc-5<(v1*sw(O*Hfyj0UVp3&ubZJU;yR>NEE>rHq7BL_jN$-+ftZG&i3bkx zVS0Mp!h$V$sao75+nvN0evA~Mka+=Qs04DGSqU8yKe4jN3wB~&<(;FafA%nLZ}Bb+ zV8FX%2515e7WcOnc;T%%9FfR7Rya7W6Cz`x|G(cpE5GL-bD!m7QXNQg)}Ds3WNYd^ zwpKaRM^yL6A1%{p-=B}4BCqpVNx^V`Cj8$&`{#X0|NW>KAprqdt~-<(4i%157=OClo~fMgw9*?fZJ!AXaLSp#ejaW-S@EAYE1ky;PbUO!#PqEbWuJqXPUhuv zd+&R{A>|vsH7+&{1vk`3z1Q{1KYBnNgAId*DH-Oa?rhHV=)L}}{3Ff^I>em-xlFlx zJT|^Rxbk>wJ*L6haTW9{@L0VT(tFLZ`wd1byFY$x1N{bm-)pI#`COsl(-*RLZ2L1V znKt@wzu7zrl!wtGR)D-RQ@$zX=cR@L9y48vJgReL0V$FFP*CUVllgT z1DgY@E4Cp;T~eDCsO3gumY0{QT)Y6uQ3p%;;qgmPXTbMa{k;rmX=!QcB^~Ygxa|?Bp7|$FzEb`-k6@T9 zd9t44<^7)VH-soE%NCe12TGlegL!{stqLLH;iGG_-8i z`I`yx^k%_o$REi~udKYh(e+?TukKF5HNnF871EEa-ItNSX>Tf-c{aNyQ5@V_v#{j3 z*`U1N^1C7MPq$^4MSZYw>YhEzNxLo)7FX6%JvXuzlU_VehNNr$q=!rJp~iq5jaJ(S z>JO*$;q6iFbf^#~x-#BPe4ptbdrCMnI~$AGFcRED+;`YxEm{uBuI z8=O3`d$_&CkyZ(RLgt-64UHNU zwUU&D@rMsp1ypWrtu47Zb2!nZpN3cXk3M$*$o~+nufHeT>6$xQT396UN2_%)v<+<3qXsclFe!%A&T%BAa9WA^R`Rzco z{`0qGPUJ6tI%8&PiU>Oh2>e1qN`@q1Cu09OaL;0_9AHhVP?(@@wzRakXX>0~p%+}! zH0!D>yFq)#qLu8PrQc5df@rZCI`i1n_n)X#1#3sArU?EaL1sQaK2$DLS8?(2AQD%h z8f4zEox59C(HNSL!9_W5phuDDoYyU~uTQ$o~ivp02( z*#=hcEs=5ZZ$B_P1|1yM|4O|3zP_fntX}8fcx6oFlzDD9CaW7rGfEXA7LW?$hWa)i H_%Q!pWmgC) literal 0 HcmV?d00001 diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_forward_backward.png b/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_forward_backward.png new file mode 100644 index 0000000000000000000000000000000000000000..2e13fdb502c5e498486872a36eef11e1bc01fd95 GIT binary patch literal 63061 zcmeFZRalg5+cu1dN{Aqch|r7!x^0Psgsg>n7kr{e zRfYrpx#gfDDS}kkOSlI9K(mv2?SO=Y55B<1xn-6%2yPKVk`@zIbG@~haxYm&)9I#9 z!$NZ~?_?&Q@^x#h_3M&F5ljwyIgI){e!c-XsAsr5P0`^fIHFPNW(w~9+=W5bn z7Hf%BtpAJ=d=-9zf^F$`g9;Y=e?Fyy4H>muF6rU zAz;u$rUBRgd(du*NExvwVMs{-E(+o)zH-msDgAfY|6T1r=KNnH`L9dR8tB%^RcT@>=C``JzRJSaxVqZ8xh}f;bx-gEeT(De50dL|x*l8Q z_F*eHLTH1E-uqF`vtB3T`TAGu(OA}76XnjAr+dzoyl(r8&S^KOp#}%u6-+BgI!Kx zVTIu39F{VffxE$UgIBGOE(=q$GH2&5X2>3xN}<<5(rnqmN}r_rB;K5jg4zqMvJNVR zj067yBF70wKPl$!Q@WhrW;AN_JF=JFUU^^27_(4l6a&FmJlUVc3urg!r&KE;jaji`yRGrtbNIIynhc zgAPS?!P?Rio%31gbOglkslmPf4XrvJANiCVp_u^i>o}Zl2sbYhY@tnY0!+~u^1tFnT7W@$4u_02%9Oa*mDb_51F46d2*!F zcJQgX;f`R6rs~DSe)C%3$MbXdoBeP1%D@7P3Igx39{eip{TTvJ*br5G-)n%R< zM#Q^Zxpe-8+a-0xbw(Kmi;~SZgD#je;zT8{al40a)`z-%{Rge1Ck_1Hz{6V zw+`wDaYlqUI@p^w&K>}2Kq~cJ-(uq`?DSJ)WTbV|ZO_2p5dRP-{6uo5B<=bz`#Pkn ztLrL&K!%%fZzNXw`%l%(f^c^OXR5uda%=S0!`_+Kqg{=1WLh^MAuM~WJc^`Zir=PJ zjgLc#2^iO&*?)Wyc5{lQN+pv}XW@RPNt1LAJ6yZD-nrQ(8S&G=ene6VBaErd=)qTQpKLkK^k^u=sZ0 z35M=?uJBsxkr$;#YJP}meyNzsw^l?O?b3?&%Km&+QgGdDbNHM63gO92|N=e zIag}&m>Aew=Z@hTK5~C>cpF+ed$sLy9fm&v$9jCuNe*`68hUvT!8ehSp=igY&wkUm zK{+9jwaMA*3fj^y-mNY{tYxW^#82H6aT4^}bh!-1eo!#I<0Hhuc0Y3=m$#PI=`f-i z3NF)=OOYOva+M0dTJgCy6Z^VW)r_??_m*i^m?=mvo^r5E(XZR2aM-CPEp&rYFEw3k z&WNXyUDeXtZOeFW?{1N*AYN8hHdagfHEzmml*r``LTZX|Eo!n~btG+cAB|X3pGWU1o2n+HjtgF` zDMkoV&O{)^2}My)1hGN+@{IjULqj#ET2wA6$NZ=UXm7p1N-L4$0V}wz^*d&?JQrFu z;-lO22PWJvk8;>tgfH(VL-F=eO_mZ1)M;&}e#fhbcC!=h_Ne@c2=ANk@8XsCp#y77 z5wv_t%i(j0(~Nb@^f(j{UglPnkTHY!Rz9Zdx$&jw1xVz0ooT= zDJ#vS_C9Wo63wycXZySQ`Jv)T72DK z4m(oRQ(q|`F5Rdr+)h1LqxsxBaL_-4tp;;(XjW}j_kks#B-N!gk=*6s(SzQBc%{^J z?o(~dCg)#e`FNn;#?+Q9M5}D#KrnYw0D{8r&i)2OsJH45uZ9G#qSAy!Gd{Vm!JksV ze3b{Wp!9cpU`x`Au>Rm$zE?P?`<|;%Jz^v`bBdS7IEwqE9ozLMdN-;}W16~7F*p2| zByoIV8Ig<;(B|eTZ|Eof(_j8`U69=D?Oqnn741%BPYyAM2ah)K_(u145d~C^?F1 zw`-)#Qz7)@X2PT8a6z}qMa<|x=^)YhgoEUDx5y{fVchmPW-L12wUDPNizDu-1X&(l zwv~o{(%f2|jTcc^s*ce-ThA{#7S$}DVTo{BOcb*&CP6rxNQl{Qz2`2jk~1HabY1i6 zq;7@Zdo?Z~i4Vdk`?Y3A&vF5+eD-pg)B9LFPEB-$UK*-rSMDs&RKIV-T@)YMl|%+% z*sAw%8=c@<5g$l=X{Mz(68qC=v+^H3XtP_jwD?qGra8O55l$AJeOXscBV% z=)LZg4U}yxpHopEuV1s5J+r4jCC`sGR`jVdvciR_7yW10Qu-tQ&RE}M{}WHf&JOBj zJHO&yYTtJYW&M;l@GzwFTj|f?Ajs(9^z5tA>8PjS%%jf@H{!Z&Ple$&d4s9`I{F>g zu_?YkEhoinKnPh38Bt=8qF|4~y99fN7@7 zW|hO96E}o&`=|7_BQhC*a)vfn23#vvs;5|l-lWjeuTHs8$&EZ?na>huazBUeg~sQO z7#cefd4msHU6~CXw;e@E(Ih0(pcUsU>&fLDaW>go)sYXfiq=L!1Zhgc0pZm&*(~nUV`HqI|#j9F+y*M?+uKMtMKb9#% zANA5;G!A#>XWH>W+i9&-47*M444=7YxJXO+iOA1zB%tW>Mg3Arba}1RU$wzToS6`I zl5Hnae@Z9D5^;}ehJRWh=ppsT<^`t}$K@fnuxKF)0tX}Ik5^*LEcljJQ3rLjNne(w zHp?1bq;h9uxoL9cwnVUy*VqYaN-BKrgvnzRF>L;hhKms~lsJrGg4eDTd$JxYvWCNy z!4^C01~x=QJ1&hEx))O`0%p7}+n?BIALjW8;xs)iYN^nU%Z}oCEC1EL0YSf{gaAuG zx?hX%?XUjtN_b`7{VZdYGk^ri&1u?Tswjz7iIKY}V3~0^>Y=%5HfMbb>3qK`N4t)g zH^SVB?6dH#iByQL%0sjy*Q}ODGDzeYS=lZQj~1Ua%uwPEBnrR;qi8YfS*eF9Y@#3N zlfT?daX67aer-2Oxt`|X?N5NM+jw|}fcLY*SSU?q-Lnb51PkM99#HdQ6-tlp7`M|i z!`%JoEU-Dsm*8D@HsD})?Ix`d=8Fn6QfSaOOaz4KK~eLU%Muyg_pI*c8pT9=e&t>y z7tT`u!jj<=Yg~msB(Hgyj}3hmJu#p+#bst~^uwSx+N~f7Gc8WwK2}3u`h=ZKajfMUhhPVNvKS z$@Y!y>|GTo+CBNnb57D)Kq8+9g56&<{JZN%uZ02~o<~`N))z zkv%VpDPgJk#)4O%aceD|+m{l{uVkB+znVS~R&IzRrB(E$UjhbG&XRP)SWHn4VaS}r z%3{-;)JmeVodisi`DRI5nt-D7ks)nS%KBT25UsIEG61$x>b9(H4~Cj==CFKryY5Ko zbbO&u6*kUH-^rFwEYsP z&;iK_VA^NPE%W1xa?glcVT(WOck@o>WEqgB$kL6s>`%%%NRgB;KZ(B9%U%BgA27>+ zT}vl#cz0gsc&v`A=A@@Gqsd<7q@Q64($8JrsZ6PorIIz?b@$p?b&cT@yPN4Puds-=!MB?epPLm7W;EriXQw_lJ2$O-KZvTSlpDvf zfRBBDNYjtM6E{IHQ{=(4q~G@QcO3#cFvGA5-UDOt^ z+y`o+MCsC?yIgFu?!Al37$4hr-PWeIyIq&=#F)va3J2M`#Y~oj>I`Wl5dyBV9|bBG zf-5P?zjKtB9uRAjHiworB*`&%vf`U#1+JUZhm8A6t+ z78Tm?h~^R|i|j1SStLilrAm`lqGZVc2kOAc6I<^BsDY6QO9l<$-_^sc9f5HMc? zN2$pGDo8h7OYaUl+?8>DJ84~cmJvoUZqIWE2vrQX@ZM^gJBf<=>)#cF7%-zQvI5j5 z&?jJopl1CKkUM#D&y|hZK9LO<6V-9u0ii$j9S&LU;|zKVM%xBUF7LxHBq2W?lRP(c zZuxE^QvPN#r~B_nX^pZdt-34?|B{Kx&FLpJ^H2#UOg+fvI+Cx-{V>K!X-oJ%@|hG! zBw?k!@rX=<$Sm)K?mk?du63E3p&{JY-Eohbe%r-xyjj}U#vho1-JuR-Kb@4t2XhWwv9!i9?T>VFHC zja0C*GPZ40EiJ80Nx}1oo)q5EhJ%!y_();{><$xT3f!Md<0e*rRu(*fv#D2q zGFV+rje>C9A4|X-1D~-B2(AYjK*-tUdmuzQiHdz5kC5$#M1;uj!}#{4w6m*L-CLqe$ueJ8u(17Mxgw7fY1vaVcu zRq$lC+dq#%4CQN#5RhayfvA~2?Y5F)QP0HbFGPn%I;di76~%UBOAWq5cz%JizF;BkB+*%E&cCp<6>`S8@!Hz8roOkak8yZ zE3aFwv1j)Z0}T{Co29>W=_)7*m6|<=p$M{x>H1_p7(@AYunn$7TFeI|Lo&`hyCBx{;!ez>#ZICFI<#w#yGk*6n z`80uPD!KSK;^N}cx_v+Yi4%@kw{FZdc1FhuW|)d6YqBw>@O}raW)zA}P%ktBDXBa* zHgGewP1&H zU4Zmz?d0Ud+&=Xnh?NqVfYT-!pqQ#@Yj3)}4=oedR~K9T8KP80x_p!04E$5L?3)p# zk`0jH>r}_?{SD(ni6D#1vf9}oKKTAH{GIzR-+MUj_3kIz)}TtbeK0fkbkac-V7k7>Xowo_Ho#?7dW<9dN!$)u zH*xEZ=lD!|WuVMTdA~IJmB-bFewZ1=3L0*% zFBU$sLAhr<4|=?xUa1AM701NcjF*@=FU4rKF$8$+Fa8Yh>I$qj`1EJfeC5bMqKQsO zCjr?-4_m1j3(Bik<49{-cl+-BZ$STE;xO+|k2lJl$v_^%v62}f;c&YERnMSZn~Hzu z@*K?NseuDMVqo+d1uspE?8XV7opD}1Hu7XfPhnfdfn2;u*C zp2G&WqM{}t{;K)-j)lo)vVtMY&E&04jiW{7qqJBdof2c|sh1s+#BME$XCy-g&VM(U zw0{XCW!=#DXNV78`tT>#Sr`oQu#LRj=q)?lF97(NbBaAbFK<8CuAT4=C~N-7x4H^Q z8SteK)wd8IlmgJyg6XF7?Tzoz)@0@6LyMk?-MJq|=f(C568X8gLF0W7{;W0_UM!Lb z401NegSZg@p<-axZQ)9Eo_Hm_kF=}oGN2pf>>M14@O*r}=1deSN(HX?z;@$JOu6Al(5VYu~1m;3asY0&-pwD5=lH zYbzwMP@3vfZPL)wmx6AD7DsW&=F5NPH7E%HhyZ*F4YV?dB1?u5v%?P16IuA$WTzhT zW%D+Mfxe36<3KEu z7`jhELBVFPjlJz(o7H;=TKq;FSziGHGmWB<%=OTrD?>D}ZL{kuQRgL1@#6|tBeb+1 z2+{tx$K?X+l*N?P@&(3%9rqm+bq$ZPp1dSA)z8}cFxu{#K+g)BJ^X7;;q(Zs#}f7F zHkb+*vb?;!hPpc5P?3PwdCO*384(^H9a8c=VLyO zDx$Gxu7<@uWZ_fz*V$GHfyb|v@tr_48^bpb?)hL{_Vebx5Et>S8|%+T~ttUX7>fX3Hs@|RpnywtZ2`! z*f}|Cb+1nn6Q3g5A&m8@yo2IkB~?{pJ2y2>8=HCgg(aXWZDKgOhWlq>NU2dz6@4twIwqkyJcc+D7&^g zZi@KtdekB|khk*FePrxaa%8Q>a*q=m?P5bH@J*&pBM!S?LZIQq_h%QCD$n&d5TeQJ z=ocy$j`lj~i^{+%oO`KR#|-Xq&7yIhw~HU{ z)GKRs;c-Cm+AtXIr;X}P0)PHcee})Xhg|sxNFrOo%J_q{!ZoHniJyoSyWfoclWo*pth^$YmCET+6=EG3+n2omU~kmvav6U=Y9lU5Do8HPgaQY@Y+mq z0jS`(=(_msWC^s{VA+^5T3u6f@kqJUfVRIQ>VG5Ly8Ngr_|$AN1T1 zQj!*pErbc$fS#$3=(mTT0-|U-j>$Ro=8HtgN14)_DThAsI%^XX6AtSMwzS?)1gwTM zThq0pASf9gyPQ058)`NPqoe%QHh)g7_*sO1$t3D$SC#vbrRo^PKa@#m1u!n2zVOGu znhDA6)`w|5_J2}e6})eHbc92JLyzd(aO*5Hr8n1d0S!aN9fjkiW=VtNU5+WZY}+WX zoyc!p6tctAFnI?868|e$PR<8EcDnW4Mfe;g2tZv*;pw(S;GTZ>s~9|ksa zsF(3tY3h49xoR5M+Rip)Eht2PgmSe(sNX)+{~IH)&4DrplNU<{A!Cu{7IYn^-#7z0 z$#QdQ-t5PNCbb!aAe+SFWcf+*MTZ!2J4|0yqQx~MRA!vyIL`ueYDiI#=qEOT73=iu zlJxz;B8QW>6Qv+AEmo5{v4V1m4@Q5dwFnk2(yR>+Slt6`&;)fnnr1Ht$TX1*1K1Er;l`oh9!e6B4}2kx$w?FIGN}r z+eKdBVly-<8@aoL{#k&wt@rV zI%5x&5lD69DAAUBsjMa%YOebI(YZ>r2mT2J{_T_Jg%uTa-6BSTL5;+q&}z)Zcz1py%*e3U>n{xdx0Cxum6A?*3Q zQR_!M1OcehRaS|jRn0n`izc$*)%&uKY%^6gppeQ(u=B04ViT@ydsM31#-DMUSDj18 zum_qbQy?8c$4m9E>l=cY)?&p4Vvq5|TWG$Bwod1*3A{bW>TUWdh0kWgIm%SGh6z-s zS>1jl=L#g2h&c~y9Teos!yu|n79)15PcMz-o7JIJlsf)*s()-raZNiEs`kiG94nQ!V^)rN` z@&gaqPfRGfrEVPkazX!3_($Q&N;lZ+C>Msh_l%zP0f}|KGTxtev`+9Ir>=kDq7nX3 zB~@4R)OPN8ssg@V4;p2Hk|G9EhQ<|x#M};&%@Ccc4X+^im%?T{T}OYt*9u7EUGRi2 zvw-l>8bDDd5UQwo&bP8UC7JFyl~&1={w7S5kf~aiBA@aJ^$w%FNF(zXSOTYS)CRUH zYI1qZaCn{;p@hTd%PBjXt?5)VKe`77+Xwc^wpNIJX-QrnEo6Q)L^->&)ns@w)>y`}N&XZNC5Jh#&^VB9y7MmH}(j#pk!6hZdNL zaK1b8sg!7;KclE!e%4T<(FBzNvG}wyreok~zTKF?H^r1QJzd=`>R(E_B%_mw7dQn_js8acLWSTZOY7Y%jHrv)z5t*I61*?rzAG;x zjV2OJL!nOgq3L+qg>t?>P42VZ+F+(MUYqk4#YL{+z&>H#H)lwY(1fLC0_~DXG*?u6 zaXyNv_7y6zpyO(|^>_(oi7c{;AuNBRUEHSQpGmz3)mD>JlmUK3F#;`SCCz81C~!Ga ziHle!j~U4As&|_T+1&P`g~asO6Iyx$&~R(^W9=t3ka2^a1*t!uq(BQuSBQ; zXlxqV13%d{T)k`Y^u~;!>@(+i@9F5x>&!KjtQaWk>l+*3gJz%EHRUR@t{q_2H+VC} z&33Zfjj#!Q^o_|2(A&qUmhty*0^&@drXFpa6~!{bK>5*75ZguhTxcYF2~#WE}uWt;mj0xp;yPn z0GXe#IO4AD*+gU#S zG$bRdoG_y3)i~03d9QSSpUcd6v=B%mf=hMz(#!5e%J$Yr?rOSUCWV3C+wV z>ro<1#fLz@(0o!*YVhOzc%9op%~7?T*T37!2)5NRVp*VCJiWR}Catzh#>I08y>4gi z+w#7$M&)i2oNeA@txC6ZCxhZ)9$Hq}zOZV95lnedpUU`LyVNvmgHzOIsE^c0DoV0vwqe5BqsQ@` z?-MZf-e}ChY}ffG3d&|pie3e;!1f|TUQN@M=G``3adszo#NmEnsbyW-$~*CL%ir0e z*Q~y?ixQTaM0+o0LXQ}S+FPfQqEpTDH6)4x&eGH@9v|~j9~xi0O_nYf-dVRMck~M& z=ipZkLZ^Q$DVa)$B_cUh?{1tW*jD)KGb5K*!dD3`^0LVxUghKFOaxMC_Cb9ZBo@aw zVaXG3oL9+8pNGY>>!YU`oSS4iu2dH6c2ni+zO8&ME#A$YP$GIKk+Q$+F@yR32bwx% zGHBje3~iP%72`(wIRzF8In2XMvb8nljTq#v2_WA`;#yfT(!0FvK7TVbE!8_ohE^d9 zMi;b4L>?`jsjj7XY?Qn1F=1>;*EWjj_00#)PWznUHAAKz-ZmAZxJErk_8A;UNhal- zQ??8yqVvQ%g(>Xp?B0*tpG$_eI0_?T<<~mC1NcO)^Jow*HRvn%k&R=dTx2%ATWi)1 zD8<&VCjW>eP#~Ky?Mn?YV-N{o`kl7IgAx3UEB_Pi63R!qw2G3LO81Qc!+M>(AXC-Z z?W_79ve{{dSjy$p&K$EQ&hrUXMwaU?(2=Rbz)!mcx;2hJ!NEFwOB1L2iEw;cvOu_} z$>*P{r4~b3QlpOce{n3OHzJu4sbp0&7s-He8FVtOVT?hEie+DGuhd9#b#sB>n^D z(yWP`?Z=t|7W}DKVCws_$``fnUy6$dD0-t46leXj%5`zfZ{K@xf4jG1XMdP00$ysS zh&U`Dam+f)n^_(Lx=@;^uyW)eqh&sz+<6unxc3{c3Ozt1!Czllz79WJt-fB+a5QkS z^1sa&)-``4XRBLErkSLBLsUvVL-HIhQ-|^#guSnnPK}Yg!1|mDlfT=)qobp{#FoUP z?Y+zEed(%Jf(!GYV_f!m%<8mF(HVWDY`6u**W0blCBZ1u50d;V6bUgt{1rkGXBt)f zHCkvOs2yf}(`PuTM%50vVW=nXIT9g@9rc{v%*B#~3e&x>Ua4uUT-Su{lz6W#y)rNZ z`ZWh0^nn(rc6<6}m7BX;^xjqN$f~pGja~&(QMXL@{CV4oEXtv$uG>;f!z%{@@t9_P z@xi6m*1vvG4dkX+de)y@Dze|{4X1zL3NUL74_QS}N6TEa5z0(HaD$spTU3nT2H&DD z;5N*hY_L4aD~=V35*orNR@u_j^eCT2&QcOKu~&QCVD-?v@B$nqVx<&{$dyAPCXl2$ z5cD^c*Ha!p=o8!#^q0}{|68Ve>kHTqzG(#W*JPnR-&$CXrJ_}>Lmj)JS6l10;t4VA zb-oU66lRIzs7rytdJ24ym7M^Co}S)%r8mVH@Qs{@Ky9D+Djjv%@{CR=#X4(#vgzw1 z8L13rgK<$I;xdGi4q6}O`*P*xswRq_di@oVNGH)AnO-2R3Zkeeq-T!1TiuvimEK-) zr@dtxGCiy7vFzR4b@+sV5xq|AIc06#W_U^&qTT@5qcvo-Fa*a!tC2FSb$_Yr%CCQ% zW-$8H7*OelB7n5KgjyMnOA39qYVy0IOqRcY*;)Jk?_^5=8(^3;3%|}z78)oHgngsy z4W8Kyi^tcF*}rhAYSR1CP1QiQyg#_&vr73aNS}gtd9gnfoUOT-mm`-q>`Bo3K&_Cx zioxbaU&1}t7D~iR4%q}{kB%p+RNS(HM^-=$5zO08sr#-~`R_XukTSx?`8*EmWl^Iy z8+j|H$%4Dj&E9BK|Mao0IJxOYhE`#$_i626_>{E`6a^7U`tbu{{BShAdhz}?8|;pX zfk6-}OtQ-v9EK@ND;^9HJsK<%!0fU?-rr=@sp`Lp5O#Pm2@VD|np%UiRO9?y?ugbN zBxE;PU{aN;ecu;w%nOR=zchyNv+W%fU*K$7G^iOJ)sDXYacu_s)+a=-f2paVv>*9I z>20X&Wa_u~x3yvEW!>(x`@JX(J>z8Ly!(^p zx5<+hA+%h}8UibAl#dF96@FPYt3xsu37GX%?)rYP*mIuznw@;O=Uzd%tDn4vl8)ih zXd~lW@t!Y&N6MccF*PrQfRdwr^B;hDOS=hpB-j}Ckb{^b{y z%?;if7B5 zXB&-&Wm}zt@9}w$zWL|307A?BN(A64+9n~AF0Xsm?-jJ;aq2v_*s>{3q0S2<%g}H& z2-{)i1=K}fzZ1{JnL$HM?G)L1wLil~Q^DI3Gc_`^AB#WerC~85UKPKKO=Dj5hPfoOtZ3zZ z5F=fRoMv!fXrrgQLBx@F>qXrZvG`U^-I5B7U9(=Ty`U%9z3n{%JXP>9ukrq zHGzu9K*Vy@-M`%FG4k17TkPZLDT)m=HqN#O87aiy#xUxi4zsIkTr7Gq5nC3wb^7lq z1aIcgmd^CvY^K4M_iAC8z1pfV-SrrT{7!e^#xlvp#y@eAUK;o{3edkTCA|5}Zh<(V z^6BmRaPG3+;B(%yNKH_E(t*Gqozq9X(r)ywSJ%*3#=CbH<98e!N3>n>JBTaRCJIR> z*BI#C+6;~7Gr}i155;ps6aOfws~|Dxx|vz7(!}Ak4fSX=EZ^|9HPLKrlWEg0+=+=k zp;i!lhT%Q|4gp$?hft}b@hV3}DAg+)tGT&QfR~NN2gcWS=H)5fk zBaXKUT7F$8F8ljS5>j`-*yv~mC2X*1)~p#aufDVoZn|YyPAc&9501TISG;LUtV1DX zSDZ3BnoO&hVe$yWi#;ihoWs51cow$JL5UWjE_kk*KYeO;Q+ut-;+z#{{uTFzkm-G? zdg90<3F;>U7;A{5GlrvdNIJT@MAbfpo&)h}{gdO-w5oC@s4r1b(4@ZpEPeb40y2C# zaIbrc3kY%u)-GKe&2hfxoAyOd4_QAYO5%lFsP&2GWq9kappck*1vXcr&exJ@Wi|3x z7Z#_o68aC^UJ&qh6^wNe=Gt`x-i%&cWQ^=goAXmKp>hmV@8BC=xMtW@&1S((@379$ zPHg}};Jxl`(&&K3$usJ^-Ke!N0n}cKMP;K@uDuee?dzEVf4S#MdV1Tz>~3Sm=VYRW zTipp1B{0*8^uIscvG@qApMpHK|Jn=RtcJK&bv52>xTJO4q&Z*3&i=Na2z~7_6KrMb z+4F3?=;N{yXwhNivtu_%c3pUfK+#nW-Z23-clMCJLfi44=bU-tYfAVN&<4Q(GCaUf z_)hs>B*P{^xni4I9n{g3sf`ETqkHg%eZX+*j=LZCnoXWyM4cKQZ7v1}Z3&2^8_;X2 zX0~?!53nb?g|4?U9nJa+j>?8vKVvnk_*s#=cw+P8s_q;tRr5u8qpOlI`l+VbwT~qPVhtJ{B0@W(9zV{E@b~l;diSnGi0=M=f>~-Y zpk?AMi7aCK_wNYp{psTvR6q*Tnuu`tAxG8EW-M)W!pLXqJaYkQU1X;}231>Ji)|MD zmQ1a$xqCl+94kL~a$qLg;%4`R2Jm&NrptXm$BITB3eFXq1QPP|(beFR*){0VPwKSp zys{-NhwQJkEcLF6sf5%4BZ8eiq}Z}Tb4@s=d6iT{r4)S=X9p`ril%>mh=o)hut5vt z9_tc+A=xuK$kVEq6{YvK!=~fVJk&0>e)%)=TM&BX({9>sPgO^J3e@uWLf+rI$e_fv zIG8DG4Z1%g4jeD1svWM~iJ;;QyUiF4-yKFxCRRWR>x)=s3EyKpJQFR>HO2S1{fQDm z<&lwH(6whzpIRNQk9;V&@L)N;C@;7!wCEhT4t@fPoN&bA^nFL&`>>E1&7Rzfw&Z?9A_;Dl<{=K zm7H_6jS1Z?dPOwrS~2M{HGv%GahSFl$*rp@vSJM#PdBv6g93GRb(7gQtIWSG+?f5A zpuj3FIF$EhCv)@mYknLaEMK*UG<2nY=LZ^&mhz6lX8e6UyP2l^s%hq5Opk0~>n^j$ zs+W~&`iy$hn~k(>KrIgGy$UU$P}n%(AHmL-`-q_f6($AM&fBX0w_Ifu7_> zTKdlgg@wIX4sy{i#h7wL-`_7THfld#S-UghHdyAIT~ab=E4wkh7=nS$G2r(1JXpwAVOK?~ zZ+CvbvfPQKoh+OQ1>KS!HtO$Ckj}g~khi=3mf@oK^}y1E>~!10rxhoU&1qAHI9&58 zZ?*@Gc|M> zw1{gOW7Qd<`^dQz>5+I^*;E-+QSVgAi75-3{t{GOdEl6uxLH@56iS0#TIj*B)6>q? zYe-5W^}JOJ=D@{Jb*2E{8u#~SjAGN}ncrG0@-qs$>a z(a~ZX4Q(OM%mg?lFXKu+)OFx@2cxE^ zbacGf&TAP5ba)(;eys`Mao=8g4e{@y^e@UcJY219nX~#~% z?qKffb=s^oeDkVVfI`8!W0h64apcuPP+22;tt)BP);R%5X%=%)$oP)4RJgPqEsBak zuuMyaBGk3KonnmQm}2Y@nZ9Wm9+*pnCg?UpfkHV})A_@Bz6DKHm&;63&&|XbGj+0^ z8bw81sITUFniQ@g9_{=mY1M=AY*DtlfwsByj>Nl66_Hnhm3~SeGE!!uy^EJ&Nu}8W ztJTeVoo`pE#3}ErKlU2!-f)0W?R~WqcY_<_Xll7UFH-Z6_ftf}rx<(0lPmTd$E#P( zL&i`30*{=ENheRP*$#A6JPe!3QT}p?Je_Nq=1htm_6}!cRq|u{t0AnruJHK*Lz3Sf z^7SJizv>hg)YNEYhlEx=CO=?{t2;*R)WsTCe&hY(rh4(laatp{6u(Zd;msV^xsgF= ziQcexk?e{PUj&A>%&J|Lf!~V|hYDGSXlj#%&%qrV0um!brK(1Ths7p_t7Syq?*0;1 zwRCW!JRE)Pv0c4#sU%zed?1&{ZaSv>YpK!P`6fAOah4A+$Os4g1Lp%mhSsOMa}oC= z#@Wf;|A4gw(7zPB#wvDGlOp8c!z2yW94a_nR4w+3u;V$7AAiTK8;LpS@%#{7>jrLh z)L!Qpl7TH(#K|DOq*1!b2_IyNe#lah9l?+Ds`GSJgfu2z^e3ox5)*P$k29e=MUSc0 zqI*S~J7i%?kGvVu;s|1|-*s-GVDx7nQtQ@c>U{#_S|MV?D;l!C#QVjujQhV^xrDqC zn$$`i>pBG*8>X(|oj3*UfdtHAO}gs#>)Ci(w&F(W_>a6h-21klvD?ejSSDQ*<(wsf zEWbHNiFF2L$Ep{oP_*6z2X=g9%9&JAzpCt0<02F6JjO9VmsHb64^%t-!WvWwSj*{& z8kaFk?qN)eK~ExkM7X3Ivmdl>n*TIFNE6r2P(BqDViK>3=3Z}D?w28=1?dffN89!i z*xy5A3#pJF-!=G|uLxYNgua7-Blvuw)X?M_d^a z8`)!+o< zSXo1LJEN$=66Ib~7S*iNHTMuI2@EL+^%XRG$J|n(Dk_6}g+5{SQ<^I4yUU4?ox^KU z(An75bC$MISWAX+X=YAXH>$6{X!JY2{zXwI+Dc)2HNkG`znpZGvKmxrpjU!*lQtzA z$2Q_YKv*v_yyBdgqO%iJMqOXo&}Ffr&qI5zt=jr1U;C03vdznIE_APAamjtNxSibT zvl^B)P!oyFbB|;l(+~x%lbq`L@ojWB8Y=n3MT}Zka%JTGwd^$K?4%Mt85Be8RU72< zCIgTZBW%DKwlUE4S@UASVo|~q(UXPbTSp4=Ua*B}crKNat4-jM=lPa~qqpQm661>_ zmN#my84)v288S~anXRF+ozF%UAWv>g^R<7uO@8z!KloUoICRcDLh0bVRX%#Y)-ml& zC%7c9I%f?Fa}W>Ez1VM-2ZTv~V;Lu4HMyO{hYv?2H(bVT=q0K#x-SCnWy$LqX5h;k z%8M5dDJ%9*NBtFv<&gsfmY3^T@))Vot->*JXAKzi3^qTjLSX6D8C$#+Qps-892C7z z^STq)CmNPpSv{F1oCGW0Tlj34#2=#Qqsxq>knVIi~s(mPP6E}E9yWvz<_2Y#L2jOKu*mL< zAdRB72B%#j7O9#fC)w0qwf_5Ml0+9@ z|Hl87e?Ypiy6WA}4_R*A+5?T#Nafd>{?yyuMYEO+JNeLNqgr-Yj&m$O|cRZ~V4DRo`JE9R&VDHT~Q%{E}nK_A%;)3WO?`~>&CDay1LT_r66(;^`8IAP-)QB0@LUDv%>IsKQDe#?(m2Zk^kx(ww3((BV zl10&LR9=aSv|8C94P*e89Fp{b^v)R-b(+&g+huapa*Vb)4VNa(<-g&zRi`|*=tP%u zArc<*%}iLfrytkXPcqqly@Qedu5)5*ez9f?u>og>4ZZONzSvC`i?m9>=jH(UIp}n? zouB|T2g4!zv*1E9feHRbMvYD?5)<%h#I8_r_sA8y zqKDvg%n6u#ASN5g%%}6XuB&S(cQG?3|B=<+yylX_51yq00=kqO}^=Ij$T7#~DM4Tjh^o)>Q+*a#7+K1f} zmAP0GH)GGG7}v3qTBC>Em5u9K!}cvAv;P(uAYtH$!he2G)rFH?2xPUXFik!h37=Ho7BAtL5=`RxN3!85w$vD13f zu>4;X<5CBOnnX<%8ojn9=dOn+0`}qmHLg2wuJcL>jUQC%wvt1^CNdvg#qS~p<=U7+ ztqG2ftz(vWm{w{S2qC+ai1<2&*mT1-s;AlNN{}P>zP5_;?At0!n_3lTn9I+yF_*0J zMso!N*=tvtXunRsB#!Xe{axZ&5%I*$haFgZ6Mw<2o<+m3N*@ zZ%791aUubOEdsWp)mKC$%9e?oHWrUA*+MCcA*a1IBb6fA8cYg_`UhM{4y8g4i|TN_ zz(_+OyEZV%`SO8c4!NdRp9+Bk+LQoxT4WClO!}$QXp;sQ89QX|>GGhBNq0&%-9>k? zDay|CEdFXgeHUAxumrlZllV>WmvV{hHtSL>YlWP$n$8P8OXsg`I^t`W&vTX(M>$pL zCe!yf@=`?gEKGckT@xKH342MrUNvLU>iXZ)vWI2lt}RR(R+(G6J#gYoK<_YkU)qNH z7vDM3Cp2Nwb;QAp{4?GZO|gxTC3OqKGMmSH*b{R3l1eKCc|HzL0}>KSP~ ze%gbE_b5vWwe`UOp~5HMLjtxH>SOVDkd@?UQNGFmW}VGrGW(U!5)ybldswu@V9awW zj=VPQeYyp;R9POr{&{Ya6zuJeWo_N=@OwV$#2PE|zt>lN3~+sUXL2C}V<@0r2xrt8 z_Hqxzk)|=6(c%S0)2i)XX494Pj>l5rQ`_*c@DK9!LUvFS_f;O}y(!l~C@Rd+Cif3# zJ?IYs%wAFVZC-a5^&y|E>?pB%DL&8DODL^n>(F>(n5||l5C1Cx@yl08;T*1OnD;^& zQdUICkAs)`Q}g8ZHSE?QJgXAaHE0Y)aV{sR?A0-&pB9$0xjg3Re-JUuqqz^sS!JXj z_ve`e+EB~Ozql0*5ZH;~0L@Vj?&7MXBfU4OH@bT(9#J9E^^H5$wwx23Fl68moNSG0|4iV# zgMLQ&ZCW3>P5LEZ^>xj-VP476Sxh#UOH*5oA;Gd`-TU zS#a#-%f!B@Sc`1hS0CV3#f0?e!tvXGj^H#FYuXX=sb(@3G$NenR`BIoSw;CccP``1cq8 z!s5E6YGyEFKPJ#jvr@-VC0JnFyU)ef3@E63_EpduGvY577eAI$PkE$(n5KMc_p7~O zLwmNcxVr1JwFLDOQ`=1Tdh%W3a2J$o5r$&azdPX#tjvb7Ko^w!t5oD z64kh)V#U5BeE84nR3ylNOwanX%o3Txk|I-&G0WU`!%bB2_0lu{O#mtlea-yXM>|h> zjb+wsS2@eOy_moAC%fBPhx<@1Z4Gy=uoh${H^cO@ZkL|e%Ez0N6F@EiUMV&&8Q9&$ z{8Iqh<8Oda(EXm$N)?3{KY>fx!wv6(W+PEPCQ2`sa^{Q= zWa+XN1DU6LfPz5cOmdvUrF?D-qs5keRBC5RVOQ~<;v1q*8w1c5xch$tv8{ntdpt{) zSTFdm72{H^{wE%LL4G%Z-TD|E?lD}C#g7R!OCeMbSKqU)|30ZS+WaGJJxs=TSzYjN z^y(&Ec0c$5U2q*9o8egaG5NUs*k(1lbj^xqTq|rRpt0vZdw(}18NW}l_hNV&IB}T${^%Q(3|}CY z!5SrpkGaFGDoDA+V?}2JRUEG_)zdLwLT#E~v<>zi&ZiiMO!Nu`C||}DCj0fK$2b0! zM1LGg?AFh#g@!knDg5hnT193ShgO}%yL-8fEx1I`uAa);wtFANEFb?`BzPDoOgJXG z>~~xH@(lY$=aINTUxae5a=5&S$?`DFFx?(IupmYf ztoQunF-Ftsq3YYZ*PO3@TFS^&R-+39K9;G`8uU-NiJhO&Z*m$G3Z4Mwcp#U- zXSLds)ha?-jtDjYe;a!EF~JpVOKa+DT?DfQ)Y@VpDup?~BnoK-9S>|1sdC6X(*FA$ zoL@?TMPEi;jAsd*SH3OKt6XAXzQ4?Q{)7nowl^X=n`ZL4GE3Fh%7&mxhU9c)s>#HMPw>)z!z zyfMp6O~eiQHr!UlRz{+XJ@Y|L_V}zcZ6m4>A?xFor82^?2Xbf)<(-eK^ZJriXW2&N z@g=-j0`&^u2m3!sjTv59!T{j#_#88{)8t(MY!nGolo_AkC#eZ8mE=JaYw>35)H>&W=H6rB5;#ysfFZf9S$?OullBRbO%8C@t`LQ2&B}Zy~ zRuVddoL-A*v`?1ntBu4Cq=cb4za?B}(c!Qr$Fn0Iqny|N2uL~Ge5YppDrKJLo#)F~?>=F!O7g+# z=xx;wFi?(t(d8F;U9UdD;S2vV6;*8%OY5%RW>!A%EOZC#@`?pHFIne4h>2vfkCJ9? zZ#sOM2w@U16NoUC7P+64lQ(h-L$&l0~wdf$}Tf> z=-FJcvzOeWYE_J3USo0DVK-(uA){te3LD6V5-8#ot1z?Qv()MNOGArcbjQXGsIx9D z_imARXFpRA3hnp*MCvXR?EVJAr4#OOHG(6~9dZ!Be|!6sPwVjJKlEOIwRfIJ zY+y}kgQq2K!HUw4fTLV<`I4tW!h6a+hBLigi288qa!NPvlW4y7_a*^ZLRu0VK0_1pQT;26^> zmo}f_^2^(;7)fW=)Ml1jUFx?@*xs&Dlsv>MBzCNAt4qyY#$BhjtAc5s^)7ZSHz+yf zwv>41*ez~-rTn#(%TMOPdqnhdET%rHZ`S%jOom}|rL@>v0##?QMB(;Fm2|$HA~Eu^ zI+xMEPa7lgOT;s*jS|a42P1})QtUHh30vXm9j0&E6$d`w=JjG*^GA601qsu^D{GMg zetJ^D7m40D8=n%sCDU&TM&BGZXE_vbm{!jyhsf=?dL;tAI=EK}=>?RLJ{0bMSj0{u zAxB~J3Y3vbSxglV2s4nQXWNwb-8j5Ub;Noj?2>^K$eo9V=>Efc| zZ98MTr};S3Le`EmARh^ecYZe<*+e8X4OMls;EPc+Kg}6FoE@)5{lqm<+4*!|vRaAp z)8$=fXj9-oz`Cw+mv?wa=rW4^`zq^W&D*xQF}Yn$>pd!`@w4V7_18ARv)7RcW*tSe z9KGt^QgcNp2d16twl25bI}T#ATD+Uy&zo}7@wRj)cMaK+95(#O>5mwO;q1`9Y1U*U z>vQF=LC{~dhoK>E&xDvh9Noqy08eW0e=S#U{Ou@nowX#RKZPIk0C0(aFl|Ex?HMVp z4zlsIT0OcqCTHT(B635diJE(!@usO-&EP5}zy?SruH#65R@W z+3;Ie;s#?iXT6_WlpF-Z>KH=L|D zbe8`UK^Sy_TZCOIH%=ElV*vaREYhxkc`O1xxBLO6mv(ym&g2 z1e0zH+sRUf4cLjKDrQmfnPEAn)Zw{d?kk{r#gV*vcSTp{Lpk|8uX`FylYN#D8B`^2 z=eg@z<&fIs>r}S9g}42r$S5Pw#lW%E|WEqlM379_dH)Reo+hk zb*M2OJFlAsApL;T;T!&IQ=V^RF&7|UPA-UbU;NnW>dCCeZ3O8oJb&RX-F$-=FA-@1 zM0g&-NEu4xcc=t7tXA$|meoxdvzno0n!vT}eonkig?^{N1%;%$d-F@`nWGO|(Qw)K z$&sk@?8&52jn`InePpc;O3k=1fTwUVwJ=agFHqxU@o2eu)_?GrOm?jbm9~ zjQuBzdhuc!Ue9^ekM56yp_rOU?*`oke15y0@qpe>Y^f`rSNFIh(|Jn|fBeGZ4o+{( zDm`oJZoHbpM_K)z0v5N9Vp#-Le8|RU3qOsc;MI_%Um#xBrY4Oma@IbPd^OuJT~|16 z`88;%NTqbBpF0n5mDVhqqc!aZs%WQrUk)t@jFtS|m?xR(B+|eV5e%x(Yv;M>Q8+B% z0#-5zk-EMZL|pCg@!_XLgf@J-VkHri0XxUTjXJXh9%ITVS*(YaI<6!lTQJW%R`OwM zs?!a?xU=Ok4y4hrS<#V>hquodq4X#jNXe~u`)ACJDl&g&N>vcW(@(xV{uwc%bDMXj z<~j8*yO@hT=5d|`&8^iL*i_%i`wFV65n^X+9dRF4UUxH?LV5@kD*CkP7x%{|{xduK zztj@Opeo$O7w$>2su7^5Ajd~dYLHzP7f(U{eNbo(j8W2f5>tJobm#)N%GAnj00Gej zNVg4gBZ;cSoxX_N&eP=YMf4M4%FeUhMT(QMY~ji&KgkyaX9s(LG`?7RycL&1`T_cI zLebvnV!SYux9bCr`p+A$db}AT>+#}jn6JB64DH6io_*%-2W@#1dA4~iD@`k>Dpz|pw$pmN^AHZ}_<(9kKl zd|YMoa-E%PhCZyEpWG*MGH~g^M2%Is=;s>X<#PmxcBCSxr)B~-Oeq^Du(=H%9{m$G zbdT4buG1f{6E#-u6Xq3aCz&c$tZ({kGfTEXzW<(&+7Z1zX(U~HLsqShGa9s#O=Q+? zR%?i;T{5GDMB+IS$q?P~Kc?P=q&i^mUVc zO~bv(W~i3Zy6#$+!TW}`88WN8s@ycA-H@H2UtDglw%T%|A3v***u1yi1S*ZXpEqLpmQGZXY9F;fw>dk;(2I^?=~R|y!_wjF z*^)B9orF3yw=8k2-Ez;zcu!M_N#R*T96l{iuhU9a9VO}}F^yCrGi5MNuC9|2wKEjC zaKyesj1|g-*OX*XROX9@2mcCu#m98(h&V0!=`kGdNHUdF1mRp9M_~Z)p+nt^_;{i= z-@DymBqLUZOrDwe0<49l5p5g%7(#18{T58P?&(^D--Pfn^$#OvbS9Q}3MLz2)iDYkwo;m{zZ0V5<0+G9goNHVz zM>(w#)nthz^73k&O!QnCymWB!R=0`$tp_Qqa_$N<%a*Geb_zcX&Ap8QTaNJo>LuxG zEK+|V1o$T~SL~V_3S#W9L{h&At}RF%kok?)M!;V@z+frKQ2M)XJLx7FB8pG~Kqo!J z*uvyN;>}ypye2?SgDOU|+W4hcC`EUpyg6FjQo`U}@DX+9;|s^IT6=d&h7~^1E&2)_ zp_*!xW~!PB)tYdi_EWL00MD|{$NS)6kLC%UYe<7i6cJ?p4IQXPmJ;pm=7uqaIyoiM zxf${Fp>GjVdPuChTbNXx-IqV;-u#cdeIS&Bte5gT=DKxj?~P`nI;hEq?)9 zV*bij@VS+ce3WXD_HJs$Sxyh?Ch^BGKknsee}Si&3CQa+mhyuK*0H(IbZTxQQ_dB= zH7YDDBz~KV2^ZcXHTlI228>jGKt)-LfFUUQmRQ&p0L)y0Wl`};xJjP9U0GJFePy|j zZ+W{J;h=yZnJ-)s@P>g8o^1A>pYx8{i-Tt!CP8-x)4}Pp!+Cz^c9m>??PA|=o$G3;ROQ|kHBKZ33xnh1czoNx zPAO5)!%62N!@W^LCBLzDC6k^d<3sQ7wHQ9JOr9j(RNdA8+ZokYs(NN99Yf*P@a=V@ zX#&0=*gyvk$7+jw^EF5aP6954Ngvlu|C=Z{r!!Xa@#EG5kic2hT0JX7-anzR#GoAf z6ZS>cYGB(pN3o$30@(e!jpe|BQkz< zffV;?TJ~`LoD63e@3YC-+A-2Nn8xvf^W+TNRZo|GL})q zNA@OHrsL;BMX4PD!Bg}^x z7cvQ=J6&Mq{2=pP0|9jXm;xcewXB(26Qs#mca|8t)1(fKhf`q4bW@k&!2yTz z9o*8vd~vo1o&KZn)6cxD4B#`+AK_Pq0sZ8onw%{7r`Yf^a&l2rnWVCRpvb0ENELaa z-B=(bOBZ?Pc1UXV{Qju-2yH55`RO z)yaJKDaLwODkG~LrJ(zMa#{e0CV?3Ng7Z8#pVUnsDGSB;+W#Iou;-0q`?!<0PQWN5 zzSLqubKCyPRl#R1wH9$C7MX_MF&Sp-)KcO3hSQ^2&b6mgIoq<2!f6RjveJ&b>Gx;z zU)YY~$DTN1R?)P7Y*U2_@D>k<-}=LYNzH(R-S->%M+i*ng7{|ygJpVutpotwNNH&; z7Q90d_G8jwjK7+GU=; z-xQWi#M1>@b{2;MuE<(5e&dtSFSnR#=tg|bN+z3*$A+nhTu0{=&Z`O^GM(}>jnct7 zn+oad*O4>KhJXA9o$6xWqnvWKbI!2XWMzu~^*l+tZx{ul zl;xP(%P8x$J+CQ}HmyK~2=t4L()dJHw<+ScNeuFo zP`~|k&%VWzC{!6peOPi>ge>s*$tzAYiZd)m5kaPXU9l4=yaqGuJNC&Ky>;K{A7O3= zPe|E_w(SpnxZ1+JKtwzH;)g6o!1v7zxddw+!HpsBP&VmJ(vEf2qE=T`27xMU{(8P?_>_)tTPV=nBKsVo6^?juh+63A zqz4@CYt}gqmLh_~=e%oz+9Wh^Ri-`f_SZ6sE?V)lJZ}tt5cxh`CKg!~Ywa+9CFdgg zn@WM^xvz`)83jSAX*un4`B5ZbKlsg*SHX~fYuaOx@KkcIHZ1`l#0k^=nF|?_Z^@`O zO1ClIpa***qMuMt3u8W6#mka1NY_EQ+g_x%xD}xj5Zu-9qPjWbd>5uLv)1jkktV9S z>h|MzE0Y_6(hQ;Z1n{uejKbI+vxN#age9xfl&M+D%*(ZRE(a>5Vn&X3Sgtv>3-<@^ z8EfMivYDAqOd02kD#WWAs3Ow)}nB`lH3I%yq^0n-$5ci1rThCAAl5TB@ebx(t zbe3x{Hf1M;j1gEKlqEH;NvQ2rX()bKshmUQAsQO)$)IX3ou*`R4*5NpNfC^Hu#sM- zB1z$2D(@#^TlNg^@C5qu?sd@czM=b>d|toi&yERCeQO|P>k}a-nkt-&f7onDfME)- zHPgY!^IQYJ*!Oi2RCb^JQ3W#)TqA?P)8AD8pXNMc-zk<`Sz8&p3K>?>!p7A#3p4b<#p*SwFTHa8n;(vVGIlicpGpqRvb zlAC~5YZi{>>}Z)Y%!nLcz??2JC=^C?kxAyU!WZl-6r50-w_&WEi7L(y4I8;M*Z)(a zdb_=D*VZb@lkGaUZrxUW)N7vd5!TMfn<%jVl(-2_xv(tz-F2mH;`3ycgb<1zAFwfC zycPO#R#s%jg5X7@Us(&r3W+rdwd>rUcXfDro?>iIeYf)mq(vItEw_0pUJkd(CoM#_ilm4TLw;(#>fS{7v>Liq2^<#8_gUwWo2?8BaJRY=lRGJ z=OZa0?S^x{L#ziE9m8mvva=|Y<4%)mmG`S*t{=)m)X*9T)4sX`cK9^nEr{#Ph&|_!lYu0x)nXo{eMbV zxdO_itc!`^XG)-`OH#B>#*2B@9GfMHODfEW3LO~b5FvPWxGr|=xU(4Cn_*p`VY8s#2E!*BZk0yMK>}Y;QB|e;64PJzK*t?M>qUrpCBg0en#hofttcq`(G9 z*g_gbKn8y1=a`b&YFO?$>{x%SFi6>0L*?Maqr$PBj}7 z{ALk>PT>*F@gE371P**0?X1+&MG-jB1I7>1Qd3JBXM>$d3+46&xRNXL^|zwIe*=kM zxoxGDJt+NL`70F&4y0yaVqz+a!zly}KdAjA*K2V`Lz60&CY{QIC+wzTP5d|_tD1H< z)0l)}%H1}78S3#5fvm%RF?44>^a<9V&KCE-rLc_de@fvEdKH5fgH%GRL@v+kK|$$p zq2hIVQ_zlulsc}QKK=9*(j)O+yD^y3xYV+pv{;QL*$GvWJ*M!6`L?b2-n7)VDv?@U zpb*haf1o-FzCvE;Atl9mroxAi`zaOlWr6a#-#dQ7MH*{qTt?*PL!3WaO{tq_4Yvsw zvu=B>372s+is=}(#+WqzuvXEv_$e*>0cIW}Cdy$tVAE_w{Gf$)?IT1=UU3SL7*9N` z`UU=wEdjP@v3{dA#wIBPJ~#}^vw{txBF`xiEo+B1W{1u+p{nu`(2PiytXQYjvq6=r zu3&z!;E(Et9)a|{>gGE6X#G_NYOD1)n5pZ?{e|m(anu8G`Hc_((Ba(GaSpoXz4gy zdRH2jEg_=nySMb-dKEwHaz0J62%(LjBx&{@sLwh=eB6(;1=@AMm5vEm`JAIIBcc`Z zW@DC8X|?VrZ|D-gCD`y@wqf-a%dO73s*jALJp*fITb@I+*|CW@G_zjuTd8*je)5&Y z09y!r9#dJz9GIO??kFTl{!t;Y zqOJA6EC8124n>YnEp2ixdTvs)rmW9d3?o_-@joT z57<>>DBgx4KQr9?e$HLLYz~C4knj^3wQ`Nzib92>BdV+0%-H06)U7E!c1Rzc%|~qP zE!!(_U3Kkar#NFr^N%ctLgnb_=~qL^4~@8jFlwbla>;t9EWm_Bz3b0G9Vb42bycH_{Fw8T`sXtPpn?y@96tBj|&gomX3#7gLyXS`JGRUyZNn1DK zf1t+avwZ3gZ1>m|EG#`Nv!oMGGrc$~i;0-VY;G~DD`JS1RQFc)aoYBzstJLosB=nY zaqirgujwj-|9EdP%RBH@GRhBHE}`t=3x*04(*EY#6;duL=jv`CPjeERk`d2bq|jqd ziq}-F#ZbJn3(|Vica+uQMlx!05@Y}75FvIwUfQ{~ zBg@|e0)e6sjOK<0HRP{i{A~HciFvmyNX#)BTgaPs0J8C9T8SmMqT=^s8X*V+5sn7m zAMjB!Ju?IYl&Ci_u-3kFt&r)s(k-ZPt~tYlBN_YW}T47}#^(M@@dD&x^sU z(imh7ed=PHvMde0cxuR*R6fpdk;r}B*Qd{Q|3>AbNiu})Y$)S7SiFY90o^V93_Dkn z;a{YCFc*33#5W=3rcMQ$$N9%`q)nSSZ}LJqe6U2?^H$qUNq64VH}Lj_HMzzb#(A6) zvVwZ1c?UJ(d<=MeS3ewN=5d2HW9~L}g&|f^qg7)hJ}5@e@!JTMVD1M#s=giOTG$uK znSV&Vj*gD-`;AOGKXZwq16B}NW#mv5Xe>yLHs?mbn zR__TZO}kpBRt2d*jWV%iZ8o<}p+R}oOG2JjfpAUGVH!_Oy?c@cq2I^Io$xJGpZn2x z8Y52l@@a=8DjnS8_eZPM`Q)+o@!u(be<+XjGSid#N%~EYB(Gqs--nks-FfBL-%4Z# zRi2{CE&{Y1DI`@IRyBh^V(9#@?RJjSe^ud*(JCVHok8wt<1RZfN>z<)23gTa5?ld6 zVutf1E_>OBspnv%OROS4UsNWvDklO%)C7`Qk$1*4*QVR-*f;Y5E9iHQpsxrVayPK6 z$2y(!jTR}u<|1%B&9VGtL;70$RhlI5{0}#zw1EY{I3GU5jpI;F%%~MZM?A)-IvuF1 zX@=8xsHOxLs6RM+nkwv+UtF9&$4bt74SQ!kNt0g;@C z8<>;zig|Z<>Z0!MQSPuES+Q9AP2#8myTg0qLH5Q#i2u)u%46AScuf-gXEESH65)&x zjY9JeYGjozXxFDiJy%-6HjLKxA+No)PmD+S9LZFoRge^BbDxc6n=|>{#52d*oO$|{j*KTma?GV~zg?~OZJq_DM>=acb zhgas*(a>iqXgbe=7ZDf2v-EKLa$g8W{-Aq!!^-cJJkvCfFm)PZ3Z@CTH-a6f4Dfv3 zz1CF>hGw+#i_S9-F7FC%`xtP<3AFURFtiav+ zD}+Exu}MZHnHsQrg9UasIt21a$VF*Ynkc&uajAyrt<%iO@@{lB2z zb<(5**_3{Hr@3CuckZge{2t2{mdK_ayRxUdxdNx{SCV(LdA@P_pKhc7-A{B52ZwA4 zXPqAYVK3BvODvxcLva~y?(gZRvPcy9_SZ|q- z`wqf|BBc;X7JG(5FoGPkm44)8h3S{#287>E#CTKr+r~7Fr6^ug_%GwNDOhPU4Y7K5 zGCq{+;ekG$n-*6QMOtm5>KhZ3L)T}i%?U=+qBN51h!5-iu2&oWGLO<1ljtO>Roctn z@-xt??BZ3hW%>SFVpJg{1pktTpV=fJ0EB8Lxa)hOc(;A6OGXn%)l%OcHLoQvVNv8Z zSHez;+J|ZKo_sjO^2T^{FZGraey-i0W##1!4Z44o(A|U15PL~4VoBk{MLazCM7?8! zUWQ5#$G6_7TBP3cS052X+~CM?3_oHYXv+1na!oRywcUbpBiLr}W>CqY^{Pqpxp97Y z{y=5T@UU^Jntlrm`mC$i%!{P!nROl2_#4Pzr)JKVmX?RM$vb@+ZDY}^F)3<8slrua z>a^ke${e;z;CE0-VNeOyyWq6EyH$wlw}i}=U0Oz_(kVDi>(Ijq@UQRiw+YsLt^LfQ zIew7Kq=KNBp`#&k!bHXIW>mBc^xM?3qB4Iw$j1`aSsD&eF*Lx|RfK$&{y~(R=vKD2 z(6-0xWe6NrK3GOxJ{j0mW*sgPt3!rGK01F3!vOPhfA>LImm>WLcZSxJP*kd*$| zbFBe^7TPnYue8wLj4CkAiGZnR!sn$57oF7fv#t9upETE%j-5e*@qW$-6ZvLYo_%o= zv3~gn8b+sF_8ptwncy`DM^{8GAk}72!yND<5(TrZIjwJrsF0Lu6C@8MwV z!W--M{<@HmceeZ)%xQ3AUwN`1%IGtqq8$Aesl;%=@KBz?lai0Lr0$&pVHA7-> zz+Ec~M2%KFG7Sd0wxCj^Ozq$ko!iv1LzthA1s@r7HEL>d{>Q95y%OAy&zx-8CE&?Y z+fIuX)&2VOXR}oKi)qDq^}=IB@r8UJ#IV8mBW}miyTrAE_W4E)X{Fum9(sPBo%dK_ z7_X5U5d3#dKZ^+kOM$ZPE8TxJ(>;05z1h9CXsGa#-iAr?A5<*r@W;DdTkh+htHme* zDR{84aegamo{ll%=YkH&Zp=2q(G3R+>!sn5k+;i}96>zf+sZdM=5uS)`tLd=SwwS=obn$vhRVFuPy)n{6UtE z8341Cm@!OOiZcbBxt@UnzHc^E^=MH4hth0Y= zVp!ha0td8I`#6{Gm~ z%5<`fHy8wh$Bdjj>$P{ig$gd_hs!x2TcdO9C$sZk9 z7<#T~G!#!M5{KLFiN&N*Y1;~7B7TS?T?CgxdN^3kJ8;a$IiPvp`|hq$T{1$F;#19} zeeHQ@eueP#y-Pm$9b1t&VfJ2LVVEWAhepz3UYwsABJd-4DUL%!{qEoF{Jbw~KIJ~; zY`U|=KTuInRIG6E`M-_Nvu zsIae|XOk_~h)0`#EzBm=+bwb|Cz{jIzj$gaz?Cckr%oyqlm6iZYv3a@+;xAuk(2vl zoTFI;jva~xO3!2a)Om*ekfM163VqkRI&LgV*qr3*o~!)>(>`yr%EJ*2Qe{Y5EB03Z z@0&24Yn&7`HArq}l?EN11gmQoYjO%_yh%Y-m4Zy0|F_4rG%R41R3-GG7E+-|xW4bcn>tHji$F;yeVWizf^rvR8}U9ZF%p({%8Qg z72XwFT`p?UJ}0t1BmG^?i2f4ng(5U{Zquq~QVj5Y1?~u7UpP)axM%!nrJ2mlfaToq zToxJQqhepqy1^e3#pghz3<<~j4gYtubl z65+7oJIe@2(7QjiFPF%r?lukKNBs;NKw3Ww4aIJLH^%M$M||Ho%Hu>wG;nHinLLel znrf-gC7W?ZMC(wpqDHG*b|Y1if`F(Axy`Lej=)m=F<;2o7$SteGMTmDc zE|#WbqY#<1?O|Bx9LeW^<`^0|# zy~5EiKHD6Kn%`dqrx)7cy(g51YxH+SEkKeIl$Wy*alj zDOL&LNjG(K5IWMji;p#v=d?MXa0f|+)$o6HyRxKNs*y+4mz9x`nN3)~mA_4o|M7jS z%t67VK8Wn7uA~I)=%kh`yOcp2xkj@tPi!-@?HAw+1gUfl6!le(a|Qw5N5w;`Sl zGmUDco1^RjOU4@7^bIL%<`T+dVGb%kpmj}ZwXucsG?)TGddO^12v$bP;Yno(e$T^cPKOIIK8k%xW<&pGp0??&8FQh0cH zx{xS1mzlk@%5Jk1H2{Y!_8#wB7)Ov$zjDoq+a^(jlRuc(>$X}l9H~N5bdUn5eaK?7 z;+ak%|6WVU4A1bH-r@JLbUGx>^-aXeV3Q7-A;e&=Eqi1X=2waH5J0wddN7ZFZrU7z zDrhyFMbY(PpQ#nSA7#G`$_{Y2=qMeogw$W}4?ldgYF$&S3N|NrkWthq_BkleSOwSTI+s}{Q>ri`#6vn zAlLPqbB;Ok9Os~ul)EDYSq7zgHO}mY7C|%%mDq%X3RL#6)N00}$I5SU+lhi*$rHHr zNJTx#L93dWor+sw0@Imda#<0Z=heQqxpz&Zb|pWHyNe>M2p7t5y|2SPD;5q`ynaX! zGIY2(3--I7$vn|?l<@Dk9l#_R!vi_j)yi2$v*;^WOJo@7unVB?+o(Rm=cv7GfoMP?4ivgC1R$>DlgBr#d4Kr74O%g0h8-QadpA<7s`8DP*ag`x8ZFkSMK2rOfv z!lM@s%`trJu-PA(3}qe}XBm(9Z_@W#NHw;3QNsuPibn=FOckP8MkYMFCb@HK_;5bn z%~c5!IYnKdtY*>SKb5Bx%@delctrTBoW(|Xyu+|Mc13gqxtiyYaFy036orU>UyMx$ z=0bZSp;@{g;?YICVUMmWO(iRpCxemqmZ`{Sz-_~ttbE39F_ea(?@K?XY1G}& zD+w4wQZfcGr~Wzeby`Ol`Y|y+Zt#ccku3y(g@hZw3c_YP21B|NEEmhRCWukf(M|WW zs(L!4cq_qupj#r93@IYRR(@y6u*xHU7v8GDjxx?|gJ;&Xbl9)5ISuoqM7Sh}=~e-4 zjV&M@_BjShE&O9XGin+Avq4_wLgem*LLE1n`My3|@H3T!`$wi?8mA~P(TQj+)t7_a zU$l#y=`oLruA~oIo@8(ZSkbD?6|{QCyBt`DI4I&;^N*4)h)>Q{FL^ayD!rY<-`o7$G$5moRns z1t>%!2UvKcK{VG&6-<%j*J@HU*mjY=2ufqF$1F-Kw@67av|rB*qE$blo+h$OcHMHO zP+gYmeAIc8Eho$A=bNt~?Sh&oPqRb?PdgAJU5}U{db0d}B4)_4YA&3|+Ysiu!6r0WR+P+49^izdZ=F$HTnK%o)tn+^_wj@ffjV=~dq^aLJ<0UrZtJ|8DA z!3r?rrbq8xp5OG0H_(m|wtf|iS6sgpYDzrNY$@PI8 z?n?y;VKZ7BmNJUw4Gw5gcBxJERdAq$a~YcEM@MbRT!& zogMA%jcbFce-$7tjQ4VxMhrv%S@RTQAVL<8`rPK)N7RE4;fJ`ydWUjzc4N#9Jzexo zr^R`Tqt3W52LB-H-mBWS^wa&P0ITf#=~7D-&!?rPm5y6c5KQDeM>RY1J#B$WUWK}( z)fJ*G2N;0l%^co$dN9*0{RY+a)-)*yM>e~l+6u2fhIPhGm=XUIu{0SL*0kAR`Ax|J z3)RAy;{vbH49M(n#%oxO`X!e?feCbA9=RhJ;h}YSYta3myzBtNe%Ndtq#`56T#h&F<`5Q7FvlxSUz&^aOK}qa z{ta@!Eu6Q4ii^yexA7G)q;!_dF2s->t7Kw?7NDfv9uSYG5!G6z9uG!~5#LKp@Bk3{OOarGaBrR1TJ)k9^AiMvMTOtMF?*C-T3W^dHf(d2 zUc%4E{_NWDScH-i5)0#cu-Q2NIjr_k0nu;j`Li=+(oT%kmU|olAUYVOjGTRAbTd7! z6RLV@Zs1bsfpSg1*f>Md!*ydhC)3~@B8=xlEV>s=qzL4RfR_M`_95vu3M1sp&4}bX zAGtG$2Xh`1Tbd@(<;X|7JF7B2^1ff> z-`$_EGBPq&9k)Eki2~iI@Uvu`A3s(>;pTTim%d^Y?e-;llc5cTd$G9^m#G%=qFp(R z7-21$cu4&!@<%nIx6ipko%N`wJ0hn3K?Hev>TwcFdg`rdBXAPNZl6LXs#U&+^PR?s zCb%mTegL5u9n^keuc9n>oOSIyFFsw&8kd+ZBUAumf0y%PkG`U5&?0LQm&!GlRx#5X z-@3VEpoEX!2v7vVxa2(Dw?!qOS~bCFqfK9^$7O8Dj9gfj@I<*k)%JQBODDHQyBd8N zhVjKG8=E#jz-dH1R_4pzNn@VC1H{YIo!EZ3ovHG_U%8i4q-Q)f#hjnpI|j5456cwC z@;4f{gfd=6YT*+je3$$x*q#zctpxWbfOA(Y_x^6>*h}*WhsFt!5RSFMP>N9tfO)vVm$|nnUCym3m`N4>e3E~t|uCv z-eMJT(`D_p!E2S`xL0e&tfrw5#aCQ%Q&m5Sp_m^;8+#Bd)GpdzEFtis`hAg^z(ov1 zM@NUP*jf#tgGxRW7d+VqNw%RFZ{8GS3uJC(=7~f>>FS!AC^`W#M#!jdH{<#VFU(c| zn`#|QkW^Zsa&eZJ9oi~1te;@MRx?gO6F*A<2HV)p4XmY`b|k*au@|_z?6d& zZ?o5t{I$$mK9nkRk@S=tehL=U%%#PH3qHO6251Y)wX@(5aX!el24LmWQ2^|sy|P#J z{lNw(9kfo36PlJ5A{=(J{=mE)eBBYXQ z^U?Y0Zk#9AyM?2TFGKScnfLg0&X-~Z@76s$=U-Bpt*Q6?!Ir{Jh>fH=tDL^&B-1~b zr}Ct+*0~Q`-s9(@QLLVdf4f{%e&U*ujwdKcTTt(z9zL-jxw~h4kDHU0Ock+(M%_Yc z#1V3j-+@O8%HZeuYBz^A`>Q08iSIk8h8JxPXhzI+Q{xv3?>> zJKn{_p_Z0bbaXVanwlCIs1%E}4Iok6LhADJQ8hKutKo`UqJ;Uu0dqeB?9Ww0l+qeh zsplG-_pP3xsA`|8WHgeMCf(GBS#YB&OhcXlX$(^$o6^2)C8WkBo7`r%ohxbsGJOgC zpOtVX0u$b}zst-7k`R3s){|YFCw@jt{Iz*J%Hq@u>1WO>;9!CjEpGFDO{2{v$J^cO zaECMx>7%R0D*c<4UAju`BsNTsaB9fVlifYg$+aAvZ%qeP#~xfUSDF5vh&c<$)aUqa>8eRHSyK`(FN@d#*vutTHl&P*2S!be?({&^?AW% z3~SWrxDyB*LmehgGTB4z^IwB>s+6Tl_fj72e{?o8l?Y;Xphr@rt5(P^o}wHdGz(iw zlggF8F>UwT^|VkUXt8+Y`IgBxB5E(?;`tg99QzS5+p5CVdu(zDy%&8;yjF{Jg!k*R z>CMyFr_lL3c91(L1U2fRUB}3EBm#))iAlA0|LPVf(SsZ8Z7iD@?S*f1A8ufK3dg+TnJB5HR7)!)Ah8`s2 zgb+q~Z{U7^!o?E=#@mTZcMx)Y1;nKBGM~{#uBer_re+t34oXBKR;Av57c+N^nJBDQ zF#o)GIGXoJ#L1$w_3p!Tqo}CTFMI)_yqp3UmwvN(TGLfm)%4F7EM~WE7gx;2@kNx) ziqWwNMY#tlr%BEM>w7&|%S6M-Q^D01?!MfN>>F4L)?8DrINDXB37#@mX3{FHkv7`+hHj3k#i&hO3S{RGb*%Q1i|-XAP^sm$$h!npaXe5 zTVUPz9!oe4F8n>oL`pb%+>Cxh{K=QCs#pEV>zpn`po2(wd|Et0La6#^j9~I(@c>mP zojjWP$sc*9B`DaLx;kn@t-i(RVtiX-PD9sav5L3rHr`ewHv?-v2)0*^ecs5!sD*{4h@m;g1Ys!g-J1n!)xcv{Yaq(k!E=Z@=Q_%V+rZ3?$s2X=@b8T3`ll6l) z{V^wg1@b!{f%*-DqsfmRVR!7-rO?fy2!jYWd%CGh zEwci-QpK?uH4dTDw$fRF2JG<4ya(!woB)!g!khZ1+IihX&@DG`8H87ro!^WwGD zrK{A3eZFgwDGa@b1Z^%20*v{kn`>=*u!%Q0*-&mi0|Q+nbO6WQ&r0o(KBloKQ35&NkX)N$F`Jdb*6s zZdSItwy+DDHm{)K5F}r-{eMl#3R z;)1>N&X?M*`F!cL2;PY1eCcgvI?C%{x|QB{1sEe!Fd=>X-M}BO-Q@8rYeHCt?I?3`oD3lEfGwQwGEIr^hZ;FDtueJKI~vAvukO(XBss z&G}(r({OO8N$y2dTa7;IBQiV}IK4%Y1BxPX*sMY+N7AFTxGa8Te0;6G7f?117(^*| zdEeYRUE|L~9b5NE$S|Gq*Jy!mD%@%k<6}4C2(z2}Oh_EUlV)rqX5n#$_k`0;B zoHGVn0gL7WQ8E~~KGki6OI<4{=tWG!j7z={a(gwf^h*|XPDWhd&RHB(sIyS{XUgKm z-csPpNKLmF2QqE-M*f`q)2~J;$U(I)YjhX#O0b=nOLdgskr3^mv8!^^ZJsD%Rahba z0x^{IlZ5hj0&+8SGc>ZEGciJs=LV&brVXhqp=bYYm}xnt^9?Utoi0v)BDH%6FS)N# zgntVZANn=X0I^v~rfN<3JDh;EFYHy-ddQZ@)}!=>5gl&=&>vyFg#rdlu!Igkx8W-| zv`AsJ9HigSt05=+Gwn>leqi%C)Z5>Yt_9_=crERJIg&TjWOJgMF4d8hO+qA{Se#x3kuI&gCz9k*tcnaag ztUvkBb>+sKu9Xvr^NR$>$|TbP=UCF_(xSN6E+d6v0+Ot29V`>nF1n7IHV28NGg^dE z3q1>88$KzxpK#Ex79th;k@|LB_4C~rQ2kifVmO;{X-9j#BE$tJ`UjPZnwvY#{E3k7 z*bmo-W~_Rqu?0%t1z_%@%xZ`6?#P1>sA*{8ghSq$!vDZiHp8V8 zauF8xZ-664SPe(ssX7LTQ00p*8Fx`!6&=Mj3kp&UZm43?{~ze$T!G$ z^!FMz12nZ_pFiiG*VkuJO_bpuD4bEws$3WsD84GylX{<}xAFuUYJ3O*W*+JTz67K< zYT4K6;3mDmnLWp!UoDJDGuInuVk6I3mB^$66eX?w(2C_U!!$bMTJ#L@lCRnnSD5@z zxJ1ObAiedOo%{*e0cSVbXeYjtLPQxAxA`WR!dksS-d*l*fRkkbFer0LJP&iA2~B7o z^qU$1|3kjiI8gr&NE3XKg~R7_8lxCn5EH1^QzQ!O3v3|`ub&p2D=n|obNzNscb#}n zcQYS!k25thlm8I@$3V5hFgP|L!JOa+&BQrFrraf{BldZa%}`9(Q@N2tpu)z2-j9rI zCtNtG_*TB{-Xx^zOF@vvX^qO9a?jr&mVb?vv~S<;%;N`FQOjSpLnPh3M39M^Q;C8O8BtJdUBkL5$KNmDhvzvQLlsEV6pLswPeHhZFgsDnmxdG5z&pB` z@THjH6`!3ucIDQP=m{2(jWrxLtRChVMi2DSenr61rQ3}M;flBjY74SMF{EX4D z2S+j9{^_yJZ*`QAas_k<_GZcrtfJy$V@au%i;h`hBdZbMF9znfJ*bgo?M~yxMJm?N zGFF!>7pivj6`w|dFp2@_3%Xg`@3nG}%|ju7!4QCfYSLOcIg#B=zeFEI-Y94NM*sEE zD!tpNdYx^~B@}@G0u~Vw{`N>ZLG(bI`E@^a=`VA>ut`%hv!C5BAz`$+N!7ho;AIb@ zRH4l8e-m)oAs`??%5ROxd8~i|lD`FrR>|P6ZmYqZ<_Pp%5GW;yg#@UK5X3l{%9?y6 zERgsxds}*!j@mg}bSEUC#26XM_|)TkB#&jC1E}^I?~{7t*gn-o@f?vbi~EpD{tGKw z&~A`<=3yX>?NRf!!zgQ!7sg$=`>_iNNi+t;~H@ zxZKW(-~J_=AE!e0j*!5W)&C6_dKg8&uS*y9`@rXdq%~!BeE$cFpp@7YXmH!wlYaBENEqj~;ZVXJ@{L#uemMm&|b5d8J{b;kFc|dU{(@wO@R_ zBxREpS`cFJNJhZ@EaQPFk*|E@-hda=l|c1kgVipcn^SkpLc)XMl18==78WKN`VCt; z`P^rvm-q{jOCAlu+t15oCnw?FrHK#`Oj7#^R~4)+XbdTfQy~ie+my8lS@SW^{c#78 z?354vB&b9E5ghG&FQi`8Px}i7H4x`AfIIqh>ZKWX((HggfP$_F?q29sv$A&L=a*0u zi?&cIJcY~tEZNT)bKfQ)2fd+A^r&|kR@m_7pQ1KLr;kMPr}J4HwPFsowIdrac!{_l zQpW&xb9$~Yr>0l}`=goR@1EP1(CieH)m))@vbd5KcGqx=l;TbsuWlAI2%j}QQskcJ zbkX&etr;o(TUdKXM{^$pd>p69P-t0n0b-Dkbo2Pmgpc!wwL;!%xp~o@<{U{@qb%U$ zaf_|R24jaMEkaJxu&jf6Vm0VG+I6KBqD3zr+5E`!=}vG(KT|pv=-%ql@ zSIdDOyllq|$+2ig>8gN<=78ZsDg@8C_-_P9mQ6#s>#XrD$pS+{{s`cb8laRJY(M7@ zOCLw?u2o@d3E)7-dM7c2-!KsrO2M5TY?#mXn*bIK*qx3!iXZ|m@2iV*El&29zctR*UsYl1x0az2sP#|l}8 zbB3i69-(Q>PK0b$3u3ZKZ0B-r%5c02OZ7UaE@nebMujkN8XiScnL~_6Z@x+%+D0uH z5vwdS23Px)3{c@#icSVLlKd$oo|wfv{RM{NF$m(r1j1~%=%n_1MYV9~6MM}C7j&`# zrQySl*W47zT8*R^F05Ah>2pt9h0B&@Mb^fa6t61IHQNc$^CG4Wwd%AxPZ`woK# zL~*@2mCK6uoEE{+$y^45XTM^AM*FvK1Oy$;>llc7S2+AN;OM=GS5!o+aD4?;4MPK0mc=hI8FP`Hfp+Xk|q+xN)!u`YFN5V6rAv>H?x)~W_{=hHEL~qDPch$JPEjaQ*o8};4TuBB%cjfL`+x1McVd{ zG;3tNyd?CapjOu{9o~Lx;D;kKMrT4IX4)8&b&8OZGahn2)yQ?wL5PcsQ#VC)Uk3C^ zS*Na9I-XfamrxCcxl+l0cDNz{iN`k(lqF}erH#M$E)aO3P3L#4w}{cG+r9A;Hb-Eq z3lNc-7OY26kYWlZlC{Hp6})ve%sn$Q38X(~4Mh@1$)Yq|Q4^%0vKG9i`1S?HO!W*} zHlsZ0$|kqzKa$a#Fmr>^F2P=8H|PyO-seL6?{TkY^&Y zER1zoLNH;^hoAczL{$a0c^tB+iEB`09`Jj;D|PBkF4a9B>0#d#O}YRSIkPqLl*ucz zfdo3oKtzE*SZMf|cYF>dKb(Fz@BjxFQ4xc{B}{=TYqkJfS?a%*rP&rquOUsC4XGB&4Fcoan{#ryehqB1 z3Q$Ff=+yrFNmYDZ^N4kX>;t?dHdfR5!O+ZuX|E(^2h#I+7;#S!qs7E%zpm* z(%8atsuS;9MzpW$N$oZ19jor`%q$a?-Fq78uxZ3-8Mhh6<5Dp^sMZ)U`ntVAnePw= z3f4fRm&C)-^qJyEfH+h(DkQt2QEB6yE#W6DPmtGIbH*31ZG(VJm?< zKa5Bh6eK(uD;>mpoA39QR_o_n!l8jaJKw^njx^ZW(G_*$-hbS_C2U>9M z5logcoxe`E(XbXu)ca6U%YS0Uot(p?oGp442T@T_Ab!TR3#wJ2#L{{1wyxT&Y&6{` z-J+g>mR$mp73vzwwt1$c;k(=1bI6}v!E`Q1oNtpD7#Iz_FwLt2cdWSAI}Bg4M@Dax zl;K0;1fN|bBnVNbdZW_kt<>d)1pb>3i-A~eL{X$uMd=$RQdagaD_$8UOcmn9ZexEC z1QQ7=BzLPjB3DuL;aR35>xSpIM$(TK(b3Y0YCyrYfJ}`65!QS^a*&@bh}qBX**_w_ zv$9?&{3d$kqCHXYjpI&iVbwYENw&kCd(81M1O8KLq+hJP`0RBCX|~Ih4vxjI4-eZS z9Zq;mTfughub)z}uLj*BB;0o?Y4LtmdF59-WPKA^H-_6Av$tk$G`UVq3Yt~2F}DUL zw`$pV{iyhgq5OQ7g?Tq_x@Xf|`n28jQ5DHlhnJAaAB2+jt z3Jxq~_7CCAai?N_J*9Pq6|AR}2?vqjxN`EQVdZ=4o19=_8?f}}Hn#Up zfXast7g);HTg(Z4!7(HQF+zc=GKV(>4=C^QOIO=ZDC!XedovF8nDl+vWH21NFI8u| zT{w^EdK2G72gqf2b!%1B;xZwTdKTBww8jtfKC0q6=R}`PKM{*d#;z!`e%blOSA>Ni zKx4XUQva}_`w%v}rT{((YAeF3yxhI}W$J?{7&v!|yWIM=Ti0YnsfnlK)0OT-l>z=fjZGOeeWD^y>GYPCr7vZ^1!j#}*gR9fO2Q8HuQBpI~d5;j+D zqysW~LNhhXx?WpiCw4@Z4#}W}KC7pqy~YD!ZzggFB|r#PObD_RUE4(;%3wrH40SL5`_6YE$n}FgoSReFr~npI)dA5E zE{J))hSOrF?~496v)myPu6QWNqJ9V;Uk@0qoQm79=v=T{J7vg1)8eAKwtW!)ptVs> zmx0vxIpQ~RJBqd8vd(i$;O>zgzHotEc70_#Gqcl_3ui<1SBpz1QhWv!mQ=K1=%Vzq zI4+@<_frd-dfw$A+Z8qx==v`LvE!7?KP-%V#OownX0V`(z(*bk#l4yXC!K!wX0j|NTGZq=AS&A|Jl|!=I>#;*@xF&?jKe$v?@@%9?ZQCA7 z21Aqf0NV1HHRg|o9#Aqmaqsd32U@W~=hTCN>JrhE!akP5v=-%qV5ZUAc!)w7CN#V1 z-_m*99GVHbEkLXoWORL=#=5VI*tvi`L=9quFqGB=7SR;_<;$13HL4cJEHZ}9*~00q zWgR_pd~tpPM%}QpE2v|3o$-vsuv*0zNh-zW+jh}a_LZdXM$)gp-iqvk0@JIaCO8 z^io8SF5E2Zu~DxS=x{A9BC0KQW~tnV$fvrw$+}|_X**=dm_L=9)FKq@XK_jF>+Z+FSoi}6jtzKN=m7Kgi1|!V#+L}oH`DsYU){W3ZxQ>OQICuI zT{xm$`)i%=gcr^y+!X*(vK8Fq+_Eh^PbEk-vHw#9+fUtd63(U|_I~H+HlK7sZ2ri3 zm5`^pkilhEZi}llzSqsqM6!wu4ha3WXhG6|Z(niu||w*zlNZrL(%y*w%JzwHhdbD?ec_HCD&`Ip{73 z+hRa8@+S}O-GCXu2TKtntOr*xYam?(g6vkE*N;tbqofFhQe-aWaA*} zUt6=tR(q7zS)$ThF04gW0>r|^JP&3QF@;^3b$p3*lI< zHc>@e(zv>;sYLcHQ?7LLbP{jU?U&7x6E%$?dqSM&kF5LpIUev~OI3UiJ~J)^ddDTJ z-`m^QH0M0D>gk0)e@Z2Y5b1kXPfEd56753;D2*Yjltjc{DzzZ>Rm1F2<+x5;SAB>O z4`G#QmYwh@^yr!x(cO8O%lelN^95WeFl_6xkk9eFUlgBbqpH@uJlLvjo{ zZDs_^$9wT?L$-_VfJY(p{TDA-ro@H#Kgl6uQg|*bAo!`L6X9gv1lefFu@$e!&x5-4lJg3cvn-i3p2G z6ujS<7`|)>YLsCC`%Z5#ZpF0qxPs%;kVa~3crH|w#Cir~ z&5cN)&4$J9tXoKGvYGpSJ;mtT`4N(570Pa$%S!#g{NND*pNF0}_YeOZDLp`Mh4I4u zwIH7XdEkd-%Bds}l10L02&{iF6Zxwx1z@j=^&gn(JlYx(qt0C_+*Qml3C6CT)w^KYTw;w=K!+SgXD&?s zi{TU;DUcW(qc)o7j{u9~4V?I-MzfBNjtb)ud@S5g!SAvxq@|@HGq)^33eiW1xN3&f z>vvmoj_h}s!1giGW<9H`(dd)bJhDByM|s=Ft!2`$;~klBj-6l2l&zK%gaWudn*-5M za~!|w8k{j`vOY6ywgMCaxB*ryo{GxV!MS0 zaYaZJ&3eyKV% zxtR6mf#yC1d52`wAl(;ii}o>6$?tq_)lia$(3_CCKC$tVbX=k{c~w%Ci@neva$6%+ zznvFyc%ZA6ISeg8i$(u)wF$wD*Z0qHK5IhwznahwcILpL?x^{$bM;GNNl;mbSV(nq zGtYB(H82>9%xPI!#It33z6GfW<>I;&u2JVD59G_fzN(zplU~zBH;1IHQ{hxm2Lvc> zbIf!&4l*%m(j!3>dZ2Ol`P4AeU--{mhdM$1y_TVT+S+LgHa&PefN9qiU^0m-J|hq% zfUyeo%xrAQ-98)S$g#-r%zMa~DG%A_G$-ZS2#S1|8ycP{pICb{K9@{syf*t~>8b#w zKQs_5L^qeKFS&@q82bARl2(JpI#Mb_?7tU~bkQ5%qzi{;co+zyf%h$zEHD zW}W_S72tdLod!^?l$1OIvr@)T(_T>TDNLF`kv{$F!peawAY6pJl@|_81|uh2U4R$ zAI*Yk?K$P;qsETM8QjOMFBP=45AAqyiUnJp@AsnZPm>>EEL35LYVh29An|hKT}3sn%lfx*uTO&e7?UBsK?Tv1)mM0=2^@03Bm3!HECQP zUOWk7tiip?1AN%jv$M1O5vC?4V!OM$8-L-yErKRzJs1JpUksi0< zn~srOXuKa~OqHpe<T`jMA4x5}no0kPq_;e`^~T zR1a)MhleB5@Qb;-H%&}Uy^anDHR|1WdV09J8x-T1a$dl=gF;o8l{`&D3T`^EU-6ax z@{OHI$;9wyZzRY+o0DoPOnj_*VVQ9+9=EVD|1+ZM#mI`@KO(aP)Ocg?G1If42zkGF zEz@$^Pt6nKdX0m)o5GPQQyj@Ul}0rA_?LChPnozb__gQNQrF0d0G~hQ`!sj;XsTvP z)}Ru4>_!uJ^CvaHJ_i`hz|qrl0F@cD-c$K}rwI+AkaL|te;3UeiI56Af;Ka3)psW2 zfHTH@&H&gM7wk@FK<@6eN=}4fA&6y{V|}u1Shmj)=dp|;Sh*$lS4ljZ{36#Qc3Ys1 z*~xxSPLhF^w~5`K{_16$8yhD6MZm~?IPM+ICG>uLCtmbeOsajGGn0%Yh1psrA;zfc zRtg`>`@STrVWOV(kxLr$y)2++B!nOUk9FhILwnwUPDt;iC2e3kLQ_6|H5(KnTn~Fo zc@-52wFZxc*EC%n=E!r^LkFSZm^y>w5Y*(6r{3crg8Fed1k9AM?%1tbolwB)Pa!W! zc}rDg6Y}d+iQm4+;4gtOFv|WQ!KGSOq4-yu(|7~uY#nG>Q8qx>n+3Qaw<-P(B_>8j zsLU`>8dOkMr|RnJ()4{dH;=O1gkb;kcdty8cX$-Nh-iRBWodNCj8ZfTXyJdB=FaDM z5T1oj=y~5W$$l>{)7?tSe(+(w6WfvfM-PDuP{s;Ta$SA-u>J=eNG?5up<8FjKPUAZ zhIvB$I%Ru2q%{8fkbgwu`Ma(SIK_7@-ncw}-vwUx&llhQb6&Q^wIA!*X#CfgY6@sf z>#`;cl>I9O;1U0Pfj59s%2ArF$bTOG_c!mz0SM;=r*{=(PX7Br|D8qo@85WJJ+~y@ zL)%{e{~mNA3W{<6yM(`c^}na^A6fceLHf@{`G5I3kCJ@Z=tXf}3nt$HSiso4bdQfN zFIVIKaS5L-0HBi%_aZs`+zA)Omk7tq!qUfk-@)5E;`wNYg^xc~QsepLQ9gfgS{~v4 zVNk{R>OafogH-eek+|Y2tz9?~5j{IQ#q!hRmC@u+rPIpvhe4XL^WmraZQi})XDrG; z{=)Njki7s6M+(6sS5gd+AvuVI9V~ALl`LDTEX? zdu+)6>rX)50zh--g!)bze-V9oxcy2qc*6U*&TD95(tFg1ycKj|7Qu6y?;vGoSJ_kU z^3&r1kJH`q&J0w=4<5S3Z?Hp3YgDmYY}}f+^cRlI=T$U542?{u=$pEsZRU-bXKn9o z?;MG|?>_i)G`+3Ym}|qiFpZ@0blccI*|JpnXl3|$A#LBo9lx6=Y(D0ISABc$?Az4E z#oq3bNN)Fm?&Y4{>+^dc0cW<$?pIi{>!33_%?p7^PKOfF@DywrnQ>bE};vit5i51Y1e_u zP=^bi14u3W%O+w$c3)UW)e(by$trm(i`lC2Aay6bH}Cx_lDbc1e}BJ?=#KqIG8!6b zPQ;y?rlFZ!>%AlPsp*;H;9{+Thcx+)_6@GmClPs&BLh-|ugeQ_R9Ki{4~0o0l7i~}V84E+ zAEq2U1^7LL-8+)c5gdob9d~&(wZ@N9L(8d;idU&;Pkx6bCG|>+XWC6Fr4dqKcB8#M z%{;}dehAH2FnL;H&Ftr7`G)HzC{jWQI59p*)JU+F_drqUe>$8Y_8wxL|A6PT;=L2f zG%VJzUojvCWt{Hr@Nq4OM8wL`_rL!tmnb8X6@li;GQM?6!#Xvb^YE8_I~;koxz?U~ z6E6J}`v3R%=QkBWK_FDeWtd8uZ`ZfT5@<&1b1c*`D|71k}p8uPj|40X}DWJfZnK=X^lt z(RMt2fk{I51$ZCyZ#ZRsNWu55f-UKnRe3q{3tOx^wh0ZI^`l`{u_g97)SJ^ymvhuj zCt=o7;2BTdtgC}eBbxGL7W;jyC*5>zt8aos)7k3!q{k*pJud89?iT5q{s_}akS42{ zI=8;Wk1TN>GPgQ*eAv_BwQPB~+(C(O61x2(%A52E9#?W+kVkt2OqXN*@Yj-$Ta2dQb;)%LnQwoy4SRt1(}m?N-*2(mm7Yp0T-7BkK3>+! z%DSzgZ7<H%!B zhS{rcqnt}8;r8BdG5@Hqcb^y!C1%hSV=PC4t3GR>23DpH;|13K|J4q$J2n@;0OrM5q+GSlfXLff zMe@8JG(yVpl3(BVCAinEKXN}xKH_jf+a{xSUv62)J4$T6?xM>jxL<+6;d*DRZF$~^ zI`awYJl~RPEKL-$FtU~V$a%yfo1royZ!3@Y!Sv(9`8ZlP?6=kq{L8W;!%2~oa z){~7GSs6O8;vitbsZvKz>&p(%Mh8?YtA#ZL* zJb4K0)^V1dnhY>jU*f;m7>)3-R}KY-bha^~VNov38cplmCz<;UsZ(s=#S zzilPacx3RPzr-|l1o!fclK1f>BI4u4&pZ6CJFqh*`Uqo^+NaT6CB~MW1$k*n$WxtjKk=p-ZD_YJn^7V?)R7OzeQNqPu8?=|N)!P=1^*FD9z ziPBTFH4%f;A(6HtOF|NT{Fa{iv5u_~G&q>{j!Vhn{AcvSBRp`|ffl|6}aT zU(&=;Gf*R)w&ol4o8|GoXRAa-D8pDimz2VOK_bb%21CyM20oi$08j$0Q`?*PP34~`%=1z+4A)zxTr7zaQyvN%{c*j* zwJH7Ps_V#^o`)qmZ`EK)OWc^_zIl@h^)AjUU@q&j=pec^6p9`exq{2#?O4U1LQTLw zCC4=-YEeC@?2Oir5pw-~RRk7cw)W+8bhO-SoV&LOA}~5Roe9+lJGzHNyoQ~K!FY4+40PYJ*Wwa*2VOL z3v#n>WQd0utaR)5)lFc7@M>+2O>(g166aLK2-ptZ^Zw2F-tgD^l|^!vp7Xd4N6fot z+f$iGEx_4u7ajWQ(&_Ld|2N%4!ej1Y&cZeim5=;=PYJ~J7rK@Xyu-*}TaF-=?dD0;_lYuQ;mW*wG>6W8#|`_C zI!1;f_Tsb&N}0*K>l=*f5VzhU89hB@ZT)2NSQnkg$^8#mF$D7=hsCec2xiV;Y1Wj? z)U2m?t4@P{Bq#xXYyS7ZW(}HC@*I03ZH!<#r67^Klzl0I{SCr=8TRCjo zQMZ3B9#|!WmB@PDj&qC?zkc<-`?$ZHl?rNPL^1t#;(S-7<)f#6rVZFGdg@TWuRNQS zvVBWju|48YwDeeH#wj);m!Hb;)+^C#dFNA@%G+7_TRu{vp5JBO$mE`*^osIUbXKzfr^W8k92{2JOsUvJRTJU!>yZ@ZpT<7w7|6~H?U(am+07dhvuQ-yRy z3VjSr;67KIKm}^F6oXA?bzp76hH$U#)x(@k=Y>PCq$csh<5kPZ>b1}Z3RDw+uK;yw zZc9!wdNK$ueoU#WUp?HmiEntjDXb|9-Po)ad-immZtZ~xZQ7qu_>!kNt+^O`w!P7g zq#sj#>aJn@6ii^ivmQl!@dN>bTW0@3n+ctH>ZtK<@km?Yy}Z?Ncg94|T*g?8VB%Ym zxzEX#@q5Bl$H>u`$KSCm=4M!m^jpMrPLTDavItj$DdWuhGb|~tSIbkfHQ{l`7D*^7 zjM&MTmdo19{K-C~T@)Ex>WWP<_V9@kHT?FcjRgZBYr$c=$PvJ6HfqA)JL$^2_Rb0r zQq`$benYYX@PjI8D55CKo=*>F&G#WfOr#zZ{>K8{aNpR!Sx+mr*4>1Ef24OZo5#YJ z90pN@2P00Sp?asB{=#wZE3TeK&-wYK_$IE2x0{Jjh$xUDqrr}5DfHg_yhMv9^7vOk ztVo$U%1HRdP2s0&&W5Z#>{!PVxrl%0h^MSK;6;w?t?yYNR#`C~!in+XdtW6qH)N$U*( zIp-##`-^0;c&+Y}cFTI?(&J$t@;wog=MUnJV^Kw_tro6nVE9hMvlhR%^6GMXMg2uN z>eq-<#PTnUK;z6|612d{seA8!WCfJTkGfKlW@g`)OPwvhE-qX%2Zw;j`W%{(=1M#= zI$UQ$Jr$HD%L_sc&kW#%UaWwJjwNfgdhp$wO+jVobC0fIrjTa)Ca-%e%AL`B8UKl& z3J^H%yKl{P%k`vuoR6CxmE*K>ZKTAg!vuZ3RL_jZ;_?akM_IbkDh;x=`-Qwf)SV*f z&C+ioxI#ymL=@5tG(WgDHL({Lq>`+~fB$LfGR~hgwl+8U3TuHAu)B4ARU+5WXT{zdXTFlyOmr)%O<*jWt9JIrG$wn~c3 zqUESx^7iqjFhe_)(9|`E+ikPafZ?Yz*_e8+1vk5|&BocebCIPvu<9WT=POBLhEeeG z#SWT@lxgSldi_l!h>%>)xWrOtB{NVpS*u6!9fGWq z1>Kw*-+tqr6(pJ>rM$A26A`5rYpMWbexyitD8cm>NImlF&*E1+uAo@sW{;)OxM*g8 z7XkgQ3EFeXHpo=B`;d;ss#cfK$psX4by23Q2;7OTU$ZEMP13`E@3~s&;Yx(^iSfBc zAXeloH8Q7`w+Mzsm1}$4o!3ey(v%jA2$DrxU`~AM17fP13q-u5gIJFKjW}n2!1h{* zZ_dVL{f;|!@e(yNLC(&k#CdNlZ!bmD^8?I`b+bt%(@*0%WUz5%7Y|N|bJt^NDrT=@C zRmb#;i!AHHA3MteoM;YrERx@|>+25lwEvG`x+arE$*CA_Q5$15)daiAeyLIvoF7k_ zYN(bDvRuNrRr3|Wlvn&nw=mQN!Ln?qRaFZn$)~3$^I} zWmtLq=Uvwg13wQCse?;F$3W@9be@=wBoiz|Uq>WSd%3+%X$T*2mL|ZvuM%>&*M+f> z!V;D#K?U|dNN~3MaPwhWXzDv&IUcJ%3xd*?(wTTfF#bwapks~K8J+)$n7dNub1aNO z)n&b^05Q=$-ub_YX*UNB?{CLpFG)_vPMbfLlr00Vh(%G>Zjo=Zvc7lSjH`+>`B8~P zx#345pM?$8YciaeRg(%IqdGO%HgVm66tG8$A?ZWPbl-cE~@q-oe3zCzT8i|IFbRSe8S`Y5e*S_aOZb52%I_c>k zKZ}BtXEvBAqBC^^vVKMsg-_al0gZ)8-f+Zmv@b-i9DB>UIA{p&T)j5(WPEUjv&^lV zz6~yI_%qC){q<WM1*iQ$N&J;&j+vgi|~EP4emqdS?UJxV`OVBuiHR)vIJ*Vm*-DOhl(elS&N!N-2ztL0#|$FhsAQVBBi-h zkq$1<3|cKdOGEG(51|mIU***P1yv?=Q?f#eD(Q@bWSSeUr=7OA2=^_sF#$6-toye{ z>NP((dPUJrY5=b{NBz`oNJBG`8s#aY^jHzj;ok;~56MrF^Et)!d6`KCz>1lD$wrFi z@EVF(h87h)tZn8;mwe5KHAu^*BC4*b* z%w&oBkNm6=AM(R@?0Hp{bbMu>-9eaV7WnK= z^QlD%Nd!8pS9n0F=(>PH_Da^m%k_r!D1W^o2*kDg%BM5gDkG9q9s(I3_X;4#zx5h7 zvThqsjnSjBX%o^N5lzm_(fE;UeWjX&DFY^PW-Lj4W<9%gyKY;wN9u(0sPm9f4Xvis zN|)i)xNREY5x%rh%z}Fz4k+9I0B8!c{eJrmeV8Vv=$X2l{oG~8is#_jQ#k*2z0nzh z>qQ>;1$nF6$oE04c;hY{5$%Ybif>+UnmBNmb`4+VnRSeZpZsvLn0%q%}7)9`w%Y|4hrz?zExh^KZ)^xa>ekh;Hkp zpw!RC>2bSu8ZOS1Od9lHqWj#XGoE!xFf98Ig|wv}3F2)I1M1~{S*?2W#Tldi`x06^ zh@CO!>XbBmhLzhs#r;IKeUb^@3-7O|YxoJ__rB25nWTbiN;z7((X-@sD~}vn|I80QRHWL zX5AeSBQ1hnhs~=(M=M!#dtDr4?-#JGue`kgwmc=%eB@gL6PLsl#N{Lw%fK&l`W8x& zi^S4Z1_edRdJ$CSfa{UG>NVPF(>->tJP;vyIL$hu&DA|m2s+!3?Ee}cPo>30F4c`# zn>`QamToJ}crZ*FTj5udLjnG<-X@Dd|Sm+z8NjmWig4A{-2 zj|V7^+%CtPM2(a=s7XYb6(-}UueJw#=>jQIRQMxyM6GdnZdS_nu94+v<@RsQ-GwfHozsR6TI@sF%F7`Guf#9aol55aCEw20- ztgX+0VhBkGAP}1)N7TUw74i!qH?-Hj-gM_2F4oY6wxULRoWGr-wgX+rI zRO5U+B}z|&FZ?za)(}5%0wfSJnK{)^~pXdhR^b`mI|hm`%iv^Uv=m z4kI+nzfaB!uGlxK<$~*Qb{m8_f6%rpQ)XHUl_Nrp#9IpfR8)2C)L12r$09J}%lspT1Daw$_41)69Z>Uid((T5BCgTw$caffq{oU90 zr(a%OREr)ZJs<|6%U4Y%wNWL8leYRc0-a#FL=6kF0n~5w%PTLPJo^wfnk3XiFH0?1 zZ(A$=#*Aq%H36HpdmDfz>F`_ObfPslC)WgzMw<0%0+qckm;4D+*<$|vkoHrZ8*%Hy z+SncXtX_2c3{3o)IV*t{Hh_wf(^Cc1aLy^ROp{ zVtbq+abBn7bL*C5b%BCjVje zqUDNXL|~F1f~_tj8TP?D|6Wj+iA0WDj^#om#60({Em%b(R{!;r-V7D=w=suO4$^~|Ihs+z#4&D7I0=0Y3 z3yE@kzw5oEFu=Rli)(MwiA!~lf(snUOv|}I(_;6oReGE9K;f#fb}nY4pY^*Vd-Ug1 zj82Rjb>y#&Z7g=PWs%IGAU{7qx~OAM*V_Wp#h&Ma;o=g096KayBi(MA*`$kI2R-&^ zIYiD${KOi>J6?=8b*x`ZrC=Y*?Cd`lftYh>g5V>?CZ||Ihcq8shB0Zp}|KB)=8qjr)e* zb3MMv>u1s0xhqe9e93!%G?ovL4YdcJ>yUVo%wj$&$_LM#wywV9{}V$k9?H=Skt~9^ z#%)34=7*3132Vmf=~t@ZjxT}1A!clkwIgd+e~3@JZdmik}05l|in zP6Emz+dO9J?muM67+yYefZhc z_UEu#fFeHL6mjMLB7VlU1M}gK<6S${e`Qi&*!>W|#^aPdZ`FRW0igp`^NFS|ejr== zhbHq{>8>kIY1_EP_4rL!!9)S z)c=W|KwW!Cf>`=5FkL(XaJ3y!z)c?-vzm_@t>p=c>ApaXW d{=dF@^#*^jV*=Osch5b*MKC>UQjT?v`wzAPIUoQ4 literal 0 HcmV?d00001 diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_grad_req.mp4 b/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_grad_req.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5f3662d02e50848b1a7115c7bc4cf64d47d915b0 GIT binary patch literal 71224 zcmeFZ1yCJJ7sq*VcXto&E*EzT?(XgoEI@Dw5Ih7YNN@-a!QCYg+}#Nhg0nX-$@}so z-`3XF?pAGmRdDazp6=f{J<~na|L!vl002lW+`OEuT^;QK0C2#!Z_q!Rv4<(Ey%Ps3 z004lsa4|Or01jvD&5YeZdCH-o9v_PrCH7jj=cMbC8D>f6D2`7&*|<1J8A(kYUCc>Y zIoU{E*myWONX>bTIeA&wK_{e{KpB`7lq94WIY>n`#6UUC%uPWj#2lTx?946PNZD9e z*csVaSh+!oR&H)ie9X+xo;_pour@Pyv@>>Ka&)m|{$_>A%FW&ml*iG@&Dznym5Zr$V%#BZUJN`H8VG{b2PODvhuO;v5*=&7~6Tdngdxp+4)#JSy|ah?ahH!=ANXk z?k1okZc-;#FHlj?r=g1(kd=uAR0#AzYH#gnZf5w6$O*&tW80_{7hgWb#VE^ zhN-o^vD-I4tR39UUF?iOnLz0#cJ405UWTTQ_D;rbpx>sTOXTKaZ0!I-fHJxmf6HUx zVr*~j3c6S(hE86fV{0=Y8|c{B%-HE$Eldnetc_j28L@UT|D#^d%&jf0+)O~{9G%P^ z3@sg_4Rp!@&rZUO=T0H|s6@L&M*v^#<4jjd5tI}=D$gy=$h<;(90FgQ zCZ71jBE6uVjvhQ*%(ri_o?3tvee}Iu49XK@zPA>kI`R`c5Qu>mc^GepAbzWgN0H%{ z=CZXs*<;K_UQW>|CjK^7T?%UTbsJF8%|e*NR<5uuH}+b@N1Cjg;N>YS6qumvHSC8c zy4Z)KQ{iTQMMtXyY$*_`@3Zmd{Rv7=+KP8>OzC{)SWPL4o+PLXtSWP>%x;W^VK<;U zy*{)IoIw{ap7W)Dp2;hce4Mb-4gJurF%5Z~^$NEC)dCKkM)Mg(i{9`CO5$-E0{B;6 zcms_jaMM!1q>s30r)Y5r2sh5nR&aLP35%QD3E^AKn(n;4>(ZV3{SDB)2^d=2lZfwi zy!Vdm`59!-g+ogzuG!jnu9PNl1GkUd=tq1Swcv+rnSF{|+{uImmdlKs>2-LL$3 zi9(vCq;CCbWlx!_p1T6KDJ0?s5GvD&?b+}6r%UYaXh!(1+lbWB|RD-`iI_Qv& zH+aH!9Lr|JpV-TC(;C|Oj0l(*d`O!fZRAX;8Dk1$j! zYL?#DG=r_eE>f>D2VdByOcI0X(HJwEG|z~gm$JTR8W^&IJL%s$AoUa zQNEI)Eo^V-6|M|Amy1G)$)gM|ZqOm}2oqf1nB9E$+!e|QS!hW)5@Yv;H4F|v46d#= zey#L&pd*|Mqsga!n^=QtZHU(1J{z2&J7KSp^A?<~VYwr$hcD(xR+3%R1quX3M`_6>8QzQho_*HacnFCRMh$_g;Y z)1j*cknaKjUc)8ydtj|HabBp|PC5jW`dY}bBl`<7Ee!MouH^uXNRjQwlB0>^e(XN6 zAVv$mMv76aiQ$Nf7mZipA0?a#UD}L`^PJUeY68@F>XM6z_%UXk!?;$)7VkCSbee3mnGHCjy(PRgtYh- zdt^oF0%n)Rwe^VO846+$CMsE{7pUn2d5?R(UM6vpyvko!T-Q6h$|zHO0p2xy(457Nr7Rfuw$FsqKdLGDSUu{eL8ZCwxl+%QSYAdi;?DarZsR)aE})_n;(PkyI<0LJ_Ms^00=#LYF}2 z&_psrCD0fselmVy^MAp9V}LBZ`vv>W5(rxg!rJ{E`^{Gnc2ENApV+-$us^&#_$T)B zzhS?7n}z%@*eu3hu;0DSV*Up-lfkAKoVa1sngz zh0Ln`1^dI>`d_f$l5j=Rn*R;^!`u14V86G7tm|K}KfJyDCwBGUu;0DSB>oHbySJIZ ze_%6({(()U{5R}(Z!<;zg8jeTZvWB1es~-DZ`l8}TNJRY|Fv7tF(v7)3P%y7LYF7@N9WtS;D|sT0a+m{`$ssL2r!9 zby|s!j0&381njx7tdpT+=_ZbHEN*X?1s|3q{tg~op#BH=6B9Hz&PxPy=+VIfKEuyP z!&p-#IaIFCGy6gx{EvZ?@mZ*Q@zH`^>uXM)i+59@g%qX+Zot>*v+jwtN+}e~0;bSwLi#EF9$a5%7_#7A)kAms;ha|-H zIq^=3O9t-%r*!MgtBPFlc_fiAi^@yt$n{MhpAXczzHS8I&gU0-i{^#LOq9h6?{tC} z3Es(a+n=$`x|jJXL=5^ccovbvbJ^3_260`vf5~5MVP-Qh7j@+P9HU7+rYxUdh@tDF zw;uK05v$=rgOV`8=xq^Yeo!)6s0sAdvO|PXc}Ek^TRXw$(Alq`9T(Wq*Qj!Vi#33 zCEcIl=o$NWJfqnG9Z0^|OhyVo;OBhX-O(TO0#&>!19C+(Z>#33zD>fjAp1pj_TtT~ zi-kkPEY_f+3+uKCxlLqUWXuWNxOLkpNeQ0TCp!Uwb!w#VJSMD=z0T%HGAO*OIz`w; z3A%4!^gDI4RwYBb4~)w+NX9?vMH$Pg8=VkC8&NN^WVQrL0<%W}t!X(S_wIG}I-g-{ zi<8iAz$i3}ps$LmoQK6MT~k~Zbi6)Hz$x5FvVSnQ>5rVElINNh6+Fu_+8aW|j$F5e zvpju1Y;g$1{FtSUh9DordiAJ8HIR@SzkTo|nA*O0Q6y#@t6*NUN56Hb&eNU@;Qa*0 zSNymc5ha?6G3gxpW0pJnK9Sbs;ofpRyWw%FI zCpfSuGY4PZ(p`+N-%pgo~_y(-iSrADRX>WKi zBJc`lZUyaNZprOnKutQjzPEa9e%=M|JoTdhqF2Yg%u)<+`V56Z@uOtqMz*^frz1%N zE|u%9*A2FD+ENn_bmBnuV?NT-L_hc{j@agtuPfFq*7FDOGdQ){S2yonj>;sl(YYkF z*Q;m;9S6WQR24mA5%Z{EEy3P~!Ilc*9QGp%jk`V>^z=eDsT&^eT5FY^3fAO9XcB8p zHVTI7XqFEW9pD!e(N`+F>^SsT+7*f~j?cv(B6BLnxL<}|-3@J5!pmTC7F%Eu+EIkr zrzVhFSLLkSSYK6!$}r=md=sHzuV&e~1}&3T#0ODG$s}?%bFq%OIDW4&c<|BUX`Pz- z(g;ijp+a-3gVJ3P(C9iN!+n_Uy^r_H$yedflc-_2GL944&;gf9kxhf$JTwl1<)H+8 zKE{fwzRZ$$WL6(gUm-m;qwsjef$QRNsw((}IpQ+9BT4GKjq$Xo1Oo%U2?qi5pe!HCpw>Be^e*oo%{y9bl++_s#|&bb1{KeD`_7hLza z!ypC!3J{xv!#nYqGa%D3ZNgyt%B3V_-C01{B?G}Jgg7SMAzLTOkIB*v3$p@W0HUP%TG1BSU7Dp*6I+ot%gl0t zuPI&m_fBy7L8iX7(4+fs!B}hFGFH~k$ZevE0nbh6{CW)G(KIUL7_x>4=G2XRGAauq zvW>yzs$;E}cm|w7ss+@nBdw_l^Y)h!4!;UOZdIVB(znbc4|nnx+ke8&ELX@-{+in) zk@4DXqkM>g@cgNc6oJ@9-(JXIFPo~<_TK69%KO(&H&}O-Uy4jubUUV&DSu^*JSZJFi8pE_w*ZTm48=-J=E7VY zb59AS$o0hZr1VD;9q3T4TSkc(U8%vpkLjr%M7SO&C3QPad*2!YTtG$ALg#M8)IYfD zNE#?0dBE8&eLDPwSTm}{u)Z!SLpSHhcHf zf@}c^#!Zgqcz`ha#(q=FmS)Ubh!CSu>TL=*hKrzX_f5!jkSwA6kF3jGCx+~et;gFu zuT&)fAh_)Pg7zCn0`Sjvv#l19EOD0HY~`0!}fAeP1PN9;r0})?arNN|FT9G;&$cUq^5p)G6A?4gteMdtl2qIF`1^%sdLqXZi3Ki?lI+;>Ua;8q8{=$Ts+86bM+%o zkY;k$o_>PoQ1cJvZ-dpwQ@Z8OuRobOTqMWE;#8Jb=v1m>8iX@+w=U%x+41^X$WAr**YqGJm9drH&$?3-hKnkj-E6T0`j!wk4L z#rx)yDwjN_;h)xDe;JnIA<~G3vUVvind@#?I=Iw5Ol`_AnSGMxC>B#z#mfg^^z_9d<%?&Q zCOKs`{4UxB)5#6dD3KPP%RFxLQ(vjr>gl;Rw>R#2z^3PG^!z6`7)<0G#Zqe8#x3az%^&6Z6F@#QW986#?>Jkh5)?EzTt*R2+^>Fq7~q z@g^J!0Lfux2=9vHDzak}@EAk=);>{R*(qz=y!T2^5_WiYn~$KA-j!QA2%#y87@EW{ zfvp|6=;II_Z?9xs&#I2DNj`*kj|c15Y1zVnYs~Kr){*hZFYBeMyB3nF3I)eHUhvkX&VM0DaanO|CMcA>H$UgtUvx=g99=8k>mz zX5Rhksp0j`Xxz0xnH5j7yVUdn2_|AtBA#87iwxns=iGZPm*s*vHgq3eo{vKL_yJ1|!%Xrdci1Ijk;{&Fp9hDkvfevwWid^bsOIRwo}PBK zA#>i|J1M+Cv!#2bRPdpN5zU*`e0@tzB-~XZT^2}|9sV$&VdiNix{85lr%i$(FpKxj`2SHx)3- z>>B>nY=2?5w(KnG{wg+CBy>?N?)^vbu}^6&!R&{Tw2DXtD7FZc1Ng5cMo+GE`(o7w z+6|%wlkY-A?+S_~G{ed5&P0}xM(PVRp(2!6Y<7gMr~QbEssfWx&c|1*xVcWox0mSy z@(9{7%%oc@HqPK)S~n#YTh{X1u+U3e>50=CqO@O%?%|D-;716FvQGFW)_UM+avHmm z2!y5wY?3J~Xq^-IM;wUK%bDW@+QDZbR7O%<>>0kz=??~4EORr+~3YG((WS@UVwI!^0LTsAXjl6=Zm<~vUG zGra|2W2*FSwXeha(V2BJT83xz`gLAIRE5-UDS zb){Oqu(|i3JmQH%!2cM%L5ug-4f|(n~_2!xJ+XSU(o5MFKt}u=tx{DSzjchuRCURb`Zx||5EWPs_IEsFbCC8NS89}9UKe*X_reRGSwX%?l?a`EvV?P<( zl(njIdlVCQYniMFKKZu;-xLCN7i;J zdQ5rKpQNld81?(L3$JpTVCPjHqhrK3Eq`IzdwWvBxhk8oQv0FqPX5qmC@Cs&_{L5g z8y-5D9gMCVaP25`gD&`H1>(&S%k>?POV`A+|cS_8-LF z??fW9@FW{SL;g;bGJs`$7p3=`xil60?^Ox$&{1LHiR~=swM=h5aiCD30O;=^<9(z7 zb>EDd{>#!Ped~7Ge7e1W=XQW8=@)Oe9(%56HUo6x+}YwcJ*d-rHD?kZ3J!gBp*iiI zoosx}{Hlk$_UZ~M1cufA1#d(s*&0+3blowA@i??(d)Q-849E=SU!?C~c*3KZfg)4> z1}zc6=pkxH`iU=qHV2fC2<*qgDM*sto);B4l_09UdT-3je;f}O3;<;y0L%I*i!+IF zh0)8-v&JysLxgAmlxv{V6e4N=5XiKC+d)+#!VE98@+H20hKvSfBmRq42Dx+3E09pJ z?H8mxd8P!$49bE3bIIzvBi2q8j+O`dLkL#jOn-Rsr+of5FFX^nf^sc|Hkz9 zFoNHi{;92hWBRu$8)W(ymHivjf9h?}<^QGD->wP4&!__=j=!5;1)ctH5eOjDzew=k znEq3Y|JC%53;t7;|Hkxh^*Qlh^!dM={*O}stLg8x{t>o=zVoYVTN=^wTJIl1-s3;wO2{kMn+ zkm+Bhxqf5%&k3);n*LGi|JKDprvK5!f6m4HKisdW|D|7(Lo|@WYg%9^60J#pCe3Y< z4!{S+2?0oc*Ud@5&I4G$l0aEN-RYnGocqJUZ~#Qi_tXEaqf>nkBl^p98aV3S7$l^w z*#@1kR&kUECNv|6^S@sI+phOLQ0X6!+5h!=z&{^2|KIj{D7}C5dYGB%qcLY>@VDM5 zx;EtP05TTwwUtPcE&&vpgha4C1@i}KqKa#^hyDYmqlA=OQu~mW)H9-(H*{V>A@cRtdMMpB ztU#(Lxo2P8FN5Lu-WdmBSD6dg;dFFhCj~Fkdc1gIOBId@4aKBbYub3G8U6u!jD*^mwb$+;&=8PaXelK!)L%>DGwsoj zEkA~Nq`vNS#mB7iFMwc!!9;o$pe2d9c}}+Y$R?uHui@4-wN>Bop5;_!Z`;r+)OeqQ zI6$B#4{law18PQN5_*y2dCsGI^*lkClRh>-r>)wdhhi-XL4Tu+g1dsXS?(rHdT`y#;qK6Pt!|>8!bGj+E?A6v7(i^GPdL(zsx1V z&~(*&es9=!>-uq99+!&7AEB%$M<57Uw4tZ6vx%>vZAufK$*syBnR_zRR(Ji#2bnr= zCufRSEAFE;&I6brhhwt6YG=4N`iG@Rj#M9;&=cGXCgsusAUc*`>?75q+X!xgDY;NB zjtSLkMRXZy7noD-3+EI*5%dfF`abw1B8)8L(C{*Os7(r?j0P^fU9ng5>O&ju6kgXz zcvkQbGs=@3<6X%|&2&#*ba`fq!Y*E1VJ)2Fs~6T&e9F*z_dr8KKfV9jjq;HxyDZFP2ltUSV_9>8oR{RE}wKhN+Ik=Eiv z2jY_MmxdUFk4&=Ij2j{ z>Y12nvbxduU#8?E8Q+(bXaH}XeBvCH;uHctEex|fM9j{vIg$bM2YpG8rP1@!2nTcQ|F5XhID8;Ohl%#9QwGzk~!>;q~ zNy+maf@+$(G8aA4%+Kl6x0MpLtWag!@o9$peeMO;i5$d{1fv^qp5*iAnQ&)0@B3+| zDGE!{QdquJ$gdo4yG*E`78k#{odpvO8hYLb50zQe(mPFcq>8tC+^;qndcQHkL2~uH zW2APgTzyjx%{sz4RlFBrYaQ;y$G|0=R`!K`-v>qF=EjxPnRpth7+^H^O%Y1;#RZ0^ zm-b|QxABE4ikd;1cihw!ah4V{v zzBU)GesG-!V6ptbiz*F~m2{b6u1+aehgqyJJZ|7%V_Cwbj{VWdh7*e6b$m8B_;ia+ zJ=LZQZdY7biEF{x3!WhUIR-6X(Ndv6(NX8Cm+%8*ij%@(OGm4+#PtJgvXn@s5h)no z*74YXRYXb%m*@F$g_(tGV}u&Q*+ITE0Adn^#4>j3tq#q&W&;HlL1Gis&!A%eT_!Ps z{V9{OC=9M)XHSeV(ZycJO_d-4pxq}Ex_K`o8#ZfUR#oC>n1WkCJkW~sKa>+lj)shS zAHN7NlN^M^yLeD!Wq_<+P|QF*4gi%P|NB7er?MoDf@-NG(gEzf0Ocb3>safjuH?Bn zzS4pK2W0>a#{S!A>qpS`Uq)NM3_AZOtNv9&|4&x^Q$qikp859y7Pd%QmwZj`pgdbUd+BAW>+H;(5I*F!42;FSA<)h{-G!$p4BjZu1n)s^Jg+o3iNXM#ud zPn_Vr_wo)+iyZS{F80H(3YJ#XqN*JQp12ze2J7HB%*|hXipx}y1-hlV;r4h9ZMQuH zQ*+J4q?o&{*S0wb7ovH||X^IS9cRMBe&|Pe?iYx1F;!L*^ZBI>BCzNQ8 zm>7OI(bc0#;PF0}<4lG}2SIz`8gl$;x>3X{&Es}XsB0#E2De&hc@;=_hB1_7-q<<+ z*r^Odud1SLdJzhPGRlry2M8ob)i)>jgC1JBA&WA$d_yLR#S))gzUbsy^TG`;>8Bds zqY$6Uq;6|)nvc!tpYtIq@yYep0l0KS((@prWBJZWPe(*Bh7Arh~c8(EE5hm*s zG6K09O6#hsBD!0b0`>MMyyPC*6X0`Io1XVIu&xW|GpPA*eDB^$e zUU*f#99|(KuY&h=n{aR~rwcNjY@1y7YR8oCEs$Y=s;TPrWkJ~nL2I`Uay5pz`bk`3zU zg+-j-;v-u`+080?jMPO7r>mjMtUeyl8fn>F9?0f;M%NAkPp103rXI8Coh=g0tIsP0 zx6*gl6D`iF*^xq+Q9Sl4*5|2^&p-4DgPAJEv?Bs}0;u3t)L#gA!?aH$zth*W`3lEZ z_Wb3AKtPx{o1sDnL_A^?0!uO~%D}@Qi(KN+EIt{PZi~P=Xu;y4cw9BxRgD%S+n(WV zh=K+=t0F9W4H*~_Dw0h-6P`Tvp(_Kxp|e|Y4sQlGlDiQW%mAojV9aHa=7O0pbMg+? z2P=@nz4X-7b0ssXZI2Nby1+%O!$9^bthl-sBH78#7Ur#Gj*8@qRJnmC5>JkK&25yA zSi4inaG7sX$+g#s2B4(asJxIwQnJW(VTKGmqHgYpM;6pD?nJLb-S}|%^B+p*ZdUBX z@i+#JVP~Rtzq&ZZL)$A)GT=v#@3%K6?L(f@%+Tm@e7G}!u{5M)j3MbyTtno0Z|XxH zmbSz2?!$wzL{!ppV`!_c|H(3Q%9$fB7i25|w{rRpqP}TvGcc)#HKP`6-|g02LAJo* zsHoMg340UQFLeayom>{^q-jSnB@`G!y#kmL^c?l0M7*nIRn*N&0>aF58uT*5dA|uZ zCLj4!J0H%Ip5E&DYA3CXn^+)+@ntUzDUI-|4?U&>WKS|9{wKJEMwVE&p~Cae%$hJ! zOt{wehyFX@9**r^^eS8U-HmT{d0Pw=<|&|PnPvhb_46>u7$93-T%VSHPx-}S>88xZi+DU(0 zLh|y>;+ciX?y|)KU6@!qqmy_{6i)#Z~y&muq$|*^9ZuA^z zM@b%tW(^;(8Sx$8N13xGDk6#FoZ~iro@$4`$mM`us%feE2r-u%fjW?Kp1;;X_-Zsl zl_5&8=`fQ6Sc@$<6f&u{^L@+*%*yO}{mf8f3Q|91 z9qbM<;pgK5>_!DNZM-VJNZQpaW*|L4mLJFsOFS{x8;oiQXDn}f-KeVl(lcJ^g4zoC zrBm@GWLgMs$4T0KJYEXB0BB5PfFuj5XxmcOAVU3KYVdsgUc0=2Pvg5!5jHS z|1pAgZ`N$k>568uq;+V$>Qzk4F{|2By8Yc_|7D{Wo;rq3+>m>7FaVtG@o^T^FQ&Si z9?Nrxab|K)m9F!U!3$@(@JW<3@M(h)WoVzH@*%$u@hgNA{IvJk*G~16fM0<)(*4b6 z#dkEf)@)CTlZ&yu(8Jbip*s&H0(%o^=-`i)=Z66Y(-PCHcb9nGxLc6VY7b&_R&2JC z3D!KPlIh^-leKmM;1bJ)ff!0hCA|_Sbq6+^?{f6jPks8L{;est>E4>YH z(j|o6AUcL9t7XmLI1joX1y2{NZh+^ZT1@!L_gY0t{oE5aBrCmC&mB}B8N;Boun_Wl3R1_S3t^@|m+kOWX@X(+O*% zo@?vZMlvz2JJ}ZGIg=jk4R{$nK1z8Q5~1(xjdt+~D%$B4as!(|{cK#FBFbOLgcQ%v z0I?b(%O<6cTGIQYZ2e(?P7Xn+ywTt(JDAcC6Jm6JkKdQwa;L6LVChR8+&A%pEpS$W ztqir#&B)Ky_AF#bP$d_-xBZaOG5OQV{R}+b>IkJsu!y7iR57pyh~4(IJuE`r3-?Ce z`^Py>BF+|LVei`D>!bksOXQnrb!%}p!iuOF%PX(W?pWw_w^TQ)2Zm_gA(mh=S?^U* znPRFbl$tP2_zUKhB+K5#UvI4e7gl{ZJYXAjSw4+}23ugve0}sl7D!$RuqHH1F%9qB zjW^ntFYb)(7FMe#J89HupJ$yz(`plEX`s!MkUxk&N1cHJBD~~X%6!jRwWW9UaP;{N zFwukcZ3Z!%Gi+kH;r%EZp+%=IhJ~(>mi^SLChQ78QAHKS%+g|ewYf28S&lMn?a*R;J*Exeo%X*+(eTgkUzLm)S4J(0+Y<*A~Tk&~*zG_lH-=msFtIC#) zKypQB>~)%aCdBIQ0T#ydf^j6xFwl!LU=wK*CL)ZeJmVf;NFA=1#@lAAa9)jRqZH;* zYj6;}BWOMlBDTxP(pF(Q1V4Y1qrh%62=Esn&?fnC~X+Ar83#n5A4hq8e#kE?S`W} z(ZXxjCdBOC%P&C_sT9x3cKquR(8gUzUXyOUES<&UmK4GMSZ9Gt^vDaCTZtblU4ma1 zB>ce3WE(LE%KB=c4G%UwF0_NxsJ z-VjZEQkf3X@QxMMHoP(?kIGa$%)N<(X19DB=4A*uW_tn}8{--ep>}mxt^A(&gfHYz zvR*k8aW2q1*|*P2vw8=w^RFt268ufNkR#6#x%>dokPRr;Vt(haK`XY#4 ztU`H8Pys%JRFaKR^#q8ONydu3pDwHYxv#iiuFS1*#Zx){iQOnF9JqY*-Ltp;t0+MjIV~kU=S=wo<@!^OxTMgIVR?-*_HRD zLxeMV11jiZ-XkAg0U%y3H#5!b(z=Xt=Ot|87xa0k=R~Bqr`G54mx^gTQ(xb@m`oyO z#(iKf#U1CgdskB(PRKc(jwHl27OVMDukDa_o-@#A#X}=AH7$l4KD6=kiZTTYWpGXG z?YU7T^c=l&WOT0}m=M&^(W{!I{>d4YLUy$p1`=Q9<~SA(+)Fq)FcQPVV?7r*F`r!@X{u; zgEL#2CTsw70mOx0C!<IX^!Y}Qov{GfZ`+0kBqqfPaskCU2BBS9( zLY}4vNYhB0=wa$Y@Cpk+@Vs7eeCC8Pc{<8+VRzm-EVMd9z$ESsVWYU6&q!bH#YgwO zmfkk0o(RG6Y%81h+=N1HkY`0tVK}G>XAR;gb)W88*$j`Xxk9~o1%&uS3S;RYlFlOy zO$mUU^ZbhbD2>XgUruxHe+Q#&o~yVFo>w^t3E_O$iIu78NSH@p>MJi@)>a195#zY% z(wh**^d!f@kS$7z8To7WOUcF44}?#)3cIv*`Ql~W=jT4Fg&h4y!D zN(UDvmWV4*LRL0*$^kW<126`;tllYOe45RiGS~+`>3nE&1yjbu_2zz&anT=DNt@Ap zI7%1e1siUm+Vi%pL#f*>>$Lzf#kJo^3G37C^;%QdL3M-LEr-zeCxiTDi;w34TYbs_ zSq@H3h)ef=EBj8ozU!K+Juv8Z{T`g}@(0kMz^7bA3iYZBN}}sIQj<@_f}E#?8|XTh zXA5d5$#_jL3^m0`95z*SULxPWRS_KltCi5-`m}oCDu5?^nYr8Fl9;0=|2m#RUJK|Q4sK1U#rk?>R1YIj#T5h4%Yp$4^39C~0U_aqQ>tN$G)uV~mky(TQuv93F z!op3&E#s|;F3s}|lWwY4B(6`f#S4H^)gvDK3KN8^1SGe`ZcNTwc~xLmb;>XiWkvyk zRC`omb{YgR1g0YEl3@1Y2JO`cJ`X8eUeg+n0Xad#15^oXbG39QuPwbN=j|cpKieE; zI;$&Cyox7$kAfgI*3gvu7G^vm1sNVolWL0?2btrZo5QI)a4gylR-x8kt^S@YSb3uK z!4mw1P(Ep{H*0JQj6+kk3xBC*y#E5$vso~#kSYWR+A|-bC7oCGUF(eX!@BKMnuu(n zu&ZzQn-wH1SkF@^@HH^ApnC1d$UsAMqRcXoj(zWu8L?am3fS)BLVfic)a+TnyEDmBun z@=2gp$1@LcRN|BKh_r69{e0qBJK0(rWqK0V7&H|x)OqX}a=oY*=8YW6&s;N4myHu0 zaXHa-3ITM`-P_0yRrwQz9^0cQ8UHjHkJb>SvfX`0ZzKd7u;m(^B5KuvU>-@r$|a_map94 z=7U%o-n8GLF?TDbmokf4*-YdZLycqBA=t*pr@BW*Y2|P2V6T!KVi6l)!U$p#1@)7r zWfKyOd@QojXY=O+4?alK=Swtx<*Q#wv2**Dw)+*hR_5~HXg($0LgMCt&RV84A=@gE zHO2fib}@_f5s4gg#+%^Grf}ZO)mr-emtH-a&>D8W=RGi+YaW-B!9Go~s0;#g3u%FT zJOumo06N(I>loEUFRbfQy^fShBsw^P>C<B^kP*ywklC-3!N*^Azyi&N zc?@IOJ#OrK0hx2h@Al>A-yGm#4Jd7%C+^5|!g5!lZxXP4=!7;Vqh=I~VI6W&Lq#=X zLT1!CH7_4&D3Yr1lkJEPO$`tQ8t9(aXv86MGG#OzKwjQvXmK2+ zjA->4yeo-xRoC3J+!?}|$YYOsMzL`xTJVgBTX4?dCYojzr#hinCQd6refOT#mr`Q} zQ56)B7(y}@d&_=qOO{kqqst>ebIBXU;T@cULsY#)A=*(lrr^adkVAgy z=vWIyyuN_jDx2pI9veiK#d&;?$E93`y-9KXFiDW|WiA|Z!r@92t1GU*x;tiP*}oN(f>iA8UGU8JKzT-C7t$!q3vM%Yb~FJMrB@tUP(l_X#rsQ zagF&<1Y^vvwR{owqO(Y9p%Bd8+Ni$b_&+p;cGEyboq5w21R;K`kVsVsKPVy3H6(R zo!w*rpDwRq%$(IT93)Axhp4PdCrA%L_$mlfHteF|wrxdL=3!4sLqkTH1fikWoY$it z{60lbxok2(B<7&cDg)lc4f&U>=pJl?fVtduCDv@Y80z^?Fi64SUM?&g2H0 z_2VEqH1c~Zb^>;RRDi_2^%r>nQ$9v;x?Igk|J%h?mrf!=xlff8Z%>%%*F{rQwFteQqu<2J=$jPI zqttBUp_PpEz85Hqbl^T;H>Atp#P3&wKSNX1+Pm5NbZk!Ejm*4vPEQ2uA+CB5w8&hkHPX8W&`< zNae>yua{zOf%$;Bj-=kIyYTavy5z_r!5Ia-maXSF3eS8;3nNeJ?+x;qGlvvdZl!$v zIZ{8~zra7sSOp+F4D{fs%RqSXSeRa?iy^(s7*%J z<8`_Wr%w=QL9hVU85mN$B(U3)B9cgIn!t#F*j5HMa~L|_N+|C3@?I34 zxmJHO*^Mb$3EB_Zh0Liu#?&50vVC8C)ZMo_!>R+Zzj}gC+wwPJDV}`s?p#ZnUR^ zPP=82E6nx7`8T)mMG(6@LCb6e+te;>QpyQ^GVbk}v5kZkP$|xakj=MEkCeJnB#-O+ z2$ExajVkIPsro%LU4cLgY#N->*!K<)q4^rNHUy@ueMqbK4T9=?&xPYQW+G5Kp}(RV zki?{2e!|qr>tVo)eS(pdxAFlJE=q7D?c&0Q<0%?M3=?citBBxx1ekkOtY=Qss!Q=* zN0MvoBD0lNy&@Hks0UYy6A|oP^_}oZ+T59nm=&`-7q=`TJO1Y4dMjx2?n4R7e5rh& zoL07=VpukJM5OuK#K0GVY_0k*Wuh16efz)QKw}KMaQSF!nyJ{?ssO+Qo{5JuVCE^7;l^BZJ6g>Ef*CGMXRG^i0 zt^aJF22D3V3MNB_6XLd~A|V<0V$9C>vn9FOBF&!E{*W@c28K%DbnW)GL|1*~SO@EU zB!sqS?r_VDE(2?UieM6J(EzW)TNXBk^o$^H=$w}zHackW|Hme5C@jcuwkFqEpj6DC zi~SG)TwjECnfv<=bsHU2-gefwj7wjb;I|JaP}+g@21zi!5cRI#iBfU5mY z`E^^X?|XawWjnTeP_F+L3=IPR42FgPAWfgN0~m9@gMWQZ{~HD5@%OOj?;igE|2EVa z<3FL!Kf&35P<|}L|EJ!dZK;0UlI=VA_kqkH@XtWzzkz?<-0Hi>KSGv&_4scuWd0JS z3P_O z%gy`0xy2O-{PV?tpC12c6~AsO_}$~*wc~%pr~KKDfA0SG7mxqfjsc86I>`SY+cA{k zKZ0iA+K*x3!nAhohF(ie_wMTk;xHcKq ztLyuG=U7`qXWNQ3YM(E9GTtvErNA$EOJ@}iDMiue#-tL3bZq)w^E{O(AHSaIGO!Bw z$(T~c48B|wj1Nr4#}$Hm0r?@lJWioj<8c(q|AW1^jH)YJ+J!gn?ykYzf@^S>;4Z=4 z-QC^Y-Q6Wv2oM~C26qh*^pfsQclw<4H^%+maqs=pf7hJ5YRQxde24V)|F#su^N*W-8o9&$LD_RB@6nZ+z%TlNv0=1zn4^Wb6UL^j0`1svDc{ zIHU~02cCjePtX-*MhL9+wRr!9NByrb16r zq1;73xLgm4Z@GeINH^`Yqc_XWM1N&T4I?jJkCF18U0tPNuDSoPv~lKUoLp6luUoNg z%A5sSdp>jZKEjI-rXbq5D1AJ`A~r;Og7nT-;{-uewou&bG1M7AQPSATaU|GqH_(wq zn4$BDSzzg#Ee=^qLhycb|5sx3cE=dY%2+J39gZRUM1u-)BxBu`(a%Xfk?ozcHTlG$ zine=G*eW4s?Eq~hDhmxmUKykS3^yBthBrYu-Q-y^GLISQJ8G6F{0!kpNuCvV=`6&u1S@^eHQo+( zmojW|=JG2m*j<6gZc!~(nkOnPlEtz6LMQc8|29MK5oUh^i=jd$1R40Ow#_&6T4N!p znfQ^)DGn>uL;D*&Y83G#Vab7JOE4mIoALVGj~AXFAWJ&v9i$HSWyt{VodfVd)-ow5 z2& z6fH!Qby+-rpIIR@39F(BJs?BnSP_-RFU~Fr6Ru0D(Bdypf$HyY;M}zXUXmE@;%x!S zwXR~s#s$1*OsO5isX`Xq{>yCAA6s=$dsa=Z+%{-Qn>hrIa>u(|l@eY?t~w>8EUTnOYIR-Bff&z^O9 z+Gs66jsN9`KlLMhM;GT2(hywM(eTl+MWOKU01Tv;DHwmjW`t!uRQm0aNPXgy&UK89qInEQ^#&x_KZ;q~uZ{ST!-fcD>%n*Ofz2jc#f(tm{Bcz=i9e^+|@ zo8Uh|_}`TNzSBR^_%D?HGc12m`ZFy5rP3e3=5I=W-|3%7{Z~r=v9s_+>Cce;mr8$t z_rEFqW2e8j75-`1KO53Nvn;g4 z_5J|;e^dI0_5L7c{)N*2u5$lgP5#Ft^K%&ROQk;og1;&K!+L*2xW7{R?{oU{M*5Sj z>%ZSf|30UGy?g(Rwtrzr|AXfg<41%4%M0K)CsS4obKDd9y+;G{M{EDD-wpphrvJLr z-+{yb^viZ4glpgWhJ}lFf$0O(`C_5`pqu`=}X?qi!1pHOah?28Y+PGf@=We<(Sii5RZ4+ z5iV+VS^&)Ns15Hw>CRpp3Dm@7DRrcp&zs(t2?W3L*S{e@HG$~hFcEZGEU78bbQL+1sxu@gK_+Qwi?>sky+%^(}U3iy6rAD{p)3z!9kFQ^wm2xbICYkLA+yWw_#ETJLlh*k3I3>~x z)0NQr-z2jDAvPboUDqn;}c{Xl!|Lkm}asfVD#DL3VIm@p5XlpvkpO0MPPYgZ|_$&1YJTV*-ggc*+ZN# zl*YT4Xy9ge<8AeLb{Fg0B-ozzHEZjex$-A;YDrIAMCW`00tOob{Q)G@wsxxT(LKLT zl0GFO*M0QAKluQE?wGiBCangIt5wA#+ZJ1Apa8(iP8pL`7O>OB(c@o&@>?Gz3>+!W zdlJ;VFkJP@iZGg_)mIAt2((4(px&tEq!m0IKjTIlmvNuU>hu(=dj}u4ni0mNx8}#Z zbp~ll(XgCoL`fBMsl3ByR!x(|0iOTbtx4=ngfy?vd&5@3DN_c(>%K3FM~9oeFs`wW zPeYp!k5ZYbt+;+Rh7E&2g!bXyF_dU{!-vBvK4}f&mC#0O8TBgXl83aKs9$JAaaWQG z6pCvd^<#H!7xL|4uS%z>E_?J5bKb;vLW5O!HwG2ge2calq)HGtoPb+h32;9o)6~X7 zrsoX|a@)7n8C?CA<|GQ&D{Qz$9#Pd=gOP!Q50%hrzXf+-MlQ(Ih8UE|c^e4ZT4w6Cp>5)Wk1bx5R^Bhjc0Q=J~ zRGjxSw6viqncYQ>Y2aTl298s(1*qVTYwM$2qd%iCq6^Q_>%6V=6lE8GGX}`D^#8_5 z&>K1fg!+k-888B-?anbg?XblL4LX9{qlgmir3c9uLgA9F+& z1WJjbwpyaYRPa$Yd{SfxbMU}n7~PH=G%R4=8D&~CK^77>h|F6@p+99rgA>1UP6W>k zbtFMeucrY?IoWeDs!`8UMrM=ZsbX0caFrml8zi2YV2%Cdvg$r^u@f%=r3ml0KDE zHXFfC&o2YpPWOBm0#4eQ5_xqA4LTiWESQ3xZ*(ttn{7q<1)SjK>Snr6*CPP_3nfn3 znYi!ki;Ec15bXvFrXbLd-bN7Il^Cki5Kt&k&f^2-MNiUFjw}6nqnNCgJ&Npa!-FMW zFGyI>t5hRk!qOhsL#Sa#uG(+J?YJ>O^?w|bkz%pNnwEy>v4sJSL@K@RG-NK;wWwJ3p(3nZ19s;68r$fKW@?+KdoocO@$;ib{>OM^G8$<$%IvLx^qxHxJ z&a(m0;j#V0w(Sp)Uxs-35gd8Cws7@K48UMYPSwO>zW{XJ&rGmS#Wthkm-)Y4j7V(g znU!&Zz$*oG552Vm3J{>fLbK0-3(Ad5Wh$y)PkbIh#Su5wj#g0t4Rk5O3KCtHz2jxxmsb#4 zK}{cMm!mMQ@N3H`?wi<}IL%ZPZh zlkAkuy1A1?XWCseSOB*9gtu*w^@(QSOQoqjH`9gPs}^?Dp=!vbW->7gJW`aJ=gdHdDbp6k8keVOBXYPOiqHh-iL6$$akyek7r+Zw;GZf z4D9iY_4Hx;j*+JIKhba?CIRUmj5U{f@k%l-d!9)y56KVdxTbj*UAoKVb#Fmo%kYt) zwikAGHP`Gx zlp=U*uXt#ejH2Blm;zd0<6xlHzMCb+bRxXgXI3ZbYRtK$183y(dC)?2f3mTQ0}6C- z;75hUJbgK5*MgI>MN~@^3;mU zb-(w_jH+8|N^UuzL9t!OF5S0kk-?l^*g!gnr=&M#n$#{01(dW{vq#KK_D9L0VIoVJ ze@G@vh>{=6&+=3Tlp8Y8C;H1{9cJ<$D2{qjgNDf2bsmD-djr!^p%7c3#Kk3DM3cQA zWd`5Yfa$jOQSy1m`9U#vimQ;Tcl;=O5ibbD20`{!vxs@e%?#Jw;{NR+KBoA`uL*(q zTUCCM=H%xj@^LXL0y?RAoKaY0$xIdCdsk8=iy%BZtOkl>%RONqCCmuv)`3A|$Pe8T ze5p4egm-Uxo@pqfER0qMR(Hdf6%g{nTV%08)ga@-;3X^E=WDv^3wAoOtgMfKG_xi2 z9})TTHV)Bg_1lH|W)pQ_uub!rijJo?dLha;vsCH{sBnl3sNu6E!e;C4x{wKU0t`hhSM`l z8l#X@`8s^I2XnE?(b`oj>^v)#tiSy_0hV9?hJ@*MP1}GsJE_ zX*z%30T1!UO<#GwAM>#_uus0q|C^ycFBd zON9!I6n%ya!Q@swvZ**oj4!{$`YB2J5?J#M=~=)L6qW#_dR_*pe$R{o1z!)od>!*! z2ar$lI+bQ|iSh&J%)T8cDnvk?HJN?h#nxMSnJw~*k2cWY;tQZ3k)&-VAAPRCylOtJ zf%bF>*Q*JQ=@Fc-nrL8;5|91~=yFR6!}19Z2zI`peZnGo_o)h8ti&-#7pNRf71U;O{xo9|{nG{yZxP zVOf3{i1|g(%O3vBiT<{S-@exW;%uPr-^0FZM*Ghs%XjHEF9V1_&H(!U_5V-le+7O& zN&n}p=Zo~eXFY$C{?}L4zoh>-Bo^Uk>HnDCe3Ab5^yW{}|4M8ACH>D&qkl^OM-rb` z7T?c^=lb6!@!_3V;O$z_9RQK-R*h1-UbEZ>ho^-;%8=@zw?7G%AwXS1Vjn8fp-Q^f z_EpZFCsPKrwI`og5$XFd$?po2F?h>{ba~AurOpZPhQB4Ud}qo>sx7|}PB=+7Jc?EE zP!JMhJ%QUS0!@FOKyauFksbvQsnZ=oO&r$UEyV%%hzwK$N80BggK1XVR02gs^zVsK zgk;Yq@adUNEm-fVUr1OOEVG(^H7S5?qbE<$;rGn z*Urz$)VLa!5TJy$J;v*ft2rE&M4EHzbSb=JcS%dSI|lYU>eo6Iox2dJG^cBA$}m;6 zoh+SaT8p%?0lr5s@HBJK;4QKc+RuD#jB6CF;PE9aEiF{CxF5pFYA^U;FE~Y*pE0fC zbuGJ)NUSdHiqZ<&Nf?SX0u-i0;1{F%%)qk98b6Y-%MGnKWS+}X z42#_A>Ph%OgnB+-DwdI082`G`j8WJB;pdgy9yhT6H|g(D2bk7=(h4D)phz z4`q?;6C`FPc1d~yxI|Q9S)m>z$TRVJw*d@K&uiByj7;$y77K-}Wz|GQ8Fd@*D0I>7 z4jz<1Ah$`)o#B?GcJqU%JOnKZ_uuk{1-5sa=OhrYCbtr*OR;(f>E6CAqFS-^VmDON zCAS+{!$)PcBK*X)yi4MR4}YndX9kZelD_Y;eaO%8<&i{k{#^F_qC(K%PVCv{+QEl> zdpRfPgQ0F!?`=;p*_cFF$CY1(OpqUerrR?=gA1j&hsL{0EgX?q-nf$y1sS29j|OjO z`sdFs^J?`JKJo99(v&6*_e5LNe_Tjn&4_-!u8kn@`hh=EcNrjcD4{29lK~MRc?Ww{v zSZ=p3MOvxp1!dUSuYs;5=qsbpJUYh^tGB9fEQU(@eQE*xZ^9S_bU~V@yve5)uz2TQ z4=Xi)N+Mc~?A{s^u~RVY3aGiM(!a};n&b##WY|0A=1V}y*Pt%VkrJQ-=4}@jmC=jy z8ETz$wQF^bGO1NL*!A*y%st!_i`mr{*k2E(m0n}}KrE0Z&-X z_?r#dwQga(@z>46f!NQ>r%I>;A9F6K_NS3Y>@dYQR6CLT7hG(--9^WR)7!%0#?CL8 zBV93W#aTvrtScMZpYn-;Z~{^Y~f<=A%u4uXX!?9QSt66E&t+c z5Ej7&AtfnlrgyA_LyTJ@0zdC+LO^Qq4*`Lxo8PFP;GyL&KhL zl|O*JEH~_Kl!JhlA6)&5cCs5S*VU=*!^Y2P2FPutVJ>r+<--mZz_;n-!r)buw~Y3N zx=^DN=r~89st1um`+W zqa~Df*%fX$KQ>gQ)e#9+SWHTcu_nN#h!QpY2(a}gN^9x1)OwXdLnk!dHVOzsMfKNZ8=#9~!=Jx*hAq#&y*f2O z&C8QKpVA!)?)xN);*s|%O8qXy0}eid5(j3|#-;}wmydv1FgHq@WqRB^qF~_Ej?g?3 z+N5UTSsNGRGi4X*KP5pW2p}S z?yW1wEVD{)(<9x|v8jV|_|2D1x3M<}9^(Us3^};LdZ{EOW3LC8SuC9LdsmW0uPwl2 zML_w#AQn1oG`uHLHN@Le=~$x44TUJa*z)(oRrdEknbQf0-4p^sc$0T1#4 zH8~MNKd*DLb6^Hb$J|worW`SyUo1DegQ-$&eQPOK?B#;{3i>7KkK<8sEpNe+YeoU6inA$V4c&7gmJ-TO60uu0@*F!i~(*%*6)BS61d zGw2RFffzaq2_v7#gzxKFb$6YxpW{Hwn=&4X>p(*vo-eK0aK*UTp%gQ4^U1_>M(bXt z8w6iDP75IC2tBCEmejMWZ)Jn8aY7^GjD!vJ+$?c; zhcK5V$o*W_DQ=ZH*F*HI1t`RkQ3=_ERm(G(R>ohZL4-s|jQg zf8>U|GsBs`_P{YxD&QfzI?8Q|3JHm01Z{Kk#{0F|K=22}op}XUzJO4eb4pNs^XtV; zuecISN}*TGcID)~5Jgk9VTLb7)rE`0YiA(q&N}lHi%c23&@6|m9gAwBL!!=@Nrve3 zDdJN;bUk>1tB7hXF>bW+;g?HK1mlPO$>-GFWD! zp}Mpoy)JQG%D$R!+m}~!ut8F3iMN>5$!~aqo8pPhO3iXrT<(}iXdUk*da78m&2p-? z@hW=YI}Npk%qR3R1ft|b-L+f6*wx6fr@-DO5<>xi2oDNflJPXC7ParKn(8g2U$b~iz!7xejus*aug@6`BD;yfESk5r}T+he-odr_gydUEHK)L zK?hQ#T+m6k`+QS(o;(r`@}^Hj0+8K&?_H)V{GH8b)ASs;^6Pmu#xaOyoi|pr_G;&GxDKSb07un8K zzBpieq0u6w`9Y&)yodsv!H`)TD+8#ZSj?U z5=WSfOZ{{G3+m@kmZn0oJ|^=9+iScnYsS1TvuWH@!lVcnL~j8AC0u0i@@bzumQ=CQ z3>7-~@XB>3FB0GfSg9N~H<7AVQpI!ab(XzXbM{ZBE(gy@4xNlPi0iGNyS`x>=)+fx zZoeacAj{cYfu)?*5D82h_nPJTy0m{YYUzLt8t?g3x-vJ&nbU0A=OV=$=Dbk@xd}~t z_Rejn``Yjc&w9xZ*#>FoMlg7X_o!;O)amN3*nr@rn4HXi%FxPL0Vq8{oVFN>?hAip z4=H(21=KA0G0JT`dDMHGV4DVycIW3)9}LYsbFbC}7ui?-&f#qHb}4m41ntKhCD)1cT=>%2uXdxLLYOiHsi8(j#xKTk69N8Jw~2 zQ>5X%dT7kYUk4gqf$p@+pz&qF?ZO64fF>xf)O2vD;MBQC8&>=9$tFj2cY=8~VlxMNu_V4>_ODmxA-FErIeE zyAG|wIZoyf`79!*8Y>CWuMTsz_~-;CTb!}I&&_>18}n`ZN0Hd^vuM*#vD(lEJUE=} zzJx<;P#euEtVGP9k(O`j@QQ}gpsQS~bki|e1}I?NlTN(Lm(|?5|rexqXUJx5hO;aUr**2)Fm7 z#W?nHjHDaJzLV_R;E2DiC^kd1T1Cy6r$MFt+aArm^7Y4w$Z(jFu+e_}EUy?N6URe> zkHS*Qq$MCYk#vxK3VULn)q9tDt1*~Y+FxC!>PUiRK_Reo3BQ;#`m3oGrjIt&FS_mwBP*IV4e7X- ziYqtg$DFcR0D!xlUwHMDyjU&ID}2Lv)K7sdlv6KnvTjukv$vdhA5FS;$I#{6p7IKK z8W!tI#v6-0FkmvhxiCZE;66Ee*?<@w-(6DC=7c%1np>b+$izT2tB8qdy>d-OPU(dW zgzsCwqe<@ts_@5b@z}_ib((uSg6k&uwvXUW5|jv^s>{_l6vLI()bLhjc_#rMNh22S zfdPDY1qYWO=gGX$)g3NZ0ZfLUx%{Z;)usQ59H%N=8JG!mM2a*F@S&_HBR3JGi_=-B zwBpzJ2o{gA$)rw4dx$yNpa8^jaZS1Huog>jD~l;nwNO9jmQ_0!kE0vOZXW)XOAArz37(7rV33OiRF&2w5ClZmK1lR1SgX6DW<` z9hw*q0pq@h1+~>rjKd^a*8VU+%F1jdL$868_XcP>eKEwrS_*^Dqed@Vu-$qvylgZh zPsqn1;B{?&)DhOmw`5JhEcFYv>t_=5Bl`Ofrki^8yAo_^dK;Cm-;Xc=i=E4ppBUKg zJ+frFgQ7Ct8JY^LCo{$CvLk_Mx;pB7)4y6!!RIeTJ(O7quhR=nj;crrF3PSfy5PIL^oRtem~g1xfEOycynxfAD#f0F9k zeoL9v5`DJ5s^fJ;R6W#JQS?~yv;m!+-pmEn4TO>3C z!j~sV%Z3#P~k|7cXx2B@Vg=XoluI46_b{0ZEHJo4x`e zCFl0C%)XzmKM%t@e=>nRz#-;!p%<*$>ruGHd7CrG`haO72=M%rarRPGV;d+6Y9*=8 zH+0^L9hgOF)Vx%WF0b!G$MF&eWCT!|Fm%A|FrbfXqSaw2w2=yL^Vk;#oXL;%{kQbM zrnMhy?lWOE0ILO<*8YW-1BT+a!u0O2-%9mnM&Aa@ZLKI9wKx|;;v}Us183PIOcheC zzZ}g2f$F(zvwV5{>W)tG@TdXTWWc)ebeU){?UbiJn_w_xh$j2CN4;|*Q)&AiLp)0K zn{~~#w#`?`Jy+RIXb9tZ#{fX=A&BB2Jmxe^U=LW!%G?Z$t1F}9dMwBclU(lE2Sjbb zkXgI!6VV!tvkvPEm~>BWo^gB`w+7#YtlMDBGANQ8s5~x)<(%todTSVuw@ZnuNwSg@ z)`>uHvzgp|9ZaOFOW++}757uOtn77(-%--0Glwf5PXbxK=atnfk)6@2dtNMMGxLt0 zHuK5nksgBjjOj_&n+-?JnvcAYrYoL@79+;kPGxflT+RA?KCvIqoAwTtkw&2pmGyJl zT2C-?I>(Kenz{s`uvS^`ruHoSjuo){mXj^h=`q%IUUeJUfoO>q%ZFMP1=lRpbNJK_ zQ@hmlv1*)i$oIJwn+N%bSNrJ$1bZMfbob_(loT!gXjmrsO+#bE9=*$LBL*L$mEGk> z-f~rZK3sHxpmma-Exy8Q>U-jjeFgjmVPgG&3nw60*vQOF+>^Cf@u*`fLwp?0p%5i@ zC!jPgFu@vI%vlUM7bZ`%(Ug;bQwg1CeH@Czw>vcFz)*i^AA^uqRHCFVv+BcV)=RzK z36R>{83)vFWHUuHQ#O(zTx+EhtlX$3O7r#LCWApyTnLxTaOly*;esaY4Ji&yuI#1j z0MA*+lQoc1z_3OhOj1>&0TZ=5hH3ulBbV#b-15xrLQHTb$@tcRQXB?el);u=H;;Ag zTNwCLb#vox^ROuXV_K2pr_spn#7;zsZ$X-7KjaeWC-S+Hrtf?D z)}m|(%u4CFD*?u>hB3Vp>lrv2a}$S_kI=9_ftP2eMg@mmv6=uabZ}MU zVrud|t(vcvMkx|kJXj%Y@q8Ru>h!4wJ;|IV*0`P>@^g7WP!gG<5WY}@1$9^=qQCEc zRgA^Gz@f7Z_lz#Lucg6YL|Hjbk=VR*H_R)ajK$E$GEk36$_UoeI=(7f>9`AFN@U}B zi&O7ACGT->m=dWxgaeGTxavbx&@J~kgEsaNy-2b$%8@)p>sjJ(5@c*23HsuFSlvFw zy`K-Fz{g3K*I^}C)l(dtbQ53%ObljUlKG{!ta#@jBU81plgUCxi<*0C#e+K^pe*8M z2v*eYS|{(NaqD~A-?R!U@RCcOg>S1IvL`Ty-pMeEq#Ld-ck3r5zb6dsz%@18(aII$ zZ_6(J%nJG&7Z$PYg53SpbB~Z&MZVndXejbId-u@Fa+&T#-YeBK)3;d;D6lVFSpJ@N zne`tTv4UF+%vqWSsX?vgDx6FL@0$tBJ(nk!wv6#Chid5DnG~sKr(eN`kWQedB-$^~ zPwcJSj(NI2e`vMw)tADtNs*X{XjX2Dk7TlVd=tV~+tTeT>~y|+|4$#I*g zQyIY-@#kGGG?(M^0*CAb5M-bQw2!2Usx_aByz2q{kjVN0v! z4MFz%WgI+mncc`~#KTN@JFpa{ZRwrcHq$!)kY_$3vl#y<>&qmkvL5}Dmccxr7cQ*B z-5SiPc6g94*w>uSX;+B`^Y#Yl!pLsm_3DzsE-=Az@pa;BEL zK2#ti>g!J#l4LEYdsf;2&~zm@tA+=*#AJmR9xT|RMFQLQTRW43&1WLV;J1Uao&zF? z^N`OzADZUiTH7;18=GB+uA?3pn_xa^$+ZXY%srFe_gERqM^brNWm~JzCJXYky++r0 zI(sv@O#0~@M6`C#bh*X2&P7h8>v4~?E8Y6;Nmr<|JiPFXMPX?ccC`oT8ed8MiO}S$ zHp$*xmG=T^XuT)%ge-QEi*Ap+2owLuwo-;A5@Sah@jL-NR^m-t0W zX9IwA?T*gM?An4n$mTmyZC_uVCQ2%Dot@ylt@`01l6lI9>$9(T;vJIi%@Fy4%4mY9 zGr^*w69e^m-7c|UZN}(c{PK(h5jD(AtfE7a?8eX$Tp;LUkfk~eacEYKXKyn@YAX75 zo*EXgQUg)Ix^=yYYSD5C)kq8Vd@NLT)7GbpZ$p{yec=ir4fWqOicLvvi^#*1YI2GT znQaumfZ;|_6(fNVyUqp8+$_DkBT8DlK{1oIV0%y7c@CItW+^ZtC+d=B`E)l6DJv)b zuSOLM0(&POR^1uc5h$rz3s8h%2dL?gl8pWL(QNW zR~kc3_y&o*D~xh@XOm5hT3r0F^@L|q1wLT=L?OA_CY-5~1I50+U5sBgcw6tLM=`eI zZ3e0L9$axIGTuH|73Vc-rJC9n{NWvZ>cNsq%OUl-QQ}T%yoThHvbI5^DO&@G@hlCg zTX!i_M?IYQ8pDYpfdrxu))q!Iuwr`sCo4l_;k$_HG17c)daiIkf(>U}4X`i?Q9Qar zK0x6!gSJt{AT}KwY@+WG*l_ow6T<$~Kp!G}+C62fjF%S8eE4ADHk)aD;1HbCyiVJR zRC+l4?GS9P#j&|JCE6`}+MdE4^?Cg<7&dLOTb-;9Mex(R;eo8f9H$_hVydXnA5 zaNcFZ^t@C4LQAZGwQF80hx9e2|uFZr-M&u^K)+Muik|+trru z5FMff!O_H#fq$81ad@~A^%Yf#-x6yC+uIvKQ3qXjKK3It3yAs?*#k=ZPD%Q7f2$Y& zx2sI^E=RvZwqBhz?MrppLX|ov*>5Ym-WW7MExpb++Vi_7>$kpMKJ?*p;EXWB#b*I_ zf$xMwbCpY$zQIvc4D4e**H`X=3rov2MsMzX5KOt~x>U7f<@}b?Qfl`JfWDV?%^n+e zOSoTfIKhu$vW&BDy;a$`$zX1(scvqvGggG*O;h8M=*szg>0g+rY2~H^Dyz)HUPyu}} z=!K$ykM=#ztRRr>rY-p+aj;gim=X(3fpA99eH=;arZM618*;?Lj(mc4j;1(K(r2WFsK>`XvTC+z?=|zlkmJyGrKkY!`PLd897%fmppbHB4i}d+QvN<-4=@1Q`^IDv4Q~Bd09pgJtV>sD%ZR= zoMh;F(P!9=d?VYkh67dC@2!h^#Ha|eQ{9Zv=GCDWMmh{RiP=%(0v)GsW8;O{f%CTM zV_gKmk05fbxZ5C$;@C&)IOO~j(2e7#Z{kL?zids63K{$3pdv4oYKAZz?4xi&wL+L~ zBP&sn;S4>-BH6aL^a=Q$XL91&Kxf7%PQMN0Yb6j5oz4`F+#_!Qad52bKmd&apRU^MQg9+IM2Cp2M@|aMfq^j`6M>BowL1H6& zT)){CV_vtgi3ZKeY1;WDqFcJSe4~_XiHz`rnCCh(P+zMpJZC(!MC6ER0#K=0g>+Vm zsXPZ5vJ6&=Tsd$x^}wS6U6`jeM8tR<0STW1q;!kVzJYkyD2$$tHhKX!a$qjZJCKT% z&dZ4CSO-+}HXM#*-MK=x!vUCE8e|G2JF2uBu_XQ#GCoRH=Mp)*e)ieHj!SKl{E|N-$^G9-j*Uts;zP*=>|z@P@c!MRL86nt($G24?}jxb1@4 zw@*vwGOJ$AINQeP-c79cG#~Ooo~Wv^f>*cbdCrQ9XTbPIUtWv(l-)CMhK^5sv23aa z%beJjZ^%}r%Ik*DgZArjNMI-#JenSF5u#5}3*O^~;`luj>^^-0@Ti59y$avJyyd*(ib@-1(05PzdQFXQj}Oxb+j>ajEDf5~2p>$=?n7Hn>^?64rw@aBz3IC4>@ zA4>VYG$iEb0Uxtb-l(RHci8nL!lGu65!_F7hHpTv2Rx}Cwoz9qadvjDMU`U<_QF$U zWisKZH92!1$SBtY@sX}sFvi^1)wF$Jk1#XJ>ZZv-rxV%vyJsV7P#MfR;f224ycc?B z$YqO&QD+0Mp+A^7c?^}aPmS$r7IaY@=LZ|T)iv1b`uvrF&~Kfj1#2L#Fdf+qc!?-K z-K!E|WA$^Aq=)d99rdYM?|aC3F56sC0FZU%$5VY+`VhcY9_!bMIaaMO>3D)-PhBVi zavxfmDuVsz-J#dtfKUjidr894*i_5)KaaW(GZCBZ!Xax|O7MIRUq)9`DZ;=+JTd4U z;-bO@R&lO3_>d)G@>CJGnV|yNCxsW6XA@dQ)Ns;MA}%=)n0+J z)Ki)_zoZU2Hebgx-0OlcC7Bl|GLn6IcDTm18{;4p3l64cV@0G?)Q-_a3Z3GSWbu5I zFnT}HQ`Z9L*dcX@_jV5COqJ7H@q^5{<81gS+NU%Hn>Nk_erVN?IE`b8D&)t0TeGAU zF*-q!L?TT%b~Z`twnwR()DpS92Gb-x?w(nm`*IUnQGS6wyjgAVXg8O`lju=}^kj|@ zE@~B7T`LbcJvVPBREWW$q96-qnW|%3t`JZvEfS$Dc??5~2(M2?WbPgAc(BK+E9=wg zHfOwr24=W5kl4XDhC7{U2T$`Rb96~b2R`U=YQcXwqII9~NG}BM%s}mivqe0O zJadJeKO@+dq;(xSRmYo*_Q~??DE)Cy7XSHarXCEF(Y3`an_ck{ zluGisqr+hEFHhV!pL;fQkH!jxnF2iT+H4}2YzYjG;D_V~z|n&uG%SIgio1p%`8Y>Gxal>q@}iSNyk1z@G&F zr~~{V<%RF?M;YJ`T`wemzbpd$W5qug0sbWTM+x8$DKCP5Q~>_C^dk7bd$|6njQj;H z;UBl+2Ybm+EB--R@{{0Sk|6xP;=gf~{I%k&9}Fcwq`drjKZr?wT>7Wp=0B|BXFihe zg8#6J9|rx`ivLDS`1_yt&)h&S=Jw;*%J>oG3T&d0!#c45tfjRy#EE=TZG%k+=1` znl@;czh-E-I?;PyNVEd5siEayh4~1iI!^wTKgBj|{m}wc$A&TORq~rahh@%^$>J z%x-ni#~9S4xq`ocD`z$xZoyQPslLDlga;M!lAIiF?6M{xL_ZA{;zRM-6IG`JgCM$EH>VXAdc1cvW0W9YdJO zG)pu?+pZFRmGtZ@+TJVY2j=lxbz{rtkZkt;jZRt9@$A+y=*z8`c`~BHMZgUcw<&@Z z7>6uvsv{i(rOLPv&JOI3)fb`2 zGYoxW-|$1^YSvIsdgDDAxj%wxn zD`c$=>GJZ%c=aDO)g6CC%{~ z8+ds$OPQ1g#x|U&7Sxy*1;oE_IoQ=2&Ag0igaJjXLgsgs7+l(*BE45qeeA2Vm)BPe*Ws+XIjst2`dT9Y{TYc_;iEEBtiV|JE?{ z7modtQ|T`p`){o9M^yKVm>2&SO>uw-6vB=+w8aZxneQIzKO%;HQW%y$+~%KMaev+G z|BM;H)c?!EA?DS+fxc0!{oow+Lfwwj<@r6ROTSdU3w<`pgYUIaXL0RbJ9bKf6#ql3>gFzgmvBXexu@Z zlH1OsDpOP-zvRm7`X?{EQ#Q@xll4mGj8*QlCJ|k(7J;=%Dj1GpBHmObfc{d@SiaLcAFj`;3rB%MBkJdUm>cD8lMw($LSRSyKm9q$F_qSt{Y zNyI7$T8}Z?v_PszKJHo~>ui?Y^8%J)xF zx=D%IU?m?0I!5goe%&gC0f0AR6iG0W1Uuj?t4blXATsqDvKR5OG z6(K+C3t`3?UN6L>Tay#tJP^=+V*mmx$Bup`7OJ~{s6%LdS zLb5q+q37%2D2Q~}M9$QCdibt`ezK>}d0tF5sHp>^rRH-9_b$FyduvTgwc;vTur=jf zg|PN%Fw$1Sqw;MO>PfXGl1^T^3zx<8!d5e{kP8p&F6CAUp(nXsMLZrIeX{ekH?33h zKo4{(wJ?HUJzER)G_3+gIfSGZdY8DQO)VbLK5t@gl&aU-?zoazD~F>e_62bs>aD2F;!6OJ+ACLBK1U%ZATQ56q{3g z-OWl4sQ4AN^H$L{;R55ka*?EaE2YnIB^${om~X@)OCe3MjoKq&c9$%YvXCkf#HXi+ z0#b=b-amZ38Tw!CeFaz*-PZphrMp8qq)QqJr9nZYk?w8*B}HkFMj8pFL_oS* zLP|=I6r?)@B)%CC<@J5<{hsfC@BQNbo#(7GJ7#`+?=@@3th3fWer?1CGm!Qq+!RRd zRQ1D%E@EGK3X~9j{|0Z!>l={o_DfHh++~zk&qHHdc_Bq3(Nzp_d-oC->Po^|mfLpkVHNVQDY;yoWni-!YC}n2!khJB zG2uJuRL`1oS&nR%e3zY`f^wM+h3Y zg*Sk6uAThZa3JYiJNXCbzp0(P?4tZD^!e-KAE3|mlRu%a{>97U4y+@Ddm&2%!T_=t zDB#C#%fA+IuBZG%0q1XrKMMfZ@;CLAz#RT#Pvu{rziTQlLx0y){)7fR=l`?|BUtwY zr(g&?;6VVc5q_vEf8g_;vHiOS{tx))yCwg`=lP382`NAr4bzGn=d$XA{>?k z2abaLj2fw}>VX3AFRf0_OAbCz_M)CXFZ*1l`G>O4-z0yQ{r7a5K(l`6 zG=FW@xpw{!(C6;@e}Mj*?~=g7{-MtN0sUQ2nH?ohA&|?^|0aFl@9F6O_e%fY)6@Th zf)7Cd&E3HP=pUN;UnJ!Fy8E((d|!9}LHPdkW(mHso{y5h<@$#@1JvyNgJV0_FJn6l z+_Lk0xgXRWhxm>}pPL0<=%G_*+7lLep%S^XlA$z#VrhSj_aAPyqHZU4eKf%(sK0t|%GaAuBxZ!Yd?=z+e-qw)SLW?XC^ z(t?7)ZTo?d`qw$acus|C3dq8|_?;G%Xw*jwii<$vkC)Xy$n@{-0?Z8m$CpCD=GhhB z7jUOXzGoeSNJ*ob5N=w)j-CPnAx|*!_kp1Omv?M|km#->ZY1lS+z6}WsFx2EpUH8{ zyB}CmQ~7)|b<%)u|Cy(!5%D_Djlo9ryo5K`3nyRTrE*PnFujAC+ivz095*Z9oI|wK zX5)hry8`p+xmYmfIHvY=~&(ODZUM!QpnsMe1JgQ$WSRoT%&!(stVAi%RNGOvyt8hqWw>iQAV zK1#yyW@cA@IRYVc*s?_qdighNZ?J~5t9bjqya&P0evEo zKkFBEhv^*SewI1dbguQv)DIja)IPU3j0G3%HhGe=NZ!+Hp|f-%-KI8Gx_cvLgm1)H zmmO7n4_P4zy4_)k(cd5{1N%)5?T#9$%H1n=L*EXJp84sy^C=7kn9Fu)4u(>E5((jW zjUfdcccoN|HBg77cl-kp!Y4R^7Qtns=j4QMs;JFXOK5gmBa}Z_mhy(ZfxOcR>K@p3 z{#x8dV*0jK+P065)rYU}2xU^~8aC0b+0B@!DJWmATOQdK5AxpYWS1HV;NcTHA}o-7 zkux_ID6Z5z*7l{8C)yEf&rN{JstV};Y92irQAz>L$}3CH@j1@z2v`7a3gPcpy4*xpWhW_Lg$;Z0ojEVP+@BYyQ+J6_pHXHUGy z6hp{kUADd3Z~ElO^`-Sb?8;xbVt4WgZ6CH)I>MAzb4Rb_5;<85n4KJ}6fEt^W*gSI z2AQehwuax!{nSQy(gj%_67L^=_3EB)ZyvP3=cNUS3*at z4MAV**9CX5OB*#kw z>q=qPmh7V=mgueOijjoR*$Hm7A!FBRC+ZG0un4}9~tN^$nFQ70z^?2U-H`b~oA-F`wss6FFfkwc1c$oW@~@s_i*= zjFb0Z*+DTw@?D)lMa)(YjQTLL!UW&cfp`j*VF~lBeTM)+UoX0)i?56->nT>{Yfe4p zfMVv9*WBT4x>|Q2`L!KN!{eXoiy#t=FVhqe+Mu~lTe?@q)dw(=&_abPcF@WZWIpS9 zs}Us#uFTN8)doxS-_4);(s|&Ew?=ha$&Lb!u^_6l8%~C~XaCXrEE`y=oJ^1D+7Al| zl;=9i{{QDdCBPCQPZL8rn+^p=ikiK$65CZ3=tZGq5f z30q}?xhiF4#fJ>>z0@khW56dHEj@zj2*Ud+73sjp-<%I$WMdThi{PljL5y@?mSOtQ zBzHp?9hQQI8ce`s{vD~XA-Lgoiu(1QCvwo`yUfK?nH&1lxNcl2<;<(4eN+BT2?G{T ztXW-zKJB#+sAs3NS5PZ5M`WOev-vCW(`Y${c|NQx{6E0>?% zHI$KgK=o35OsPyqio5V5e2uU<#==roXLRT?ws+64jM^mGfnRxz(rvnzSGX$kKIJ)5 zehTzhP7l*eH_hgtNlBPsa}f@D3bTP)G%Fc*64&QnLxNsUI3x?>;OR`HxWQC1;#KxF znXzug#--LR2nVSUbDcnyV-}U9Fi%|p^vy1`UGW%7xFh*K=Y9_Fci-%8jaodaGvDm? zfdlfsVrTELpT61gIKkscGdD=8_;e?)Yrk|#ryoC*bECCodbwBm7`nRc3B;X{m%CQr zkvma5r`U^Fn^=Dt7p!8kVuo1O*IXjeT#phS*BW?mYD}%_adBMxkWS|^A3vZ|b(+!Q zrlK&=WV+(%m4ms^9NFTqOG#gI$By*+7^AIK{g@ikiSLZS)cCB4c=%48%Tz7;v}u1! z<*kno_-S9EXR{tVZj+qSpoz0h7>I6B;FKb2$!R7m@^o{{5eG8gr(SQcnKKopKzpq-&(!HYmq~w6}c2FCH?d6 z)3)yytw#f8xPzlL+`p_ht`{=d>EBMY&@v$R=3`yUN>Eo7G^T!`>s~LkR(q6>VHX39 zMqa;hJvv_90$#4MYnAz)2WK=HKPs$=;QLOc^7k-c`79e$SiEkk-)tu#AuX!QC@?C05z*4*kl_J~cN~vp zu}x&NdgL#rk{4se5J>BhQBtnKXDJBZO;$k9V&yj zmMRJJ96k;U$J+singRX1iAl*9LJ z_tl27!;IKV=nMS5#7t>Tpg+&v`t~7mr10VS+m=M z!-Lk5ZB0bEo3x^M*$J~Zt8Bkb&#qWtlN%f@FYHk##X%q`RD(rLM}RT)ZI7b% z8;D`XYMp|r$1Np{?U*?slr1P+@y1FA)r#i2h!64)+LITCoq`8f4O-v&o9{#L1^2(v zkT^Duvu~0PcIi6bj-aI?+wZ(>l~%MA$$Q6$9y+JCTypm7fl0$_bG>>8xEsB3h05#G2fy}t9f6v2|(bYdsWkiiDi+J-ZnRB{*6N^}@fELxlb}<0gD$p1eNWy61|-hjKY*+ta}|xhr#ZpSZ>V_z0wWhshj%>DW)#D z9znjtx0b^7YmO#A@7jAIz3|C^AMxL!Q}ir&qQ(8S1hbDcFoko+tncO_JFW%%)*1yW zVSr41rdEiE;fXP+bA@)W%RNs0s6O*A+q@H#?hng_m&a9!@nQITg?QB7bOZ;>DK}2^ z-~OnhjYEwMdmK@kr(%eNiSRLb052J;9t+wOG3SJ)?j5)5IM!S7+xLXzuJ@uTcX+5o zW`c+utQ`>I4_Bz8?GuN& zhVzJ)QJZ?du*Z7-S^ax@h0f}9y0woy473F_l#^KmUm0+nikb}?pc<0`%Ag)8K#1f- zJlNa%XWFkE2xl>;D6KhbWpAa29^Qp}kya%3G3kA??YobGe#|8al%=clALFk@njKu>pLGmLcMfiTFrbsO&#FLOp90J#TOF0Gg zmucL@H&FHoMO`^@tTt!RdzEJ2=NY>`VAp9UP;f)_W23CFEz?XF$yXQSTxUZ~;a4?q zcxAVn@lAgs-eSdYw1t|KS3@K6j^Wg^(j(3*xR2SwIWR*KkUECm2RMA1bUPVE#>33e zuZ9oveZjUSfMp@2;04vkp-NlVcuJS6Et-_HZw#w z`shMT>$pG1{0U~9pz(7_V6o>{eX+MU#tK|w4)c}+GxD?b3mR!;mMi-0uF8Glz=zZ( z=szu7?Q*!@6@JP)$1zz zomm^AxbAny#w!k383v^IP=s`IP$MHXr3l)6-Ty?nvd}!{T_@Bgd4$iHr9wpg5#!zz zw;z&sKKFZr-KuFGg`=A5u5A9d*!t&dkYUSmZnx3?CcpyF$W|0C1%B4 zod~LELNHz;6Bna*j0|XbMG%tGoNQcQ!Lv8%pn((44;?N&nhHPWoFx=s7;tk4 zdSFm>^_}YT(BvuZKDU-b6$PxmA+&n3n)qyT|DC4i%gzA?>rPtcQABYPP^n9fZ92Uy zPrEGAUhKw;J}r7|ag2<2wg07k#$Mibhd6X;Hep&Nv98+WL$i5^eQa1ywQ4|_5}k%Y z-4g}vBqyz^4z}=qnqYjLAqyjz?HaTOQ@4unTZOm6-!GrMfb$?YhFj%YEU% zn90^cVcT8yZ4^jh&CMJ)XA$@u_<^44m)$|RhF4amox_*L$cRbGdc%~r^?l6KS4EfZ z)7Ye4VNI5`JN%%GOvvD2Gzcc% zw*(Og6!6y_Bv%^hPE;t&J?kj4Wg$iq6xCOe3f$Y@?LUwyt&je!B&kiP!tBLknceNR z(QAqQ-a`?d@ZHEb%#s$b(!J$Ki_%mkW$zJnBC_#f1{Rperc?@)SsF9bfmdAcwf+N5 z`~{pH^TQi*?;4z+-jF587%v%gt8mfgGWh72D~?Vb4LAGPvA2HJIk8_z(QV0Sq8@NX zj99>TRGh0;j+F}9NX9^{q1B40`dp?hUcD5F7$vZdmsg`SZ%!#ng4T*m^bVU5ox`L;}eUw5Bmm#6F>67ed&ps&(7RK67Yi3|6qv3 z^EiYl8t*MN1Yc=aw>wof(EA_;mWBTdnMcY#f^lw?IU%7UC7I@f{cly#ii`dxL0M#X zguBS`pQ;(T_%+ub7w=pr?WIyh&l5(ERI)o%;hhyL_vNm+Q)_DFh|~EbyRw%M`ekQ~ zr7kS5m`RYFbgu+5J=ZLrdcIVXzACSGdFX5G+d?UF+pR%SNVco-X$r_sY&Kkg#m75s z=%pQxxLRx;i>KEzaVfHE0v|FmVYj!Z?ndNQlHgOAZLvJ-*C^Avu6LVi2yk?NZzn0P*0bquijRH8%ekYna?msy(&bPqFTt5RQ8Ti z?3r*%l$~mY z)Ydo#4(B^z-i;FC1)F+pPfeCd-l7(Ie{IqdV>cUmpeJyR{6PbsQSg2CB&F@iQ`5~k z+o3vNJ6M50`z0;B8_*x-`Kn^eFeyYmc9`!O9Op{yY?t<{HK^d&`RuV;-OTw4E#p~W z@oHGr>?=X^eVyZ`2n*FL_qy%aVOA!)$P=XX&R0e5Lfpf&3&CM&I#5Yoxu%jIuw`x- zsH#`PHp;rdyN#w*K$xnMl?G~9ExC2E50ws{sW;e*rJ;O2ZQm)ur4V;L#lFer?MpvV z2&>o~9Mk9_(#P|pQ_S^CO-rNfUoC%6?ZoJRA;&HUu|SrMA;te;wJ z7Ls12UJ=<7)}o3rvJ`j96rw8kgjY*4+SJd0d4ayPLO=ZxhD@3kA?uK2$+#911t(F8 zD&>j7;|2_6@~R<*XB+DLtht3k#jyRmETu#ObVx^O7P=EN!Eu_SMHPpN7!3$ou3GH; zi}cMdcJB++mP|CR)r!>PSI~qh;prf>j^o$m-c&s4pw7MJ@KsYtIo3f z_hn0;y`UHh%wQYO%fwak;y%a?-YDNUuWjAdRqV}yao7HEokeZvNTT!`6{~gsoP!T! zEcsJaWZx}EZ{m$v=7eVm?yCZX_d^4fhu=SSjjF(3Thk!=UiZ(MWX!s04*^ZoLM%0AX`KE{))!^+9usXXk0?7Yst0Ir*+;6F}SeXO%;f>V_n@d zQPLhaUn+ztQpV8VOpKXkf0O(4^Ou@IGbJ^Gq!14A{12t+wy?ve?6 zyt~jeEhNU(Se_BLMdNVjVsEnhR>x4A*mMfOPb}C;S~F$ycjz0GUTYOW@Wisi_5A=} z9gcovWzF1zM!>-v5UHxGfS_mLd40V@*DXit3>s%8V;LA1(LY_GuHE-9kVCZdL4A}z_mdIY<|lhak)lYXoZKRi{&TJzvNAskIh8ZoY=K$;7@*Qz&lgwe?M$_6)C z#!9cDqXr|Ia)rL>$2(23 z;C6g9ksM?q&%P5m8!(RC((GR|B6vsI%xs|Gz^rbJnb?9_8NW{!v2U)s!=y64421^7;0BWM*tZ!eLXj<#Fp`@V5yOnVwF2i3-XfAm+MsY ztK!2@Cq#0^=)#GoEdtx=u|8L)N2!Y^XV$N2A+H?XKos9PoA9cJ>71>p->j^lJS0*sc-6gmlr0%DVJ!_ zFR&+GaZSgEE;q*j<{5KR>{OPHU8sGQ^B0MrZrzVhuit#uS*E5b8FBh`7Ri$%K=4-B zzG#;PmvyloN}%@?%U$wXuC%TRWo}#TiKMZ1(b4|6-gM}DT8gk4YO2h~{eG5XtW|++ z%|>LhGWOVmMx?x0{zmGvPm5 z$|tD!X^AzhGx}@?chM^&sHMgC(Rt#0rht~Os=|0-H!I0lW58H z1@t>7v5t-cu8GY1UowvD2Kt2)k>*TD32CTFvpVvfEAG#_J#B0Y*baA)YTau;-nAcd zk`{|z4eVVa=o=+{a)1 zYcI0AXVSjD#_iOVLes8p+a(*49l+bCFeJoSm#dZ}HLgH_!|wH}zIs90DlNk@x%#rtvgO#?XCojWxTSqBm(iB6OY}5@dVVmL_47<{ zhKls@F8Ol2brwXu?&fY`8Y`YzH1gUiW-}`Eggf_vDYPElR=-|tpx-7^eqrS8eNOhP z*a}|)w8-EQ76eoC-x;!6Q7?7g!|<}25xH)Jr(nr4mVG1{mHD~oz+0 zMf#Rz+b7ut?m?6TMcRt{&j`9x1 zuld^2f+O@6O`Rw%@I;@vO_NPKuNz;@ADvzBGy1n{Fa6R6IXm_hP;lqS5*QsGj+l*_ z4Ax8gJjqN(e+^Gtj^)cfSGyE;pH(8p`1F(^__V$#DN$v}&ttLyQoOQ|t^9a#h?~Vq za7F^z44SH57G;^VU?XOa^ATFAaIrC8%9gcJ7Xp2}LAeu}?m7oz8J^>}kGL(>MXBwY zyjmd}#%%KZ;tH{il&}aK6FW~H94jl5FNIk&Znelaw88H~`x(q7k|jp+Or?4th3jW) zD!gk9IAu&PiFWY}!G4sCq4JMp?(roadzC?H>*t$=^p$QTraXMrIZk1#)Ze^IzPe&3lP(BF^#-X# zP&S;pj!BR|p6ULap+@mWsN}sHk*QhPywc)nx3*`bk5Z>J>VzLcVSdSQfMUm)z!yLX zQ59mipUpvmU5RGMV{%wlX+K4N!q9ei=CfUpb|@v3m*aL`yIU`>?P4CY6XF)4wAhYP zxPa%Lwb+exiY)=hIysZfBb+-7?W%WM_g$HcYMO~yJ=io??M>+lM)Z~LRS>ju!q~D~ z4WT|l=go?V{vtzIe|MWDf$V^n7k%G+j$sSd9V*4IL^p;1MoP43zoW@1G~ynqO8{*X z^ArR|&CHOAa@0xr*6d+SK(T_9M)mYPFJ5DazB&`8O^eZqi( z_=6G2w_kZ0H@1c*e8LFAcQGo-WU=4#wQi?gr_|q)C+n@L)@W`y$n=m#Z}oye7Cmgw zj1*uX5S}!^GDROSeX$ffJqdmTfoB-)hLWbhf8w}!S6ss9c~t`|e$RH{VdOYW`M`ib z=XB9{xwYlwaQ_EPF2qkf$Pjb*JBQ+@xIGHERtD?>S-!rbQfzB{r2Url8j}1A_Xe1J z;;usyK$D+KI~$DS3{3`WI3FsXl13yVjR2YW7Zx)x5Q0b73>7D`GzC2BB7u7Tw3`8{ z`@@_6%*^aBH#Wco&CD>Fr~r$@vm2lz&M_{#gM&`xzqo@R0ZD()bP|C5VLAySLr0!t z{DYesU|uqtyaf9PAMo?q{$Wdl^VdHc6F zGyq_KZfI}`_OiF>_u5|G&HzmMd!~{A><>fXGbWrrvhNmR=T-Y&+siJb-(mlzkudPc zei#W~g8jo*2IsZ?!&U}h65zl8Cj()~PycH~lZj8CNIpxyKj`M<_zWn<$EWLa7{n;HV=O9#vX1j4ljfr5aapT{2} zK=Lnfk)K!oRR|gap*L`}*RudJwHz(Z$^>fj6)POF4g4F0?67oyM~psf`RB z9e@ytk)?y#09EwhDA4KWoV`cs?H*-Z^C?Z0C{oXrf4 zF2dmXrj5N3xx5j8M6Lp4*#Jp&K2~Pd#y|ka+3F1Va@DB~&+|wO?Tx@|zyr)jak950 zKM&K1IXLQD0^uA7M~91t0$vlZ=WM_ZoWbKV=2-%?DbRE&AP5q8{OOAG&NUr$4bO@MRWN0CHFov<9Gk~JwBZGgWu!hIUjzHkLP>m z@9^pOJo|f|1;hi?KV0+|F#1~`@mnA9d!9Y-2fyc!b3Xi!j1Rx(ad0R;8;t(lj7S_$B50R;ASP*%bI4wi%e zJ^XJNhXcn!7Jxw6BLs*KAn-YZeHUyyNFdKI!v8Mue^vYL0lWfb7332uKp^ixeu8p* z!NY$?eqOZgKa&q|T)WhMBpD#i4-hFppd5m6aC`w}5u7)`Yr!zsCqV-7!8i^;7xT)0 zB@Xm+8qh|=3Lvmd5bys&zXoOK?~Fq*i9kL$CWCFE0SFu`L74-2^M9dz;B&a#K2R1e z$A8WLXUD<0^@3Lbzu>fiI5;nZyaR1;L;(Wjx=6J zfP26(=_3B0;s0;X=LOmV@(pYcH9)`C7qr*{+Y9z#@S0!q|Is+WU-A?H*BBu1xn1xU z#Jwo<&Dj00L!M-^tKX4>)UB8J(ShAcW$w86RN! uXRl{#3tHLzIGD}OZ3Stv9c@5BQbWD7n?X>BoSw7sbzT+@HWqd^w*LSHVcNa` literal 0 HcmV?d00001 diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_gradient.png b/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_gradient.png new file mode 100644 index 0000000000000000000000000000000000000000..793fef3658dc857d7058d93f96ec7721b323bab9 GIT binary patch literal 39559 zcmeFZWmwc}*FFp*NVkfJz(}Knf^?07BCUvYr_$Xu;wD5vKsuG~l1`-=x`qa67`mJP zHG1p5pW}Exy`S$7+s)p`k1JN5>s;$H@VSB%5&kWF3=9k+>1UEk7#LTIFfbstxL3jd z`CP-R4*rE{uO#&tBftCh3iuB^n`i3w7#Nh~=zlOxllsB0z%ZmGA1S+Fu1(^^D-Vqx zZ7*3B6k9tOg8_KSwFVV>lDNoOPzI^zgMa}SQQ4_iOTJl#Pd|Z4P zYlKoHN8!i3!^?Q}vd#(NLeItSv+gBdVf&>>`+C!f1k-N89E)u5b$g2!7UBO}KzEE@6{{_n55NJ3i}_ETg3dqwmY zxR2PaA^)`udchuC(k8d`tW4Jbv%e3?2)l9v@=8Lk5 zO2=$-(4^CLXW5>BEOUl%&S~A%Z`b|{JXO)dNXM`920u!s#J<6<1}!Zut-I%Ow%w|v zlW%A%sukRBqcTOvk^Wz(uJ=0T+mZZCTS7=JW1;<$!tvgKqGr$3$O!B1QjfLgMl(*U zk-$=YtYGo~_eN1zQ(mW*tue>~U9LDjOFN6h4!;yFHv?7GpwEMS)Tju))WknBfeGM| zHu=3Ye;$>2>I!_b1E%x^-J)jDz)zDgMj_jjA4IQLT{oREX8`nT0i!KexCmp0gPWgll zK367x!yx?Q5vYsR}P<2<0@lueEQdL*kGj8GUu$7 z2kNE?OmtpHUq0LxEZz?e5tt1aZtc5hFrwpA4-i(8TQms&K4Q+q)fe`n{i2lbC+czW zwbPU>!oRnlNrqq*;V>EaCz7_cw})oE%qUA--D)JU_PpIiA~&ob?peXp%;9Tm8cZ@& zCsl34W>P!yy!0ya$)BUt+=G##Cd+oUy2>p^XLIb$%(CU;1^VpwH&^iEEE<_nHl{)C zdKTPN?s*kmLFYb+Y6ZLu`+tA^F(-{zw@IB8uy9a&2#AfQgTfPsr*F!}(1y zQCx1LHXCzzQTyw)v{^nbr#JsRb7a^xs2vO*y#D7JxTP46EHX8rGWqpx`(~ph8Ls-z z7aPrH($)Rs%YHV5MMvMPke(=uk!xX2DT?YSG?K-KSoO?1o_yK|3w`*ng(|^9ZE;Yc zyL#lprPto7Z5vZLX__o&rUaYmo1MQY77U>kt=e}*TnXEOA^CxNa3dx=Y`cmOS92(!Abrz21X_m`qX~ln_5mpHnzc?c9%9yP4tY%j)mQ=~t zVx#7>_>`K;xH4T|Pe>!AY|<6WG}rQx0#W={$ocRnnPr$#&PhMqSfEfYQAAx1!d`Co z1COPx*yCvb#xNt;`rocWMt;Q_|xuSCr%%=@Xs%aUsg6X zRgf^ctQED+S5Kek4-O8tML#g~P!Cnj(^%j?J7}S{{ycqgP@ccPys@3nVp-nDQC&#I z?a}$5^=PAHGCz+*9z)Eqg|OcJK38mK3v*+Ux<8VjnL}b*%Kgb|tc7#{-bLD5%8idg z^nWY5y^lWb*LwMpi+5kp>-p8h`V4;dC1T zm5wsJ?=Zi5BTS0cwdvL7l>6p&isaZYRo1opkp=t*a2(n#n#=ic#WGr+H~p!@t?P6S zheroXcJ3C%_;G7L;LU^#3ewVe6KjWOZ44owzkPdHZ@C!QZeyWU^SmWpgZuteeR`?J zKGUr>r9binwznJrtVQbcNQNea&U5btY8&U4%-NWELHnp_5{I#|vEAuDuM78pUF07jL=sRU_q|o`^K(o8Iz4&LP%}!x$$4ss#iF0rZrrp(-ebf`F|!TGG;z~n*RNkMc34rGE;~E(s7Gof zPT026*rD>uPZpcXEXlgwPr^89l|U8~8+Mu_*BAu_qBx>cBSvG{vupQWzc z+bsP?ztU-k-d5&i%T2w-t06ZZ%qF+~gCMq|dQXYs7A6wnihTPg%Rzk_zGLAUkT5r) zCduzhcy%e=Pmlgmv+i4Pz*4tacj}=S9v)5-ZCQR^INB+UZ1NDU2YB zC4(U`OJfn-E?atnOY!#eHM`vuyCbsA-zHyhqMA8bcl))UtNu;k@aO3KC?h75KTaMW zJ5F??$!qI7PTnmO7t2B=b@kEMQ6%Aa-r6$Y4|(UO)0dpi)1KwgKN0ECxlKH#-PcMw zyT8jghlr0HyVj~VSn90dbOtS~!~fo~7Zco(7+NHY#MW4BjOD>X;MTbAE)EmiW(}~U zdsoKjVkUZj=KFP3tvk#Dd=gIb4-jLW3Bu)#qQ#S_D(&P{hH4kS)l`-=J;v=kwEymuKh{XfWHfO=LAa~D znK9T6ge+o*fNAqd(Wy-%k!_0Tc+N0dRJruQ!vK$#8a3~0iFf`+906b^G>jI@Y`@S@ zwNu`v!^;c>BDb*vdp%#{Rm<0}w}=S%SYr4rHO%$K?D~KFc#JeQ>D-#py*xp3R9Bj? zgWPm@G)%CSECDGJ;&L+aUdqp7cA_d$W=K{F2!Ym zAQw{<-Typ@M3&zkJ}L2>&E)A8NO5#W_-@cn+tGSCDa_oAB5bBxt{I8s5)*RfJNOG$ zNCR#Ft9bopg>)i=>L4yLKb)>K+EK4-EFB%pJ7ajHtg6?`hHufhEq}3_bVME}#?1^H zQS$Ao%5$gV(0TMa%V}#Tob5!_e?Hx;uivkT#NpVrlHc4Q;-7$~XCSe9ne{HZti9$m z4O-bz==de$<>iIY&l9h9_do@@^s>RTPKKZ19T6#KPe1bLJD8Np=x+TB%`hN+cl076 zB53HSsfERbtfH#d%;D7=RS3#wvB`F5=kDS~wsrAOvomK}=*n%z;i<_dXK%I-NBI%c z-1Vn%iuPDH*A1so6e)6aKgHpyKPwiwIGQWA`T1CPYy5(@gcx$5MAfoogvGDXoA_GM7zo zP>{LnaWas`*&{BSPc&aS0?wyDmYvC5mU1mE1c=x@gHc+Z(zty1yAkOSgyzzLZb)vz z!x@X4o4e@@^_q>9m2|h@9uq&mYG-|ZB+&X0>M6>$gnPf6|7807txux*rK+pnziX^R z9GoRjg?&z?pzk09r-6)D&u-iub}cf`K65vfPh}buyaIAg`{&gP=NEZOBH58;UXey+HJ6<~e#^(VF>i(xcXaC~tn*<=h+0H%Z z#rpcFfuDTBr|<6|2)`_Q9%>G^wu@C8~3^RKD}G|vl|BFE{qg04xsR3&ie>sZ05ygzZ&(- zfA19u&Z|Glu8a>~CKur2(Gw|}65BOML5Vg@{>3^p*P#LHnb%J5_`nMAZjb6RazcYS za<7jnH`a{kNGvt|*~~`@97uG`MwuG^jQp9tjs+tx%i-Tdx0emVW5JG{9Y7_S4G*k# z4G_zL>zQ<5+5a4;50q}y92{Mq^rVZBRuilbEW;;tQhu}dt$DNk{~UR*7A!LzXa59? z)P^iNOSDKRd!)c(lBGDtwaANE*caV+;Ql zRskkxueb8tpn>0~h>lpv?S(8h3%wZmdt}KXu-7$yvGQP81sR$hY|NQgeg7Qh zoif-Y^-SwFCe|21=it7KXu%--^G0w1=F~_YtWeU zB$Mcm*^qhxCHzD(58}czZ-DG_N((e_BKp1SZ~tH&2zK0j>n2)_Gkjc;gw1!XZ`}PO zH{5q0!8u1BVqXWhXR!I4PnnbQ{`|cdj;Dr1+s zCs+C}AUXlpDO0Shya~Z|kepGA+BcZKS1xSFdirm%ELj1Tc*lRg+~0>1JGhJRreZGt zHmzupj)lzXU-#TYe-sjK_ZX-;-$$K`&qimhOe1uEXs^Wy89Dd2Yf=Ej;#P~DB@Q8D z{X}lw{=uu8I)#7u&5s*@ZF~$Qy_CG>N?j}wdYgDe&4_ahB`S0{Of;)ByXCux8HoWViB)}X;UQ`?c=JUg0_+V*1? z(O(NpAjLfj7ztz;Mb~Z1PvDFRl&VtgWql>_y_^v!cP(0=Cz8i1u3hP_{Bvr! z4LCLtGV+TKE_8-MfL)uBp_?wH_=S=5KbL&}g+(O&fdCx7@i+(im|`2S%bHYn26+N| z5gkug2F~!?q?UuhN6vc!H!!hIBxjD;5w;1(X@o3J$%UAIJ_&#T39!C4vLeGQAc&vg z=5hj*QtVTRdOVo#sSK#k;|Oyw%|q5I~OIZGYH1f)OVdq9I4%YsBpd^k>>_>wj%P z0q*n9y?9}CnD{E8=PsSssj0)fxm-Uhx*ztLl)*pvSvXfh^u5O2Fo|HBufRAI9byuS z-88x95(1MR>*)XMmeAgNKqIw3I;6?Af?-b_eJA!ORMe}6y?+jQI?C_#t=3j~7I0?WCH{Qp3Psu@p382e&M4)LjbW&B(1%Vs7C|b^(FkIfgGBG`ckH2# z<7YkQi_MAQ5{kL?_&tA?hqnREf1D^==q43Nh8c!)Tpn(A)nN=1(p;Pf`YtR>Q+c&& zr8NeernR)ESZ{0*@zgCJY1Jc7Puiucbk?qnpDnF+8|ql-@?Swp{88NFyD)={PdoOu zSdUq@R;8GVU6;h9r9XsT);fJ-@UhGqUGX=SeH_mu3-O~aubGMx4;gYTg4ciJTiu$K z=e>Yi>JA5*|J4lcULI^_>3dzRkg5c*1d7*#KffJi?6Wr9)YC| zjT&9z-&l>imk7*$bJWfR>&MfXNkxRwXqAlXWr^Jbdwu!95Y))!1xdZi{J6KgeyR1v zT2PSFX`=tCxQUL1o~x9*LEH}u?ePYoo&R_dLt)i>E*)1EXsvEuEej5l2Z@d#-bIk6buY+hpnlYq2lJdP0QM5dk#-@f{|ZpqE=&z%C$r~Ef2-U zohpb~6h7|PYpMGNsPp`%289U_oH^!jNU@hKgwjJsZLC;UZ(Kl(ScotAzWU)hXhM0j z1Xtr8ij?kau_$(hsfw}p-DweKtk$i$?f%-2tASS*Rde{JQlwfh%3^@YMNZH{IabTr z1U_%fR7OjejkA60T~9~hLMi#>t!USxfrkxgzq1Lc6~yFQZwPX9Cn~Rt_w>L_b7x<1 zHQr)r`KwKi@ctv{Z8W?odX-555oNz$1${E5p~8$ns&;%R>I$8LY47fr6@2)VbZ+l^ zwo;k5)5Bjb8+)}n?K+WCX+fW=Z|66G%9C#%F^&P3{lbgQOy&n2d*@55KUZn@99ULA z%_3#a>YMp?4eI&hNv_~D8{(jY7F_K5NQN%4ZgofGQqVZ3`3|Gbb__>>0!2(%_jEoV z744ng2bg{o&Ygj^q+8z_@LqB=i8857scF#v(|oG2*^E_A;^F+&cC8M*3GN%&F>{`3$vxeVv zmSqsUt!TdhCUE`$|=mIxVjgT zg(oIHt(8XQU)uVLhq&i7RXNeiIfMDBS=~^mZ`eXP@lA-1e{({#_nf<)j z^v)kMD2Fi{uVM*78*FAlK}`S&h1zs{uu!kBeLAJ4bXR;haj>CZ(cO$o?2f9!z`4Nz zi?`WmD0YOk&>Q^0wA;XiZ=&6xbrz72cOBw?Y{N13l7cRfB zdhZg1ZnJb@cA;#|>a=7<)nE5R@%(rruHN}`m$xt$5c7rM&H1=4+A}SczQ$?_VIsRz zoNNO>8`Gm{hr%p&p zUAT}Nb7hQf3>DI)BpkB7Md!C4$?W{~KgKEpj|#FpUX{@_i;XXR)?2YZ6&R@%EsKE~ z1l2PQB>>13*WKw!$sne;1sjm|Rkhy)ceiHX(!r)O=RW*1D zweFvqTWnvxFFh`%I>|cSmf#R%4zGVvJ4LDmodxc~P#0MF2rC{^R1{f(U0~8v`p=Nv zrtCk(wo>+x%=iy3(`O?Ma!rvY-m!Uzy_%A*a;wyFsnS=b8}!?{?;TD=VxBl2e151I zsd|#MYq|chR%F_n`acozLV zfSZ^+9!F9nc@!U|9lO**?HvNPpR_K$mnK|LX(5sh^*-MsmB!tv8h;>Kxz3pIp894} zow}JhDZ>9V+A7IQQ-?r^8*B5rv726Mf;kT`HLf`?bOHM&}7RPy_?IJ|G0N?YJuBv#S%@PR%X z=Zx6X`$rcw;BaJ|q;!-6UoeL6M<=R=Z?V1PxgE#eiWq*?#zi;I8o}EY6XG6sS0@T0 z#M$T?Q7#kzC%eHE@z{r$92ACm^?b1l%R|1+E6y3>>bc{`??OqTpeF}tDl%Aub^})C zZdBBYMv)^oJ(qg7s7dm?5n~$(C(VX%17oSnYJ&F8{8_@zI14|bhm($t8Ln&{74E1# za=Cg^xE>Tk>5dARs&_gy-vN~gsCk93bVl=Jmpx2mOA+Kk+b0=IiFlD(VXf22%P#yj zQC|SPPZS}7e9|BR;o=zMAgR?D)R-I6%<;rrcjmfzm#2j{WSqA#giG*_>ab-_B$N5Q z`77E^OCu86RnnS0{X8TJPU>b$3<+_{`MaOFJ2UX`f0t0Aj9WgOUv@7Y@MxlrUY(#a zR=roqg<6Z>DUiIezlHUd;C{S71yKRi#|K||-TZUw)YwKzTaUtLgy~Yd?XCFOeM4^m zc8Z@y&A-u=<1>7zGwf(``t^fMk&$HaWkjcckgM^<`0r@s15?HBe4Ub%ZyvUJ6;BVx z{B8h$Iz@xiP}CghaxO^xrp+UCa8`yKH77zv^loO2_2w8eBodF*@Tj{^l-VEZ7};vJ)jyqdRh+c=fuX zG7A(+7(;mVgxL@qxDJW{HoJS>{s?O#AVkRT7~h#Ht5yNjrY$eOl^=1Emt>g*M}EXO ztvc6cKI?#wQ+Q8PE_Gb!F#o_t2;v%E{6&?vFkJQ3!Ha9_)rJ?pif2-3NSq^H!l>-q z0!;U2XlMqrX4s9TGkV^z+ZB{pKVAT!dt37#p*y4|5M41gRfmTWXVWj^(!P#)AIGDU z*h^ER_7<8`0Cmb&cZ5EMMc%$QE#@5%z2J88HTubXh*SHWeWBVS&+k2Gz8@YyabI>h z-ob~-!7E#Kc|!Eg(^-cseqcR&wXkWAA8V(lv+qF-<_nSfu*s#`NySKAhHP8E9D&jm zOyx8NP^T8UBu!j)@ad+{Mj~Py4L56Sb3b$_z8M{45jQmz6n|Fpy=FwLXo((Z4918T z4O~yxOxCt`H@`D8>6BSx=lo}PycuG(g$u;B>#3hpFn1A2+ zEKEB+U$N0}v2FM9po6x~yPc+$xVGl!XN%CJ{KNXT$J%BYkHZzycm@%4ex|={BOh{z zA{Uw_34>Q8r3V~E5z~nUGdXCjj|9MUK|A*#jSu;_VGe@EZF4N99g~-%cQ?H>Hma$4 z!d5}Hd)WRmk&*cu(Z+oQnMYlk49_)WkTfyx8<%XxO-1q{YC?*BcB=T-vig?eqr6@b z$5n&f6L8CMm*0dREZOHc&T;%?hANi+ZlsvHm#yabfW=#CbM(cs4nULU3_wm~<=$#Bt((DJy?E$9pz{A^8f zCFvFmsLtk3Z+&KND&Md3Dz1C%b=t%{T~)l89`?JmD+MtM~S7r4`~%EtftUU zJ2}Ogv8*IaLV0Y;=;b@FQ2VG$5ERAm08&;ENIA9j5nmTKO=u1uN?mG++Tl_K(A>9x zL7M;&yJLmH8RCP)>inb^?&>0`Q>=?o{mk9;a;x*kn_eGYfRm#Izm;SdOV3_Fore}> zrnSaE#{V4HsKmr2+iFsJuljJX-RA;tz0Bd_=k{sTSYHogUfMH%%7Vc(C*}uOxkK-8 zekq%t8h8wWSdp>z$Lf}dV|wq~=ArPB1phqTcgo3m1Wc(zGk|5BVS-CH4lpuF^JuMc zp;N`?MeQhMg92R~r}NZ9m2MF`m^p^IQ=9<1e5ajA`Zq!;z7W)E-28UAbDO-k1_!|* z$xFWRYNbvMPb=B=nzHJ9rgJxcVdH1lkJO@i1=XW^@oqFsrOPg#xr)dxOPZb9>rMdx zwC_c2hlBf0Yo?FC{B$@})b6qPabJ|5tXVZix#vRgNK&PXsd|F-F3YVmFH|D$&Ze{T zF+`6is;s)1kd2tVpm;yO#-ktF-v#zHLu1MFK;zbL~14S2m(=>m?qd@DEue%q> zFOj!F9?N4EfkDxo%f?dUOiBnX8S8LmZu&gQv(M}a1n$XTa3gN#45vA-x@&N1yx`52 z`C{LJBvqVQdw`MVl2BZ`U}R<06UD8Ov9i0 za=-cU@r5U9zBHt|j;}k5bJsv#G544`@fVCD6@^rzECkJCIJnA)Fm|es2lixpEOFSpwxQ&*?CK)iR6^${$ZGUGA}f~n!lQAKcq(pc_d zFDY*)iZY%exc{ZWLWiL5d}>yenFoxGeQGI=!Tns_w>kHbd!srwngnQ7?0fbthzT?R zC`v3`1@+z}s5*^v^Z1h7(>7YT57gVh10g_IRyzE{VSIv}u6f+dDqAP|p&rMI&qAVs z0LVb+HPdQ<4-S^e@{VB01ow-?=<8wS`ST!l_|_b?-`7F{&@>ki#%uD>}s%2Onc#z8(&Sc#)=5c@e6eW6Xk zVVNlo7kuBtcJ1->Xap=^0CQa=-4#`~&3=yo`N{HJONXn6^R>d%!-p&{^2O~7<$CWl zfu<@Ix;*PI)>+${U=%c}u&-~J6WX2Ns2retMIUEoL6z9)AJwf^%mp@_xegb%DqA?% z=j_Xwf<=0*dOr3i;h}H6mnSVshKrt>nebC3g(ZhW1Y_q{zRuY(=^#Ido@TOLFE zFRF?jmi&U^(9^V(!<);lK8CEFzGs|f9-~NXF zOvJ+3VI#S##}si^7H>}3``dz3xg>F1S58>4Hl_@F6bJ4!%icSy54|iPeo&{6OK0Fl z7(;>Po$lPov?yG_*M7XzrF8iIcAw(B^g6stVaB<>npBf)3ot#ohhraZF@VhDIG(*Q z6;;;mx6WJVMzTi7g-m<_H6gaNjvBxTm?-MIyi3sW1Zkd7aq7FtO_^q&qL7Bz9lFi*zn1 zq$fg>=IKP;;2__L+iMi>_m?Ve@x)RG-nHj-39ceclVN9)n(xiVgR_xU(Y%H|&Q3SW z*PDI^phC){-2O-cULVU#;X2Q>`Y$h~ym-gR5xLV(AAy{Q9>d98oHrrGrGv*-F~?+} znqo1W&U&wD3z-%Yc1iE#%*6gMbA8iirCyP(bt_JaO7^Kne7q2YW_Q19ah7zE*QxVD zI~7Rf4Sbgjpqw^J-{b#Dg|-UcSFTL{Gr$C8naA z@~eJ#k!TqHA}T(vhoIxHM)IrfVqIwRkkX~Ult%A63YkRB23SyU5Cd}l(TsKMd^=X* zgOhR7jTb`0stCa5mFci~!@i!)^y_oq6<5FEU8GvJHikUWP^D#jZH;PI?`F{DbUMbL zuB84wZ}S7Dm*iBfQ_JmwA*>U1%iH}brp3*N*I&Vs6#->(`Yt92L<};uoS66WdS)FQ zlq3T`TaN91d?sYOn-#G%#dToGfB8VP*pznk_Do$Zds{4Co;%$coJ#4onoj?|0oyM~ z@{{Yik||)REL@$U+_giHFLvwu50wB%}MmBBxUxGm+XZa=82 z-`tMKm_H>K5Cv?_X#RH#+4dfVJyG^i&FSJSgt+2>i-3*eKOAGCH|^^EI@Pg$T(Pkg3+cU?ithGpwfB}a*P znX!Z>K6bs(h0!~vL%D!Zm$1F#$}KGTYji`&Pi2vo43`4_4m$n0Coz)A5GSK;XFnpg zs6s2-?AF4Hb6o4kk91-E45i`-%V!UE4cwa|!_lT7%CR*_tl6z&BAdlLOTd0on<{d@ zFHG}c03b$T4gj`4HLbQs;=Yrdsc7$-{6d$zLq};6IbHcP7ouC7;m^ynt$0zVQx-Mc zJ_8T5n=ndP8@#g63GWW~&|oXB^utnApejS}^M&FFG|{z6I&LK3qgZ3A@>>Y})$MF) zF>|l@6X8mR0@Qqsm#_RiEEC&rfjMYJUAweWW&`=9QNBvWT$U;^iXF5RK5=WaIPG?O zhJ{^Y;P0YgFA{wy*U5Mm9X4!NT9!n}W>qpn2Aqmi2JUtqXET@0d>nDL$jVrdPef;OWJwe&E^imwh{AiROF(*A1DWx~G-% z??l=+D7k!#$>-68<+g-l*Sqty7vd8&6@_XyuI|m2)l7vZ+2aU_AJr5h&Ks4ax?9D5 z*`g@J^K?Oq?jgdakaZjJ>Cjk$ElYr^n46E?C(d!p=+VZpoJ@$O40@d*zBtV7c~OLi z*ahbJIn8H0OaKaXCq>L@GVpQnyif`^)VybWPEIUY@;Y)&v6 zn{^!arM5VM!+t-^dt%LdkRUegXcZy`J_72P-!y2K;p0ayskV83@lc1-I`F(+9K4u01lcjWkIbfDOH@tv2!4ovh+BNW{` zVD`=q)zuP|Iav`ZNnkTY)r^Q+@tqe~E+lY=+9iZU=nq>0mJtMRK!R@ajqU^yUDO9@ za*ipQhM(;|msRMTb+!w_1I&1XZnAyS#aYM#I@k0XLiUN|cRJmx=SQ)d`+G;uzAw&a z#n?5+KMG6iKge12uH)0#aQ)It*>>^7`(jJBQTUbr$~imRvv0LscjcUPHsMC%q7h|fhdDMZdo zd%clMyt3O-INM*^e;2=igN!C@it;o=ytO;DS5TyMRl1@^zFv7+2-?(RSVBotCF^Rm zXt*LuulM!^MMLbD#;Bg|@M&G~VUXf0qgP^hclEorW_!w&OD#6GdhG4vzprYh(Ywn> z{qCP8w_@`tk8`_AZ($63R33FagpAPoFTKN+%)}5aA{#55xB<~2KiZLYRZ58wWJb?sp*6|%<6~BC($gCzf@0ZNBf0L$>Tu4-^JBBm>AJfesKiQH|JgJQ`c;%5d0`7CkFx`M8sKHU7gR`D@~NAl|O&uY}T4!8OBSawh& zTk-rvhuU6sxM!^X#Z~Rg{ZZ`=DPkmH)8#Oh!dx3{jkrk#i&P*J9 z>;7@#ROw6Zq{>{*TRe?=Cylz!_{?yP@d)jO{bD1_e=Lym&p~G-RfR_{7SL!JF+r2w zUCp3DW^ZTVYNJ_AvC?Sj_-wn`b+75s66z}ci_yURdbc8pR&P=53BWJw`*~ENqbD^zV0Ui-i zruA(i@`n@3rhFRpRV&n;SuO0fSJq23*=GfSv(uFjF#nP4jK%kw1hiNkv5WCuEDVR*^&KhtWFT6@&%(-ITN}D3_-u|KNcD@gLv&pmVG$S^v zI}0q}9yZV6NT+eTeHqQW)RWHV-)&^Q9)`^fX#@5JXawDoc#p2aK!8g9`DSc4mcti^ zu|0l8oqTR5@4QB_V?GoaWMu8Q%aYE{W;}fdmHqlYk4`f zQf|$mO@>Z0?a+*%Nb=8_=|wC7J;n={Hf@7dQcZD+;8)vk=_1LV8$7=TCk25tpc z;Yb3H9Xr~P8X=&4HeV*i*G94Hd;%E>tupGi&Zpu)zS&`>=oPDmwh0l_WcuTIaP=B7 zz!%=oDE+RoZ162|ix%cXd~pg(N+hS^6);er4Z+0g-e(ZoM48%ML102+iD}eIi6QlW zq-)td&ew9$3T|2zQ7`dZ3!&54M99QvHNICAkrNkm{GydC@qTkyPDV1llgv?RGjqb`RiH=cu|^ zUR`-obBNkYxgQxp50iNpzNnVVx*FGR+L~VS)Fv@8gq??Qz?tA}krUb&x1 z1}BBF4YZ`nqA`{tg_vWhR_|?sQ<{rw67SDN&=X>PY`v+{yIxiK(pDEgBG0GUP9CMA zb|w`Si{`7o@70}rM~7%PdC&?#z7~9d79Y$^Dc1zr9fgj|k#X|^>qMs4pD8{5z<%OU zz>be=M!Z)t#XALTe$PpSvOmwli?7RV^xqtt^@?iTQ!S3U6cQdk z79RYC)G#2E=#FaL9aG@WVx$85TNcOX`-Y6-!hN=-nCBGp?*)B5>Uw9z{`_44R6zvJ zoAv-15&_0qkg=80?0Mm;2b8-_j0I@Jvj_o^p1ozhL5llU9ScTdC72EUhv5twkI z6bEKZVC7h06po!5pX^pYu*ckg_{81p3Um3@(P_)wW>Cngr+N3F>OmkRdAfz!)fHs< zt5^&=1mOi=^Sy8Cts|wxwxm|gutIUENAof^z(GlaA$v`JhB?n!fXcu5sNZeHDnxU= z1+?R=Nx22fdxD((X-<&Fq0%9G*~1f@vIMaQIB1wdOYUOe&tb|*MB8v-k;>#9cT)lb z^@T4j8&@(GfcDf<)9byifIYYOE%nqurn1GRlJ2ZM6Dgy)V#l@EK(x zFMOb9n7`dI$0E!AxHRf((ydF!Or~iX+9OBZRD_K;IHVQYXol$r!CY$<)BVFtIJe#N z;%iYzgrT+N=3XUdm%W$3X?b^$WiRg=bkPS(#gE2P(Cx=k@E%$7^2d%1o^h0)aTtaz zZ9Y3>K=UvEH2_133-?i%I9Rm5in>)yU*MrepHr^ln0;djQ0;-3^;EJB3M15M2|`4| z=<%6EvJoZw87u{!lfX_SmO}Udo}TDHrt&p7E+y_!=8d1u2r#7uQ^x(!CL6gM+)hSA z#o{ck%L?)|oyvqRGDXzhCp_>@vjPPue|!$HZ$ymS{b()ACEW2$YQMhKp)6}hfu~nH zJvM;}31tBEl%QohKx;5zIN(x3e^#Zh75N)2_X{VSZY}Voj<7!{lJXz_ElDyLi)v;Y z4HLu$lq$eG8!w(V{u+aHQNk(BEjFS&u&lsCU^9=ArfBY}(O~QAOM% z#)!#1`&O#AS*zDVS2sBoDVG=Bnpr8&AyoqP>Cjs%<@e}8MfiQv*ZtFh<$2``ikcJm z49Nv1#oNOwJG~uo4DNyDwR{psKJjB-2VYK+-^a{Vw_9o@DLCY)9vJyPFJ~)Ma9}|5 zm{Luy02@6Iw^iJrL-WF}DF5;nyN{*xj3fqXK_z_ySF#S6&wTj<=KwH~n#$ZP)1K(2 zuB*(z#mT-=*lg?@-?$_;cCR@7J*=!TU=6S(`b>p&W0Bzw7bq0d#*ols3V~XC0lfGL zN3@=QG6ESYEZE(Ao0?uVDhK1z?s6 zDWi4G5P`QE^0S-@R?}SRxfDwnS-u}*C;p}jz?c|VQ^-9ZjFNgX+Kena2k3_OOPh2vOzKTDrIsHvM1!&!GI;&wI@mTu;^QrkieR>wLf% zW@`e2q_|QbPuf^qUj5j_GER4ntu{>eFTSU!tdu2IvrZ7r}U7~nPZC2Zau z4`o4{c>wn6Ly`vmy4PWw1x5P^epBOq!zYRH7VYK!*k@nuH#*QWs?c04p~!5W+-x~x zw8wQ5q-Hd=qr+ekvs>vqr9?V({rU+$m^%{%z!Sob*C1@Fg%o^yt zk}+-VdRu)G7b|%+rER+fb5ba4OUAk!M`o4o91&kyil9gLemt5PO(=^mZi@T?6;^reN|`%gHWr zlbt}EB&@Dvk~iN;dTY<4=-C_5+M@MnG}Y+Iy5$zg!QyEVL`5~L(%sd^^gUgZn=ue$ zxXT};j~Q*u$dPTU@fY^!1skZO2tUPtz!Ee#`yMqBBmDd!*?T;VaZZh#-EO(J8FBvK zEa!fqe@ck=gax8!c$|FA{9Dq8!ucELzdBg-FMzo`vv{?E2M{WK4@FxF9Je zmdG9OA}7$~YZw4uagYKow$RHOUG~EoHP+Hwy;tw4RuQuCH6BedeSdkMy9}5!3BE;k zk#QMk((W4PVR*324OdO1x2Pe8=zrkcGcv%MeU6+L`?YNbjTj8jPVK?{LaqinTiAN7 zSU`XnwME)xOE4gj=!P>TO@*Vj$ey=^H)9f*xD~IA*k)lB(oh(4p(7m7Y?Nk571%cf zx2ajfZg;f6 zjQHB@a055~1i~*CFLOD79+MOmdreyH2$@fBF#{hhwY0o(UXM8LMu}ufyr-7K`UPFx zV0yb3#|4V(4&tM|jG_FF$MFRAcsi5+prdanJ2-569-sjtg2%-`ChzF!`tBoYAjt z{38fPOG&rDlhItMTkp+)(@f)nfF><(nKA)*V(*hfqW<~*YK>1uf$IExg%g_36fKg` zE!$*!i1X@6!4m2WSq$CP?W!8Q6zmMMSgdyZpV^Ev~6pyq>r(^h?74%0Zo$Mffs@u z>oE0$3m~>AvC_1!54MIDmrTx)OVO9B!HpoE1(<1@uJB*vrMMkS{2zs{q=D zQm=a~^rVNcHLLQ52pzbucme7H6-L@&&5{Um*&zPsRooDQ$nl-(t_>`3@? zMNTytsOq3qptqe;eO~Df$b*0eaOIA!CBo3bc{Say84Z&KbQaQD4VkmEZlT^d>f;Z# zT)?a|%mL5iYx{KPj-Yi!)%0PeQK%~qyI|73?j-yd1OyxeyY&@xfVl!O(ODy*Rb!#c zWj`Dj7Vit&wL9ZGPCPRv*fmJFYw-wdfAPWy5zka+F}k#2(}EFt3>5$Fn~!zNgtI9C zKTy(AkQiQ!?@b7R#Pd{d9=cB3XLot;iPDSpdY`#2PP{}Fv#>~hSzIGw!_GQf$!wn( zovrW6nwici&cj=n_hk=!2pIg!ADPVj^1R>8R@1_m(7fCjdK5~Kbt zWk7^N(bL(^ZQr~iJJ&YL+YjeC-F~4LXa^P_^5J3Kg<KR5v8ANmlMPiA)Dwx_U=V zE0}POM_;R#uVZnxWDao5F2shn-MEifI{K%AMa1ygSNHyvONzWM0U3tA&Q_9uzCfB{1nB{gi z&d>q7$Wy2RwiC*rPgna5!egnwzs_BucbcxA&a^$^7^{HnlzFBFXd8(Q-gG${aDrVz z8A)@<)<&gBJ~zF)>56Xc7d_z-Sm%m*&0=Zw7*VpA#O#kq*lRfff7t^Z_qtk5Yoxl6 zhkzP}@WZUi!w3w%$I}>qcf{R`0Z}vQh5a31|BP_gq|Y7NJ(dyZ5taQ(ck0>MZkBi{^ors;8qb4D2f)m6QSo5# zW{CBAwBs{BLfU9nFg1#vcFe;^65d#$96uR6gh^3$OrBsyfp{ItQkd~Q5+P<8Mwegu zgih!^p1OB#jDONFVUzkcRv|Hz4=q`3&=^{W-10~Nylwjcos=A<2}3VEp0iVzBafb+ zyg8UfJp}ItNvtT?@|=&~QCBd4_z6Y^5WJ@q?P6yJMeY$LbNh#l`jHdakJu4Q71U@- zjdb}v5Q6HUz*X?`bGWU9(gNVno`nZ9*PR~c^^c6)PMvz#6gKZOo&V}1y>}Xa8~|h*T63)uKQ^Lk5LOB_#Tyn* z#`W=5TjZG%~9aSjh# zyff65K7l%8u@OW?SaLOly=XxDQ80M1lX>G=KYMezu9(tQ6UJv+7UMX>*2JarnCn?@ z=t?$*rbgT!emd`K%VeHgB-_euFa^}!aSVrf&ZPDCPXBt-w0ysGB4-TrI+$-&g7w=I(hU5 z2Swi)YHE{+3?bkv$_<2cbx-|CykGj0Y?|xHBZ>-Q#I_b4WS^$@UO_eJUjYu)i=OeQ z_32;(k@W-%B;$oK9s&>ki{&`s)QTE~t0<2O1dbrb#*~4q`l)S8w>J7kiaxZ^!2955 z#+XXqPR^@rVmdFquT0*z=oqBe(%Kt#td%M)ZB}{R6(Au4}oL;NG)mo_RVONrdazr1ric za;P`Ei%{PM-F2HQiE==IL<}m(%&1`5e+}{+ZdC0Bdm^MmcX61L8|!4=pf88itNPz0 zz9wybCot$FMj#V$kDrGQ`nY79|6klrYw>)_iJT$+9cLU{YbYnn*2Si;&vlg*>BaV&H>o! z`s4jp_s3)4^c*J-%!k-CB?h`@-pWg6drSZVs?S&C1X-jv4p#1w2;N6L*&CJiPNy5= zmXF!3-M;43@@v!!y~O*T;3Fz@m~6bl3EolWV2-r)L`h#g%zp#y8nfB|LPG7NfzwkO zI4}aXLg5#CTUtj=jfH1+_qFFiE5!p46=#<_Hjp6vZE^XL6=bgUkHG{it@-u|HNCTb zvvXfPocnd>l%Nxvl0tOvTZ0Wn0aT${1w?*LNfC%(;QwfVCtfYVIm$V(Ww(vW$fB=y z3)oJSU&BAl<7j6vUD|-`VTzls^)1La%1MsK?$eCZc`*v1mPnD{paDiSVEU_`iAAgELd|f%Cg8-$H2tKo+}!w>-oqdh3_O(vP8n3WITBmi`Rw&q`knQKh^5*KgJpWZ4t` zBKeMu5^^uvxBb<(wiO;2WL*Y1aF7l93~uW2=bxZhvChywqpsvpudsV6?T>5oQxbum zPMJPU7Q#v*71}6!ZJ$U$Lqg0?(=8jl=N&Y4Cdp6g97@E_vbvVArKntS3R;*UTyn+e zCnTj08J)cRUElR!=3^)yAr07sIGMs@K#qJMP^)n+oOb`se>mizxiChP z^l7DlKSIRH*(QvM*5;NU1;KcB+$UT0A=)MLt*oS@-$+f8C~6LQ0MirKPeTn zK!og`Cph8(2DeGb`>*|6`#sd!>s1* zlcrq9A7>u%Uat21pDgX@bj}?E&%jj+PQBIuh64I^+LkZvJ@NVD?@VW>=y>*WN6Hya zZOi25$0+me2JEVWC^k9a{f>c+;Q?$0Z>z2qi+w{5F4A^Ge~qo|Uk@VFMNcobNr-W9 zfT|}=ZG%*k7<77E>8K&xK4cqxgsCp`yM*hBXsbTbj&5Nyz3VzP@+)uQx>!~-rd@Ge zR@K(^tnptu$`dmHT*=Klrv1nrZO;G|?d{3fy>UB4I^&|vOOoHk90CXP(f*8URC;?d5!-%GCm`@9Q#kEQ5|wq{Do zoN1*9<{}>?iH4~G{tIx3pJql z3{m1*{HyEWdde_{f$qPVGD~gC&KwoAA1$pHN)m-y7MSc31Kfh!@4B}1;c5y`hdA>K z_9RX<{~wJHMI+j?l>S^Y7nzU{& z>Rdn1Hp@!r<|*KpcliyD0%K9C%u<85S(iw~wZ_vRd+}QqTUmyjm^v?+f1Uf+tkGVm z*;Z1PgJRq;EHVeFH)O&EDN3NP*ovt*2TO;WsK_v_d~22_@&)|ebtRrj`_*`QKG9M< z+li&n@HBN{ig=@rmr({8K4fTlwFHn1|F3#%cvCZMEJdA)F<#hdIsSTe$K-rf)Zo>K z&0^=g=GQm_cZ-Uqk?+g*cMD&C+H?tJODa|K&}HwY`$y3z$YPVzvX zvAoH2nD&L-JW$MAdQHr%*};TuLlEyvTK76_==Bm(_Rs~zJBXu>h0p*J(np7tS88NJ(S+v83A8ZW^*hZKuh-VW1iaD|l)USWx@bJe0ti%7fk;&j*(v zrj_oPb&pf8dRCgpUuY01NHlWwICgV7g%#sfX%mI^{;mo5n<)Rnlw!2bbc=c2%gZ{R z&QK9Ur@2}v2eK{ti6POI)_q=sfqT8BMGa`gqO@$Ik^9K#&V2KcP?|2Mf;F66t72=I z`ZZiqK{~z2X!3@3qz{GO%oxoPfa*7-XVk2!d^u}0M9uDinN{#O@$o>bU-8qgM%aHU z?qPg8#>Vol5cflbR_^Gg%yyU9#`O2fKn4s~3BB4-+N!l7`cu;`+@#fYmB?Mfw{gcu zM~>2MS%b$bCM}S*_~w<}3S9vHYvBro+;!ogk0XzHR5G`G&uUcAA>SsVnnKjSC$*T`cRvM!K7QC~P~V-r!co{{%Y* z@RS9fIc}`;1Py%}P|-yrax!Q~!gp^%HTwppY-r80(&NL)CpcHTFJFi0vlZ2J?&TjI zS!**fsa;AaSZ$ciS#4;@hy&N(T=yu40-PNGG4KQ6e|InktAEvlf!b5g+BMePXj^RO ztGT33ThTri)Yrlc_MP{m>?ZWkmYdwb5z&|gs@ zdC;I`+hBYom<^Nbxt5U~Q!ZV@pkszE%V=7RCWcWGhZSadlO|n_RVWBLPO&IbqFDLw z>!7|Nkb+!v64xVd-=SRLosp|v;7}x%J>Wtl#pDnSn;84| zmfH{ST;w*2QE>{%-EmY~+%B|@5(#rqd7>NHB~sdI^UQ_|(fmj|4gLik%8y5=^@_$%+%bd{&}I9C!(?yU1_USYbt|Dw!8!vo?giQ0q#d zRRi{6(Pgg-0D1%V;K75)ckgH!I}ls>+gGKY7z78iNk!Xrm$(o^u!tA>tb@U`xGvv)N*2L#KPFMwKEqf=wo(I^oas zEeFM-oD1E?6*Swq%Os>}+qiulckO;^(RieO-j=qm7Wc>TQ6Q=-Sxj;<*igO}sJsr_yDu`@^Tt}6 za9qoaGWpBJBmq6cdQO4j_Bix$C1R2+TZo) zM8CM4>gUmIbNMC>^qqD^>wPIXN4!gZp+Kj1R(g91**NzJsz4Sd;z>Y+c*PgGCK><0^3No!!L`*jn?Rn~v*qW- zsNvk95N?TNH21DOOtzXBBWp*F4bgxiWa;~UE{?Og>Ksd}@$2p}Y;!mBK(@N4P@&6e zeBosR44vOT9{Xq%l1^NnCu^N0Mu?3oSfrX8zSH;Ht`gd%-6>+c8%r=rjDi2POWKvH z>}S7D1Nvtyc0Y6isvUz!eC(`woqJbbTBBw?q7zuUN}L-#+DM8comETfCX)n?8~bR= z1ULm8Ha2lfLN}5;4Eg?$E!A_F%-TC%>0YQ}j8-c(BSY*Ks|Kx`o7-TnY9Z5-h@l?y z@*J!DWM!v)#ZvsMy>)MD8k5(?vLlJAiwT&nYzM+Dn|s9OlL2DyuhZIo#JIUGTx8VC zpqXn`@N2QCjT$c&k-)|sW_Im|TmCmtjSgMX8^TnEif>kYoB$>IQ(^JEX0Te5ZJp}v zL%OY*LG3|dW~$#Cg{7ZH>vr;zt`STSa5g`<;*TQoxFcHWt`zaK_hjKkV}2x&4`ZymUQlT)v94?9_?=PGg0m2zLaiqbuW zMJ6b0f7ac=Ka5+Bm=UGcBxI1ST)Q8|J!4C6y&}TNQkX{^_-j1(y@R(vg!;?N_CA%V zsDbw>SuWTkkW#3~4kt)B;--S80(=cem%uY;*4|ENUtC|dj>?>Uv^@Rl+mB_B zv8N#71aBe*=a@CSmJT7^3TyZc?Dmi)=Xa@xfoGpoHDwF)+%Ap2)6a)(eMA^>zDJSi z#!^fSlr}4N8_DlFSMI(%7S%sIo-Eqo6e`?j-!pVP>Ud1`Bdp+di%4$^_IKA1VT+vTz;l(p!?G zhllHI5fO7GCvsaFjOD5y@AG`gUBzRG*=*e1BopC!~+pH;C? z#Sbpq7w&`>Ile+&2Q`jiA*wPn6Igb3B_wgvvNT;Lue@VgiPIkRdVp|(ddS@4ZCsD5 z0;Xt?HhZJjSh>#JBtFv5B!5h|2e0c%DM~Rfy}8d;_%Z635VqwbaD}#OXA4R$1E&E& zMd>xyeDMJ2XaV>b@ee*vOiV;ZJL!=m1!!~Wm~db3q#1K|F$tQnO?H`9S7ghNCy&i? zb*%NRvpKVJJFF{ur|hw(5HruWP8#)Nhv-f4=+knSmwR;$^$Mr~B7OBb25+Gq_Q{xq zpy>%ZD)Lf<*1TmgC>8~Z$_Ti$92>L(2o+mywDWz~)n^qYCuJ_z{tofq8x7)>&&h#E5N(fEA& zvR6?ixsVF&ymWx-sjM$;#T^V=$c-q1v9md=^k&zR@wi)~$cLh`ty@Ej>+X64VR)Ym zwMVe}Z%&Q4B3TrJuK&U+xv~iHx>`l1gx>CK(R4X4tz-gf_mi>ygd&AVXp}~Pr_rc8 zxr@xxs|g}Te_R&$MasUkqoYU~8*pi|AeFY(>1xWtrOHwpQ=+ zqjp_!r61k8O6v*TC>zX_!X@?E-eK5iM`+|d?fl<-f%IAY4(q$u=y^ijS32lF-Pti# z^R|6MxyjX4^OS(5Xke%2EG*7gEvB7hS6gj%LdgR*#r+J$HWZ}eXaJ{JC!zI|m3d}& z>-=M3Sm7=1b$%F7+O*WGCdMyik>1+dMq|m>-OfZgg}LO+$*|jL=%u2_LcPDv zhPOA<2a=mbPPp(yv`eUd^ja0(&dWX07G$86j8}iN$ytic&YpkjP`};1T`YOUr~#p~ zc^_f5D&yX2sXC8w{?$f|@Rp-Yx*U+tMD;Ytw}3psI5Nq6yP_-aUMZ34?*qpC%Ui8q zeYb;+cfB@_l}%&^yLx`k-za^|Q66hF>J+%I?T_VSZgRW9SYN6}9!tQR(&|P|{xi+r zpD0FDP^X444#THU*}Cuo&OZ48e;vfIYcRuGvNVI9>4I}=`Z=?bb{S`vxvcy?Etzq$ zH#3f$CHA@q87CSdC=Gue%%au4Qc z6E0i6xkV{MVovXMgusgM@3tFjj7Kr66n+!iu3Hm}YQeQIalg}pT}?9;fPQ&7&e1$< zRh7Ql$7kCQGafmjA8TinfX%sWt8HxZVsv&)#$AiX-1WTkTKFPh+2-DtpLzk&QQ=#F zmynd?)o3&3f`4mCp~7@LYjc3wz@FaYh8A zYT{9q(u=@FQPIxM|D!}O@>ZYq2KO_})fQDsTbv-=8?#FO#?Ps7wq*-f`$9$*Og`K% z6_M=xc?Y7DOMaABqzrt@z7he5f93O=p4MVyVUcmwO7Sk06+U)O=+-ROq-adbF5QvS zX16!8fw#piafpl)Adh3-7<(huY9W|_gGmk15h4g6B1rCL@3%1M(v*)m@)mrya$>J65z z5y6;)EG)Nv`fu<44+)nLRx^pb*8y;TM#E{McxQQQ-cSFUCUCP|yhWlo{@C5Io$m+# z%65Tax0V7i1^t$MRJU-$Ag$YTcavRM{7!wvqsGsX)+N<z#G`dN^v^eSbOf?MoAT;Z*aLz{TqCSd(!r!;F36f+(>e3SkL`=)3H|z zrPk#sh_Qquz0icB9>)2y$eoN#qB)Md_F$b8F<8c{F3Ms?HlJB*ME~m1oHFb@!KU z^F)$&(z1oiHrtu?*#fe7MQz0$!Pc`@^jd>0PUUVUE+K~$mKDY!-}SomP3*IVy6>+J z_3+nb$D?zHrG368-Tv#3%F;qa6&TXhEC&ujRSrvgD%IK9DQ3#p+xfQisijbJH;4M1 z;n@DWwKn==PUFpnrwgm@$t7Ec+77U-IUyL(#fv;`+%@MOHv2oZ%_{cX$4fV3s_mpr zIO2N0<#3H)y0eneGmVfF$A-;BG3zqIG9_!LBCCk63S{B~mUahl(X2-_&p?CHBGvz8 z@m$Rbpf9T3TZ=KB(`D9lMs_v-&Q3tY6UGE^aV+sSV`q-cPACp7KQBUf!QY_bUflr9WS%IoG18oVGm~j*+id|hy~%qEMa>`Z}@8O z21uGHeyo#H^yn)2gch!)ImL!mQ+l9YN|uR)!M2k__GO_)=^lr&(r)Y~2bY9hQ`r}_ zO)?c3efivu=K5z`YlC7L{)nd@@%sHjJ+Blc6ACvL>wBC2B=7@0V-vvCJ)1QEq@d$V z56E= zbw5QJY-dJ2ijE6R*ioN4T~tsf$IO>MSaDfL^l&x%ICQIWZnNu2uP;Kb8ZWiX7-mLut`DG+?o(l71%DA*jhx{`NtqOpb$l z+n4Tv(Ab|xf5!TTR$cTSd&>ATYL*q%IkYVvGj}+<)A^^*0rVg{5TEWjTmjH~ALvt8 zn@vnjdD`z3kFY7vuTy?0cz3L;&c^9Iu470^da;K~eoBzc1!V=$1v=sXu^}FV=E)zL z2LgpMY*kAOf*E~|^^!_Mtql5lC;q*61l}9%3jyJ;ORxrF-t}=6MYV8dfhT)ibD*Wh z@sZ-Ai_I5_0_8W#*0fC8uv<5LCZ zKl*%byQ8U6H98@+1ncUz_udy))+8JPFVvsMGGu(|oE0VdUPGW4K$l4Bm#{B=$&v2E zEC8d2zI1Mzm4lj&1{)nF5R9$Bf>dEv)@U%M;GHrZ&F%1^P-|8b$Ktx*t+(PDKhhc( z+v#7t2lu5#dYms%1V$t?>>FwEL~$US&#B_K+)Lw%C3Otj#{!l76d=~`dvd8^O&_@JEAaABCW=0oIl z&J5QzREYgtS67l4x0i*yEOZs%%T&>jRXV&h2|fmAg?-X{T2fj{$IP6Iq={;*wbGRm zK6B6Q)^5-g=9v2vJ%k?Q0JIQw5gTy74$Yd;*G~gu8-};BarTR+=}GL@C&&}A>b54X zPSknnHfslS{_C;%`vz6ug39^VKU6mTlH`gZ47FkNkS$!}+$_xBcRTotTvSh00avVf z@|+9;FI>sbvX5VDOQvvbjjq;cYrjq8TeoK$qkqI8cQw0c8_W0U)3+XoDF5kzN|lfP zBTzPXF{=cFN-qI^?`9;J*#oSzQtBNZdYkZs6peejtmFFPb?ASGFzf*un;bI{1AY>7 z+NX+&)Z*geC7{ziv&D>HcEqdC)P144%yb^ic>!@$7tn8F1C~(jWRMy3vpiNX*|0{a zAhqOMm-QH8K$pvEn4qV?IUcS7Y$-3D34qkXpBZo}{BYfnje8*+u&u*2gSb+wN``sb zKXx4?x&Ru^R?vWM-SDO;82OY*m$>K@Uvd4>p}TyW)YA)?=@nioC7haa7K7;2Rpsjm z;S^%IAv{<(E#K6T7K$c87*g~MEY|BxV*xET1S2uw{k4$2DDVJO87AhXI&Ox~FUS68 zm-u?T#pLT^@4zQ~qhXvu&h1|8>`@}89_8;g@zm9?y?{Jq(89J5_sbOufRdsnQSUv{hNWO#3|Mm_81<|KPU<)*YH*1ri?{`5_2E&hvN zvd?dFg{q&HS#+bO4329#pk`mfx$+Ye7Yg8??>Zd|q%O?7hEifK!7y+A7;FfPYVv;I zSa~a6tj@{K^+JcmBe@4#0jXA>a~p&2A!&kTKs7(b_s?<2Pc?%S_1V*1JsxWXD*%Yjm}<@_Reh^tT8v~ zo*(}=(?f|^FEHuC>xVD(w#ocU1v1S`^T~X7D)3Z&8Wcb%oHu3z7&^`lm(M%3mcoHx zYI)_MUy@+9T7idwpJ%CAcKI%q-NiPgk^#fs%BzN=3cm20e&lq=nJqgl!rNyW19YyS zVaGYc@P!Kkj^WC`22r~r%YP71kqptSeU|viIz!T*zGQ@@a(gR#m2~5AB1pG6zfI4( z0H1&A`2do)y(eUtPedE@I-HHwf@2cZJ5Blw~y4+{Hy z3;y@|bF?c?b~2oM)g9Nz?RY%{Oi}X}yZfq&X2P&%*`)~5<>5^nXtD-lHjGgY{$&wx zDn0d`S5O5ebdC-Jsz6!f!?ANT2w+Za4)k%*XIP4rZ9UEa?ix?qc=&7@l zn&o2b2riD|>4@*~ct%bz_P0$q9>fklJis_tqP~LU^#V8h6)!ySq373|p$4d^6{l(8oI2C7 zP$s7B>xS3UH;tfVUkCv*@-H<94pl7YfV|`jjDQZWmjZCy%Ua^!T!eFoz$YPn!FuS~ z^NXuIzfX9RFyP+DlfeNXyDJ3!#ivvwQ&VXb6_q<9@gE@t?R?>4uwlju91nm4e($jU zh)Vs=c`A^)e6b2MC^dA?qzkxwfnXQEPgMcvq7lgZmy7)8aoS=XbIk95-v0N$AAr}g zLWLUlfBpLBTW}y%jTgMo_wW6Op|Eu7a9?l$;7VZtcC5FxTnI?4r1p!9Bw%D>iX>uI zQM<{N_U|A#3?LZ?2V?=A!}NSNEW`)jgV+;A=vUllUS7qCq!4mf)1mn++2joYW-9#7 ze<1tNw+T%9VHnDmFBIIadOn{4#@}#w9@v==Wyvl$>i|l%2ffo0wQLLi>PO%*0-NHw zEG@)pEJm2z4i{pKKfl5(aoL=`a}0IkzyYr3sh@3HpjQ4bXde4jSWiiW@RQ+otxc3J zfC)`BqH8*Ym?>7i_g~p zm|HOJ7e6x~_P5~*M|i{jI#RHY_8kzk;UVrN&^`>Q2-pJ3`G zMuFHAf@)WB$?k>R`IYOebdQ_bE&z6t;uPTP33U}Wle*wIR_)Sukc0Y?A3uhXkr8i4 z&Y4UyhwUk%Bk@T{%OFkl0H|dh$Z`J!PQ80af0}3}z$6`u5r_b^KU`MrVGq4CmUsvS?w#=|Il_;Fpl5JqC3;+o$o||mp9GR`gg!U&#J@3_=k1yrd8PLXnqtX zAw4vZZ~o0>&NBvRz+;*D5dFsUHQ>EJ0`H8@g#rLy91G(>OK*PzShbxu<3xPCr{h;n zh??KvL;><_5eiKLf%hLk>G!t*J)i$^3(tE`wX|4l8xFI;K^RQd9bs*sUS8{J^1Cdq z3xa2<->w{pJ69AC0VX;ehOiH@qB&hwD|910lk zxFRQZ)oUePiDKN$%+g0Y%QPT#w<}CZ2|&;Sd<^t$5vBk6^ubfOKkh0qkSBFC_&aj< z#Qxs-U*v;(0`__!y!IagKZnK7aBvF$J1;L{LI2?M9c-y>Umxn7_YcgFkq1sBG<^Hj z|5_bi*LNat{DJfEbpB`0slbonu70K|UMdn{6zjA_(9}B?-3Be;-zN@n4-DS`ywjraSbIRiW25z* zy_@78KbHoYAgeX+XP~1WUzv4VBG0~7k)Hk;p;B18!4C!-20kmcnW^b;TS{80^9Crs zR>;Ej4?qO8K!#9B$w9P97TokvITr<{eYptRe|t^bSw5ztIX^#B>*7-3wLjyM9?163 zTLKTL@~ysf>CR1r*drzQD_UeNg!@u{P4i(Q8z3auR|9sEq(EafT~%Bvbn|-0(eCPd zie|vg{(PEh23)~Verpc6nhY(tDW&&9v}z(RTf>3D^1yq^;n=r9LCgTy4fR`G?0*r@ z&+(R?^LIxZFQ{IX@rLVG2ZJ%d)ZX&K>c#lj2V(Ta){|v{Ze#+`3nJHFHS5O-c~p2F zeDm+cURTw2ibgFhsj4VT=F}Z;)N(j1%5=21hu@%)XgAMJQmGc14Fa*Wj#rq3MrS97 ziaI)Ml4Q?cC3+t%+k=^Loh8OysGRKNNK)H61xIgKrI{ck2*52#ad2{q&zpm-)@nNb z2D8bifVhmYo~l5Afr<;s9vcyPd3l|N9~(c&lWp+rC=k}`P~qDd1eOg*K6RXZ4MfSH z8=3LJ&u<(S!}s>z_qv?&EQavR+R@*#8c2VwKyr)DtjH>t&XW4b!}c(+yC zWrhw6nv{&Tj<5ygV$=W+QtFOYatA*s(C{tB8fIwMI&+v0(oU3H#y}G(d;1mu!U$lN zCA0vn8>yZ}bwn}CkL9Ye9|FWm+Lw-Di)JpUdQ3e(u+64klCgErPP`i;+LzW{%lN@^adUg8^l^wy9-`;}Je9fq)w#NnnFpL8^J)pVRFUFa6 z);O)A7GiXfV2J|PDF8ZgytjBjzj@pt`?7Po=7^r1A8 zp^m*1+47X8jHTzAhsQdXsT=3|>yZp4LNze0h)~T79Tm*kiytCQdF7nx4Y6xg^#?8` z*fvBwljCa*{I*fMiA1t$@*iKrCA-avonuSp&LNvsE~y-~C`BrZELT-G98lX0%Z&eE zXTsq4naXXRX8qeG9d@CEOPb>Z$RYx@}S{{AZ!xrur%eg z8(~H1`ql4AXiox9*GoL%d;BeG9wcB)>S(9ek-_F{eQ*49ysPu_p!lrMZ!c3;n|-Pz zsm-<0KIcgIj58SYTnqfMiF|{3>YNP8KyvdnzvTKgk}lL1KWvenKQ@+5xVLP%K9aA4 zONT2IBcNH2YhGaUOi}SWZC2yiZs7t?<6(${xwijF{r;>2fk(N=a%N0o@{ciHpP55Z zUy6+S0h_`C+{`R0y_bN^x7n%@BlZJ26P0X5nxtr7*Io zMsnGq*d|cMU{_t;YjiY3(nGZT$8my9O-JAtSv{xPJ?|Amc)XDcyFY<{a&AqC@S63QfrJ3zvi0eE#6%>~W2&L!;Cr1gysI2Xb}u*fW;4XMm2sBw>5q76XBaju-{F`k^d91f`42 z=f;S&q%8#pw|XtAFklVe5F9{S4XxIQGuRGNEVY*ye!&GKW6h5p_rLe3B7K@E86+ID z`{R9BTVBjD);qbfZw;Yt_s`iYCDspojY`3-cA9jA!7c&cBZXaxr{u`IzG{oZR(5ZN z)A!MvGs%-{YgWr(NcY0t3j1_NqGPSF>Bst5C&s`~(ZZa*rW$d>=VQb!mFAJ_XTOwY zJ~B+ZC!S5AkY=K(GhUvOP0}Ej4V5`c1w!;c%Y6lC<(Spg)NYp+@Zan`%`^nVVIyN= zh_}_8A>r%`fAoE|ZksA<;|1(G8MAF39mOSRM5FIFe9Q ziV67FhHYZFB7mwYo&SghV4+@6(%%!twlJ~R{d}=K9epeqj8fF!MHsNP=bvCW1bShRog+$qn-~3*k4LGa{)`;$y$E~xkD7vrz9y#@xE^SFNW?y$b~K&L345Xx$_90O7z z?)d*;XHD_iixUL%mwSa}gMs#T(m7vdN@f52xRbmNQe9YX+C|-WA&E6pJ^)0y(oK*T zn925uP<lK@VT@SEN ztQv$>o|+?au{DTTpm^D#Vc@L??hKFX&#z|wML1~k*Fm*Qhsj;w8~*tojw)5Fx%qgP z0|-7y6D&OhL#^NPr$x!xl~_-4V_wBOIT*!EuK`n=4~cTK(~inrp=32zZMX+JG>-cf z2`kzNc@3%;$h9Rh2f$n*D<^4e!-wDT$Jd5;<(6YA4bHkJV?L)b+uOrl4ay8*_Ca#D zG6`v_#XYQcmkNPmDJ3HP zs3{0?ixevd0Gra2F*T^G)@OaJadDMHz-9n7tbO!|ICm`B&uk$p!Nwl>wjYsHHVqsM z*G{{X%C<{zfC~9D-gjy>=eap72ce1D;^Y4o)k~VXng~o|k(I3n{Fa5n4QW2Si4V;q zfq6F>H;rI0b^q#j4fQ@u*!~THuIH~-x!oB$i-&%ryGl-HFZO3W`X-1;K!>HRcv z7S+zXGaMoGq1YfWm}j)#l!tLlBzQ%pzXm(;?06dp0RckEKxAc)%NHpIex@WoLo}KL6XaC#9~jv*av0cfLeG@ z!Y^9nE#aYk49uj$#bPJdluqzqA35jQ8nIaTu|#XYp2|6M~ExuV9ZF z+)K_XLC|Wpk6`|+==z&%3Ju4qZD{oSNkV#k5x+SVW?pWK3p+%S7Z=0)bKEej11MG- z*nUIF>aveF>-Tj`Z;t_Mhc!%aD_}^I6tC%YLHzeoL1W^%O(k-@3K7e8obGdJSWMZ+ zX5|p%MBWz~RkY_?>V^fDFbh(Mp- z*n#VKA1-Wx$?U3pg`{EhU0_^oo_Cza90L<>efaSvJWs7GTRrCMKzdk`83B3zhxZ9z zSNaXs2UJpAyPCuW@TTykD&S4ng}37)A|5bK5HGrepzbrcKi_g*X@vOi$ATu^vF);T zdw#xq85Zn$a@`qyWo-#k8SYL_)C}Y+PJKfAyqM2-PtWuCtcd&3l~ZU&G8Fx>e=p1X z*hi|Ix+so0tWTC(Mx&Rq?aov?WCIIq`Y>)PPrdXG<$&dK--4_3ui=BILfo!&q3!*| zENWj*o>Osha@unif>P>BKA-okhVa2K&jWRMipU@x9&Ux%)+(Eh2$;?M@|5=r`b_)U$w&$|21F$2}FffNv>uvocI4>hi!1CXrzcPd+i2Y+XZf^k3daP7IKBO zC|3=5Ok{t$wDgaD16H6+z5_GXJAgKyw0{ES;TCB(tp|=^COFO#B-)T(8Bfpg5JFvLEb&tCNzQr#s|D&e(eCN(++AEpHUC3zN zBhHO9e~OZNjTQhK%$p!Gfo@>WgpR8WqSv^}_q~KS*{%!#_sd;3*%asOtP4*7)n@_$ zFwGY@175>AG_HSseCP4sAXf?)UNpXhBeVP0?gXcW3`*eaKFZp2^2p#QarFW$98 zzb$7a`|QR&0LuSzT3kY~?1Ojy`&YhP;I>8|68Zhdhjjkm;Vt%f|NR_zO$~(8EdMN; z5!G~5Ql!PrLtAh93iZ+Lu6hMSqTEP!!QZvvGHA&0KtwYx*cg=)Otl-UD@FiU%V$I< zVxPxx<4;NH zIsE_QUQ%QJIX!^397`l{Qt12^JHhzP?LNE?ll833f5$clk+lSR99b^l9B1)LlsFh&rqIK0IC7_Oia|0HHeT~_#JuL`7&h}$A3!ccf0Y<$dZyL6y$_cMSuz| zwCZfjrzSYK?H?+N`|3&`qYCKs`Ob2SJS(EbxGQc#Q3B}MCOmK--e%%ZzpjQ`Kc;D^ z3U@I(Jx&cpm~$@$?FVT6^ipF>ZjKaeXt?{~BK%^`hJTNZjpCf4^^kuM1~(jj-!D2- zpAm;{%~4sdY|gRl3?y{xXfY5J_LUc_zg}vS`y45#yyN=UpXv?LId(;v6|Vv5`L3zm zR+Pi5mglj>oEpk+1C7KXR;sDn{a;=Wx)m`9_O7!^oq+Sg$L9{R~CqCa!T?{N8NF7e!Z~Qs*jmK%dJB)EQ5+jMkDDDMsXz*9{&aX ze+6}Lu2dlX7d~a`A9}D#uihtperJ|U=}#Qc?;(hh$(CNtt^wL_3|ySoYVsd126`Es--h1~ zfmxLk)nHe|B*XIv%{{JELtkxnf?3Wcu+{4V1gs(1PBAy z?^^9O)a|G$Z`;w=LcMvr{2%kBnrJKkCT_y1X8wc?I5{}zGX>B=#^ePG`~myV1`v*` zpOX;#ldJWQJ^iEvOwk7FcJ|-TTSCDZb0*mT^4)^G5-@Y)f2><10bn1WMg8=Dtq`fU z@jspeul?Vm{|D3mL#h8sKL1Bi|Hn}N2cP`^$xr=$Fg2#F< F{vSnI8%_WK literal 0 HcmV?d00001 diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_graph.mp4 b/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_graph.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0bf34617c39c593fe7205c0d7ef12477bbd6192a GIT binary patch literal 113606 zcmeFZ1yEhd7B+ft4H_i41$TFMcXxMpcL@Y{_W;2I1b3G}f)hNryURPd$=o}4W-{}t z-mAY})w@*z_SwCVJ0x)Fks=JX95<8(*h0XWaLG}XqX8ERE2<+#wJF<0wFtl4{H-sX97lgdL|l1 zdImP&O><{wdrmq!H#aw0R|{hkJ8J`5T02KGy6-5o=FT?OKpQ)IXA3)9Cr$z*149EN z9tHwO6H^{00%H?HYda$=9tKW&PI>|ZTLWtkClelecP37HcLoL~0vi(^a}#$0Cl^ED z6&r!QlLxRW@VB0$F%JVRJ+Kn+7lDn1yNR*h_ZAs|HS`<}Y|Tt~7+496%pL7)4D^6i z83>#mO{}dgoPbyC?(D`!&cF*J2OA!GAO!f`ZA={v zY)qVhgJq~^?*Y8FFy>(dUKSxlR2=u zqmkafyZgP|(TK;$(Uic(5IC;iy9NBf!^lKSPvG#qGd%RPtiVhA?+yR!uYo%cD+lm~ zle39E4>N&C^u)l5$mxo(4};1!N9w%nXy4>v~|E&4B9 zZF;4&+wn2a+uG7y%?$UI2u;mEWWX`Wwb{v7@!{#RFk3+ z&3SS=b-rHkVlQ9E)+IwarD8#9lTNNFXGb+ce1oE#?kxB6#?f{Wz*=87erUvdwvk2m z$r$W|FSMH0ZGtd&m4FjUZVvpQZtX`t965#E_pyPN(hFIr27 zYU8wZU^d~SElO}o8q%a4s+BAnSrRKeINR6J?UYwHiuVg@i9u5-f@hl&CNS*O5t33g z(!~kKIx)BO2cu@y(V!o00u(bU6`khx<}>-T)1kx+BTn5Cd>3R#P1rH|RgyeXhc#IH zR!+L0woxl*gCNTNp~!qB!n`1ZCOo|q%5u-NKJ3o6g4C#{JY+-X&JkzB+R&*9#~oes zfY0@PWE$+Lj@!Klp{t>jOOBw)W z+kguy%wV7Xq{uV0Y(OU6MiG0MUQx|CouvYiR-AEX(5}fOt~Qg0WgAbX^G%O_^%8DN zwL%7^^PKQ)zwT;;Wg$rdJ&tKdd1>sd zB@5SzBJ88jaH+5WAw(=KJO9co2YLEfeUQhrfY%%MC`yQf;F6Bv58GsHRvP-dkgu5{ zkoQ&C*!8Yem96i)hie5IK2JV-gOS#(uRuuDv0|{Nh(w%t%F^{8`w-)QVrzL<8YzX7 zmB){%WvH(ex&BShW$(8=kl8b1Z*=RC?mVTZ=O*CB9U3KLk z!`Jj%N9K-6Db#ydMj`C`Mh{QMC(8*0Vo9x@<)s4p4eTu>>p1n{w3$P+hEUsi!ZPng zQ)hbSQ*)Odmx%~97_j5XGb!H&RzE6fufN1v>bi$}FVNU^@3K!ILkv+gcQs0$MEF(6 z#!v>km^6Hb!YfSoeJ14^fggk0L3@C610bP~a^5ubf9)N2a;I`D*CJX(A0|aJGb{r~ zpwP92>{?L z4nyPx9i||8qSf&LW@h?{8?jQ;cVlgrEogV+(QFvfcEvkcVVngc0kb}sY`NR@E_ml` z=ZsVn$3Q)XRkMd9sCi2rQ9@Kw$*Lz>kdXyvzEDJI7!W&4aTgc+;YCO}x0ysL;RekP zj$F}JBo(iaf+N#LR*eLbp-THF?NKqp=cc2NO|1R){)W>5vlQsr zsM}WL^$N}Cmm%d1woH}J+h_*_RPS#?HrSnKr;l=F-?s1gWTFXKt*|NEGZ5WCU^mgC zBKX~^idu)iRx`&P#9vmxCLL>a92QchXT*^rOyIOmgnExdQS0;iG@dEGw+lW3ypsUl z&7#^&me=Wpor9qwnm7eRHmmrW8H>WVm^{!s@)!+S@3}Y7!mlY zS$KG|7X*F61SoaxxPs|u&bIcJ15;j{ILHQ>;DYSqTWC6wWr#Rrgs_;Gh?CHk?@wa9 zH!6_v706}*!FQD9{G#{val0iHm*xkQP8d73n|-k-tp zfEE}6*`XrH)WrYY>ThsGe}enoDv-y!Kf!(H0fZ|D!dd?{T<#CJVG*=n!R`JD?q~1z z{}PT+ApPWzaJ7bZ-%j~|3=!D&zYuc}euJ{}bHL-hKQN+>h0g(fVt++#iFv z@E34D7H7`wpWuG>?*3PBtG|Z(xj6Cu0`5oevUz?1m(BMJxO9?V!~N)8w%}jD{XcTO zU--hmUGJB^@c(Vs1A6+OT<<@Q@N;GU&+mRavAp`9-~GZ7{^xgpd~W~kBah_Ieg_(I z7YRVScp36@UjVN2BT)ap=6$ehfAQ2yJ?-GFRZsC}PX(BKD4a^wG`Gr6pg;3zyQC1` zi!{7L9$<|QmU99q_1jw1JO&h6TEa`~I>8v>490;;b$|A;Z{V(<71HkkxCW}+s`iFn zj6lhoq!pt$55V||(Y4AvPI&K$#wHFziN`7+1+tlEbJK&(9_NW56yNU?xXErc&fk{v zL+Ik2s@8ZsGi;v+C%nWL9MH?gJ!nDQ8u?rs49*F?Nfv@P(D0%|{KEY!;VL+Ub6jlk zlF1jkx8pu>&1Nq>BW2&n#YUpzLN{3WjH7%uz;PvrL1DkuqOg*hU_Rj2M0wW$fW24kLmkrHbj6m zoam`lm7wl5Y=LDk}`A~*`mZ4A^7 zg(L3+Uw<;MVZG1aV|X=)XjSoHRP~FWx2L`H6u1b$vFq(C3D;~k7|wyCF-Bd_X&v0y z4i`V9d9v3vY5lS3_>dS!mXBWd0*2?`0GZG#bx;~7Om1+DA?b(Xm)B5S;8&p5wRfl~ zy!@}Tq@F{|XB5j=%WpD!xdjf|yPq&Dd6B*pcPNEbG+omcO<*n}M^xM%(d%-yp~!yv ziuy|9Q~AQzlSD=>B#aide^B!sW~4vgZ^iJ;|pG%FYL1MHAnOLch15)CE%ueK&i} zG8=-w$w5ZP963HemNoR0`$m3OZ-3=|>MK-kC?OfaO7v5wAq8+SzEAu)9X{0vBcHwS zl}c`|P(D&foLnq%&QM=}puyS6^7?o#4X)T;dJcQ=KBhSp^$V5O$J2yM>4~6jO+WIl z*0}AC;?l+w8K+-TT^jRoXS?#DTq*n+OyFIvO!QdZvjm8l6#2WHR(J(8luQK$<{EoD z5Sl+ktK^fK-YwxjYQz&$72GQ>&daVx7QLt)GGPfMdY_q@HH=|nQ@nnhW3kTljD|yG zdZtab>lSaU$e_J2L+L#bhOp|JIE*K;CsnFLoXn8UGF)<>L$AosJYMzu9=;CpsRQ1M zG+m$NV2kKQc7x)(Fbs+f%berqAUT9!%wU(JlgP;?Kz84Lc;>iUCEuetcPv zVl~iPNn5PuQ|4G=Z4||U!FG1LC{xMdRc@0UFQq6-D|vI{WAxzYM1JcA<9ad4#@xO2 z5tVPAPg_^})$ICO)1E7%Lj)$nILv3Vw<>seZ107Abc!jpNGo(n1z^5ycyyGfk9dY_ zEJsY!XGBBUtY1Nv7`G!>Wf-p?o%LVoYWAb<#!cT3u2l+IIC$|J5#D{E3}G13*Tr)c zh=6FyM_ATBt4k>0C~+0kZ9hL(7Uv?4&EG9Y`l)!m-4CB@oO=D5gzMs7jC~EPbZp^`isnl z-XkzENykM&HR56(W`}JUz74`j&xsb+>K9EJAN_D)Lf(yVtW{BtO3_oy-r!EHC+cJY zg(7Oo7zd*0v}rR87C}X~Dndeu;xZ ztipjRUOL%IE+m{q0GrR;xIqDT07QJV>P9H(&{e?uB73+IAEI^gUKw}9NUyPm$?G(v zmT&Kc4%fe;SroonS(Zwu@h02eQ+co6h2wa%rJwQcnNbN+P&GZ&KjoJFW(q+yQ!-8y zKk)q}N4sqPHxr9z1(zNZ2f;Oh$yK$U=+8BHxc7+a-dC;hcZGD`u&-*bPAlNVY;`eo zjAQMF*s%iiNBCTpP-f>bFU-!jY%3{+O_86mNe!sPYbs}ROlvZ3*}$Ww$!teCOL(6= z6dIJAST1_&&mB@=KaziS0IS?t>ZmTENvtf5ZbTtvta;^911Rm>#)MVN9GiDgQ_%R< zv!fof?^B|X!yb%sLwS0*Vm8P-Vz2iCM&XRHi))y&)&Q>)qU+uMir|T;z_irDVTZQW zN!_PfVVT8@BFIp?h~D?|QW9RGvifO$ciU^lQZGsfx*L#adm6_@KZfhXtT_ryZuZR$ zzCm>4bDqH_XhCT0|5n;r+Vi>~tGVK~bX1K|n5DVEzN)H!VCzGsQ+vPSk;I!77zuyT zWQD=jZC(>^`P2#(>Y(d_%|s_7v9O9Lhx_jAcFW5=3V}z;mp%rPh^V)Zo%U>H#5y>c z?99!!iuI62>}&=;(8j&pa`;)M5*8niogWu1cE-yvvx7ckf`>Bdo`TMF);t`Jg6IvS z^&%fwNA;#|5FLjEZZe#&U?Ux7RDFERW^)z3G1mffaN}ND_f#`0LaSsfeott$qD{|r z98#spnUYi%+}t=~AItP6_DM6b(tF~| z6Dd!5OvuZ-0^<^SDB*JtJBbFGvjJg@VX}`coQx^l9s7zkU?9>wU|Ib zm7|6HrS?VPMvwr>XjieZ?|MOEh zeENb+S=&Q1NZKsrSQT8FQ}A8<)XuF)A<Wwh8>061~dx@aU{=O=^o!^o}f6*VEbt zviOcq(6zi>HyaV`0E;**whF?vp4xbK`J~1Gz>qpP70Pi=io7sgk==TuWfycXXiXAM zPbf4ZxOMc$dsudFZ!#(+mt8Y9u)zsse|+#x$6LwTWRmJ*ox2F{f3l-HU4*Fj`7 zk4vLIsqK*xl#2ZyWAPN&&^wTt3{JZXg%u>CICTTnqAl9PJ2bFAm@>DXba~zJ_cAsK z(wKV9#1m$=r;|^1z>v7H(-E^l^j5nD_%wAQNF@X^aCbOH?4N_Gbz(a+#5mwQ)W{bP z`?iREeI={JL1{qk+ndaNpOM7}5mH%-x*!>TT0Os(doErgL^;tbVv9B77pB(Tgj>e5 zxx(4A@8ixm!P(MK;r3Q^rhgW2!Jtvln~VH83`7fX&`Y-%!`S8Yt;uFpiYv^3Keyr% zlUPVtF_N3frPrJ=A+Ox!;+vY|TPBgD0oQRYgJC|*m3qe%M2;-Obn-o54vC%sxMSx0 zy>L@whzF5PU@E>jL`?|`?Fa*6AB<=sTaG(`L?nfE#)(EsfW73dX)u%O4*>9W0c%r& zwgz=dG_6uU0Sl8+JSC`=h zYbdGTfPxnoz(n9XA%WisL-m(v!fpO!ULf$eG=nKIZn5BRqK)5-Odse20C40$YodQ^ zMIg9(9i8^bbE-N$N&tIBG1->{-`m9f479=r$@wW9f#BW|f~YS9k1)Xclyw!Xem4Qh z`EDZc7ZOsQS2|MecN0)rpb4=+&VMwd1_-WCFYMZ58`30wiJ_T2zFPs%zMqzXU_bRF z5L~0f$IwLZJjD>*1*q%Q?>4X{KTLiqOTXj;6zu*%V#fRp+ls|0!D{CNE&uWIgd)Nbp<3QmC9RA&A(8&A30 z2qyX5$=$IFmOg0LPh;~caPrtI9{-bg!O8SfYOVY;EtAA@D47LnM>B8IoThPXfEg0J zw9@6~=+EwkI#S$fChW*c@=bW&?h745H+J&zX_vW;sr{KTvA=J*m ztKhk@5PEA&%@7apM=|BpP!+L{3&hX36Zb&H_+A($f)AB@B=eZ7F`8JsoZPg7qk2tz zEeX2VtBqnGTWS=89Hz~Q=ywaReU@62TXIJeC{$hl#lFX~`J??IJ%q_ypJZD6)d`dW z4sdQ;syzd{J!kHVE}L)Z;XG!7V8Tp$!nc(sHpt^+%qucUEh0^!qaZ@jvU3l}r_7m- z)Wf2#Qd?;@*3qi9t>rpxRbsu|L@P;hO7kGD3^7>~MiNuVZt5f1>lvpeQEx-eXAg&| zH$^)m@U5?p}obVU+sRH12A z9K3OAEv`HIt?LXIuuI03+{t>h{NA<8%y=DW{yGEYJP^jV{rnQT#O>Uvn>2h_@a-_I zCshv=xiiiMbK2KC`7=5J@Pq|$7Wx`3{;!lATvKnngAhvI51xt&SKIBoi?>`|tD^}**8%0MV zd(}4-`m8%mM#mORYOaD?<%7hWX{q24hc%3-6%p_E!oR@oUU|Gq{@{wl;c$VG^!oW8 z;R~{KEZ@r(N3JfD7pPyF?!8RWV>B0eJM zs@0sbie2WEcutx&rezq#Xsb>=Z`NTbnf-W=g+&`ap%3ElAa|kxZGoNBq3^i&TiW`Q z{|&?(8%tz&x*&+bk=UFxHCldz<&Mz!@#jP6(kGAu^4ojw+QSdx8b{@XJ~C{S!=bcf z&>%JbD16NFVEV(Fbp47A28Gy$b2Xun^DBD>95^0YRJbT1NFv9e zE}EcA-?S7kmL?(?GQ`QGdtFZA zXIb z<9#NN*CV~0N=Xx`A#+Gy9%;p*2>MQIexT7ru5ey1;>HAq4IX2qzl$m_% zV44fuZW(U)5(CC?P?=sz3u?3o3hC9xiVH@xP?jCYyaDF8_s481`k@11gmkuM9ixthQyHMT|>y@6dZ_ZfrV^d+r_!9~j zDxu1ugI}N@^OHVlj++J8K52SU^@+3cJ~wqQ%;0#Pq}hzQ`s>El)b@?atRL@#y{(Lz zAz3zUfAxM&25GJ#czMYQb+}&=uQ@cti-DEJoG?4f4nyCUdU0z z>9_Jrj&OqI#>B^|rPI>sPOMm!Z{Es>VW+$x2v`j3hWb)`kY^$i5;>IzA14xPE?VFs zPWXnra6z?5@cBb<4)|gWdtSUIILwOv07(q9TCBXidh~pdPr<$Cplm9hV{-Cr#DJH1 zji?6B9+Y9b_CQ~l_U!slLS+|s@{;|xcby6Q-X&D6*&xUhj5iuyj?eaLhR*d(O{=06^G z7grf_u?gf1(7dtkY+TWGO&(2Yt5zSIi1!$rkmu}wVCuZ4fWyEV3km zQZ+zwRH+{PN*!Vf+Is~n#z=GCB-u-&JYK1%sTYtxGE%`1VvHtpcC)k>+Nn}f*cD-yp)A+EwEVeUg>{J)FFzNHYUxYy}~$^?e9xb9`S?^gAdq@ya7N8#d3Uio+V*TEVUZZv*?R?u5| zA?e=WSo%fmTS~kEJO;cQ!2n4npaJX(LK7>AKF<ARZG!r*@NT^YeH7)NUXQ9y3P~D<)nPz{CR4%v+0vaFg;GD&7;`Why57~`mFsm=2@SkOpVjIA4%jkr`A*QA`< zx<`ALc%7409%VPmN6env+TLiDOtB(F^ncXd0Dh5a4R~(m`jf6`zazM4a~l`hxRr6y zBOLtXq8jv42#IF5Ym58<36uRjc4xw~SQ!AY9005i6chhRZ)Evzy-@>RaWW?kacuv% zM;c@*8X5Ya6tiwKU8DIx4Zq~&HN@_2Z6g2@9N4z#ACLXOvjBh`bJQQnO%?5h98oB9 z8shan4<5R>kX(GBtknXB4!>x!8}S1P(Zpi2gV!a)DzJaKz?Nu!saC?y%nzIA6S3dR z`6#3DBZvb4VqL%rrBh<+5Mc^zs|S(j@>E{7C5uNNV@!y-9IE=DE=<- z@_TljKva@nrCf2zsSb?ncXi^GJj5JnAtz&n|~9#!2thC>HII={VwVK@3H$CjsF+#{=#Sf7w`VGgZj%o z-v8nqaO?PYJE)&~l>h(i?{EY%ntr~V@%#|{pFj*8CpdH95eUpAJP?5J{B$kb0h&Q_ zYY7&w275JJNUG7KXa#as{JRQd5|bO@64L{Y`rmJ|5&>UXg26w5DdC^-4#EgDh{Jpp z;&=@0jZhAPYp7v$oicR*KyY=Vg^HLKJ^<+?q?N47@F0jsz@&Dw+o*+O+qc2*gm0nR z+iK#zeV~z~TEF(y+f&Rug;^Uv=5oK_RS_8TJt-ThHg;xQHYK3Ro)r_VMN~wL z8fjwAasC87(mSfYNF75zI%M3^pA&w%*auqKo(iHSUF+?}sj}m?C6_{<4buh_sbEVr z>Js$+V6C?ohkK6zEtd+`uO5W(4oxEPi-?C>^jV}w<%RS(DdUpILLud_)|I5li9pL>>nEGa+fzlPC}Xh)IG~3#CB0AHoZ1xEfY{x={#ssD zTI*NnA+{FnUHapJIT(HO2uNqX>c+}0GV!uljymrv4z649!V!`VT zZ9R{@)LSGpLzO{K1B&;UmidfuW=ktdnlWz%;Gpi%^V6*`zT#Zt5%`10cI#)rQ(knD zV1kzq!>^7BaQimpR@lfZfcC_8bIB>(uw>N?TVO5}*`(zwx3Uu#gL;hJ75Z9bz3>5wnwf>O*6Q>J+0TVv^H(DwKi07FgugGoyGXDzqu-$#8&GY-hC}YN#!ReXSIP^aYL{Wa=)w3M_fZuv!%E zhxhArlc-|7#W19rpf#Z4=#0rGp$;U;OCXQA6QHY`9VZK7cM$yDXI4sk94QvVtwQ*X zJE58_iA@wHfoSIphcPi<#}fMUs3O-6u2|qApwhWp1>wm>Es?15(OU*7z8PV3)d^cL zf`2=6$sgB2tV38bf5^aPOHMr1W~(Hfg8}ZCWbqa4BF5K`#jUIKxNYHL|^-PLlcDlkx?F! z!{%>~?PQaihxJMQ^$xc}={YqO`BJb%+@YsKnm4(;oTW>z9OqBi+pnj<@24W)3<1JU z-5bu5+!p;G6|VTb3OLO|094xHjCr+H9d27lp^g<4ge5I=%+WX>SE+o?adJ7kj5E<` zBs_(&&?h3NFW2g-=Tr!J7YnlSFZQQ>x;}(~!1-O%6?W6cRl63!LXFgFo0%XJl3eBu zZFJ#Y6~A=@XMCF`Qq?xh=vvT_(U7CNc53nXJj1dJ+DWXOT98sgb9>TcCfWdJv(zFI zcLQc~_Kq-swVjZ1m!`porhTu=_Zh@(i*eN7Rl0{sdY`=e0N~i``q*^C0P5U6eThiM zjnT>wUw?AE4iCUqyi87Afs%)+tCC`|#ZLGX&D6Eoq7s!JsO*|7R5`IL?0H}e`tDm6 zKm9UH;dKnk6=YsL@Jsz^hG6+|1PuX*YOG2mOq{C>ZKn@2H`&S0`(vg)52<>~JoWmoB_)S=kNuR%JzDD)ARkY7;Av1kC+M5!y`4#>WrkgIekD)@~BTN==G%h+S+MW#%a1?mN;f}01^<>t3D;OTz)<*Bx zq8FBM07e8c=hp*$AxWvZ?~;WNP9#u@Zec#Aoag9vmeIP#jXDZKn1|^OP;{lBTu&r0 z2$4QSUi%OTJ!IpS=|Q{8BsVo9Y!5*#Pd?{fg69B0`cfWLwvUPP=a=4($ArI@SDnD~ zhpLniK(TmG4OXM)F|D0x82qqwLS&NYG@r|$4E~6xmFQu2U=xHs5LRP^k2iE`55gN3 z0A~cxQ_G+++S7Q=NO&g55+ogf5!Q}yNYVm3j94w_j@M7|veqAmz;lr{3$u}Jxn$fB z31u@Dv|hj5pq1)_e_p8|gdX_rXD;3VIhM zt=tH$R~=ZlCve}jp`?f8)hRn3gP6L7S)#Dt^jSNcNAeHZ-K>^mpogvqrMwL=ud?sQ zTq7jWr$`o4pZwe~6>e8LY70_5k_difTXNM)@+)VVZ;phodh5zu3&%?}o;RnIAjk{(2u$H#vKolS(b{$zx* z>3FXsBv{p+*|kf@Ex29(CUr_)NGthvP4}i*Acd zA6Ji_3E`B`O_u7^goqOSEqIq@p%JayOWy7JQOCQivxc2elN(nW_;cgBc~?4r<{5Y! z_;AHs>P{2GM`;U9LbX+!*z{_<_5Sl>Zu{9G_dw#TTK7mf93$9mU{8r(MgP+SflQdv z5)MJ+U`CZk@*5$k;(SN9Rs+yETc0vdlV|xX84f$z-H@%^3^2HM0scY)A-!~c_64gioa)&A&T3Vu);^k5>7(*T0IRaYp-M=^77bP?&5g^MJ_!D2bSOZbhLv-o{hO0#aw{E&a#-KOeMQmVpCAYWiW8N5;f)%ha~>MTER z)n;GdwC$aj z5CP5FXz`=^?4ij{pZ2Xi`TWH+*Pvwq&(sT20-wNlVfygRT!GtJ91e;W{nnF%2=T@W zR!~(~Q(HIuQHA5s;7gs*UUh|%QX%PeTfM6$B3No0U`VfAvMa5vG(a`-&;hxqI#CjB zAY%iHNf$x*U!dX}$J8a%umENiqZe2xP9pR#!OraNq-4sTErbSEI;6|x^bqIRq{D)T zQ>J&2S!p)M7va9K$me%-unsCiLeDI zJBeP@@FRRZK{V?zEZ8Y|;TKggvY(Tq1ZX10RW6S^uoIq4HOtKQypQGWuDom%g~>{X=J+_gCZpIh&lX(V6l^8VE)7wL%9 z5xbZ3gdr}(j^3Xkc*mV7igO(&zs4%#w&#L`bD3E$=Ve3JuQ5(PMJB=0eJto57b`nj zHGoWE+m|ms+(hg%0e^cTKZRta6`aK`W@q(6Fx{s4&^F)<9PHQ{&JzDgYhQG#ELtPVec zJ=O9MXxB7%yL!c$HWL*la*?-v~VNM(1xZ98YWQ5BVr9S)YPzdEpJ!#zva z#(-dc)mK?Dv$T44INwcoO?wR=x%zd{lW8xM7u=$Cs1b|~&T(=U{Uf{$ar82)zm!k< zE_(el)Dk-fe%`KOF`VVe(LK!)&Z761Gnsw|BT9*#30cIoL8Hsy#uW(u;YF~T^v|Hi zIneJ*_hwj0;Q6-vGlYp9OwC?~AvG0z5y8Ni9Eae2XkAmY%_bAk_8r>g#eL-Yc45p~fpy$svQBV(YqyoL=@dYE2So&rs16d+DFa8fVCa8WJ&=V;Jxt13phg$! zB5tvA*$l0FSjK#gKn~Z_nh#?gULgO~>ZP&@V^I7Ik1Qe}7`QQY+&<8x;%!(|rp>uY+=b^WEvml0H1n@G7Vx>MTb^yRl!8XJZD%&#wG=zVFj z1sXkh)FSwBq?oVUTBKa;gAE!Sv-&6RAL~=>Tba-M`pka(+=8d{k?g^ zJ4`|T_Dz5NF;Cr4P9GYIcSjCPzV(*){8-5}ZO@rKqdjQu$Tkn%BF0Cw+KEe82Y>@jk3zAc$-JDhi{Vl2`iT8WsTap?^0qsa+7 zJjKSmoBe(EMJOqr7M!1t^z&BOu#}nYXC$X61D9TQeBRtV5@8pXt!R?fgbxwnf+_wh zK_g>M0!N4vt(G_FYT;N;ay3b#e8XO7!;6y^m5=Fm!nao1H@;Hhqwm3sG$ULSK|b=u zOp*J!%Wm&&z^lv4-D}S>ePF^BTM(q6@s*}XVfP0O3?!e|xXJm@SxFXZhPt`g)@PhI zF|VXW6)JR^VkO&MBw> z1ybW{P9~Xa{3=6fFzfM@CV(wmm%T&Mcumy=vNre)>ZrXlHlz7hvYK=I+q$1j2jDKD{=bX{FUDc&IUUZ?2tx|81GR8=S8{3Q_Juzq+V`y z`gIK?U7bAea8H4<>t{RWfYE0Tq1^PMmo3+vZSC3%a+>O}$1y8?^TFu(@$G@!1lVV+ zA?VF7Evt}ZpP7(+Dg^6xy{W02UK2A~Na^c%v=8&PL147liV-_W&1_Ac5~YLES3Tf} z^=zd;ot)i~vg9Q9V7$nidpV<&yZ)JbjZBHGmjFf?`5S>E%}iLI&OM;}i5)*M=TYiE@Ut}@1ightpYoUE5fnt264FZQxJ&K`HD^rF)Qr2@< zt`MHPSVLySrhx6?h><)k#E^1Uh8^A(ynQxu z$Ym8m>3yztMtzcTgC#a#Suouj&OH*`qP9G)AP#^~^AX(-HLn4Y5u&)zqI5M4NynV( zx?xP3EPIgCEuM~2TegD9k>#&^@y3QylwEyj(9kJIwHHm}4MEcBVRq!obV&T&02?aiMa&EWO!sKp}?PrmM2vr1~t&qF!d6x)9bRio(ZcfO)<`8bw zz$}06S8vZ)Eg5BUx8=0HesOVH^XfcDrPx@(L39W>yvBAae$Aq5*MO>A3y!NF`!2ID z)u8IzMo7ywZR**LCwDFY1Ofj@xLeuem7o~rvOc%*d#L`f98eTSEd^8n#LO$w8QbU= zs#2WWHw_p>>_XTWq?q1lpkSA?5uOQ20lr3{yU&QXi$aF(hVm z2V&L_vud4t4VT~(Y92GUumkwpjhJsG#2>qAYQ~im8xCS$p%|?&$9$I7)_%I~_n#d| z9q%kZ-O-*fHXDYBzkKKuaaWCsGK3{^TBRGF0C+B?e|b)ELd1ALfEhn7wwUe4JcGiB zSr&gW_T}kCp%z-a6bc=OR9etRoSDogJXz1?UQW6C7t7ixZPN-aD)i_d#roLLJqFR% zk2Ykaaw|Q_9;#kF`IypnyGnnZvZV1n;oDjsGlP=1GOquOLqK)~XqvY?L_0%hN7D@F z#W0f#;!S-y*-%kLD{fb)7|3IdK`O72-{uhQuJEC}Os0)C5d)wmIrWWBTauShX?raL zKF6m-l%xvTHlSU3J5Dq5b!$*q8G(_w8i@E1auB_y(4_es12fh&NDw@1*eA);a5VnW zq9SeO#>?bP$7fXH_e!}=w$;XT`|J+#I24@$dRp5f#`q>v;IzW{TStP1rQSC=tPkAa zr#7!o0~t`$@K4Rr#N&K~rUeTrzpiMV2wvo6>bu2drOz5jEmL+Tku&5dJ^0@iyr@3~ z?@*?1|ESG?K&#waG=9E9G5>8`G`@~vN#0;F$vAh|i(a2jB`j92W592RuQVTVr@PE9UlJt0DzOu>qv{t5~|=9 zeQy_S7m-h$$V`VUT_`D2XrxH-D-{^ za{ILH=A-v?7eoAD2;#(dv!``cZMv-MDh}dtA)!-_5+*n@`FyuTzLNBGz9{xE^+r~D zD{&gmY{t}_2|`vCaS@DM~fvum&WX5^v`yB4;CLA4Br+5P&;XRi9V-Y4cIy3?ka zwgUfWNIAdP_KtV1v5OkBP=Qzj>`t_Tk{x;Q0mKwq$Y_}pVQJV%f>WQQQeIYd_K=O} za6MYeI(B=W!m&}Uldd$#Q`Qm3FLmKuRM60k<+h2SP>I^Nt{H)_hlDdT#HSZENTj@d z&>vJ6pSUwKge$QNeR;`1pYc|lg;!^k1fot^<&sAdy?@jxFpRcN8oVoarb#sm5oXHe zG6=3EixqKcZH#Q|!FFLTr1havJeVsV4q`8lB}6<#jW+}45Pr3T0(xpSlVv}%K!pc$ z%BQkrE@M#AmJ zGi4qT#io+B-VfhhQ8np{ej~b_CcqczkY2$#s)Tvz=lhji797J&<4d?n$IwWslc2O` z5l)V1MN zC)X5jCtsM1=f_3b-A18O0RKNq@C(_^jfpv8TNx z)Hmq!^bo3hhuP5w8z>zw9hHetPZhwY%5u=UFTCj11S_gwM=wE-<6pdgBA~Q07zoe$Fe*CL=MSr z0%xUfyV0#9$BPn6FLD{u#dcyF(aDVrXy4iG8hEL*v>WR>GQ;jfm(_49S)x9#IHUba z;{#d%eO)bf*%3HdDM5m1C(#ws^6^zfQ%^FLFC2hVC=kDRLC1yJ@iSPohtfq5P4)%7 zL}Qf=nu}}O0I`i+4=)0oXL`KHs+nYn+}R`i81Xm5{C;0rEnL*_Yn!_(c3cS0+G6Z94!Dufh3BjvEQ((nmS*7=!7- z>R91_U9~)#`ntZ2}R|wC9AYi_4p;Q%hGYJ`72Ezbsy2#g#c$F zq>6S<;qZO#jKp(3wrv&R3topq@7+_re28T8f{xVCpw&2nljDGw&TlXOL?V33{%Pvi z)wE?|Xz<*)vxIk+Y5fv2qQJok#ixGv!S8UcLHATXg5zjSfZv?Sdnk3;|;YgTSm{=(@TDt1N>X|4^Y=Kr<0h+*0%PeatQk>}8q~ zpH{|KInxTb$U7I#Xw+!U&>Af01u56GrrxBPQG&whh)Kgj1cu&5U2h?^z?Pl{d2wu* zJ#cL7f=AZFVp>uhnO4j4eat~5W>xP55*apYk$JM$J+%R{d>?}u2X$xS=!tCKuPV1o z>cD2e#Z%P;tSd+$>@Dp~`^~Y~ zP$cpNzbv>!DUnq5;TuCJz&ahAfE(|u({3qtWxg`TG_2;q))JXCQ%FHMezfqYZYUyK z1YZ}g5U8p(hj_Vl9a7*Ix>AX8eJ+lcPoWEo!{E=LL1hjZ6#dwC>(d%Vzd8}pA5t2E zQ7lK$D*<)lHc?me-VBFbC|^@6ml0dLyV9T+$G!=dGAI&0vXaL4w>KqwjU`L`4VEt` ztORn{G_P;tA>JRX0*s@Hgcwos9?b7%oQHT~^0a0asbHU3XMT|h8G>?5eqGv=!>Ox@ z6|9UN)4qr9X#UQfxl5f*4o+TRGc*){lNN%_9Ug3Q8qJ@jH%Ch1#PjGi^nppC`kX!j zd+&KwZx$dqlDAhtDnUZ5(~<-DEl;aeaaFpZAiY51K7Ea^*)*VECF|;2+FOx#O>aY3 zG#o$76MR(!p|W_v;8HMifuWnS(am^#a#^^}yGSB0GhsS|%uu=Lewd1cLryVZ=)+gP z96F&c3%%qbk0yBvx@W$lR`9^Y_ALUuOtNb-oo6--POT!fcl>Tr@*4_-+`)UgbNpk` z>nRe9QpT@75s6=Y;EZoI;)f)KlzDgQVvMA~vJ@dikFxt|o=ZJ~<@jUdvewfu5r{hxwk%7S zLYRj)cC$NiB%y=ZU69d^!8hrj`Lx>%<5zN-MwyLQSdD7CyP_4)ysXOVW(u(VObLe5 z)%yN)dJ``uLTSO7;VnM<=k#s?FId06b5ZY&4<;N;?!hmaCt0tU&Rt&I${u+I=RIP; z?6q~USD#xQ9yw(0%qz@T+o&rWOi}qj@ns9_T%lBP$*#coa4X2aT#0KWQ84Gqx9aP- zV4Y$<4rp>uu_BE+=)4Io*I|HA*-eIrv` zJK%#zvy|AU1mfPUotfO}HcCDO(3?uStz{({+7-1Wk*aa(?98_7f%1xET!&>SXpW#2 zB6uS_j8v>hDUce*7hllKDq~ud>jo4QN5rkesUgTH+Z*esix-rQILy6JGpG}A+8m5P zqggq*bC%nejYKYk?T}2)z05P8Gf2<|ws;1kej`u?CswAm#rew6jn>Oeu#)FdX996c zNLAhM|LTZW@NuqJ3kNlFz^pKxFz7f7EQFZ5+wMd(S`yvB>$CI#R{C+KO)Ihn@hfb% z{!VvD>=V+eFjWRrSF=Ysb+Po-1oKE@BXEn|3xF9gtO0oaLu(%LdG-(h@}X5+OtQ7a zQ`1)QOu6~8zQAxzhK0G)BCX~$Lpw=gXCpgkok$Qc;SIbK^G{;*kc0peAU6e4A9#fU zB<;Uy^yqHwyi8x$9*b21LB}YAVFxSM<-_8qQlecxD{-hcMbzyPU3!sc0PkY^S*jk* z%eu-)=C>FA^f;ayoCs)bQ~HULx3UdcWxIM|xK+n&mXF@fGW%~gQ~q`y{&y97ND*L1 zK*IJG#UngX7<4gks>=usl>LxloL||^1pp9|fu)K5%O3izb_gChldmnl^)yH+^@)bB zEI_^NdA=8VB61JGPnR%U&bwd@$A~DkuZhtl16Bwu^zvt}5ISsjc1XteRSKB67(vDo z5nNk0uq4^jGw}d0&|S3EZ3$QPH`!_~*)-w2c_V7hF)7-=>+S>L{WuZ-H}H6Y^%4A+ z{_}gGC-B6v0bstrmH3}h!QXNH_V4efxk;ZgfBpi~5OoxQ1=ty2WB+0O{EWz{2k>I~ zdxMf%6M)4-n2D+@4>+c_Os` zbT~b+&JRY&~<(R=SdEYzqZaF?hEK0etKBnt@C?U zjX&gzo^DEh&jJC2^9MiY7jT{k{r?Q-hjsqO5B)K-ewB3t2@z4QIO`133D`y2h=_0B(8=U4RP{~xb+pg?}!bO8;Subabo2IVDN8lVlc zr}hYI{}#!3*F?$6ve#SmjP2T69gp7DdBdvio_zUK?T(B~ENY@(W*Y(y>dngQ=gh3K zWAF7tE+p2AhcO6Uy(sBCkI@Q*rR8{JEN=$1v6H|#9L7ytD1$7aWaB`Uy{m_P`1l!? z>xDj*hh`cq-kN*fp$3iVdRWe#KR2WxT!Jq$+}8IlYo=umZ-hCwm|R<->d&Ho#RQJ^ z(5{XQqO$4x6-GAMOm$IB7!^Bz;(e$LCS|DRdE#NTe_YCe^MpFKE?vd)W+MC4YkN4Xss-ppa;e-dRd=h6!vo z8MGO7kzCU;O=#5}lCAlwxgtj{Y864AxwmvKF~-#qL@SqGU*9US;fHFajqwsn@D(~l zVieq<8UCpvOwc4r64fO*qv^MPcb6yV`BAb`5 zB9J#pM2R-(LHod^X2_^9Fs|`Xf`GQ05zIw|zogIoM8pCwGWfvz;0>eXTlr(v&BkGk zuEh9>(!K|MV{e^~kQ3S0afD-2WWsK@y2b`u9L1}p3ifw#j7b}(5`irqS!1y!8hxqd zMq=x*m1#Y^uqBn_wh|_o3I5?`W0pD=8V3qdB_=ieXorPnL7jayT?$L@x^BPxOkedx)W&G1WFU5oO8%AH1COc=>$_fVEmczHa z26d0Qy$WP}d|Z4~h(R`6!Li(m(C>t5TW(lCSB>0ljw1iq-9`o4J6bJ7#Tan}3+~c( zEv=lXnGGacP<`DKhO0av)|CTmkP)tLT_0=4f5C4nBt!PhK)tkuWj!zAGI8X#{8O(} zH_DJz5ocF?EwtKDn&no==hsyVvgaa0TsSYany&eaomIFpP~L3iY~-xPxa5nBp_>-7 zAtr3Gw{$Jt85iHMfaAHBcySOXHC zvlabu%Omc6cQAPw?iY_59(NQHvQDIxo2iXSBbjw&0^T_t__8(P6;1?V=IfG=hBHDH z$ye;QA<+Ubga*yI3c_874_%<1*T@xNd~&%&w@A2?ncP8H3}4#!OUnRIfYj!}*b{O5 zP^%8US(7P_DygyZ2)o^xh|5yqhC8^;j{0m+g#7{1?o$j$bx^DmS8jmj!!~k4qmfQ84}0+AE;@p@?0BCnv0rPCqkVUInEK1Z;V!hpwU;^YkbDQ+IuYR&1>+#l#&*S*3ceKn|Elgd^Fk5pLZ zOqo>#=WN`67A4TjfoBr2Nhhsid(-VgQk7mSN=Um?S-wlbAi^&SK^4;wXk5ueEbF)) zQ9&J!PBA5XxMXocXJvj>Z-V>)$q2Te5_R2H*_B)cj*hy5zTlR&>-J$;Qh4~7v_7cv zvB39*{ppZYCO;nNE?y5ex>#O^q1!f0vL6B-<_UcdNW)ux-*e2Zt<;=Kfk}$h=tv9d zA}xIf=j|BJcm)((hs!conBsYZS!lgONku`yr-pV18od6;#2M(lRody5Zz8juiszbVLvN zj{ua~fR!-)Inro2x1aQGio(OsQc+Xzv=!?8oqIyc*U7*VayuxtJ$Ja8sDKe*!UX_; z;?KcHLUz^b-Gn}Sh#AS4udka;3cP3iB^Z?e04^P1VVb{-LxR?Z^#H7A3bsAqaUzsD zz?~v3Th(}hcQE|Vxa6ip9~sE8-{Gv%7Cfy%+2PU-70UDHh8B@yalnp1;r$NkPeDpN zU4sfA@4l>O`n|Ie${!E z{vrX|_thKxg^Dwq=!o5V*<2a?Zxz%3&miWXu+8gl6?C*IvfWiM?L`;D=e~y^`8t2o{DW2|M;)MC$qyK&Z z0)|lk8RtJp9c2LP0^3{DtTd5a-V^)?ed%8f^U= zoImXOBm?pnF6~Kw~N6w1x0knU>`D4aENs|omH#ooB^Cz$R599M!(GeidpT7qE z8t0$A=s%3lKYbqh$LIckz=QrJ&L8&tvj_brd;Y)gK>rfw_wo6Y1N|qQzmQ4#*MI)* zm;UviXsiD)-+#O}|Mj1Ln|tG5|M`F43iMwG`hC~*`#19cI~JgVKWF|px>>kiRJz&Y zEx`1;cr8bo>?)xB+q(SsAJ{hF?c4-Vg&6qvyO)}75Cp&d7yJWoEy?%ab*iK@4_x5~ znjUH=9#FO@02ooR#AbkD2gSRpuK+*~8) zSET=&Ygd*ZYggy=$ELIKpb>Xn0F)H)c=I$yy#f zImWTx)Czi-9vD8sPNsaFK2yHia%%uCTW*n#xrl->nrwaHe+yIRVkqfZd- zn)omy`@P~=DejBkrsr*>k2JSJBA?Zr*0E(&`#Q+^x*GcSI%yek51b8To`5S4^fgne zLNbDq;O0?dmnNCd?7kFzd$@7W)UfEb!Kpa$+!0`(x_w!S%9}@AzY2qY`WfG9Ol6;3f5 z@D6`^rv{>N%F4WaNlQdOaF5$y4DXE1BWi1xXsLg(*ZBz2I-0>xJEc}S!MxDxn^g}N?*Zk^-TDe?N9ro7{oWqppX+Bi9 z@@BRVJbasGBf~HLSOo(L_Kl%R-6O6$$vF6-FpkSS%jFddbqPU~2><}kV1IHP<>E7g zH96qW*>tJ8f_koz+`Qd!WC1Zo!citMH@A=&Oq1*hRCRo z0D-}SBK2M-WCUR4VpY8zAhH*SUIRifLq4f^=!=UN_zyomjM%pC2YbzGY294@qL4M zH;!c5gJ(HR-Rx_;XYm9Qb0le!Cc5+%d(3FDP0J3j>`sL}v392_i!g|~h=@(^r2Ejj-n&kvn!a&>Itq>VnKJ8V@<+E#$$o}8&T)@;)}8pw zpgy$;{+I2e3evC@3wa!it&7)OAw1mM`1F;t`u=Q@ealJJDEa+i@_b}*zFCU&F(wq z`%V`1O3SM)Z-KVk=gjH;o6iWyB2-11Nc{w1dk7rS+Z($z6G1bukv7!NRmwwhO92%H7 zmI)v-Y@WEeCV>XwK|gNF_;`wt0YY|~_Do&5q&~cD9<*yXv=&<5##7g(_ieu^SDpCB zVb9zTgIaK?U7@@eJqDOiI;Qmv+OKtsv`9Y|I(p1HQPW>LqxOe zHb!=hGN45=Lw|)WFDvolZ~P%lmw9zef&0asbCO;MgmW}-Fif8nm0P`FVXV>*PDc3H zQII#pPT#}xBQ9LAK*pU`%)-Tzw1uiKQI%bEP!{cV>!+eLu@zOSjYE%iDp5t8+*gMQ zSZ+p9s_hnXI`#C!M>N{6L zq|_Bstkb>0KV>DOtF~iqgQ68t=vXqggRoeE5*(1m#H<(;vz4>rWede$1HM3Nye+cD znc_)EF!v${pVqy!U zsFTMX*;%9mvQ@FnIL`oKRUf|i#$Ir{5k+L57HWh-A8j!l_H|9JOb5>?s+@FV?o$FV z_XQ>0o@azYhjD!@H9f({kBe6_6QaaL)TK?D!d+WfytggXV?q?tdDB9Z*1;#|VxD?B z=&OwP;HLlL)~bxs+rWLzWi70B5s#B^GW?~Mk&l2_2L!Qg2+Rv@rd7jzfa&Mz7|Uj1 z{j~GUfOpDpf{!D#7=cWd;^=^b7Qm(3Yf7#Vuj$)K<07K|u(Lf*^_1+OVGpB1PBlwM z?w#TE>P@CP_4UB&&|YwX{8q1Pt; zBI0dY_8uEYcB?AN=v%Yjb-Ty3)~G-f!wXl{Au-)-#_LQXPJYseR40DfW+ESAfTer- zj$QYnCAxBr$ATyUKb)vs`}3kC!HbR**{NLSpqVvi$$_`6osZahI!9ftRTU9lk_P1X zTAW3>p2ArESzTj|+Gp3?u{=rTNGFC@#2t2mqE!48a1hz3)$U>b{T52Vw37>EgZ@a^ zOf1Myl(lVfCw%ww;7ACT&;Y`FQyIwdeCR~9Z+=h*75v)A8;Eq5uzj%Iv#4(88T ziMr;7CQfCA*kvjJ6Vq^05m~4_Q%3ig0&0$2+?4!54v&8`QWV|Vp7?t8SqYdD8`Sv* z%_qOlM=RgV(QLVC!K=26lwimhyh_y{-yhfX(AU%KDxQ0Cr-#JUs#;k))yBjs=afob zdK27kX|O4F&Y-fl`-9ii2s4Rajy~JFlIz;gEnzyAWt(;*6(6$nd@u4W`(BjrWoVDj zvqO>8RQ37Kf}QAZz_3vA$Ac-dL|mJV(d9G!_3T6Whxhi(e4JJ2tStJ6O4%ULeHIUU zRn7~!ygrY{ImYu&j%KmdSK+VUSDsFMrd<}86Iukn)qi_Y1nN(Im3lEeO&8^KRD4}MI(P}L5wnCn>G5`##YN&2iL04Vm zP$stICI_<`%7MUknt}SpanwMOZ4Gr4h}fp47=bwYxld5-dp?^HsuSrCZdu@Ej(N+{Ib5PpQ`tql3t{Z?kZGz&YQ2r z>0zq6zf~R`2AGkH5{xrJvcy|+D#nr93*ibU?$(zC z^$w#g7|?0!n=B_aA4xyo*UT~}tV4JHzAL{bV>KYL(S*9H@kZYcQKocK&=XoD>fT(? zzav}Fu=|SUgx`)M^!iaJj5Z;-MtpO^ah;k1lg*@^qofLEyf4O;em%N5k5Vo(7}*XO z!UJWTO1h`@F#|kXsMpKc9Uu#JkOiNl2V3l*St0$Ra2RE|ID#%~Uv|$&dKxi>+M+sA z`nZV_5*uK9hYyMO91z?kIgG+B7 zBIEZH$VmQO=!}lZB4(foKFTG}LK)?8o!}MG?C^^l>)GJ}DBZeQ z0vJ0%bGRaw$6fF&A51JWO1s#q?I9{ry$CrR-mqWq;^`2WF9QG-0rT$H6zDESU?)k@ zH(=IQTYVi8)vfN%avElbUn&T*9Iouf>R&ZIyNJ;gqBEh;9cn6RSVP`HGm84=+H*oa zndbV{Kqyh_0L2O3gD)xrXbsl|^v>?D70RU_CkrE54tc;a0KQ#`ThgGpt5vFfqhloO zQP+I!xgTSx^{^`k(>H2D0bwc#mLFr>j&qTNDo3hM4pd5x@J<6%$^@j0{JMGs=i_#9 z$wbUNl8xQXJqbVu(;x)jria&D2}{Q+d1|Wc1W`a>W_Li&s>>hu`T~|NawG7R@4ig8 z?D#9d`3bx;v+$r9^X-izI`cmA?jcyrp(m{LH|CCScPlAQMX*SRY`=&=d5~q zlZv9D-QbXSGINE;RG&mNcGlTs(5;eA*AUI4%s zjOvY5AMYKOdDE`ioVs|M)mnZvLXTPIJ8@g7Xt_OoZj|>j{4p<7x6>OzE76}%hKJy& zPSqyT z-B*P#Aq^G!#a!5VX-?IdX9TfHPdIwAo>cP_Hty{Rh9qUG#b zg&o$&?VBWYf*ZckPkU~y*mOo-I#l9s0_-=2Mro;}hD5 z>UY~rbH2gcR*nxyM~-F_)adI?!Fp-pDESzqfq zDO}?Cc0hn9U0mH(C4rdgA2uk_AUfx0E7*z|@(obNpPUzIkz;9aP9Wg50w9?)1KDDA ziky^d=_TJ%`9jgJmvP6bzybHb>!5&omUe7Fp+=>Q1e9qX;o-oGvqG6ItT~_2{0Rw@O53wnN)KX;i=a!({USQ@$AHR|P_?#{N zgpLzlvlFkEyL&b#NAJ9PVhPSv+^+(V^u#PpZx)9)mGrLc^jc$EVS{8a*e*bsA?_oTQWC07k*xN@#{jr z>kzLMDR4#SQE5#f3hNZKasMyKdrdH33dyBh4NZtCkYlV=J!p5=^C$l1iyLi?;Y?1^RT@6uqxFTk{)4W1uRLlZ_e6TJC4mcr zEs@l&FXH;Bnx}jsLYPYe0k6kBmNB&MFxw+)vBzQ1uOgi|*dq{?RD+7o zcbtMb0ftcq)Vj!=JYz_hHw{44?kxVT!?wZa+Np+iEZ52A+BO}G=T+XOsLdJlE znTiO)6I14qGsA)x&Q>0z3O<)saZtAoo&kjLPck$BP=U`edxt`CamN) zJ81;FqoGz+eWw7H+ie=^bzD$fY$w0*pt;Tpb$sd2n4CQnMzcngQQrmb#OQ-Jb#?}J z{qjo-57tX&#T3G^-dEE+`MH8}`{IPX3p}6O3K+Y?RWelG7(<=U5jtz`J(!7;jQFjP8+c~Jjz@O5k)Q3GFXgvQ7a{=>+ zDSo8G9R3(k8BeIRvAC>vCYBYaLBcK5az5lhnNFM>-hK65EKKFP&^Vq6f{@Ip!+pxe zI04{ugwRLO{o611X6XLVY7u*M>q0jmdJhzf+NW5e#w1E)uHD4-;!J04;*REV1_d}~Ey5R%WT6pVb&PU_SpL_RR9g8~v%Q_4+v*3F3HVo3u+i3kDTUoAE0SrQ$+C}@4bTT zL$SmTg;b2Nay&6Tpl{q^2jS!4<&93r^>+qKxi6sZ!P(Fp?eH-8d#5+O=-Dy`qxkTl z&ML1t93OHc3RWHp#>y}je8x?)BHp-Gic_9&L5eM4Oh0a6MdpBH11ub<0l zVuT$BR)91s_8Ehx#Msbp9{K>NffkD~w)K^F@$t}m40WO)(7?FcNlPw?v3mA7n`>ps z^jYFI7XBA7>S0BJ4hAsLW=d$+i;u2DTXmwSDk$}b>)K&9YwWw$1w9d?vonT?!ov(& z0O0=Jl_BMGXm;tP+e^dU!A(oSY$7c>cXOroLrZ5`Vb5*ShTd1s`1zme)NLA@AIokb z#DyfX&g|2d4KcdVoFy*SbC2#gDL2+SFR#&FPhM10o? zJ~Sh*PTJZmmdB|a?lNCey5jHOCPIC1O3~QZo~kMhAJ@Y~2Q8b*sWw{8ESc$IIDVUM ztd4fTb$vmcOu4mY5KdY?9%r?wW7uF z%r>tO^T%5BAn|c3O_9n8jk_&!*Rn}i)Zr`2?GFPdT zGu~w<_G0DBXMSF_FXn8mo$rc2m7!A(6uqo99)VacT>vR*Ss1T`AwWNW6<^mVTGUkH z`?=xGW}Xu1^8Qx>c+<#olH?};ci#7?3=-H%K7k2HEK%8;&Ae0)8^?TUzPs73<=ijJJy zJ6TGt>D;Pr9Qxq4i37gA!~{d* zUQ|i3@RI2BW$UWf8$3&c$E1YC=mGqviM-F_qUA;Mql!oF5lZWBY&vsz$t-HxcjCgl zqadel**yp+8+`j8m-%7iiQ(NkuFz=ds8(+chKIhgJ+dhlASu$0Y_&I~$(GlJhmmh8 z-;<W44%?8lJAkA*jv7lj5Q%5pJ@EA#k;fz1>X?kuz@g?#bKKWP?ihpVp zY1$dK*=en1h7Ah?(%NZX$dbf3`1{x%O7e#jHzqgDYDRRQ5kZNw+vMPH(|u@rY&5S*(QbvEKeYF$YqxS$iAZR`WcfPNtVp0d10$7m8^5Bk*+CnCP7;`gUq9w{ThgZhL9h0PV0p#brHs#9Tx*M1ZKu^abJp~SqnZU z#>7mHHzFLA%iaggAY45V#oA24-jd>Jf=_EssE#QYqqo)S1x?E&6I6;U&?X4+QrK0I zkuSvNwA394GcKl`KqKKCqjiSC>dhZ$f|EV9Dh*)ue-DH9oeJ5HT3nWsn) z_;Sswy@{fvE@{cLi)K+y78LD67j`x zswNVtCK@u_-*Cr3ewy_O*8o#ok>+#Z3Mk9_VdjdJl z@bTJo45dHg2n|*`#Csv&bOFC~>0Z`Ba2#8Etr_q>QWB{Q&62odOejX^F-zy-{Rc`Y zlB#y@Iqg+s+nA(BGMQFaH*?+q9@-s8ZzrQqkM|ao1Q@OJM0sVe+e_*tA1kx{ zIJ~*yEe2O$v)V}Y=MAZ&>Kw!6@e)MTP6jPxT{G z5^?lfz)fvCkjBl_2P6xD4bE$-E^^N>xmU>9E+d4D4CP%7;1t?*W7wC~L*=M(dml^%5>WieyC+P1@8)5W&e{mk@IOL~Ywb?yIgcQ_r0-_Dx>115t5VvWF_#A|R z07RZ>?OCo>iMlyT?5zbXwlm=wM#IWSkx11kcQ0G(s#cTOpcGAunc;$Gib}V+p4L&L zDHoum(Q@dt>FjuhZxdhAE%nQ{B@lk)^dNr4teP{gSz!a?!rB1K99mJw+RP{YE{-px z-*APVotQM&n`Bv^a9RAA_HH)`#`^k5-~`)?DJZsLec*j*h;X;BM{3`J=M*QL{+e4{ zo&59%d&(M#P2Nv0yP!@iT$Of7tFJC6Qy9WPL3-r<(t#TV5dye+LC^f1Rt0(TgO_3- z$pT)qAyd%AI7X#Pkt-M5GJx3M!!%@fjbeiP6(MF>@A+XowivohzfnxM4yg87^|gUB z^b4_@t|G|-CGBkhsrdA!s$Z0E_6>qKkxaooI>G7Gvrz_jXI|1nNsx!;R$o;kIe7lz zAfh3j!k7)eAbO1 z*c$;-j8M|H*N|@~Ew!R#(waT*tOrD*mJj=g^TbUfz3lHXK&A2{23L;J~>8^w;Ga}%*m+x#=TY@hes2GL{3)A`P|Y5O=n9GRr^v4QSsC|z=zX<5!F;c8`u3<@ zmz=zt1Rc5I;S>ctv&;R0iRy&V0RR=fliPRO>5pdEC98vFqmy4hR4*ws1hA*;2eH43 zqM{0jBfP@kd4nC5x)xHG3i@ zmMjzAGN`t0zQT0Neo2Dj*a#h%31_lI11_@Ql(9o~Fiw91+v6e}f~JuO>Z?!4_uQi& zN;s(;GhsvI*s&*4@y6YfOTWDtuD{70NF<;Rq6^GK!r#56IKbTw zUH_Wd5OqJFmMFeFUqQXUXWGqocudB4hNzLdiQ4%TmQix#gu6r(Qb*+sBo&(VUFkC` z3xUcbWljBq4SItcQxO?eu@K4d$Uzj*D4mJzNSL0(U1sWt2pa2 zEIDRPJizES2$D`&CfVByGmsfnh}q%8=;I@%&_Tx3NXfFCvqP5x4akFR^ZuHw<=03% z5&>29DyNT-(636MMszU;u0FbD72CX2V3EFhIeH;7Of>3*GjK(v%`ECT>1~ABHfMvUOi|Uu%?)wRth6F|a2QRnXQX zA`G-LAcG76Xh=kW2!Hy43A0+*I)lbeI|5o)OIUO`B@CwGCcyixw#g+wL_~(;Y-e{Sv{*liiVH}@xY+n<|z$`Afib5B`oe`@ZZNNV4E_a~Ct zPt83!fcB(Tj6d;rpWygsw(gJKJw2ZLQ*%#52%ppnZ0-q%?8yaSbN@gY`_a3noZFw8 zdqS@LbMKyTY5&~ZKQO<(_wG;9UVm=xsfgf{T7g652_yE&g?}c%{ur5039vsk_k`*C zr{4WE`+(Q}GZB8lV+Brxr##l5n)~y_`>oa=6X6Lz^$Cvuv-1uy{$~omHhwY)$!epV zsOT*^LQ)Sy?%V4G*2WLw58yJV+SXbW&&uQc6DVZNbxsa6={8^S8Q*nX=GHW(sl!TX zf|-0|Umr*1MG03 zkm9n?P5pr;G{Zrn`70J7{Vg~=$#@$zPRV07&mVDfL~!e3+Ye&<04cXU4mX2}*PW|+ zMVpTvZwPNyTwY?oC02QM%8&NNCs>hwM~Tg!sjn^E%Vr)g$whZ=>l5ZnHqE?51AiJZ zk~czx*eS4Ua^INKPcvqFl#i}4ySgNU@{xVOZ`wZ!+i9f^nY-m84=~DeZqJU;u|kC_ zBL=#|eRz(ZCUPwaZ`D>|pooXb7Q3N73YEhrhApLycS*qI`MM97MQ3WZW#A}^2Sp4y z;yHFT5le!>E784_!P`7_;i`$}6ecfcK7wc$MF%NE>3%-Gooirg>Opi9^9i7D!$aEM z=$@Ljsc)c@*taX7Ei4bf>w8rGXq%mA1JB#L!xxi?fY4wLTRmpgyu)NmA|;vrJ{%_z z7WEw`wY1VC=L^D7OdfM$hG>_QRDfQJ*om|Ji8*<30)wxPs${`S%REJ?3Hb1<0*!h3H}gJ5cWq^MG#=?sEg;Og$O~L+rxt10EE;U~9uA`uYWE`7 z6vNv7vGw&MjPSVi##9DXbU0*M{dbXRttiu3QcdnWgC$Bh^%E)nN&dyS$Xv^;hPQWo zd?xa3EUzIToAHhnvLe#quN4b-0t8;8tCcssU#CIL;tB}mSzE|_W>}A&wVa!&TVV5i z(9YoyT@)Q^My!=%Z0L3^?bSV|b}YJI{Rbm13*tmpYx{y^Mi?ith=*b7H{C-nN$J%K zix8j!!D4&mmi{;$g|PP(l||;-OV2bO53Kbd(P&hAySbXU>|O$De&iSyF2}14HK!3BfF> zyOHslGa@0bWN%NUH}{Py+?8CiA*1ycksgt(K?&w9Itmrsnuhdqp}v}EVW*k#aCsPvM~4cOMZCVlZMB{K2r2xnV_O} zQEK!*)oHQl$@z=bgyz87dI&Z)`@vmOc^pP%!3JcF8JQIgRtPI+OhdfHBc$tV> z`Ov&ck9(mWyV4;ykrT#aqw!^$Eaby7&8~0t&b1N35kc-07Bg!=+Ip9srK4RIQWK0M z#~O~_I|T7+_6;YQOnxye(d4*ckL*sJ0w)?K-l9`K7jN91KJ}&$neKWJkWg{EKEKo2 z(P2r}^A$UwPsTGrDr*j)5gm}ps+Q#XAfi@(=IN_wn68aGLT`S#%AI`fR;AT(N*HQD zu^~|$9ceOTkM_}2Hy6C*SkN)}p(7nn^)%yDzraXl`*Q-`4)Y~%)$F?Ft(bXNH=ph6 zJP2}$!@Ym3Y(}o$0E4Y)aJ6zK3s=vH_&G|4iQ|*7leLPEH*vR zLYwf-ncyuSvl2`6)09kPO8pGiph%8Q9 zzKp5UcuS?=L>%Potr1r|6Sh1|(-R?IN><_owIMOMee*ne)r$1o(nQN92wpZKPKfY)d}c8pUPO=m=t(>~UcA3ntMkz{8*> z@Hk-J_JUXFIsRFY@Xzunsw~sEe5v^Daw^kDY*cPIWUXaLOW=8ciO@wa2SsI(T}j9J zSfsh=E(mKzj>l=<@1qAiAaciv%?>gsN=CTpda=e3-hD-W_Q7GzWtR{c>wmHLR$+N$ zUHj*QyL)hVg1fr~cXxNU;1b*&g1c*g;BLVsxCVE3hVIVy{oj5k-96XL*<5|X0nbC# z&8odut^KQA>t3mHA);NCzY?5p`H)n(@E{{-Z|oMZDCW>47#v6RJ$^dpIqRnU>4k3* z%PtISrLsXYpuzGe1<_9Zu;XG>7@Ok+BMQi-NR2Z^0$=3^ixd7!m;;dhsw0yBHLt2g z1MHfR76rGBuN(iES8q5l$qB$_&jFP`ieJF$aqPJJJYJGkiBR#@dy{GTOAeDkVgN$H zZ6{oKUvXp!SZ(-FWVyh5{ia=^CsG9LnhMUCtAZ0j$yt#fhNxL}5T3Z!*e3~b@kDIy zn=??*9A#-cHGXjR*P?o;m@vI(H|a6>TlF6Ol#Tn|*@=|+YyqW`?y#jj>=js)HsnIt z;smb*dkLK=jTRk8yHF~82ch#ko5JpWLIn4u_^c~YUv+i;$@1ToaMV&7r}u5|$vcqm zoC~Vb(|5dLjG$gdeI`1}rN+J>Y-6&JaLrW;O0^5tf&AV=jnhLzW+z&RimgxH{E zV(v_tqbwaaR2Xh#JRuD}NSb`3y<}8l@JziN>M2d=*~Oot6rMcj(|uRgZIL)eduZ_` z5DgKWQiEzgFMfLg$M$!a0NV@J&F{f9);eYU?ltK-v&|n|ry?8e`4pa`{Rz*}ON;I;g+*Atu$}%dwQN6a^1?RMLP2 z=i^}PFHN3%u{cjc*RSV{QF6j5spfebZ`+E4df@r|TL=u9Ln?iY0CUz{+>4;6c5wV; zo04WMOz7XqL-ZzwlsBr4F+)yvOkoGl?!}BFh*=>7z$vs!ZTdXUI0t_efz5!iN|}=i z$cl$kR0{^|%aGfPe5^2+s|Pog6GZZ;$hwln5XhZRxIhb*Xdc3Te{Il)b_rAI*~Vkt z{)TeE+UdUC+}syd9J&`Z(1?{w=wqd76gr|h4qECu?>eaz z{l*OqrQ6F0(tN2-HuIPx|7_2qc1QK)-9)qgCtG@^LGw$LeXfMKlOF=2ev)W5oypa< zQl6iz8{8w~PnnMEJD?OVqpmB}uY5T#wB2+vAW}&3dB93dL?Oz+-F$fh_B^O>)A)ho z-f$1!|28wSH%NHq@iWtW;;Yf({{zbbZ|8MXU|BJjzt{Th7xkRZ5MIsIijme2t|kz( zFu3Re7;lGo<0Jpk-RS2U`?tk6&(9Wd4ZAVpy_u{N!$vZ{9|r(^m9N|&S%}p=-w$x~ z1%M%sC$O(iK3LrmjZA+!tRR`fpa--5?EwGXH4X{b2o)5`bq$hjJLpXjFelAxPgSo- zd(kRYW7%M8w805G^%}u$WK$;qisS77B>&q2$=+3fT$Lm44FG}wh}u(vieSDBBvAwT)nUkmwTuzvm>cKt%g&xrhvy#7YW-}J8d8Ihlj*#26` z&xrhvpZ{9O?-2TLg!}`GejBVmJFWe-ke}1{_YCxxLjJ(he=X#nDD}U2|E*d7tw;WQ z*8G3ZBVZ{1{frJ1;#dtd=(&XLyf6`DHjdMA`>pf`(%9mos3T&s+(c#%) zZ|pO9Vpf=s=9|CiDWjZQQrIlm;+q-FuycE4-X_Se)1PV?^-wtS5GH;+<~ymbWm%bi z?}OL~6RkKH(xc_KcUcq1syA@Gpv@|8Xgm>F5XTed=g9y9``EkO2 zUPD}ja2ig1Wc}GUYgZqBAQaR1hZ5+`$DVHOZF+dQ{Yubw720K4Hww2}Ab7Viyu&uj z3*QHF=mFn*U$=qXDBC!@A=D(jt=k41cU5iMU2W0`)u0yE*Pabb2>eH3FC`|or!2QS z8H)wNX3?q}Zm3&wJ@Fv)SWH6jd1~UkGQpI6d-44Yaus?RiZ!>vD(#V&xLP4^{Q#`t zN8XFY4z1+sl~O0iqsQmoM8ts*ZdjN8EBMGBrgwC^hzht-PiSeunWAS$D(3kff|9!{ zW5ePJ$0Pm6s4@M!!LYTqyqQSTIH~@CgW+gqS-pPUlP4P! z$+|P0@2nD8OLiYbOqwbu6o> z=j&_S^_(2^kg$bAQ!u5>Eu&c%#-FF)5LJV7WNhwx>z39+F;uE=iqhGQRC4Rti1gm} z%EQA{Zhknp_siQb9eDuyNVpT(js4#6A#&94DH=|^b~9?9;rIhW8mL<`Q5@S8Uy>G$ zY3e>U8+PJ5c$e$|DF?fk*kk;iAyFwSezkw zO`rW0iHwi;oEyE|euUZEr_cEMXzS*E1M%+7A)^bvz__`~|HgkBmY3*T$Z zxRRo*xId0dTLi^RRL_8owz|a(`AVD+1CXWHmhBF>W%9vw;lVzfZt$)M!G~rLgfb_d zo#o~&T|aJ6&-Fb>T-v7n3JvFm&CjF9C-}2tw5nZ1BD|j>pzXpc0u0k_5bUIYUo=a5 zpOlHVUO+>6z=_QQxSnx#kHn7-J#M0yQ#6IvT)H?j=~KiJ@3DGzJ|TFrPOVTjA~{g{ z7oc+1`1KSS7%w+DK`;lY)*QJL@F0A>Nw8{&kk3tyPJ1hv4%F4jhTovn=~ zwqkRwLA{C$v906>c*gQN)6*rGhDZ-vs9bP?OaTneXQ3VM#oc4)9 zt12Q?FSegOgfoGP zK{Atk@C7AVIiXYhBuVay=QEat*I7qjzb~1_|GvVS^N`#lkVH&=fK-W&D<9z8`L=Ln z|K%c#M(lf1QE3D(1=s5gi<1H#rCQuK?n%NpddGtI@UrBX0FkcW*UW#Qp}$y|vHu@H z^aob~2{ZLwJa0o7{eUNzb8rESF`@6XMje##sSs#Xn*u7e-)4y#5di$wo&>=A-)!iQ z+7miSGZ!d7$Zr6@Nh6$tRMGgwrk(AkQDe+h?Z)BqsH zLbg5mdSkut;!-2NI2)hxoR!`T5A@&8O7dTz2m;YfFeri0*R!;0fi9UUpu`sd9RJ7n zez#Bk-z6tVSP)K)5|JAE$Z|tui=?eD0RFcQ{{(;OFMtGUhm%dP{`MW}U$Ud$bqczo z@bMRsW%N$n6_fpRiy-=GFYrGx|4)g;rXXqnpxHkJ{+T5Gg}{H_E|3#htQrbXY==;sK;cXMtKkT`BllKcI^-FobO^-Kue?n7#DeupO>KD=bDeoUh)tkIukg8wG z`x(7I0js~1_h)YP7xL);yIz!mTm}VJQGd%7AOLs%eLObUI3A8U6D4A@hBq*m?E4m= zx5@GgqV-F`f5h|8`SY&?{}&&M_qQ|mZ#<^5|F~=X;_2bvrR6_8MEt9Z`^UESzbg`cCYFDArucSo|E&Ojky!qS z-oF*#|16OHlvw_MPyvF+{VMNKXl~sh^jaJPyw=FQc1NG6K(raeJuEl)dP}b!*{?AO z>U(Pyqb;!*h&jTCx)bgjjv$T%q03TLe;lE}x%Wjuq$iz|LER}};@I5y_^bvP;uIi- z4FCig4?G*DChKGlW--z*@6~oSYt>o?V}TvOUwWr;gy!@gE4bKY(@X7>-U(H!Z}ZBw zgBeFnnrkVYX-cFgPzGJvH)QDcL_h=1829@+QBhYb^l*_V+l-+Y1>R-$RNRQc@q3da zea{?gl!+RRZ>ou+BMh+wYGNOvyp&yCn+EJw9ZYg!X@wO&uWLR(Q5N$?sZPhcyYlJX)ui11BrPo?0hp$ zdP6d@w(h0O24Vnl!ET90%MVJ-V_{>_9+ILvri2~+bYi&-nR?_hQtEB zSkerY9&E_+j#EH1rTe@C?~%RB+mVd|J7^V2enxS|`h~Wv*`;2qzbgF%a!;wbVHOF? z`Zgteg4HmGXDnkZs4h-K$vU6r8u$hnx@?>Al`Y0(Xi}Bc=k<7YTl+_ggpvP?1vN`r zfpK3|G|M^37D<=43HRI+6BqJ&2#^Yn544(S)g|qYq$=!{GfjXl;R#7h~kM?JE504TK@}4~6S$<1L zPfNG2$sEIwd$q0s>;OmaBim}X+hxrzAQ zp$H67X2&tp@ouda9$w4=Y{XZ3@~qk)I=d-K8Q5C!NSD}6B4|HHfAEoe9ERVyRlQ_f-n%w%>r_*sCR~+(*37 z&|{`L#zF}ExsEX1+SJ0~bLTTa5k~fb5-?@a{v3fhWwhg2c0h|`%VZSAL^~4}hTHRV z14*Agn4Z%RLK3(mxqyql)1(+ObXNm9D@>1c@lat%oB~Y7a+D!#jTWjZWOJ*&iWw$s zqK|a^%p7|?`qYLWgB$nf?KNwbpmoDoTQhb)EZ^fM*01rww5QBOdH2G$)37!kR|64S z*pE3AB8rHe^EpP7u&S@YP|o=N;c#^GlGIDMFpO^a`&~gvMMZm6gEAzjBrmS$ z+@FD{(FcaLo6VQ6_-qwTm5g?lwND$ERp=HyFb;&DcXRxr`Z`Fd+(`yC(Yc-*s;FRl;OfpC5M~R z)asxDU+_K)Nv0+uruG}cqSAfuLf(GoJ6>-j3RSO@C$?8O=;u#-gtAV0AIWnEG+3GP zzus#jaJ7fuF7W87151sYSoZHXcf5`}q!wbc)Ddym?Efvmp) z=|o=^%7iw7C8(~pi`dro+ztS!biP%K0>5Ake@+1iWroWNfH;VYDEj7QcA$k7{aN(xMY7xF6N|`z5^ai`& zlRyLT$?2kydDij~yWg!p@cPF{h)L$=i9}j4kTRD~mK*7P@gfsmDVU!bl zd;k)-hSn(xbrsg)d_g=;DyL{LM$(PeN~#8ct-Sr&o1N`{q73Y$l-Bdk*%mLFb|%c^ z)*FfJ8Sm)e`@DT+)CUu{&6QWjuNDHH*4f4yJKlR0OD`kvHqEA z{2$Ah{{vlMuFC69Ui)(BaO_a-39~`1Xey#GZ@B<~0Kd0mlKmO+0{uV>WBZ(%xssE& zmw1&H$&k!#M*;v!{zK@WFz??8{X544a{z!f&iIo*0RI1l;{o9%6JD8@SjZOx!+cf( zeZzH^wlUzC<() zWUA?hOoI!FUuCen6|nh%HEglnzD!_;_Wk%io-pNE(iJ#E;J(3}yLj(;RcYfM#;F&S znR%v5!9RM6y4${hCMaZt+j7a+=sjJ0w^^hrXd4#)Y!u*L>KCNzmSeJ+*EM@JGE6n0I<6tBJ`B+(2PosEP!_Eap4+Ts zH-{fk^Ot@eja#uBZ-a(X{UF;_I3>h@o#BDygzjG8Pf@I67|d`hqx|N)4tIzf43ZRP z)>#{C<1{E5=xp5UwjY$Xb|{y~^Y&)sXtoPyY=r(WKZStV=|to=m?w;$7les3o>g4F zF|u;?gVn`mW@s%kvTjf|AF!UM!lCc(M7F-^`myeSQ+iWZc%eNxGPRV! zoGoSjm~6c!5bvp?Vy;`9C%oIiK62UK-@7?ZgNP^;reyegm&6ty?xMY;gA(RBfhUqE zxh^jTvb|{zc$M#=G|vQ5Vze}2Qgj^6LYZ|R62YXU2GTLyCnYhBjSzcETd?P9x4wo=wCs!O2$563TPaZxu`uSgg zzAA&AGg%cn7An3PW_sCwW3Y$~P3pX*S5*N%pWPw88Ss=PRmlx4(=@&w31`DlopN~> zf9Vkx9eHYwIUQvd8r~qMk{9%2)0pqj-wKk|HfqQhU0Ejq&EkxMq;cbb;;TFtpY}C@ zWY#w~bJ&V~%X4Qh#@6Vaap;B%^tx(-g)L<<)U|z17Yg@>?#vyAv9sdu71H{zNud3#~*7f;qp42Iy^T^J&YX(nj%oa^OM(+=Ah zRRZn|Ih3Uabm`IeB~GvLWac7X*u=X*tdWx5J0xo6A`fXnX|lmW3FjwMhL9GxLA<*1 zop;3@!&-f~u+n{PO{o&AtJ+(f7$Z5!FJOET0W8UU+)Az!-(eH{ub_8}WGYaDc%w2C zYg5&xHQ?kn9M$)`dP(lLBeHNGpn_5(kQ&A_X%E#pJ*rN=Y$90hH>hW#bOfsb;rXo2 z29SVzL1>WhShU{QjVmBs*2SAp4)Wip-n9^A%=En89JWk;X*O1ARJZ&zfXUO*?9nta z!DBVV&fpTvLlJyrq#L^-Fr_6S{S-L+u>|I}aKOys4v=Alqu)4KOW*D*#fn={lMb{3 z?#krDFFNYH1gv{bYWeI9T_9PoD8eVkoTEBwypk&2lvUs33v%zvZTlX2qEAm7{&lqP z3Nua;HyhlzCB5Y)_k95YV}9&eb-GZspH8~4cYIl<6%X*5$`kSRv#>!dTuBJ$R-!da z>2AP&RFM$lHNLhOJ>BWF>65b>t*!ntqRn}W&ei8zgLBTxTHZKBzE*&&b^KzP{T#;i zZ1NS)XH3;6P+8;9gF>UvByCv^N!UfLJS@V(O?&n2v*y0%O1k+Z=yD&(Br24l2X8lx zb*$>R>)PDC(;V{%C^&pNLtt1KQ|X_^??K77ba^Sb=Zogs1;JaMOB6=H-{t$$1TaU0 z0gXIR+|qAu?g-|4v>m1Az%D_JEMH;jc!|t~?b1(<(n?)AbNEQUuxUNj?R)|#3*S;` z6dImyJAGT?n$-2xEWzdsDy)*}68bivk&Cc1Ic8=~$_Km^TBfyluIXf8zWwQbbOv25 z66Iai4l(9eN}R{{)sK?7<)!dp};xKac^{xk z3|6b6ei!$eE`>8x5!VT~Ml5|^7k|gUSiq-l>Yx;wxZPavF>C4ez7fFy^A(#5Z^2>I z%Q_(f_2GUfuS*wE=!ED|&8(NUhjKDw= zPG2Kc8w%R;oPOx*PPmtEERuyVzv8|dM&qCsG{oa27;b9A^O|Zc_Zp9e8s59bUlh8Q z1nFkp!Wfi^ypo?l zJk>~*Xfa@CMyR8GK*AonpFaQ=wGR_vY)H3GJF>qY!HECu?&j%7%I7^Js<4C0@F=qq z$n4_H^zjAq`#vMs)YR4Z%OUq1+UWNcgF4LgafsReh+(LBhng^Wl&INl3Qa0F8qV8+ zO>S0Brsn{tzBJfPbql@#%{kWjua>`7lhP<@+3|JV318j>$;EKZEpsACC zhnmP)FuY|*d*8{Jz(elKw#L6*)=VbC>^QA&&b!AXcy>Wt@n!z`5&q-^<0aSIrDJ2E z6g14wG@hb1>j-2T&d;x4G!`}Z1AGHXVm4iwE{kM6SE@$hFrGO~>jVe;6h#AJx3e`* z$$I`=b0)yDxMlt-sy&Si$cU4VH zd8j)b#$v#GNRQIqH1eJZTUdg97+4*`3m8y?>F(L~=qet(OBVS#u@?p%aw7#Yz+*V} zRh?Qy6KEP%U|xM}(3lm1`7>ye=I*Sh-p-8~3xXeOUtXvJ7amZ^C!AFTt^#w_T54w9 zvaFbd5?eEp{I7|Tugae4K?bP=Gn!HF?^dAqmly(@LeysGtMoWmPGMp*Pp*{tq)OIw ztNN+XH#53xIKoRT`6Crc3DWJ~S+luzp^`F)6vy4R3sWcU;?va_olM$DO1*XrX1fEY z%hZ^1ggRycvxTR+NVy2JGe{h+#mEQ%$N{O)Hp_1${5r8+uXiq@XG|)pj137TI}7%wOd7V_37x$Eib(t}j3egx%8W!$OL= z7#j^kRZ0!=9E;N|0uW~9L;p||HziOPV>MQAkYIsW_ej%L(ZTh#Wvz7VPFx{A+r?a@DaJZ#f^{T(WF zhFmG_S(PNPyhNU3GVEMUEwvq324%VpaH-LUeDKXy7(RoQ{s^zq@t3_poDEdrEa0w5 zYilOuZCV-7GlTC+vn1M&?(z`NxjWDFWU32`Bp)PDE{h(y%aZxji`tySmjZges?}`L z>6gf5K0&U}_ygwfbiLO!<_M;AwssrgYwuWuP3#sGQlb!3W-DU*`w%*Se!T7vh_Z7_ zAK{O{CY3JSzCkm9T%ZW(M_s<9k|M>Y0b?dkZew9aq88eN(rjUu5H>-zgF?O4eP4kj z5{qQSm}T)r$nOSbgtfl1b{~shU87FW)EVR*tqj?6>e4*=(Px6h>B9%vL0K4pdhY6H zEh_mU^B3<&6aD>tKQIM6Esfijlcq>o*jdz?`z^1m6X`?(S0b~frye7Rme01(8)J<_ z$6veBo3yTt%J?HzH$v>g$9Q-;XZ3Gq)u;v$n<0NIp2i>o0)~^dy>$$}TpY&l^s!9@A!qKR}%WDj4lb;z&X99BL>`MnqZz?N< z6|~^A^6Ha_SG%S{++#-zIJZsR^5nYJwCQR1iJF%*`1zTkzC0Tt zQSD1~Z8+9EGU8X(@2ypX%l50xj$`7l#{<`8hJnXU{nUeZ8> z1JNIsPl&Rv2-_d0Fw>|Pibth7r<{6mS|67$J;m3FY$|-iJA0v3C1Qh|Zq5@OhwBZT zL#XwlJ$GviL(ts`TH@b!QUd^xOxWVMhYxmcz7Z-GdOy(%xzXp2Is&g zVAkDN0*VWu0RNWji94O0uH65?vrwUaA!$ZcjO+EJ0UcJMBUo>4IvvgS&#pv;ZVAo8IWo(ck;MV zL9J^ml&B!wQ)jq@dJRQC6hlCOA&VqWszJAu84g+6CSw~!VVo0@@>*J;g zRz}6~wwGHRrvsCmaEtj+?Cd`vsGq(4+Cf{A0snQVD-m zv=Xz!fTk6hPDZ$id-n6ZuDnv7UDx+0j;MQPX(&UtAJwsC*~*1arU9^bQH&r zSqnL}-rMyZIss9fR_X9N8BXf5RlJ*07A=WTu-CQ)cq!8ge}ct@h^RRUJ(s?%v^^b+tpK`g%;M&S9dp#;PD5)^#Zc*R z9Bj`8V@94{y_3MU@ubE(1~_$tA$lwvsI=LP)9cZXKyfDD2Vn$NMB03zNz@pjjkOje##_2cT7Q%`l>O4H6%nXf%hhJ3JqPemq&%k!OuZ5}?c}fFbX*vjxGj#b&x1m>WNe-UYZk!I{(1>heST+0lH>(~b#_AK} zj)V#=idKDv7-W56D1tP($0;c@eG6gforJi>E>WiS(u8&FW`I}v z?NgtS{;0@l-gA5X^XWWiBNP8N9g2~smv%lxJp7Pk=)tMPV-ufzX7HH05h5NED97Vl zBdcjRlK1}P$I6dT1|P^reQ7I=C%+_jt92x!jA}}?RgLnrG@q_xj}Pe_raROmlk%+> zmUb~KeMAGQx_#hTftS^l#PsbQUtJ(xVURPi z11qxwB^j!%jG^jc)8yCa?|OiWGR5yyU#Wy0eJCwnMe4Jo(ZacZC2g9Ui4zmP!GX;!bwd6fl2Xf zw=Y{Dcd7GWs&NO_MZ|cmJv*%~p;-{(p>1MWt_S4Ob9Sc2BN<7yA&H61T*V54<;)&d1>8TZLY!F4=hT7TMNGxL#1)rkXKD!lZ z2MYq6i4{(JwQzXbyi*UzoW+tPlHs?Av?5XjWRr>$;Gq3=RPFQ+3!H{n3_!l5^iDBp zB3KEolDj%3akPK+b`a1(mU;Bl56Gj={z%6!>}I~#o^2!+s|M?vEgobF{>0~*R6WFJ z8Moh9MDtMYIqcWhSc!uuXFm!pQ|Qrlxql3m#5L&xY2N!7nIgEwG!ee)zi7eGPC`yG z2wm9w?AH9)q}e7Ot6#3@mTY~cp7=qQ`ga#INQ*?_Hy5*!%$zv{(HgbK#e2Ret$Qjl zB5nPaw#KH1d1ZJd?nB2DITgb$ z+orC_uajPnrHw{wajqgH2V{_KpoV=ig}mz?nNuyFjM-axGkf)1R4F6#jYZ_5+v`WZ z`|-ZCW!2}xo&_qFiUzpL&dyidoB!Q_vz`{#Zsin+NSBX>l5x#5RR=bEtCV#!6@>F` zF@wGiHn0^V_DRmHgsM2C71_sO8xTVg4o-z89D*%Ri}BibAM-}Jic)|LisaU!d9LQr zgr5T898?y;7sl|)rMP@wi}wgto*QUfOmc27#=zU`-#0bFh$45DMVmVsM5xelpj@Y2|rTg!4u@rED%*Fwq<(lF5rM5;W0R6th}>Xf=)W373x2E>AW7d>hdA!#iGp zg#t&bJvs9@UW{b$sxEF(EC|l5L}M^oQ!QlK&jyJWY)~^sVo$kAFnM1f6A)>tBwFqEK9!=t<~4!$l+Q=+LZBq4Om6TSm7!ld z!NRXcF2&JZ;9W$g$8W~!@tux%zjSnOR5a5{Eh2+*a_NSDagTvCq^~2H$9rO2aFr!< z1jS2Y{-{pzEcaZ9#W7T&7EqU~{w|_Is+eyAewUEDApd+#i7TM+8_0FdD>b7@N7oiZ zkSNl{gvG06R|-%P#~4QAeF@8k$+;WKS<-!4(&l7aAUv1EOidr^BqaCb9-In?qV(nN zB?OO+_BXrOc(haTny{G=Qj-GBZ1ZOx31i-w-8PA5JDDJ7Rh zgIXRE6^?FGmb++iR&n63$CyT5k1D40ehXhz0H9Mu3l**giCF9tW6m zJ|JTBAUqU-&XlA_j|@^=@Uh^&xaM)a3CGI|8x};;v(}fwPv?Odce%$Wbj|XuJ_FOO zEPe6y4i^4cYW&el9Hri+?C&~@&cKJN>cQoe5(DT2y9>NxKBiP`Z!XJRCcb+-tZ8UU zs3rq5JhH)u(11<@)x-H%!z_1F>Y1ra7LIn2h;da2l|Zb{XH9cj-r_{~uzC<$U941r z7Se4x0$O?P`mXH3T-65^=3ejO7 z=WIBw_cuogV>_6+qcQedZTrGKXFWv4RDv(oA<6EabBW-E7jq2|XVs*H3bU;qdo~2^ zMnl8us%Z;rUG{5su^#zoc$th{(Tw^~6Uo!{fHdwuHr|9=qQ0n1=0%n;Wq8l3m|zx} zS&&HxY}H=6L=_XZ7atT@9UnCgg}7bkpijx;zpiNdn2##zcnK7)7hfGK$pa9;s%JZH zOK-YctU@^O-aWr|>Jx$Di0tND!*NQ@gkL>?4#v)#?j~$YzQg1)^s$zmP3tUKJXBFX zM@(z-4pDyzQPGOjEwFSIVwcE;87Z7uBht&L;+-O~y=&>om3H5$7VHEdzKo>PKy8(g ziV0$7X@;wiN~3J|-c9D!?fJ5BT&Wtf%2$fBn2d|~iOkVk2a)?yw%7pU$ReFKhbd$b@28Q7s0LNA9ZA?zyxIxD9TVs(Ni}gA9HD*BjwT>Vpr^C!=5gx2OUGJOjSOqE|OKF7R zHCj{z84d~PCB6%dqxLD3W-i8f)RPLvX(Q{=FqNes_*#N*by$q?3P%FXNfMKOt4u`FKk<|Vz!=rHvHwxC`B^Ja}Yb3aDzUO_aX3p z7@Lb!{LNkz<)_rb6k7_e9J-OW#VN}bh?v21Dq@YA4Bv0Rx*%P?-E!f%{0RdkD<*l*%IP}XNeL#IDe%ya?f%%vCZwV5Eh?s{(*2Dn};!d#(x2T zZHZ>**!Cjo_9m{XXOGa_tUCK`>p$(p5$m2mZJxAeL5!Mq_iCxu=NMv03Qi`;>UXj^ z*dyl+i5tOFpS2YNgtUeV*mjB(9NIot(;WxX_Lfsuhl6;@@rsZWyJ#!!LUNqno|Hnku88^Dliis~1(t1|t5vq3>Jmlx0-1kcMuq z`a^)7q2O|x&_}lT;RrU89DSafd2H0%&^>w@slNY~4x#J2En^)V2xez{boB!X0P@qZ z(=HHx%Z#qv=Ld@vE~EK)QjBa`NCN%t#YNYbn?!?Nqu{ z^Kw2GYJ>^k+C{U}e8Ed+v}l2J!UTz7-^!q>=S%ZWzgH=`%{e+qcgV^51&SG)RzYIh zu-|(ZhOuEh+pK zbe!)jeb;K}*nV75N}9kHpY33;W*6CsQ3A!1<`+cXu0!4OwVf3qr}Y`V>S^nwqJ*ns z9LbW%jMteQx94wlq6+V>%vlV#+bcD7q)Lt=Po<#2>UY!-9BSM%d4@Ohxt)HoH;W}b zD9`gEjv7Y*>%bZ4d!ZCZo`v6VfiJW*13#37CrG^4!VW`#(Ty?qA@&+0(h)zp=I-c* zr52HR)fcnr+|mx;X$ti)ducl#NL#~KeiCx>?O+A#TRnN&%&<*yQH#CC6u*#CEzV%! zJm`k$-O_f`ZKtGXPIjbyw{dm^9dq{=3lMiXs-*~FkZCnv8bl7n1x!yV914`hWMi-mmawKJ zwe#C^4#HjbGwXP7=bjMTs8FfO7@fj~=z89q|Je6kWq^`IkopTk-*pvuS(%ba(Dz(S zHgN5Hh~u(4+iYDNUKgJrI~g;arQ|#}Iyua`BQ5Jv+cRs#Ba|rki|^h3AHIKQb5a#} z!Wx^rxatRo(*-y_Qsn*yG%MY|`Tp6fceX(&N$*6xtJZL_G~zeN>;V!ly1qlnwzYV) z#1GNGjx|qM5{SN2xJq4+52RV2A4_Z@buRampeQgfLO8~Q%IwRub0aL?nRb`6*A}%* zO9kssTBPK@VR1PKof{8#SXL-ST${IbqVtRTT6v;DwX8|n^-9z;D_szYhc%0l5fQ&q zoMYFI(Okedb9?Kn`XQ<~skydsm~1_T zBI$VIw|!*M&5I7g9yawkp;jqaj#;O%4pbN+l<+>M`CtPqXlp;Fh^ei8+jo$pW7DFM zLB zyXkzElERl9`dDkdJlGYsI;xAP{$<1^Id%I7SZ8dVv4M+mqLvk*KX-9L?+L?z`~MZgujqezT7+cIA%%wz zVTDA`q81y>uAM&M_bXl_0(EBeDt$?PtP!{@fVGf!7<-wRI2b6@df8YD8rj>28@2Hk z=ENbSCj|o(F#Z9_YNRRfmih5{{Uuj+q53Y7(!7Y;@HYsJhxQbUO$SxL_84n>dgS*n(753QP=Mi}} zpqlW+ZBCfv^-fHA5R=>gOXMm#+>P@HXx8{tLclBp*2nYC)_TeuNbrtyU~*V+xC(fi zX9q`U^b|(jfZhrJ2%%?nwRhygA!ZZFWmi)?8y?*E~xQ46@kN zNte{7I)II@?v1j!=ZfAV2xH29yztMLB*ngJq*i~K#Y1W-;{BG1nT^JfGnaWXQ=PhJ z6uC9>d(ggcao9H2Ozjsmx-;$HHAkqc+icI?5M*ir*nmLB}NF^^Q9(wJ1#qX zW{zJVI1P_ZRcCQ->hoLyda6~uFj5oHl)Sm3_&F7(J_V@ou#+-;8T#JOK~pZl6F0eC zpt_PCITe$U$Ja+7Rf0v6RYaV0{W?IUh&uT_yu9Y6&Qrn*ZZZGI+^ceY@J5p2&5 zC3@vf0F?&K4GMdJc)ko~p8=}rI1}kZulTK%Oqqjic}D;!@{3^NjT+eDqXlotzG9tP zf>b>1v^7AQdKj&QDji9{o81D}y$acq2`;8m4)1aj4RYc@|761!^|9B?UcmfZ5 z^J0m@KB@uxHSx&3v5;MW^o$KiUnM#GO7865+tzRU@-qvC942boa^d*OSP#&Olq#(1 zYS_b@@%_UJ7r?am(11VmkF%?wYiN1o)n2!|$pdOyiakv7ABKE#vPZ?E(jP!!C#0rT3ncYthO|g2IYonaoN0Y1JiyE zG=ji(PWy4VmKz!o>A^vL=};HA87uVU{lyY$Br19Ua0-obp$NSSBrEbtt`}4)!A?h8 z-Vo?U4TX-Yqru|V;2DbArcsO-NZ7=rV|nY-Q~G0waNl0ei0lbLB^;tK$}>O*Cv%V1 z6X`?E=Swpu!(7ysHgz$i_yDBQ_&sJ3n*Dp-pm?)g6X7&j z5VDyNCKEa_L~Ac^b;&(O5R9{Lz4Ojk5wIgndNekl>5@gK%^Z6kFiPXXaqH&*kk}Ce zCMn?1QgY`(1OTyf;QR@&bOy?R^K}sL_G*I?5`?nUA;N~Tvzfdymp(MAFQh2(?*tv= zmFn<87Db1L+|YXbhW%ZXD!XOc`CcK9Pi9e@BsnHel* zW@gDEib43@>r%wRDyGh57by_g>%c zADg6uNer4ZgIlyhIJfKUcgo-j(K0Jw2X@blEzcq;uRgJ(zyD&ZyBNfbCCb50C`3q4 z?pfZ@nP+JpUKgw>O4*j1+KlMfV-9I|La>)M_id~?g2cp3e|-bz$+ae%G(e{qgZan)Q^9GaYG!o42w*!Eka__BE zIRm*1pS~>*=HevUx^a# zi@z&6n#I3M{v^He`INb3OSAn0zLJwQE5%eAm`-hK@f*MeUGv31E>yu9fgyl(>z%zn zRvEq{D9|Vw{e3{|MCiNj%K#u&*z=A)flsJ`DcDt-o5vUuryg^->B2=4y$o+pf}qQ8 zCABo9+?=PpWQtUcw5Hp@>dU7G4>Y=%N_JKP$vlBXrNGXF&Ae7*#B<m+Eb@r2PM&^n6Mc)zQx`W(Lb$a8T0taZpCJm3EaEsVBm{isc` z4ThEp^0=jJ4#OCU@I(`j$7?GEH;#4kqVaq)#TfaSyb#+oewM(I1d%TAE9o0Ca2 zO9El^HLokD%MN$@*%(xbYmmMVVDH)DK&GfU4ESKb`4bpTqs$(|1et;aqQVd(4qKa2ZVgRHZ^=VOq1^j!K1ebwuX(RdUxu_I;|p$6r?F(VH9x6Ltq~ncH1uO z1Im-&fHfKe7-u)?|fgNHQVQRsb(-dZ7|eu|g< zHR%%?2DQlz=8!zTI(~gQA)7H5nat%R+v!nV;%y_9r-j~7%(vBH5WyOa8mYIqSBTsW ztD7tP7t?aRE&P1hYnRn-!im1)80ks5fo0b+8;2AfZSVW=DSNcxsMWngLw!*it9AEQ zu6mz$mcQOpdPWkrv-;+7YK6Cg5o5g!!*)&~_Ing>i|FKb_t8rR1tf?jVO$-okfVRD zl`B{idLg-D;`BSTd;Zb5JJjy`?S=%i4XH6QV~)1n1=9~*ERZpIFoOY`D<1NOyfQHL zvs9V(N55xic5BEygBXsg{@TX1CGzow2SBn$f zo~$cM8*Fg+=Gfq*H>o+P!eY9nOJlNU_55T$5_j|=t}-1fAfBamKQXZxDk>SeZ+}(t z2T6M4u+|E20ee)he@?)~u+9BM5G50-NW`^Pq=3V_vzUoFZc?cSeMK&`%cA*y7;=cP z$$9DMltX<&#|osPcQIgH;SZTbmE3zo%vqMn=S4GW5oI&;qOvh zEpCiz5_l)SsunNYFt$IT3w+}2Q3Dd_t0T1J$$}=L26u9&@92M;TwIt49vfr5(&Hb! zhP1FEx#WOyXBfCzDaI6jsXvGB_vUHoj9iy0!=JY%1#%n$2w;~ ziFJ?uQ%SLk7oWza5Zd!9B?n*D=Tl5HzoYhqbk*2sU+6aKbZ;s5T+_N_b3TgCIV{`M zXnMH@wv%R;MuZX2PkK_DepIy2uE6Xqk))v4*y$;!h`L-^n-G{$Vd@mtsLLP~DFYge zjvkI*GGbwN&zrmBr)V+@gIYB(_CU~Azn%8F*GXvvG zj){bN$Ybe3u8y7}_|F)Oz~SRk~G{^uJg@tg${Yd?Z6uCs5N(oR&@L1DrBs zD7PI;zd~SHardrv#nc0`*0=LORlZtFx&K&#(i21DMTaVkBgfwHM6?q$QfQy!*$V&A z*fM3yh+nX{T21RA_e8)5k-u%akafbN8=$X*?Dv&sBCW$v<|_v>+q!*mczxsRvBd{J zGizWEv)IFZ6olPzO^QW|bSd>y)kn zNg$nRj)sJE+5}xQg>`7JPZYs1pfIdQCOOKYlmZh>)NrWgH??rjeX5!5SLf&Mm zSbbbdX`Xexq+J%%!X!pnzC;bnBJoL&gq@>FgT<6xS3tKY%xb-@s2Eq`%(b%{*Lp(e{%E+V<-QkJN6nvpfnHef5`*$Dg+kGiNF(c4+eeGooBb@$Iy#e= z;Uh?;F@eA{nr)|_2p^<{KduGmyD7w!h8Il~Tq%}$(Sn|zOm5P~H>~3L6$we({CrTf z20f5)RPof@zpfn3OA1Af7Q@gydce*(YI@3F?0(*UZ8q?z8Y)qqmwwu0?Z?M(Va&}Z z!L91Noqe${2n2PFv`A&vrs9si9{soA3t8Qr=w~OaDFEaKRLoz2OX1GntGi^8NmRs9#y~~KR$X!6v z(Nf`-{m`)3;-8R(qI?Z;FQYML*WKpk1azMd14vF@&&W9X^cj#okuXUQ0M-!SkU*YP zjkSPyK6QuNQJJ;2M&_C70bYEW_E`Ne$kesh)5 z8Km&cznX!-6$e$F>{ZyJ+`?4d~<{eee-4Gp(g+H$YU|D`H z%HKZV&TlWcG5o2LXtB&5Q)s-uNhg+GdABALI@d!b5a%Kx;1`-d=6~vk@~-FupnoDn z-^4-xpj!w|0I(1O0KkpjuDt0a{;O>0n-zZ)E;LJ)fzg#5Psqz-tPLJ!i9>zUieSpLWQ>rZ>*KSfMUHh_Zt-JUz+=gxp`~u|9`>IUnU6Q z*XI628RFO8{fS`cuf6+Uh^nl&E@1rQ{mwt>-LLO({_(q z`BzA&x21yrD}DKom49EEyiVf(x@3R3>)RUoPr=Y%d-qqUlegynRWS6|!Tik;{@{4O zTJyVt=l}FOtH1EOU)a)r*1Ny`?hm8<<_P}-4fnU-{ktRl?RS6s-M>D~|F52R5Sf4C zcWDR?ba#{3X@y0q7h*TN)jh*@)^UZJpcG@LWnJbPoemm# zbTKas@rez&G8Db}uRr}D^(K#y%;#rp`8H!*{k8i^Ytn2xZ6*`k%E6Yp%`_N%wYcO| zG*z8bI@pz9wN-lKOFbR%^D}BG>M_Vxn;i#`rM29Szjb{)sN}xytJbdlxSJCzyNiLg zV5{;|Y@Y5j8}_b({q+dWu8Q2r8f994L|8v)VLN7pwaF(#l7P>C<2#?OinDzU(NBeI zWe4I*kX#7I%TVj`I-s**jBG+iquA4TdOhoxuO07q{39gEQM0zNc}*qGBTI9#r#RRS zA#_2yoXvuahItyDEWOxz+qBWRr{Lq&5t;IE5oyC|C8kDW)C_I0Tpz4MWOr_%Tc#n~R{HmJ~vdTDJh?+Y7+QOlzF1#a`j z*REiM{7**$%jA=2PSC`BavXhv^M@gfH0Q^ORL6|~m4*9AOG^$4gf^Pb&K}|)gL07E z+sado&IGBMBim+jDcgLTC0?U2s*avlp13?+_V^je@}@7<0~rk9pd7YkB_KTE(_0^6 zNJasMcDOZkqfXOvNX6T1Xk-Jyx6Rq)9p9E-k(WRq*=DE?cTUZFjmmDJ+UZvK<)=y+ zYsN-YQ6mB{IC~5hp^P@fNg{-(VH=e4aVcvtZ0hoLf_K<$0=Ut09DFF+%dFPu1{uZ% z!rH&C(t0MZ40eMT%q`;TD3aJ9*&0&!L|-Y;IV!SsQIJ8WX}=qIz`#@&jl2*R)Xd+8 zKI}6Si{ep#PdtQjHe^wN{1OhV!v50RQ`%}^hx-r*X9bpbvRHwWw$I4C>*$&AS{`c9 zbb&`_Xu0?aDb{G_f%Sy1jhSMGn8|&Y8O@;)MhxvsH%{h;00=BlMMQcv#K+>zkRH;J zyigE`?EuZ|%?Q9_U1_k`a~qbNa^LdNN^#8T~tqwf~d$fSrARMnf| zN1pmFCc#mM&cH2s-Y76(2m#5+dqaTt8DJiE-_gLV;A&?Rj`w5vmcrppj>E}z&73Oy zWT`F%9QK(s1IxRjEPZ4&bP(g32DDPMeG#v082Ampqj9|Fn5!6$X{#|tG(ds^OI(PN z&?*uJt;P{M0!Rsn3-w@v`MY7(c5hPadT)lyJtq*nKDGPvVczXddBOI{)OO{u5Q)PD zl=`wC76rT>KxHxa5^|f7bVhh-JZSt-+mXpQFJco%sGrb*kBNrCkdP*adxC;wBj{vb zKAkVY7?EdQIaI)kBqN38L`XbT-1SKbn*ZA;qf$(lWZFY7lg9b-<(ZPYl8a@50XFu% zRC);YXiGyt#t@mGkj2Vy=Smlw3e}La{8vxHCfYq%`DTlj)mX_?mxT2RS^tckG|%y+ zu$Dm_zwL6oR5U!4adAOdo6=Ts)JgLuYGeEMgs6r11NP9$G41h!@G3% zuyk~L3hi>bTpg3RL`u&)thHyElZRy5w?xnx0_~V$6w~7EtHtJq*E@s;-L8n&0XHAbGC~jt@0~QLeno{}r4RSU+*@Xv& zVSJ%S=ER@HEu*D~Q8xt^Vmc~`XX!igVd7)CYZNjzYFxx4tx%2&<#l>w!B|*sPBpGx z8*wS4wNZ80nKb)*5$!p?*dD@+*O|`vVgJXSgrVAoQ1SVM@_t~-v5tp#!gvcl$0^N= zDtg0ko%TZRTsjCw?*f7xj}PtxH9AatzdWDy&TN&%#Wu%nVtoBdgyqX*kSo5>e1>2h z5m}!Ruo?lppZ4?VjvDh8Ua|i%;H)%sJ0i5`d^Jxiy*NF{ZJ-q#_sn?LaN`4@WJcH4 z--*?y^5`$87bIUA*)86Uud7N*(EP%2DMb)I!tBQ8$? z?3o+<03g(Fb?|@F8hopR_CQ?Vo2rqd!uph9@ylsL@(r5=VEhwid<;^_I+(CZE+=Ud zm$lq(e?snk=6C0}Qz{66ZT+pzn_SmF!`RBe`y6Kh3xV=Zcotmg;J}KCpXGoO-paiR zA^sNA0-+fU$ciZ?16*6)(jOoI!JkdP8TVUc3xswzWC|&F@_gQRCyHVBvji~zTL}We z)c*)^q2(8{&VH5vFcQ5LA^BsB`>QQ#`O}xKWQm;-qXIw6fqMPg)^G9dFQ!OPc-qxt z8e)Cx_*nve|E&bxKLX!htbyPv4xVzvNByNGOTnMF_)8`-@gD==Uu@xTGTURBhj0S5 z`*l=)>QLeW{R)NuIVZo43S8>oXT+Zk{W5BJ{|Jfyt08|C8owFx&(QdP8S-0{{9g_E zEm;2VhWrKb{mqcyi0^N``(K9q7C--2Lw-wN{fQw0zf8?vrLo=&`DYsIe;M*yM(e*C z@(aTF=hVHe*gsn%f5k2){s&L(e~-xTxvu|a$nPnyKQZLj5&5eS{LPTx=-n#rUliuv4Eble z?0>A0-!o_bb!vV~p#Ap|`3tuC-}}h#^YS)3za+i?-bem(M1I$P_zU;_1-$q_+?T=P zmvq#hEY`pGkw4$!|K3Ob{b2O>dE}oX^8eI%MC{+0;-?R*BD6PdYltfp!36}7RFBzA zi-00O{;TDmPeR}rU>h`E|7R7j zbbIR}LnKkI=UKxWJ^ z&*PUh2-t}#n1Nv$`~Cbk#K>D)Z%6UJccp)e3I0Ca{QG(D_b(3rHpDP z>8}-D{*IFVOphe^k1}sS`Ca|F#-fHz+7S=(u4(^mv zF?~XY@R%S4jif3x{WP*n4$Y_nK4h+I z8GAZ9)6O4-E2$`Oc!9Qn9*YgCtSg4B?uzySHCF4=^1<*jc~YsT!1B7+vFIGP3VlI&3>f^5R%a1V5zpt>eHp z>PG}=D}K%0dBt_aFv{_^h$R~NbOuZ=;)hrMX*ZKT*#R!P>Z*YD{B{^Xyt9juiGT_; zIAX8S33n6Zlr_hPq2&gJ5$Z}^;N!cbC}=ub z=QwaHtE+SOJRm*QoYo)rnZ=?>%7gfu4143HP~?%dW#_KJ1JR$?|NoG?)2 zVB~`OnjAMTWlA#dhrkcs6L!@_A2NM}Se^aqPUyG*P{|UDgjKp0&6IF8^Sscr%50b+ zN5kJ9&-lx?LY-ZK@tZ_qfH3;&5ki!qk|z7ejz;dmW9YvPu>kqx_e$MjZMFY!;|^ub z@v}Q?j247SQ?2w|a3DDbM1ezP4#~tY+v+sR6| z_dPjA<936l2U*uoNDd;A8v|BdsFN?1H8Dt+7R|nIuVY4XdPKr!eO}PL5L2w!v^aKC9X_%y9+Q+d+PHFV~Uj%DLyh6SY2v7EnmCcq!407uiQ}n z2iARjv~VpMW!vRhi^s294Ys6?TnwRms5ic6N{)u=W+G9k(z7fD3QIyHJ)Oiqd?G`; z?F6NQC&TtIzqmw=6F)SYjZJ34oyv-*^-GQu?gH!>Vzdgdo|#vk2I=GF!c;Wvz9hlN zk;eDA*w;svz=^(#ycGD*n)jX`+WuY9@iEYHb@E5x`KxG1Gd)%K98VFbsZuSPymnb) zqT|U%do=RgeA!_lXYrP%Z`#uU1qcEfu`+AUNSYg)`OF|Ad$HI%HlhngRddfMr2#hh z&p?r}sdLZhk~JNkXbv5uq2MLGH#-AQYS>o7^_2zWJ30RLeE6}*1LQhT=nazWv&Wiz zH33{wH9-!XPt@}5PMUc3A|a@YBLjYb-Ed&co4d91Z0EhN?=Q8M=(XXGuEGshEjixV zskdOc*#Rf7MN}tI6K>2sOOsqXz|0ZRL*hV)K+Uai+Rmp54f%-Hmu@GUF*!w7NK$;d znj_9AI$p{!&oTFDidP1o+o05qd4 zp`e7Lm!zKzCo!oHXgr%((00S^`A2i0)Nxchkm#YED*T!XNDKgYh~Ci?q<^lR*2fqV zCwYG2hE$OUrV%Lm^6o~?YorvRaj~)MyTBW(L(kxcvjgh)mVra(dIC*pzOgL$T?2Qe z-h!Btm*H^9<%xSE#6cI0hgBqsWi=I~^K;Fzg!G8srR_F=w!KPQepWN@&O0Yxh!4Ru z+)nNLGH;m9$mLk2_{qjf(DoLz&Rh?s>WZQs>&!|A0M=yp}9D5Y>0kDeZ zw#JUS4+oYaaQ00z)oIfr3NXOTUlL6ZjT=G)Ji&77mU+GVN5vOU`Pco%Jj5nffz_Jq z)BLEj&*)c)UY%(dEH&`_ggoj!3YQ#bX65y~&MoXX(@*ey9KO1Vc7J>Mwt^~oK)#_V za7=k-dw^TJc`Cs1@QOcA9Ajj@kZnk%pDKK--GzAC$CrBB!$A5DHDbWK8#^wvMT=Zf zP=Q0S+Sf2(xh2wMsr80d=5e-w#2|~lnF{UrM`mzAoU)73-u#xBs-TQa%1&L;gU2 z!s1Xrg2t6VRrKHpveLdbTR1s@h(^8>@BUto<*)$5RjDn316L1atf&aibKId$ZG525 zcqiNZ7twvgx+$7r$7URl;Q_#tNECe*YRiTmP zic;O05N>WqK8Td2;Q!pM1^5wvS&40TzJLS#ZPo- zkVk7apoZBvbBJa|Z+zA1RKlIFQe&-Hc8Oz!BPcwH$ciG>!u#wAEx^z?fy!$d?edCi z27zEB1`EWF={>LL@o8fT0|ij~!t=g~&sh^IS}Byh5{m)rhD^9GVu<#P3$&v7=Cv)B z2q`x_KP;dkZE_t6!){lueqg{6Vkw~}m9z!z#NPDO(qJgoKk5~S!*V1-y;rGAm&%4G z2NIyruVYhz?fZO-71zWCK#$%k(l@c#Cl$V)X|||wV8!4=V^(4 zl-4=;H-N~LLj z!B#1|03@|=lAq>m)`C*He}SXa{=1@xPQ6tfa>7}jV2%Bv?EDfE{R|@vI-Cg>y0|mb zg-k-3e1umZ2=SqYkkT_Bs`sSx?RE;}dAgtgY;IFsM!3BP!*gCwmz`J3!r|cqR;3SQt~h%?;L;E zbE!=(q7CB=ElidS5hw^BLTeI+C5Wu=l@>N*;(neZ@_JtAj zAgHHUHmC<9P40b%cJUjjKG|VAt=#pfWO0R$OS>dM%hB5k*b@XOi0597}pzkbO5I|gM4`={kDmyyXS$VQP&JP0v)PSNn8R==j z$F{V3Pm8+d&+e<*n~Gv}025U)6ZT9d1+T^F8YbcsF9-JQJd0(ixY$_c;e5m&>^~#3 z=6oKN9egc+`mXKz~c-^DFg&SN?JI^tXPD5WtdPm?iYp^Nz zJjGTIYfTarSLP?}opONQ8Wpm_!AX#PNUR29arWbpE))au{@ZqH#CmOf+(cIDq*_zq zuq5rhHgkVKH0KnTE`_CGE2%K$6^9f5u3swZ8Fguz60Ts)F(FDurT_Mxi$%&nAvhk_&X_Z!Yv_6_S89cwMwN-iBa)cZhq0-lbgowM8ai;ZLq zHstpJHZq})!NFuY_9{~+TSN4PaH=F&r~W%IBY{kTv_Egoo|&J)fGbUj!uIiy&JFVjm#Vqy(`09{!5((dAyL0>Kk zodg`AI+h}I$;Zi%i60->&$z4HFBr zrCvdkp0oN#&puU1iteFE$>yemVs{MjZ547SeJz?Vw;`g`vLc03ghNe*Xd*ZBn%hzC z1XI-{gw#_mBq92IsbinQiahGTPz!YfXl}H(Vsg1yy6}vKOle;qaHaW?wBwrUU6lL{ zF^LUJlFp2%qqQ%k^9bI|$%AmLzA#^MMJ&09f?EK>E`P`j1X&r*M&{k~@FKl~kLo5T z0~5MwtaY3~l(4I?f*-7WVmQRf8|#%A`!3eyNJ00!`5gzuYCes+2Pf>uoffVxl=918 zSg!`dy@{80IITHQ!OgC+0Sv*3HUezE-Tu+7Udm>brPuThD&Zzn9yh4 z>%C+FAl*WuPJW)JyjWY3G$=6kUKtPQ1R^5C?xqVQvB5wkfxe235(%yM2O(hTay?y0 zIbb3kWz%8lC?{HwV(s?D?I|XP@`O|$zt;&0UI^n~W|Wc7n@UR48Hg+u&9B5@g?gE0 z&?WAym{3t|a~B+~oscK)vOJ3@+C)yWf7q;knNoh!G!TpXxE-zH%EL1Wp5;kQmVMsz z0R-d`Ait}VMUkU52I5bVsvM??vTBCf*mSC5zsNj^ClXBg-m<6xTNCBG%#3R)0KZ=t{bMO|U?E6>EE7mzrJP*hdFxww-JWkqP|Hxc%}2;hi4c#zv|}t7E8yO8+d6 zn~p>W1eq&AgC7g(J6s2kpbqXYF1m2tlIeZP ziX*0(d%_{F3Xr6d!l(73FH^^A$#)mIUx}dff=0;{UgA~=Z%fdO3V@@;(wOGJNF5vySiH_^*YABEu{j4YtPY+q+a}9k&MG}scqk1;FB9 z4|r**QH>q{*FD>k5DBa75<32D{O)UT?7B*mSXmrzyXcvFMzhaXqtindl!#4>fgFk_ zv=g6GH*!HDTlIi1zX-(H_=fP6yEvO1eB0%=Ff zSxioj1dW$S8c>QcC?kkpKxQIy6Nb(0wIP9)lXKn}Y{!SfMM;InzxX(GfcOGdpsZ)vw6^cJwg)^>v$bQO>8G!1+f_9hDb5CmfWOTubtg^mi8vr9q z3nPdW4r*a#;_TSXyVHz5E#6Lg&Utd7ogT;dQYDy+kzQ6>^eoy3!|niIBENWE$Hs7R9IeS?7D_~Sc+K=|>vtKvKw6?Mh`;w6>G{EF6KyRGk$!S~Z8-1YILh4Y zsW6K9p6D*^Y*x}US4m$aMN6}PM*JvV?7V9?VoaLFkthb8O{XzK?tH1>cUG0cesjt^rq1wr-N&q7x@M;Z| z+)bhs%iR9nxFi<#hOTUgCi1x)j|Gp8GCmYD3A;UO(@mfRsGZNwq=-36a`rn2CJa4j)t>ruY? z`r3Ynx!C?F+QRdi5tu9{HlViwX;Q3>v0-6|5evDTl(W4d6h5m@_ zc1va>wVj4pu6m#Y-iu{5O$G7WA1wpZS}4pxp&Qkp+rY*OuYuKl^TW7Dj$g4vu7g#kENSqXIuwsK7((dI1 z3rY#_DZv+P>m}ER4r7Xnr21Sn$VCiQ*a;E11-QhIgYoHA!RHC=V(GzDf|lc}d-=Y} z6hhk-I`4c{DxD8U(2adFrNTaFW717(doGSe(4znvjXPl2gdaqP@zEu@BMV$W-g`y2 znR>^^TG!MDhjRmSAsL0+(jbsr@`4#fSdZ^pf?x4b%lsTYV49fOKWrInccEuvWM4O% zfM)V?s8l_SI17T*`Uz$9ghenBh2J;oGxeruI&LnwNMc;P=_I&mjaHS;@!stPRv#(P zv{l_iu`16C`k0_)_V}t?r;hCD+D!ELnJ-Cx5>(*rJC?vP3@f*#txO!f7$!SjuxeJN z%oZ2THc+F_#635byvGZ^3~gnrf*d2llJvPBJMD6Id83vBdP;YbxC129G+CR4xkc`t z&52^%g@uPwh0UdXI*ILz_}E=W!4JLlKNq0#@*p~7POZPTXQ$3Iv81w3fw0Md%$8(b z_ze_9Ug%C+hTSNkO^a%W9tHO2jucc5Un@rG;TS)9mshwR9q1Gg?k7Z7^@ZPEQO2W z)*@DDs=@k^B}=i&g6SGTg&V)TNb2=<=H|WPCoq=c5Q&n{AChJ{l%tZIwh$Ddc#mXv zH5lX5Jq{`{+vOZ}lDfKbvA*lMAlZJ|@;B*`G(~^}Eg=d9K{#Y&aEWp`*R}2bUR|E> zL`q3dDwKJ_S6U{8W;dmoilC%8=hvxDB%11gKs(x$>sH`c7VLfF4ghS=&@XIBcnN{5 z>0$e-ovWmoaw@ElaMtRl>VL&^TAZ3T!R57ZRat3PHG$)i9SBMM5%dxSnGYHqGR%=; zdzt>GY~L6qcFs^*d5>yAe{3u(jDdDU*Rni=T#~99impG*ed(MvKF6;=(%Z4${!q&w z(AAmBOi@I&!EG;!SyZwV_5)7ByYSFCq>l$c(~W21xCo{~WLXUSy(AK> zx8Z(zCgYXh!OTkI#NkXD2kp#3fsqJ#u6|ylv1O!gUb20;*qQXZ_(csC)DIco;2%rY zkUR`y@Cnq$Ep>|JdCQU>dG)(aGx7O7&w#BC@ki0?i0p7!u6flv@q|U(X5K3isgC9I z>h`!+Z*jc$cxGn#;-h9lvRbHI38vlpg9=t}ippxDp<0r(MAI0nhbTAL+Lboo-quE* zR)k$yc1GN?bi!7x+{{gB9XtQsdlO&++`gC7FLr}^V6~oSU_wwKkdU3jt>jLD=c_v+<-G<+BApJqih;!&VI4EpgjCvj8vQBR%5KIO0>Sqeg)dKgYw zzv=u-0;m(`aW977hvY^xk6U!Z2cz#F_F>=YYeh+BGb#o0W60Nmn=)}%I0mtv`Dn{O zI+$UL(ZdnECt|8ANeN|C} zZPi)6Q_yYv(&fh_ASRDHw#R;9wcF1B8^`Oc0FNT#mt1u3!5~TC(-#v+1=`W$@+f}H z8~kIoS-B#24PPlt0oAV?q_BY|dXgaikf-2nqO9hj3ivw*0_S({T{ohx+Mq*+V-O9B z@eXC~E&A!JPf96N5fBapWlvDbz7WF9(-m72MPd(Inu|}#GvIivBcP6Y4lpaGC@6Om zWeHJrusKpXpcum(hYO9cC_gH{#(JgFQJLYdB(6fGCdTn$E2q_(%M(dQR41tTWEMwt zz<6+Q9f`$$lcQ+PzL4z?*+CV=gY-E5_6W%JzU*!55zX_=m54)XNGv2fjZdg@w6mbV z?YzIFV8cN)>`7?tajut?*K2L478CLAVM_Z*XyA~B`dE`{G?lV=CLy5;nEj^mzPe;r z3SuS`#495Je;z?i+vKZ30!Iv1Avc7ZZn2S{U82nW6yVw=!2VJ$|!{(jl1QsHLT6G^z>N7+QV6gg(Lup zeozw4P9jrFU1Pj31+QL1?hWN>#~J~Yz9b@R@`=Is44L_occ#gv!lQX!Kl8Q3@%ppP zr#Xe$!14nMv4gcFl7^~^yp51NmWn&HT$Vb7I zfmDmaRo@7RFF4Ud=Z`4Jlw_LzQ+!14>Vj&umhKuXo$v{FOA|&hY*V8ljtmN3w>6l! zIv1yM-aP@|>k)psf_Kh7o>jqC zFw!b#4pFcPsmbOPlSWiny3yk7D9a5g3nLR2*H-(kasNre6ljqrrsrKe$Bp#5&q(`v z6ir%qTNToeT$kK9t;mX5bz*Gyw1RR}T;N*p$77dv2GBJd!iqLit)LTVM)Dpci*yyM z-fKmB@6j~7B=y2gU+Z$V2V6rcC-0&diH#fgCsZ2p49J*$QOfYa6JVjE@GlnANan!- z1HemYREW*b9WVQ54w+0*xr@F$&~X9MGLyLX56C{wUL`TSIys`+)Z+@5n70}bM-;Ev zrc#!5)Eb7}Tesq9-$(%VFe%`=Ia@L~@J8~83mY6_umDEp9_IXmX@y7E_eB(pht~qz zMpTZU8Y_Cx8`v%V9_~2f0ef&kCQTGe5dLg_R)38-0dvriVi9Xo?ghQR200(sd zV(m916CbjaZE%*Pcry@bAmJyFS{eE8axWLOwF<=HJ*i<&szY zauC4sv1_|w4%wi${R;);7eFw4w^lGXBQ^Uv6g$qKSkT9w!6g2RhF2gn3&;- z*j!k7ja6$lweW3&o{nxIw}Xd!Oj=vGh`J8wZ{NEiA&y*KIealGHe5<9_>9%wMM9fw6Y-y_AvmE)bX|Ao8;i;Y1C@~-Pk0-9gAkZTYPh2- zk>76Rs$-}2d^kI=ripGOwWEiGl!P4oWjam{c3-LwPB5+;9|Ukw&%I!^$)O0>^ew$tW-EoIg%b*w z9Y5f7jWTgdiLQ4*cA0B2coNFmIGk4czdvg##S-8j!VDvM^wr|^W{Do}Ys=VrzsrO5 z4yQ7@o_=T%Yp5II+FFC+*g&9a>g3O|b3x)p_E&M#dV%HWZ5=x)tjSn^!Y~cpE$Rh2k}(WD*a7 z#kh)0SOlfwI0uM8h=e{&08C*?neqlOpb9vxkWH8weN-)pBCOnjE4T*rT_k-uK1{u^ z;#e&^Wj~b+FuZmAR2M10n^DPH{_R7b3wIeEPX*W`em-LDQiJx z>B{UZ#*}R?ci|HH4nrXnqilZUC&BfD;fLZRDA7qYi{#%4t{&pH1lG#+o78K*6WqfH zG6frfcg*oeKyl=8;ypqz9o#M*1WGck4vZ)I4^&{jBs|fes}s6W8C^ODju&2HLHmyWKrZ^Dea%ZSUm8XP)-tS7bw4AbqlNyrvhP`o7~~ z%M>v~!Gl=b_&#e$P2WY-1T_|z5>Q?jhnJ?ykc@4i6RBMHJ!a%fo5^rpf-NdR3|*3CJ%l!cY$Ej`WTV_cb(aT368}^5*Mz z8rUi1nPAyR*LH`8+se2Y9;%fML8b>!OW+SpDJv#EO6FJ~(@mg#lES@&r+zNZH!VP7 z&UJd?5S9v`C%KLVP$(PZ!Xr8jy;&+>Ty|Q|w%xPyWFtq~kZQsaVb}xtkK!`xe<8SD zEg-4L`+GG<0F3MFuvN+)kr0hwUm5Kl&&wURky+5Ydb*iDGO|F( z6s|xnuT{;ks?TPB$*UzoN|oYo6V4%2P}sn7*VXxkXF z00-s?(Z=^LBqYYWN3?jb5zTWb_J&_eay%qmOyEJWSq^t9AjZd0jiyZUd%a0z9Jxy7 zveR`n4j2Cv#_whsKK0&mexqX9^CSGmXnuirGqTR}gN*gpZ8nZ|sH6rdm`~`^2IH~C z7i;>jUMR4KJE{?j@3p6NG!TRbKBePaJAK2@K6$i!%tn!&2};RlSb`1(P;MC2cJmZA zJ3LCEW;HF^bMmC<*RO@84dle#NF-c!6kJjHjB4m`u?sx5dg%q!zA1Mc7(6VSkKOvp z-!edMieB^n3Uf7AAv&wWMOA@Dt27{ap%Be5JA~AU#sxb0V#ZF^muzw49l`LT#Pn2V zkLIVq^tC5`3&cw6_L{`gMFaCSMO({2;X46R)J)H-E*V7ZD~&P{aLQA<*hV!G|B){; zS#xXk_>&WVd|~Wz$RRXRKoK5dT@TYy@JE-@X=%$hrN4(rMeF3EUK<6D!Ahqtbr9Z_) zT!lS3BJcLCY~L5WqDCe5J!_!5WAqE#Y4(ks+hRE^#-@N=15#nc%k!C9<>nSV$ECwl z3-+D__makBRls~r8x4aOIJN>-M5%aCt-G$9+C)$jK5&k4u-h3fD?v%xWylQ0g>8+@ z%XCj4&*F`WCuN-!<(S-L=zs!q#Ghf8%Q{LbL(YF%8r;?+;IW~ytiYNp&E&%59aVGeW0;5UX1&cX|wN$UQ%W2cWpH zw8TJR^SL~rV;l5gP<~9n4Z$wzw-+5LpKPnFtIL(V@pjosae={&Z9z!Kmxly7&XFyROySp3Gu^bcjKE37d8OgC z6S6Bzf81c;*Bn*Z79{stP!_uVeDAl7@2~9V*@^P=%vWXc_1b%mzL9v~8v~<+od#C* z{Ir6MJFOf1>sRak_Sfl-GJ$V!p>OBR--^DPxA{oPm-pK^g_UjEF8}nL=ryJO&Tgj_ z_TA0p0q%t{_3cb-o@YJuu1f{eyy^)8q4Lzm#XjTFK>*yRhx12*U9<2PaIi# zINtHQ_m2ou87#YY>uK%BbXRT(Wb~izHCTJxeSyCH=XojrmhR)6Khr_@l{#yCxdkK{ zrM90f(03`RaMCZY;*Axgdc3o?BmIW{*Dmo5iPmhPWn z@gPu>eObHy3ft%QgZ-9=c#AH^%+8+V>OVKe*2C{$gaiBdF98R%S{B-;+_)MT@4iQP zdwuu8^sMC5hW)e5dRVb(Vi7yrC7zr3MdzZLs!Iu>Y+hwi$J=gu&U>AkB5hf}x3r*I zzt^pzuP!uS%vy8BC$^VxtlfVh?o?WK@U=OydHc3~=y*2o+GjC(d-G3CJe$;zeyFd= zDV^AmHsL_wgyZeY#@EfP6P0kLHSscTOx5bRlB3(^5UAZ$Jo1zYH`9G@@Xn;}@Vl2a z{Z5pc2HjfHx?oFHwzIAsk!v_>*I4Tsg76OiHFD3K+pbf+56(#5b==wGtW%wLad)#@ zM48km>xYeZn5LPVdnpH z*q`)i@s0-KPpdVO+V0^6!a;wG`*54^DV@5Cp~!GfT}OiqJXWgsmKrjvr9 zz9pA}0IoZsn~Y(6x_%Bhz?J2;c|(1=l3w7Rtxwlsr)sknKeoH)uasNxE!n&6f4ElG_|c`<9q=!W`qMa;m->L2mxO3s~% zlL}>Xl*7P3?$b43px?<=i8a(*w1rd^59WP0r$$3sw^4k`$bs{ zD%PkXTT}Y28Pb}{vLj`~yL83YjNMY#P^}I0CJbs!hY#|JZilymyV|%x62$p9?aU zkf%QO*#x0Ej`07NyrgXePY^}bLCE;LFhv?;4^dPY%O?zKVl4-e2p(}J3;B`ZkQgr$ z$}3jwx&@Dv8S|pTFb@WlkByEJu*kG+twb6kfOM`zDp5=d=1JwsLTw?&pjxN=!AN@; zIu4!$;1N0G*;GFa2f%+ZVGp%>Xz&QuKqE0_4a6sFTq~Jf%|)K5E1K}3(J*^;$qEf( zRK(9s*V#o-W}KKS5kri42=n$p1YduRxC=G65CkhC z^kG>|9WoD0kvuOJ!7>USS>&-A{GjC|4m3U_4yYEie$eZKwh!bwpzR}x18pBk9N_cC z*+&3ffjg~-B<>14Ug$i~=Op@^NUeKX|HySnuRjt8`kX}KK<`f^4v(}C@KEub#_2IQ z(fcgD&w_g3_YaEof-L=g1pR#kz0Z>CfZjhy9A1ijp!ab!#d2dr>)}h(L;9RVpOdI{ z_)_d3t$(!sz0Uea@8iFFABUG0 zwf4g@b5%P~jXQGaFv;dusm zoR4S8V_VO%zGvOP=Um^YWEel%eZ&{Bhga4KM<9jG$M#k2JPWo3EMo_RXLnrVui(0V z)$ELVf3z|IH?;exe~7U{=W!h%kLORt`4a6ZKEpX;0nz-^Ea|S}arSW?*ek?En z>nOZ%o#PsR8S5P9O7*$MBi#+sh?I19cc&mA2uet|G)RjAlG5Fs3erf!eF5tr=R5y< z*ZS7I>wae~m6vDFGc$Ykp4s!8z4t2s03b4QaJ4bDx3&ZTKmk`jfq$8Go%ES3ZCIHA z006X+ouMHB(D2pLK-U3CQv?ZddHH5a^mF~nxJ1=U+A*SWlFcm_W_DI0IwE~*J3}HS zHfAC_W)3!1B12AHHcmzsV1oobkbvQ#yr?)GE0N$MVIZY}p+2xd*xJU`!qCWph?$X* zg^rn#=>hPeiGza;7XyQ{vopPusez%jg{~F7wVg4;l@xjt2TKbejkS$~skN0o7m>cM zo~}MG6Oo;v5ibjofuWv-wZ1to6Bi>FBayC^u7#_;Aupo~3m2mc6B7%Or6I40p$n0{ zqaN_n10oxHSKzC_f3@ulc$w%KfiD67A+j`eF*MM=;>ZMiL)%W*%Gi*XiJeH_#Ln7M zR~z^$6On_Rp@oI1J@6BU3x|Qe1Mq{sttBrbPz7BBH)|_HUS=j*CMF^yU3&*@8+&t8 zn=6VN2DUcZ)<#D5h7P=R%tQ_*cEC66dD)39tgX#;O@O~>|9HkkWN%@r57gzy14be% zyK5Qxrk1)6S9+LQIT+em=mLp=&-E-E?Q~tW^{p*!bRB@d>jO>XV5e(p1!Msvw9~z! zF|yOOG_(gAOHbRz75LfIfR`EgS=T_<=BgKZ+IptC_E$no?F_H`>#`yiCwxy{R@K+*xeM2ileMbjgR>mte?R2k9YG-I~ z0({+0U;AfwuiEYOdG+m#h%EJhcD?Er@E2ZY7J5b^+pEs-GSagHKiXU|zW%A}!pqJH zd|>ZjXv51&WNHH(B;bSqjveSnQDeVU1%U7?ykvT%KyO&Y+ITsK3diRVUydPXyFEAzH&kC0F$u z1VHqKjJIrAw)sTJBlMnkFy zor!MoeYlFwS)WN0Y*?1<4EZTv7?wUNg0JG0kUEO>Mv8yK8_LG*4Vi~rhl=1>4~4o6 zb2Z=asxgcgV4vVJR@Jn}m~cGe2R#ciUJM~xllaVFq0O?nPp0SzY(F!O&k}o8yQFTu|I0J{LxZ5~l~h1QCNK zv3*nu+6gTW_SCVvVS*S; z^QfMSS2`h4@-W$PP=)#}54>)U?oer#eJax9Oo1hX(POmJ-0Cl9{_*n66OPzg@QA;dTEn$I#`AchiHX$ze}f1OTo zJ84?@obDMW@VLZk=6qsV*%hEGOu;zfaPSFlk$yl+O&d@DKF)-($C0a!%i07b`&^#G z<7z0^?O4GvK?>6Q_+|dhmy5}`739P3xOb&NABUC7#Yo3AUW8C_6V96+Ck%@Rrm0R2 z^|6SKn}tewp%2-?G%q_GLBE|e)l3D%PcdyV#5yC%`;79l-{rL zoNI~sAxF9!j*X3`0)sX~=F>4m-^6`OX3JgH|c=zj}7BijDqqeX z(pf^P@TEVdTE$7p)Ps3qh5QXx5(zex?A9F=&zDiYn5J-3B=mh&7{q+V4-5z5(*!1M zd=+U6X_i#m$qG&yP$5E_8d1~tOtqD09=|e*!&H7d-6o9sEWMjFdWoKr9c@57$RNGS zvTx&9#LLyq^99_u*^;jN*^bgHBH6EGLUk@(Usc=4s8DBv*(^lz*q{ZRuo4t6>0|Xw=C9QcJgyuXE;52-GxYpj=mXukUjg*adfV`pws#@ zJaJGWM(CrnIj<~OKL1xw)4e^>-|LEWo0^IBxl!w5X~s4cOAfB$ zByN`Hb zVlRX30qi+M#JSz-)lcsqkPXpT0nG;2rl{Zo6>q zvM#JI)+1S%#vJkh0fk3<(8Y0)OTtJ`cS1+e0M91(_{YL(+z3n~Ndyx}Erc#Fo|ORT zKJ+0W>#wO=2L|c9e1AUTQv%YDktGin0VG2dOcrtp@CE?N>tGOlPJMxokU)}k9mFpI zm=Yr-WyAu)4ZvpzAj$e>z-J)%M^ipeuJ{uOCYVU(3Yx^rKxb_yowg0LaCAeesTV;bJZ`-SML!BCcgW7&fj&O_?`332&NqW z;{09b(?2-R{XOR!os$WE=X|4cGVg89$^5rDCy@R<=Np}qg?{J!eb!yi=$l!0qx0X) z=f9bC-*pbUgY&=h`8V%Q`FB2FTjSsPe21s}8=rrh8~+#MGpK}M;$P!41cRyn>f*(w z09xx*U3&(pA7B$zU5yX$iDRarsJ973@$$(b1)PI(+j0z<5an8K-zg~My|Ww_JI9iZ zowr5bdInsAAAuDNM>K^ldEr-iQyltD!jfM`jwAxCLP|#Rh4Omqucr4PnN%!Po*n z7j*Cxp&YV_BWb@|N<)AARcvl6@ioOox!mG_%3v&IppN;v0fQ=uS3|~vbxR>en5F7~ z{KJ|m(k_TpdGr7Q1bOp_#(|F0jyw}-qQO$jub@Xh5!@eQf?&%yIfkMN7)icW<8REU zvQ&i+``fGYpJ*Xe0hAIh!jwo%0`*h9cVlsB1Mc(bbfQ<2aiV%A6b#$Y1sYS>z8X?; zKQDCv#p8#O^A~?M1U8PNtBJoKl=pCZ^8j+HwsUee=^Io60IOtY8hgWHz9yH;)%$Hz zxY^<$tw6m3fp#9kGxwzOntPnWb6VpK-BDYTVOgIgIFJc*F4aaQAf zAItDS4vfVT^O(4!tTu8)#U_w^g*ehZAqo_D1|dgJL^^9O1=}*S3<@LA*8|0CP+Qn~ z%pe#6rdsv6mt*wg#U)WfGTAUrza+0Q3HMO?VNtsTOSbXLljnmWNhcv5vg-=U%b zMt9BGp{F~}nLAbn?l}PJ%7Xac^KB>;s_w}uZ={~eHBl-H&OQ{$9v={AAOYU@DnUj@46S4Aa<$ZVBGg48ZpRgk3Sm1xdcnaIUml7~4G8Yc*+yoo8|t z0H9x`R+!(@_g}hIzShvB_Y84eS_@2=8Yq{}&y!pD<$FgSp{lEsi+?)R1V1qm4-WN>#8<0T#HIeHTCLJggP;sE_??B;C+0Zu~y9N!nWPk7THF~%u z+ZOoNKf?&1-0v{rPq{Z^d;=+N$^G8vYix11+`leje*jS6-1-hO{*-&u-Bi-*r}a&LZ-tt_$h}e;DEIeB@WZcupVQZH1KRyQ1~smS8n{zaQt5%zK)x}+4%+r-8sI0>%%}l`WqkqHNOA)FK~Ujw)4OK zd$kn*>%Tu!qMvC8(2xG16!+h=1~dGv|58CjRd-3C;OIudP`s7bK`a1F_25orm-%N}V@*&9W~(U?A9bjtcIfP+@E+p}+OGsrn%jhy9WodY7wqpUyiS)W zd0lf5d_vJpDE@g;GXWds9orfOGg?Z9=6NUqSrAp3f?Pi;I7uW@|F_-piEn~m5q&2u zYxYW+vCHg}n)^d+Z6V?nqh9AAe^J0~*(eon$Yf6la?}w!+7}Fpy*Hwal|LorxC$HU z;YRwRZw!mK&ovGVcc&$y(^S~1Iw9+m%j1X%vk?>!2GJZ?$r>f3%4!1CmD4nC5sweb z-8kO{h-5#W_2w|YQM`X7dmhcUNFkQfu0qU<3Mqx>z9;Kgn{*zZec;#~C`>h%k<(5B zN+C9?gEu<1ME;>nb@^rD1Gl<}vhhb*Y>2g!qcg@$0vNLPYi$(OPl+Q|2sEsH_h?=c z`jw@dzl#a!GOwt7E#!sJE@C5Y5D-LCZNXsx57V>bYRWSJU7$g3jsxg-(B-8QZhNRq zC|0mJ@~j0CBb$ub4isqw9R%G&>Un+c0)be%#r)?;-UxgB)YHwH^KF4oWlnrKqgdW! zbMz(zZ5qRG@gUvUkMTBNR76Nw-P3!&0f$b7z7VB~GDhz=vx_Gj(zGjsM8?zbB2|`U zPr44dQXal=F7*h8_&h+T7LzyY*?I;3_Gwh<6o_9Ph{sU#NME<_zTSj zTZI0ZpGk6h4iau>bNH$crk_Ipu*k(`lQ0ru3}$39-PnddTt5S9KRas!U0Uv1^XNU( zJnC}@W_W}~l~;sS0okvkdn%f^Sl$!*tb&nS5#AG*s#`9%vzDBw8q7dVlwro3>`#V% zjn3=OiY;)@L(OSHN{Zw}&qcN|F3*o{Eqpeqr?6V*0o$4q)*Ot&%l>|cw}AU|z<9#- zGx0=SrjfR?ZGclK^!{iB`{fCsHJeKFTQJlG6^|V;xW3XL!lf4y^&7V&=!nD_eV;O4 z;M+{!aVR0Q=iA`}NS_5R8EHohe7#lIiuQp;!=u3|zDF%`>(ep-S9L3Yb|3aDk(A%D zDSSYhxYd|jn7@l?$%aPebF`0z%t(qHhmqo_qEP^V^i`6{b{nVSgqEvVIRH$K6ju^2 z-)`bdK&)DNPHFT6apJa*xMv(*8qoVu0AB>w7``XA&=v^c0TWb#bOJsPp>zA9UzLKE zzw-70$O{6=u)af8crVN*9Dli&U7=71Eul8o7#i4iz0CxL0b1SEuRImW8GbTc0*Leh zzDIg{!wIH|L&*4{A2|2U69B~w_hg|D;Z-L5mpnID0Ykm4&%^JuA8U>afClU%00SiX zJ65Ibb$$v$)-a@9>!TUK25IcWaP^ER~E$q#Wu&!hS0Y{cgtJOF}<{J-A&C!U?tj2j$#*zhJ!`j|xH+ zyz~gLTL|H*@4OH`8u6$c?w(XJKcKN3I1_PxcR-jv{(Eq@(nfKtD^B8~X_9Dky{q8T6Ls{n-x@)vaZqx=7iEP=iJ<%~%2 z#iyQMoJfWB>5T6F#xWGo@r_(CI@ zlRC4HGlsq^f`XAHSmhb$$D{@2P~+&(+NhZ?;|d8(LK!+NUa zdT**%RX%dcZ@bAq*fo7&b;#CECLx^sdTl1U-(449aB+z1qKGR|E>fCEK=W{yk?tH+ zaFKMP+2@#e;(gxEnB94-QtB|xhnobr<+lvsoT&t>##3Bxs2jC^+A%LId9EH*nLlyqr=Iw4Z$XMB8jq$)AX#V7NDxMf4_yb>wU zNVcJThGwB-&26CvmTmmhIwY@We#pEag%$&i8c9HZ5w6xO$d9$u>5@L0W=|((wZc#f zy&TPxa6I7U8IhWG6of>Bj&}W6;rkxZNn-gz1dS;bgLaD67g(~NRO45m76kWb_28eP zo*=f5FcZAE=quEA*|Qx^l4RXTrh+CKp5ijC#sT5w3M#l|P%2|t*nhAp1$Nj%8ME6( z<*PVy1Qtl|@O%exN(8;3*11=^#dE6dg2N*4ul`>@#0ODqZ=fh2u5e;(gIOp6>L3Lr znSE_rdLL%-A3RPq1T$NsIE1&CwHvgM>$B>PQ`-Yvpg$gU1L@DG!9!c`gYCqjZkuM# zn?Hl`gQB+SaufDOH#m^MZj>Z_6J0-ulLfHdef-{{GGx9_E}%I;2?k3`oprU7?GrV+TODan3NK{UE~Y;_18R0P zloVGe%0B3h6Ee}<Ea>bg0?z}H zch82Ru@tDdLn!INMpzDCh6n7!AbN8NStU}DZ_SrrZq6U;oi>A5 zAFP{C&MmG$2n(|K=xxwE*^ClDsN3B+80C12sKp&mHi=*vN730~CE+~pq#ndF=9Cd6 zJaZb;j(^#NK>HA<%I8_;%BMc_89~*7RFpRn4lhx2!$Y2Zrm)`7;{hG&MOzu`i`rYW zjkB0G)w@?$?K$7BdN~-Y^=>5&J-;~ZtdCZy&{Hl*tX&2+xc#K?qqac`@jQh`yd|H_ zfSDtwL~@LZKzTl(vqxBJN02sgVI;l&o@RfHQaILmXzK{{W_snayW9T6{@_4d968vc z5LW^CF!L0}C!3rmMjpLK;WQmRtF@loBYY9G58Gtkh3Sbk`=aW-M=|RDG&qzY>LBoIo7smM*ty(sr?ck8j zO@veB1Z95Zz{W;d-_0HgW=Ixusne{iYFSom8nxEmBq7D>JfpX{^(9*09y>o$%SH(D z`GCB%K3*^SwdcaMVbg1=NSf3c?KyFemV33b!C>#bSy8UScXG%u<1anY`jOjo$3Atn ze+isUcpImZ!gRn3yP}{0E#8+)%fFHS7PdlsVyzDUq12Re|5yY}KP5@1p)n3gVmOUu zQqW=MI0Q^#n?yKvlcq*>pDpbqQRy~&T<+5?`PUvToFMZvjXWB@bqtFgIcLQ@Mln{O z%zM{BSJCz&eca~W%$TanrndCQA}#P|9HSp)Mo9A`VNvcg(UKk^8v^M=cpY zRYRYc&I#U-M|QR%#FQX1vEoJl!Mkv}CT;^_>Tl?e<^)rc)mjA{T9MQHJfz^=*WKRK z%vOLA8ZW#aPT;2$TpotKIItePe}oLB6ZEpk7Y!D{x23soIzAJ>oW~G}@p5pyt@ zjH`{ek=%3qX_2^OVQDL}~!*O)mLqY#@+UPFPN=~KM zxUZ1&8)Dz7Or16zGHbgUnpb`hb~Y+wBhgt)){W2xeIv;ffw}Cx=uKOfy^O;d?N6uC zL$mvk7ek2JFWy7&nJSV)W~2^+(Eanip_X?qVi; zU|$QsbtQ3mLa(EH5x7b?S?HRWO*CYB>A=sB^V<1w#5w+X#DKT_r!5)KwQiBjLdAZT z2m-tY=kCu`(k`NLIY0e;UJBcG8B<(_2Z5tA=0 zE`>lRibz&b7c(ihA$4xO9SKLL&7$q2_s}_@Kl1KQ*Q(-pb|JVD_@N8b)1=RBp!c)f zD^%2q*+=M^tf@7#vN}C$kd_f#FG_?~<~e)1xxc_fSVMWCj@rmE&;@2kmG6=3_8zED z_0`0anA4k*HZb)h6It1UiD)~vwjs}28a%k1uL(gwBv4#Ye0skiTC{1kgvd?DjDBh! z<>b=8yU24XFRrVB$CDOZ1_A;^;~nqb$9LUw!7qjVpL_OotH8Nh^>uvvAdgEJJF!|B zm`v2=i9?*-BT){wE7IY{T5bF^Jc)Q{mOG*C;|ROtGxJUGjc%J%z3oA*2k-iQj)$7f zjSOA7Bt1U1c2shh^gEsv&*{QCV_rPat%d=QV_l$l#RniIj;CS>fXKHcRYqpj&hcAQ zqSxBZ$;1&`$VyqUi%Fj^VC}2K2zDu8=w)A{CY>aqJX@S**huFOSY5E{4(JpWl2RcP zIX;FG=;yNWYJX~71lcO`UbRa+McP2v4F;@>9gT-)h>Bgm|I{>*Y;M(#t^NL!!3>+~ zhLkySyM!5rgN6h*PA2|L51#~cs8?U9o3h_Yq32aDi}7Z|J~1J0@2;n}?d&9IMS71j z%7{j}BW8%R!?>>U3?xeRxeolROKTsD)8?KXYsxA#unc;}Ed9WCYwBIUz7P$}`j>lU zoPlVU+nSH;OVaI%HD9cJj5ywn|H$h!Uz%mgKbLO2ixI%z>q|!MM7I`6y373h6rvp_k{SLBTM=XDgjz7w}g%7F1+*ipBNt!fjt~!fkVx^ zpIJte&twbV{s!5_6?NfqzjMQhFqMc)M^q8J8 zE4+yJZe`H7LD|>xjL9{2L6q8!IO)qNEI7Y zOuiyA@dBLt0U+#O?k^X5Z9ab7bP$qUqL+$SVK9wvHm>{`5;tT$F+Zk@O=mZL7w;6m zqBSjsF?wpP@!sbt-ImE1d}D~{FP$;>yWcV1Ut)rR^TBph*|XiaFq?fz+Kk#c^qF0n z#oOx`FqVNWp53X7x$9oyH`N4zm3eyvz+SPjjC|u?c1kQd_g-;UJRiVX3BEn+No)>R_){>3Y zUW3KkCoPPSrGIWZO^El<6FDYAMcl4kEF;UC5mV#*gi|zZF(nY^OE{vQz|&gRhB68z zN^`X6H2>gLlz;@doiZ{3^Ctx$W{Lh^?tw1gW`ck`IT_#4eGLv8HO<>2-5?>`GjH@9 z#M+>zqZY_v}#DbGqM1~1{<4^(JKQlekE)LxDbqaJM zCxtPOace0Gnf_W0hMOLH&^WQ4!frzCjs37(9rrWLLbKU|-&LFZrw?HM=sDvLP|KSJ z#L>FH9m#K`5uhRj#nL-_ps^RVB@^wLqjjw0t-S{2l^3w z5ZVJ3qfpo6nWLUi1c3RV_)#VGzf9gs0A%_C>BxVa@)xuTq!NrY&gWs}4z6zi$Crgn zEqgxA+}s+9=mX^lq{RMS;sxySuXTeP^rV7)3z1b-*9mwBY-ITJ8o-uzK}N5j2VtT_ zksKuI9O&qJ)bma@H;bj!l1U%nD+E_nq?>yIwKR(ou}obFJ}9InQq!B6VQE2GKnlX| zg$w1B6fxKl0M4`Z7nO!^@8A%l z!=X8W?PRxWx_~s(mX5iCOn_AnI5W1Tq3n`csGR2Dq*U->n0%(Dv8c z4YzgBg!pcNA6Fdy6nZQq+R z@t?wPdVW(a{!{qXyaT%AU(Z6`7T%c08N+ShjhAd7%`TuR0DJ}GXBGJlMXt{({i(=} z6>ciYe<}jB0tD*q{T>iXf+dPxO~SdAHVZ2x{Kx}vu&fF9^pf7SRhHOn6a(M#6p<}L z4}^G$OW#HM0vx+6UJ!o%-LoexaKsuX`s?YEtEF+~Ei6l%%>9xP{tOd5gn89VKLN&^jfGFeltbvL}Q2a}yk z2m)p$ejJT&Tcv8|;~29kCXLr{0-Hi>DVuK|X11;=P@oMeIe`quROGai7r4xh!E5j& z4Ms6U6?sNX5>|DQHotQrIB<9%tz(eUxZAn>^>9?abdtnBc%V!vb#@s^zacDb>-d4OEP{8gItpiv@MC?qdu>A2%Lw=~jG6iR*M;RkRdL zQj|En7yDvH?brvpF++?Z<=AUm7qJFs=RME(7xr($Yi*O2<7x-ajRt9>JIFDpE*y9s zk{CtlkdlN)J#WbJ4Zs#7RUJL;HCpcn=UFB44|v0>`tXG?7z#@?yw7x0XDU#MZ`SlS zMXhL|TY!#=*eFUNwbBMuI}*Gd`t<%NeN+>(-GKV7bwfQ%P};UckrI6=c}h%Nu-s(@ zg&6|Od-ApP5YJd!p5CL56uC220%b?-rm54z4pY~R1OKOFnvMheP=zun!sTsdPpdp} zOVyutDx~u{`?u#b?JL$8HdeNir#9{L+L{QFQuLI|Ls%iz(^I`aOx^7aD^sEzq_XtGBpJ=2k~oKHc9}?-6{}6N!gGw7Kxo%-HX3zN{wHla($?21#3I_?0TO<@@qPBD$ zKIuk&)#)+MZ46TQUOHuAUjx$S^&t{R7$Hp4{lc(PhC;MKvKxrXo zSr+a1Thv^(#1n6q70lrUO~$@jlZ+f4t2Uu&yu{EARIapdnL}I?a|?zgT}*%LF85iV zJ=YKGQP|l?a!v9IRz>)*4S}_}p2vt2Tf&C~GG0e3uf`s}{r9t2wqfbe7_YMPaqejw zPX*btVoWuoQ_uBT`E)m9%&%RNJ`-=5jb$wH&tkVySWRwxKH)6i(gRjlUy8Rs(g{jZ zjoOi!ec!9>gkRpFyCtR-*L$0fBZuqw6V1^jw@_~e4dzj{{%N7*>o*^9Gk9{A`?|+j z((_@z$k^qn$X!Ck^MSo8BO0!$cYG1)x!jIX<);}bOxV|HAX&PNk(WD!+;yP(#-3^w zCa6LC+@iFLk`)e?jtL_XO<8?ii(Vtwl_EG+*p}3hQkTgIIljg@y ztFKB59jq!8pS}{)H-Pa5R)hpvCOwIlnlRq_TLp}8C?wKz*~GT#npL8@=BG6t7mCK2 z6em3IU-Hyz9kf1Mgf|9vOJ_=yIG(Ac_h{z!2>j%NAM1TDe8E*e^L+)8D^4>^YGLDZ zlFknc%=Dyh#z@`^EYcz^mfXh)5@)YlOP3?+{{WK+qU51fiq}MY_HH8l%f-RvhmA9i z8580M;vJuAFoD6A`{yniK>KMvS}wzB1E_pBj27ii$8f6thKB1@&Ap012Uw!I0Vq0s zKOZfgHySR}dahIrI9I#O>_3vDm&LPl3|b|vEK>l%p*b@+D)>Ry<=~)^eAM_Pg$6W> zQN3#EDJ2%FOxw*=g6F_Dsej~5K^@8@f-@2zAWFWuw3!bcCN25N5s3*sc5L~ipt4Tg z0RI_Ct@95V)B&5)|`!A}kL2a%YSOTH-ztmLx=2R3~UFEn1_4G2*bS6Xbz z1K{HO`}sP2pjIHPCDumKFBkEd$K`=CDE>^oYF@9q02FevkgCLjv(}_wVTm_=wSlCh zKXS2Pc-7I~VFAF9v1U>^rPf?w^gth5=t;ERV=$T z0_mh~XJ^nbZ(#v40gfGt>Eb1r6n+zNymr7uaplVi)aR5p2@l|Db|5ly`(@GI#sFcCR!kpwp4f!th>x~&a2m8qr$upI{^<-bV&$p0z%GYa=2&WF5F z;*b2#VpxQ=&qG6gHF_s&&1QIi z=Zys|kcjejvUqDYe|6z2Z(}T%t_ITL|B9eLlXi>#ofoU;qToYUW&&QRxju{Vhnf7m z-%DLhc7h+F@UQR@z7C`Ae$lnkK=k(40}8kGxxo*=&IJ87UItHA;OdpMuKNBr5E7T0 z=)&XNdXTBL4}*1ZwhUh`D8vJCWvZ7P&LXTv-Hozydf&{v)&r z^qL=c?tgjBk2w;5WwO5uF2Cl;-MI5Ni~Ixhym14zGR~ZWar$uOPhY@JNCNzu&eT^XY;X&=vwuTZ^ zk(e;;XuiO*&y7YuzS-h$Ept~%>fd+*xEffm((=oD{*5Qs)nxcz z7WscAp8SVt2{5w#x5g33&fD zYec7>N{Y?VYU6GeOX?KNWF%y+()ftdM{etNrZR}K_jKY2Yzd~-4{(ILG&y}m?_J*0 zpdS;M=`ar_cdXOno?exYA!YDL@b~|)#r9ZcKOGj$oSl72bVhnqr=5!9@l0EK;AEZt zoOg?a6Nnt`#JIJANT^jl$-CCCS<8^AwGiYDNb)>H@|-L3I7wM=HX~l@9j84OKz&Yr z+_1-76v7=;Ml0b14HYy_I0NrX%l5)UsFzN<&PVYASz6LJ>oP{wLJcw31EpBtVUK#n z1EFznaFua9rOlm8x}ETj-SE7jfY+yy=3$J+nb}%PAy43!iffv%cb-nXAwgjdi#{!7 z`{k zUNgbPXQSe8DP#eOin<)#)!yKCxxR1DyR63gMz=|;8Jy2XXJ}RuQu2;`U|#RVl|kmj zfY5P`tqCv1ybW7eR5nYKZ&zdsRJMGxd$0Gx&=fQ1Y&Y+VXnh7VhiTn-5efUCX~)CG zsF(6D;#*J_rHD~-B@ODl=0mgkM&35UDk>M|MNzv&C$e0U3b|Cfyr+I*913oy5=IAN zG(EuQ`mAE{1XP>ctpQ=TxIQa?0!qZ#ZGc6RktXFG|FoId)3LpkBPkFqlfL_N9ziOl zaah=2UeRJX(6k$1VTgQUx`=amYl`}T9DIw?#Dho7O@KqqQ~St2>;r37-p*G0!hWn6 zo7M0lNNGk+b&Tb=o@8sB0v74x?qOb>7wJ@m-zw8~Qw=J+nv5Olsbg^C^(5PE9~PD^ znkm5<;RD^;4Dv@1PC-#v?v?K0Vu9xD@=LItHKSG~{fM>c{S@rsjz3B9;w?q8Q_6XWrF&=)hCv% z^69(GQJ^8(kw}YEuxw|I%29j~P+;=M_m5ajlj4=Q4G03!Qwml_h8}2b=CXApr=~(4 z*0KzGMb2QjMrYSVBo?L^utaFZcWlTfx=|gZsX+EWeo-$igFA}!z>B+@l5&OhvG5~# zSJD&wLq(YMTHc`sQ`9X`pQe`)RA?~j)Qe5R$tq#I4r%L7QbeFMPeKqt(!+aJU1RM@ zHi)^wU5f?B!B1kM2UMJVvMaeMgah?*f~BAaS)Gk@`_tSQ`Jp61rn-8=i1i~*%0b1R zw-ZPWE|&vpx7D;YVRfKQmHQ(;4{#Kt#RlvXkuxmpzcZ9X985!%?Xn;{*E5=juF4yo zfb4@hL6%c|-tT-Mnbp{>+4JqWLbKVSffeaw>HC#LWZ6c%L`c{UPT?NxF#S5%{)Wt1?;xcYP>~nwl}woyI(wlUdOkmBF+?UM_5YNz}4WGV+^Tk@ZC+-7a2@( zxu^n%n^~2D$v@R{e>0)wimgIUXsgC{_$~FEYp5Y|pNXVf;h;Do3k;+~;UrhgpO57= zf>}Nvqdaj}ky1SefVb|2K54S^8H7a^We_sO(_CkIkpZsQZr`J%;!f2s>#t&WEml+4 zy0yy)I=u6i-!Y&a3^A@J+LFe0Ns&e)d>9}j9YlPZJ$w-O*XYydO-(MR~hH}uP& zvKz{3I_{Iz7Jk`gVOT(nTf^>B@|z6bJq>*+1^qec9@`VMBri2Mc$#@*>+$rjE4r8; z47IvG6uId3a}zZRGj=}dIr>(~O5$r=N$7i7VJXMA)3P?u6$nx;ZZL<9gidttMS;#> z=z~#DWQxl2;2x-Fpba#P7oQ~qtOj6N9f%d$miSrgLr@X==p1s|@QD}7R#MP3N_ZtH*+?eXKy_>`O|t_dff)y93={IH zBr;4p6K~RV6)hNKzC?8u0^9@#gBF;3X&00Mk@(W1fUJ9XHkS(0ei0R>4DlmJI_h~2 zm}ztkbw`SD^`*gU=Nk#RGz}h$RrEnB^#G!~0C)dNPek@>Sp7WG2aG@( zlE1;+?x-#AhC~~4`&~G$s#(CwaOD}eg5a}^#aqu|9Y?Y$1eAu zfLq(|`$qGf1w1U2f85^%u-q=$U1M9Fh#EmGiQlnpnL0L%Edq-yrg-IB;AjCio4?;8 z`c*Rfsmz^l`^SNYJCA()I&JZv)a!u4|458~3cnLs|Fp!Nb-MrQM?HY9_t%Slw{gJF zLoR<*<9?nqxzqoDjQM|6=Kt64e||^mpJu-~-SXez_rRcY_0_MxzKP~M27|Gt8Im5Tru zm_MrQKNpxg_wIg-{oS?oALjqRxX%CU@Bc-Z{r~d_CkWW>bsZ%67#0hJs+xvfr~p{t z2qB8_uQ{Iq^*VN+ZATL9sj|GlF}`pK3H`ckRoMBSdfy78Yu=J@6Z-WO5h`3Z2?S!} zXLkuSv?i_$;;52YSBrMZ)VKOOQ*xoEGgvzQ3|meMH4kg^vb|wTCXg0bScfkvr;=fH z=A6891?8Bw_fo>`OH>?k{X$0k4+-W%)xk@Jb<}-mh>m3^IgM4g6&Bx)+Y?G49n}O0 zma*q#yy&C^IhI6>eN~Jt2g}`|7i`9eZAa$Y1~9~?fNy-45b0?q+?%aCIAlc=sr2nd zcLODAIsa(i`K)!qN%Yn{rH|J4QWU*FIJ@Y^Nul~N@F6WJ@b8gwry!M_4lC_m7P z{5+)qQ zNRMN{a#@$-yL9uzE!iGI=(e>D8^=^mKFkqCA*GJw(zMza9MWj}>x)uBK1gDWbA*wk9;7GC$7GB~K%RW& zP|{IEPKJvYW;i}pnCR`y=2lk4lYY!=_z`ZdYVTwt$@h^=Aw6}PlkD35PLub%C3#`c zRLWMEYnC1M3`D{>FW+!Qea6!Z&t@B(58WUa%c9!%UsxVJdil=zG^mJ#NgAYdA-n^j zn_n*4Rr);Evit+Amr#9o#e2yQ@$4hujjVR%188Ftq4K6m9!eT|GCP@EbN2>JAGW^5 zdMlq{peF-BdNk6#`sOC`mX$EAqL0J}w-~iAzPY;hE zR+~&v=T~i533y%Najcyml18eOCCJ>1bChvzt`4Rz_3tl3k1!rw zIyoE+$Qbw&#ztzzq>+lQSak`gO=XKIr{xts`GJmK|%i&W^edbE@*^-*HiA0Cl9i}Ma+{Y zMTH`~h2(z!N-Lq`t=oC~HW5ebr-FNou*R85hiu~$Nb%{V?;y?qjK_3S2w~lstwslx zM01ez&twe*#29Ic-x!o0W*k@!dimqdw15`9Su)8{;9F`4?VMby*h5^T5aJY#JuaPf zgjkWqlY%Q2}Sva_gDH@=IuP^+`iNDAWQm)b! zVk1n3)qD+uRDM=^Pb@k;G*rm1r$Rj0Ya%m{$>u?z>zD?+JiqTFc_|@{PucdHGokMN zQSbqa^Qmto*?odr-1~ExDs*(3N8J-CIIU$}Ctw0KqwAVB_TL{?FXXPGAzDNBrJ6o< z-jaf1)>qO~*wD#a7;3j8&JZ(E5jkVSi#kd%53bL*%h9#b#Ow)Cc(j!$jLlVID4xdD zoPzQCxnQXd%$6>f2*9tuI97s1+V)l1qpY!`mjdaUJ27PMh+^q+`-U+tvIxCD`6tGy zRpKH05K5M$m6VEGl;Z=s2LxQUTkBMx>(sc(dV`nZx zKI8Ao*CHLy+R!;)Qo*HYV29gWgYc;7Tb;OQ-#-BzO zj|7Ot-pEQ4c*y{r&F_cM`G~+hmprzZXR;)id~5Ujbpd#%<}99v|> zyfys!o0;p_9CJ<5WSXiz@d<{FrzIdXBE}7`6xk|ViW#X^pc^OkwvknCOu~b%SG=RV z6lGA1-}@Yvh2zoSP!GH&ZEKlJDX#uXAfbuhXe+PH;$xcNv0H+QWBzQm7-om{S82#4 zr}QI?Qqs7Guy7kN8>;sv-uK#pe>{>!?7^YV*R-hIefq4dNZtn}%q*XaO=!9RRRoi5 z!20yTAr`W`5nsh@$AQIGOt0{!r{$&O!>_NVz$|I4Jz^mV?-w{DXxhS4?X%cKrCJ5Z zcsMWdnttiHtU&D7j^{X>>^y&|I36&VZpJ&MOq3`!jQ(ye-2Hf}TzOslNzmgQ?V=(k z?sj-+RDnVDFq!p$eugi+iYlNbeox-N%`Tk&$kzXYX=3NZUSXaTyjS3IW4u*jZxv2TE0gxNsaxj zq?@b;*2r8jEYb-xd)>>e<%1~F<0)A?s<7s1viG6Usr7jybCF!;uMKC7ilaMyGtJ$G zx5dX-Cp2nbN@$NpRXJ^rF)VlV{VF#Ak|{C)i{`n5wE5;9_==b;*@4;u_~`5*YUpab+nQZ5 zd*KB{mLYe>=G!n{+lPFOX^TCBR1<2b#M*e!7%T2rmPlKv=Lv5{$ysE-f_n!esanyQ zQ=#|~#r0MpJ+F7y_+SCk5fj0o$}&ZQ9F_f$1#UW&JWx%@QkdKnItc!B^omF3Y;?PW#+&=Z~QQqIxXtc<6>(u1^+ zJsf$~mda@cwa=Q98MImR%v7f3Qj`LxxVGGWO1i^Fx8=);n?v(*>{h#c8y1T7ULymv z8b%ta-kUzuxu*gd3um9c_~o+E5k|Ve_2Y`4t)QVctdSZ8mu7V7hU-Zy2Yt%MR1B|U zAr++dKaXlPiJE>XK|}NqdMIJUFPQ1&fy$7U}I7bsYXhtN#n z0n-0!?=8Tx>b`wZx5xV`rMm>_Zjep^Dd|SK8w8~5zOU$~-?#t! z?C0!r_SyH`>v|ZhnZGsXm^I$D#+Y+nP>H^VX|}W}5QohiV@%zhNUBry64Xh+Ew_-Y zv_>qWN*m=ha~)G9WEu_lgp2Zyrx~4l0nU5IBZu25C|K2^5NCH@d<(ZKuUA2r_i9h8 z^{l5aGkk7glFtsKYu0gEsc5)LpUMoKt4>7Mp4Q`G!n9NbPcMJmAOPWoyyPBaNN9=| zA=GKpz{Xv5mPG3aFOZC8iZ_9R`KBIv$?b}#P|S+_3>WqFIO{IAD93x9bMMh=BP{}a z3wr#JA|hg5g0hWGT1g0-{deSjeiVzDqg-&=^EgVnr@ZS+YEIWdJo*lr$o<3VO*&7{dBDl4PKWDxSbjx|OZbWo{}`pW1$y9&tIHRcbaEGNH)PfgM^_aREIEQtZCcZ4$n$1Xj%?88Rj&@h+XIY;6 zl^FC@G5yuh2WhuCqo5xdL=_0#Q42zOh`7=uVoF5MjcJ+u!}|wIOZ@^9-$d|6Q@B#X z#%#sA=Gf_o=Ti&S8xy3VO?rOBC?t(}`>8Z}Q>p zqRTr=AO%C$-+)NjvlS}4?=3!oviBoQBh5$MRZG3M3i37~IZgS}&uFK%c;+XOgOtRv zDd850!l7g;d+C=S2NRcqn?Vl3_zfOI#Zk&042;b{VMD*i!qUb9q7Vt397HNr^E6zL7vz^Y~Y8t3OK`ABe_o40-pbcNU z@%NJWM+^J1+`sbO55Q0jeI~MVa@{E!nETXB#J9vMOq5=*nSgC-?SdW$e@$9M>ue8tT>4cKPA6t=pK$tv@lojc<(#2hKG3%EVo+RZ!*UyX@$cOrRUocNI z*M9iuTa~>vdQyfP$PVhddsB|7yReBn41C`Dwo=<*DnZe+skYb^u z+e&jj&{%Q+Qek(XpTl23qjyjbrPF;OT%L+HZS**%b0H85&RAy2*!EO9sVdQzm}kPS zE(R7nXHxTo*VB>v3pdFtAqf7WVnH-M?hnj|)|<-%34rvy>a!H8Zovy~JimYXn^mnjlDG}=WAL6Dz)44i5XAs2P3^6q_6XY@I zu7|5BXt6{*GU>vX$o0QDTjuV~^^vL%IL9ci#Bq}?VW%$lHKLcEmRIE0e;4Ge_9i>P z-`E*C>g)T&TP9^iLj~crmk`Qs<;liXM(n7n_ku=7^Jt}ni;8X*@nUa2+z1Z}Te#}T z!o;TG^Rpe<)-FBB&>q6uJ^6U%9<@D^6ig#g-B(xCKqM08_KVOsJsR|o3ew!yIsF*5}D|`}OI}V)?B^?LKWfkVI zcO53~jmL7zUFo;E+Ja|Kampi6c#MgLO=ZpF3`9%=KCa!**P)mU>^na zOTIG}`K;F83k6wTy@j7c9pUyRz>=`8Z6spwl_TE$E#QY-}jq*Nk?NZ0e_uE{-FFX-B+T z7%gK<3Bs&WH&MD<bSKgwKUb4crhhza|fYrNT2tW=1}PYC1pT(yJ*?&GzUya_`?Wd z`3L(+hKF3yuIH49E%n+4s=V1s)h(l+42cW2P=nghaQqXC>-+~m*Go5u??QXDjlFT# z0-${BzutS74~%`D*dM5GjK_Re5$j6(ezVkjJp`4yfMU>capE;xkVI4<;w_#tVc+m% zeNxq{VeFlbQVB0%Fpja6`8;oeCKVI^NLOrwa0?6waVE1a-bT5j=&hF+@V$ZW z^7#SB;-ik;dx4q-7|q`XmIDDKRn>R zci|@p>;{=jDYOrKZOPIj6fUS&0txb^{#2}WLHHoetMK{rYXJr_G*n7M-#0d2$?UJF zEpE~L(}g1_*T>$SEOOtVj)R`M@66B6A8wry4_AK?J}y~maY-K9l8cqcAHR`qlnYYu8Er6wZ3ZMcx0TnGE81ZRk$#6>zv(oZu4{QX$o10;}S6}BNQCz zOiz`R1XXOgq(rVb13^%BiSr3|3x+5=%gN26rs1BA+e6xehTrlthjGpZMcFcpV}B`> z+>6O}V8~kw$^1r&s35GP7OHENk(&4*(p!QwiDR9=;ZYGb097M8L?sF?Hq(EptPsak zuMgdOG~r1-zV{j3^<(yfq1<-!LZ3lp(o?BXUp685#*Y|Dpq0A#p83>(0W`0W-os901>Qn8#V2@_W{;**vaaHSV^BZNA+m1VBscd9tjkE@(( zhuCuU;7x__kZoN7hvig1DIX=Fq_$Bon2&aSCloBu!VmqL89U4w5)*snTFab)<(2t6 z%yPh<(kR@pQkOXt%HwixAVWiHmV;#@(pgO#4i>P4Rpz!Qp+eElks1;j;4rOEsY4)k zC~+=|W2PR!`o{@C6~_%t+Jg)-N;cN=$7t_AOt^fVE`}Iu8kXqvE=6A!`W%wDz@mG}II}AROyWVA^<<-tU!x#d9t1#Y z$)NRh7&3m3KfeKA+e|Eqz-J(xJHzo*D z4zh(#$S3jai!iE<&DY$$7mo!`EUj8FoF}hBWoK_5sbu5(Y!f0O{DB?|y)ds=1e|D9 z)apk4Sq`PG)?z?>BB0u2gZ$5SPugFsvx#&NKpLYElv#ZCSHOx%p0P^Cr*_qm98)0O9r+PZP;tS zr8G&;`jYk`c3QpbNz=S93+`16_T`D}!EqMChZb)nG>t-O?rF)Qx`>p0PLmPsA?wx( zM7^3AcgttGJk;~=^^?eLO)Gh0~eNl@umYsluQ zhL;I$aI!LgeyJMx(1N)fpPR8ab(A%!U;37hpKzIaE`0TkDLfWBOiHHi4QgL(7sX5Z zC>4=}ch|{A^ZE+oM>LEYNgf1+ur!?M1ZY3>w$9 z!Ye@PST*={Uv#}kQB3_dcI%C8qir0gT8Q}JnJw96cR&4ji`u>hA-j?``E8@|gQ1(b zn|jmmJFB;KDFLkVGnlj{VAfBjB@gqi?Wn=)E}nWr{>qwNqDR zzaV1mpvLWkkIw|?YPH~SaiNe7&RYju9Vjc0Its%2tIRs~hjUvM_xUjUvnfY=At_au zw^e~$QDcIx<>63+fSB>5@<6Q_l#|D+PN!oEuDhz5Q5Rn@w{1zUGE^9g-Uf}b+iwmw zr-phPb5rsx;)z-_ifyB(|zOl^Aih{^3WSZ zb$ruqA!X9T0U=UF?k~X=){VjzlV&uFCPbSGD@FYma9tk!F|AjhUvq|z=`ZPH0$!3k z_%@81Hav8BP@p+J>+*_4JuAp|)DnN`*rIn_SG}(dqYgs`8Ks{|9gh#Px~sADcBVjT zA|0`@u6@4Z6dn~{oRLceMElh4cwpWn%d5-gy=GD1Rf)MZewNRwDT1h9f+gh0hrt4* zH+kyAM53bpF?LWM8L>#Cslvj*?i zu91dJ&T^L44Rd16UeGOA*(;B+j@D8gxs|`I;R6|Rdd(h?A)>{X^37IpbdK?5|`n3-cEBR&}71!rOR2W<< za1SSwpN#9T!>pNBYu;yAvCz0NRyTrAqSpu`KV9bvsnpUc!^*vV5r6iiovWrCUGQ^P z3WTWNGZFzq^`a)EFI(c3et9p9&CiqRS;=3P;I_gAkRY$0t3T12kw038N^gz>A&8TR z^sNzL+2n6V18Z@9`dWajh2v&5T+set=YIWCGKM~AqITTuMwx-l%^C$;Ip(WFl;_6v z@|)`WU3v_Ck{nK{H>lTTvT}MMt)R~+MXJ6?^=w7JT_!Qg2-H;d#|y}{h7CyNG{zs-lM4|G_+T`31W!U_zTqB_Lg3@ zlXyO>J=4S>abX#idK7h1G^3;jcyX93QCNW89sE-i?m6}xh~?fRa=A4);h<@u91t-eU%!$9$IFX_NEzpIom9QEAoex zQMk!Ue#lr1sXgTaZo5SzY*H1q8Mu>9vi}|%<|tL^&C@swEtd~9S*0ngnrOjhl!EA$Ym+FL z?*mQwPRpuY7KD{b`_yS=zicEP`J^Y;CJLqv3re>ku(Ml>1a#^d8Qz(I zc~!T9yD%mkm4a&)Ucbnu>|r-|6i?RLgFg)(Si}&OBJV{+Weh_GUFPa@LS4SQbK2@e zx3c9ZG2%^=OLA8DGoh5{s$}L)J6Mm)*K(G`^A$}f_WGjUhY4w{5Wv3oYSKtvSf83`&l5cdx3dH z1Vc6_@>4nlwLiI;ixqB?mPUUm0Ym)w_PjND*s;**H zoMxICld!qioFQ##X?0pJ+or$t_NSA`auR8o;6t) z&POcP?7*zRme(ba4n2fy=g=*&oE*Z)Q@wVt%S|I<_Ym`CuFW0>Y^5~W{bjZU)Dr>1BS4aha{(Ub|RvW zxxvEVtfN3SSTa_qF8>5!-A^Y!tG^2DIa-g+!{Dg@HE*U_sxKJUjN?wQy?MWpW1(;A zba`ybvRsYTEoZN{M9y@`TSy;_+W;L56QrV!O-nfqbMt6QQ0f=mLFp9P*4sAEX);hg zf%_*WITP+g4&i)-dM17Z?u7akT@@~>*&$f%or%_$8JIpH_uyw^A3dQfB;4UO%2|1N z=B1Y`KCta03b68<_TFoyM8vDm6Xxd+A2X#{pKG1$<^bL``6ui$+h~GVs7zC>olE2= z>HC%mI%t=Il5|f%J7s(#NG>LY!`%>A8Inf`N75Ek!p^42jY$o>Wr2JxWdQ2Z{lNNi zk9U#+8Yh^jt0$1FnY{|4&1^_CmvCA=bER6h$Lp47x8sP$aTPyce*x3``<7i?8GvuJ z9suRaA4h0IQd#kW(7`od-U;v!yly2`iHcOc;^Gdu7|>Ru*M*qu+)t%!uAn0w$yh z5u>pgHN{OD-;;PR8O{|#C>E$o0yv=W`(9q4d31(G&4xYXGD8b!ZTrltI#0{QNvJQ8 zfL8s0?YX{daDrv95RDYF6+WV<~VC$>UskN{#w=dpeuKOnEZ1N>0b{tfbnwexR~|J1JftCWA~Q~e$C zQJ3mFB#`K59Py4*3t0<6ouxX=MC& z$VbcK-%I&uOZ+?JKlZBntCW9fGW`wm#|{L)mGXz?(qE7OTmHXmCI8r4!?iXacRlo@ zT(Rk8U0x88u`qy1fKKq!7Wl_gesqFIwdCIl{=?+=`%`}OtAD5={wnxS#lycB{ISRW zUhqF%HOlb+*IgAbIsLt>{x&)N>6eeQ@_*wLO7w50(A0;RV8~QHg~RC?Gi<1V?=#3> zJ3ahuxc{&#`E9uW&?WzCp87XOm;O@ioEO3ZZnlK_Yww1?WB*6fDyZc@)kYy1O)EH8 zf}lZ6^TN??#`y$PPoZ$3C|ns7cm=)mU%|2;RLG8ZIu{OS*tF<2y4pVDxqAVHt@#=X z^MnDm%7hvJVq)du+fl7!*M~sFwu;k$*a!?5=qqg{8cR*{6>h^oB4zD7bZ4@7Z-pX? zlqKEgrC!~8b6P))&SLk5HjNKsoeFG5Bnzd;UEWcarKj*d3oC_hD?wmNGbQyYD_AQEYZdubtA}epcRSRg`WtdK#<5q zApO>7x~#ZKM7tSZ&cUT;GIdRxAEUE2wdpYqd_TXK^v9kT(3lft>1E7f-~~3A=t{j* zx_%Myd z60Y9a?aI`zos|fX;x7rWin(jYV16~fJwR`!EN6sp1rQ04w;ngwAuzq$)&#DoPyda2 zzL2mlp_fR7+jo^k^k3#T!m>42{dG|Gs=(zG0Q9&%B1E9|26zKP5Pphs;2@yJ0RO)$ z$UXU8kgL3;U4G2`D8~KB3;=r+;(mesYZ30rFH<~WSpXQYk_bO7?fwG$A-??u_U{O9 z0I;9R@V|pSif+Ha9MCR}O?Wpbs^d%3tR@K@E9s*^bUeUaG z1}Q~NrPQoo31YRFv&-A7I4#r$`HVd{-nrt24D7_b_lt`7)xO1cP%hr3c(n;jeW!2J z9lxHMF$YSnWUfY($=zJ?U7C22z2K71PUQ_1RedSv9sGWn{=J7phg*&|Rh#R^sOO5Y zT`pN*v75?zOn zuWlxGdhc}_QK@1Ebak}@SE3K41;@cnSg_p!VEbaWyF@^1r##_%7Cushe2(f=Zgt;M zkIN;cXS6LA6dYgT_2k=}Nz7Pu+~5y>HY2$&g*%V{NeDQsUd7 zeL2ZTy}MV<7cZ!0d0Qlks|)hnm)@t0!xg4}(X)+P6eKm-&F-~s@1Eyo?VnzJNfd@M zLbi-T8|%B!!nkWPY^Cd4NmLeTC^@tv*&C^~0ZSZ2h%IH0+krm3#}rFCzGw`4c$;BX z>`!`=$t}mk6_15<`woeW(_?Mkvmvtz31LgfAeg#fWNf{}zEC?dp|hOJQW4VuK7_FZ z!=X0l))@IGP!e26x$d9H0n_~RU-CdHS1+g;w?IW}_YI?B@g!ZR8uIjw zuGeWaae=Vs!>AyNt@6+WF|7z&jb9VHihZ6v8Uoh=;>ST$>*bq{+?mr2h02)Q%gw+e zZl{n!0JPYTZ7e~N5*5J|f@~<2MIA}S!*2&?mndH{rr1*BHD&lB9FfrWcNExPZ5}hi zkH1|vn`X9~ni>|VdbwXM`Z{CJ2LJ>(U+_^Lx!W9M$v0hq9c2V=$M|pew`>=%1kvd3 z*pM)P`}s1@iB5G2rL(-_Jt-@dMY|sYBXR+`AOmsQ=xI2=F!O z9}yXYj`RTp@Xq>=hz$Ih{d3g+yKm6{GeR-?KTPhxK|nG91Aw6aKYuRi#B{x<5X;Gt zeiW(fr@6&~PcwZV10C!(HJqqi;p!VAR2)JyLp~sLN;8S|EtKiC# zunNfwlEicZS4tamKkEX1uGRb!OA!mrvUaTU_OTN%3wfLr1U} zAH{NY%k8KA8E(|oDqM*>`6kYBHfS^NSf{@2Ng3XN<_o1An8@Ji_1}%{tSBr(yY4zY zKzIn535a<`#v)MRRyvFbVs~#yl2&)ie0%EOht@6dOovc1r!x~Dy5O?xp0YWw z3&GLDfmXw((gk}1C*wkk`eUwK4Wv-|6RCsOM>32JgN!a-?o-5Eu!4tqr1qPxABG!v z_q6*Uc6Ihfv~9oiqhtii#RtrFeQ8_Xm}Q_AaeoodxNKcEF@mu5YM`*1m%L=u>&~mN z=DH^j;iJh5KI_g0gC%|LDbZ*XlKy}~`9UXX)vbEY(acLj(&U99Ei-NNfr!9XIsNA{ zT3y$7#5AN|CMfY4yd91sVC9{o+7`lEztoSx^^oZiZhbq4{4jD-Y_d~~#jKX{st;KX zd{KhCss}7i%ae-5za^_t?Lp}I{lURX8mG>U-72dio$0}(Fn)a}1G}_-VMj6j>ahs) zyEgA+9h&+QIp^>YCXR@-1yJoTpo{p;=Nxd zvdykkIdn_>NAU+kV|V-T7c`+B{z2p$ysup!=>4h$422e+FO8#p>r2%k@C#C5+qp_kP)=@^>3?{4X?&}Y9pQw=!58^cSk$s^h&9n3T zXn1;nuu4vnTNa|q_i@;xImK<*ee?DO5+c{y9G+kWue(yAq?V9idq?4-@mrP~m;TL| z=DH7tc3yo{I$OTTn7oe#z!XiJ{m-5y=7=!jVzIc%C4Ojp3v5=r-fiDSv_$onlJRA}lpXQ9S40^M(>h z_%!m;>X4|YV5nqJWE%y8SzfwbquySIQ??=`a?H1iKz4vCiLYL5`3%jVvcuH(Gicdb2z5dSA z8x`yH9A!Fw$+n-QiD|f!O_!44PXKd<)8xI&3&VJ>`n;X|jUY;5kU-150e%R>37NL1 z61q5NIk5s$7K&qDGuQOa@(}QP-7+2;&A9!`#ywQ9c?ekjSPq-r>4c<2?d)ALxRg4~ z6{mtPU%FStaH3n6_!w8<+G{q~G48{1z&{J@b$g=>1G3N;>NV3Ksm&$>DXJLLIUbbx zq_TWC*oQpGX+!goQD^|@fB6ly@A2g6r=zCiWe`+=#2YkBw~ zVUZ8jR`UaK%Kq^QNqlBHMSE8Ea21mH3_&aLmiYTM7z=|^^0})sr5~!upip1MF&Wzt zRPlCsIoF4G@+k*gH+_Un+QCnlwY$m1zuLGtXuYvFO;zT~wr$>Z-+3QAIt5gs)jxdtlZ6ze?lO{O8~wkSscgKNAf-6PGHSKN92fLhavJgcvO(xQ&%6#fD{ zxRQukFEYvCgxf4KFGZaJ4=O|5qoZv-i?G%Nx%YJy|Kx^iCVc0}X-~qbz3wa>+sj53 zze8W=`b<9v3$ke>$_&J|$uhOTW&ZKeAT z>XE~J6>vbbKf#IFDLv9zmroB5+S0LPKGWR5Gx0sPGq_px;w{`GClo#?67R-(=7X0g zPA#9ZLAEv82q!P*C@XVg%Wy08K+!V`j>pj)iD#gSs_9WD%sEs22?DG)(Q&RECih6V zl^6rN9r+$0Gc7~cTq*AmC?5@8ozIM%A=Go>?vv(mf`uX7+&cNqtaHLWC_Q3I zwCs(OO4?$Ms(JJ1osI3&d3<&+-f7HdpoRiZUEjWqQ1L+1&QnQ|g^WLViC0=ZbGYFo z0sFdQRiG+llFy?sFf$-cPTc3+Or#JR;6037Q$@tfv<(55CHRFS*dW%ruDYSxr8~B_ z-)LWIZC!HFl-j^$6efFk^N0r0=lIIv=4=Z@@Qg}Zukq|Wq3#<@K+1k2W-62>9Cm!7?m%V9>$%3pNU^4 z%3cXj;3iem(#Wu?am0kEKpd7h!3DgjQ}H0P z*qPNN5M^zGhz|{^Z$HeEIx^9h_c4hxwqX>VR)uYxs8eZKBZB6&fxO$6);)ShB`5S1 zssU;EV|p}(dT9PTY}8R->Sgd{73!1WRF*5FtA<0X=|0GG>VSTJ*!S6N&A6QA!%1=@ ziKu?ubuLpCm6yY0Sd!2EMZ6id3!-p|4dBwRRjepnVeZ(V(DT-}NujAH*(ynp-AtHw z0>NfVp$#BsQz^Z`5=8NbTS_Pp7DMM$uf-9ecXs;o!!D4Xe;UF-HodHgD9fiTCoW}{ z4~QWVEE61>MMV*g!t`l;4~4<$ur&{5VKX)Mgbdv$;60Qoz~HN0xTbeN0iJ*h#xu}C z#ax)i<{BA}XZ#Ma$%}ijW7L@dY?-bv8@hO$qWyA{$a|PZrML+r2C0e$4S%GNWO#XO zZZ$;sg`M2l{fz(%A?E#}Z~KXW=k(0#y-oJQfUWBKK}`P5M>`l}4pBiq@c0`Kh5^+1 zcRldF^{r@lMqs#8mq^nL#a<*?`&586yp^}9TZxr8iA;$Kf3v7sU)K_A4BV}|=hhOZPTSf+BJAo}S(J=VQSbYwnD_@#ww0phBf2ecoSaV#%vSmB!K6JoK5^jZ$br z4EaXBa9c2oboUh@{V=S={MMDIHZO*M++j1t83(OyV-I-a*OF0wp_c{f-;xi6E-VJ9 zcMtow8~xc$wvNA6!6z?=Euh~58b?R^TQ0ht|U@S#V~`++Q8Tz zbo3XF(!CK=S8?GLmW0$EClF;~h+jq=hH87eFswYGwl%ZmEJXB=c-s89Zq%G-ZTN zS}o}L@81Y426%418#1)e$d^lOoE4;_P?1Y8X{%VbaCe4$GgCX~J#|4m9zjxx3IkJC zxD@%-8RY{4(rdKOAFzoBvxRNS3g0p`Ym%kLYU~{B0L}GfU8C+B7)tHCsFz}U337@hW zdF1ysrpU94R_44;7JI)YoT%$gKWHL9vNOp$NJsdJp9bA=AY8~@k=zX*lFS*TscZYW zPx0sccjQJ5p=%LHcc>QL6e2IzVmpW9NKUJx4XY@T-5nDE=ns^nQv=d9m->-dtr5{a*3a_aZy;aXF%Ld#M7>>^X? zR-VWU%d6TDE{)Qf13rc4DT%Y%;3V-7+`zL?#f~^|7QE1yP`@k`K_ zf+X<+91-)SDwRg>9m#|0w6W@FW5de10?uJSg1koDq?^uks6jbnjQOfy z{#5K!%4>X!!5yE&vPq$b{6tE%#ZQPls3-v>o6wBckS9Y^%oDC9CC&H-sh!0l)T7=s zHcvLMM;bw3gy7@7NUy(WeRE8Gho1&Zeu9F_maHQeZeC#7f}1c)17CA(=-k~~-JXgO z)>5vur*OrVN;6VOw1_Zz8%17)@qz97CY&j9erasiv3lBtns-wIieN-_z0a36nwZvl z_BJr`snmNFHo~fyzNrTSnIp|3T_nvHGfnTp7@{xoD`+72HQ+xqw?>xjs zH~j1yky>8j6WQ@qpu3p0-dxC@g^G%IWl4RbPHn0+dO|*+H_z`nz;s1`CDDJK$j{|n z54r8$%~C5WZdyCs6k2p6%fMD{3dVCB&C3XFW_TMVjy7T{MlJb{;KCJs#{7sp>6*iBPF@WYYhc6Z2z1dv5=s%7B@S0EMUNM{93>9Md!DdS zCvh#&leM5)?a625V?#-~py#v6U|+jotIAeMM);l|1ihHcF1ELFDV{ZMVOdfPm9{L4 zV#gxHqeFYDMPEa<0h{CF_K_KCIS)Nk(7ZRQYWRrT+xWe48Y~V!hJlP8-FchwJ9sJZ zh2T_1tr6|n608~+w7DyOoZUCGpj>3Q2MUbIk<;b@${!{lHZRM^=%%oshQf&u8o~`J zXqTBwn4Vw3nfeY0By4=?eY}v* zbsccK)ELg66)O_9-gg3a17RzTfJ@4_A4!^-v~jjz^nxSdbtHhs4yx_Co^48a0e39T zPwMnFXpeH&w^En^H)8G$(=tUv^Iq1KVNtST0Nqxp9=yh<*$7$FF1z(BR1E~B=%9lk z$Ia8iVZVH&jyAsTJZgLlE`L^97SSM|*FFi>?mQW6Ox<-oZ&z+_(~lt$B(8}_kCj%o zF|RTgXJ_jgyE2|liaQs5yOHeJQ|Z*Qep;w)3=RtmZ83J~TM>NrbWcB=LRaCP zyq?J#___piY>5Fc3C*r}A^j3kO6+Z&Ev}7SZ%N|IDYg}4zl>w#*xTL4256G}L9Iel zVOM+x0|%t?Z!+H3k=gR^sGXemcjPv!qrQ&o2^X)nm5o(%dZPs zqYs>{Bb#kd-*iCHw+4%rCV(4Sc2@!Z%pub>+oR4T*w3`aAq$}m6~_r2ghj8}aRfKp zEtLZ@FBm4VTo}6@@)J4_;fiVPbKR%RicTkou|W8gw-IHzWU55J3_IOCp4MgC$ey!vh3$Ca6CKW& zZm1GwcU8G#HBs43uP&ZRt5ATZgTn>6hw}lIROO6>{kzJuz45M=u2W$p~BdJjl zMx8IoAh+}A2RKRSExP$a%gMh4oRi88hU1|bhMhoJ$QQ|lD!^yN7;(PU-wT!_=c2`Z zA*GwhCy5}kH8V7|TPLBK5sP6uWE1Pq@7SeJ6E0GK*BB=uOT1z-@!I5}G%l-lqN&zv zGzq-)y#Kw)nQS>`l(VqAXi63K$K4tgq?O5r2CkR!3aqMG(u}2ZFmE&cCTC5W2~W?| zDrI@4q4%9wBj2e;Eqtz|<(wp}mOrgqp3{T^*n+Dl5AvgtZvDdMU`F z>*-6tX^jx^g3B%;1%YuEXUn*eEP2o!d%fbWSX8L&6im}b4kDsUicPI|PORV_hiO7p zNR}b{{FyfR7PDUHn?N3i^LSjaaWO-8Ha(Z;h$zw?x5#7`7=6X&d{Uu;w?L7bay5J^_MU0G^oGD>p}25=>O!r0TNlP@QDNTU0m~zBkWyLvyExlUWd? z5JyKz!mK{K$i#uL73in3jv-!q%cz%M(@6S&!0d6gAn4CXS3x20S4%ECys(V4OJsa~ zhZv0Z@al`O7zWfsm<_s2L!0Hoh)Z<;I+V+0;CwF&<0kxA+~V<8)C;tCNy6dF^rdxp zLwZ-A3}RLBzILE1g4@EkA?tdV4#X8o0g`lI z=o}~YlJI<`_R|W>4r{#p7geP%)z17FamZ9{;R!-#^xmw|hwnbvWx1pJkt5}Zx*>0D zm+9zV*=0MCBX&e}obFLWl9Y88iVH#wb~Q6_Ah0B3@uJ6jv6BK!+!brmp7mFT-KN38s~I9qyn9q8_G(>P!=8?w zb0*m6uN;hW&0EbnKDJof`qW}yuFppP@7P|sT->m&wCEG zd|vXDWap{pt6Ut7xJ$o9v21HFsxiqhS&K(LhDRCt+sXN&7>%s(s6_JXM!1q=`N7Ehczbu3k(3ll$AQxmFUT*R2XJ4VOA6kxo>7(Pxg z51i1%!Jqsr5O zKTe-1Uy-WS3Mxc`1`jbJR&946TGEQdqS(yChGo%@4Rjg5-al8OM$Q+Vmte=-!yyl^ zrl1r6N0zEuF#b{JTWl-@eeb*?Ws1O=2@ii5*mmf>WHcJY*MlRw!YC}b;}6bAmzAYy zXR{ZGQN->Js4ju{Es4*i*=;atB-|2f-0|s$@kaV6E<$OmI;?Oak&KCMH!^oJx?sA5 zmTk|baP~BpK^9BBH=Zb}99~M%F13sefmMh*+9N9Qv}*2oGS1cWDD(T0B6DIDezuoG z9cS|?oasV~U1W6H4R+tIstA%l`Fvj@b6HJbd4mzwgZ)4Wjvh%O0t zn1UZG-NYOqKJQ>4ebF5Tk>My31?Tfmd6Ubh-TU-CnEtHm3mJ;AtsZkV5g5=my%ZKW zP~PZ6q@ayWZ&Ex^nPx>rgLf*}OxOKjhTfoG+FG6}rnB|!GMi(>g5NO#4VN>8^zfN--y;3a&Sw}eZ5Xi&A^l1qN(9%w5E%EULF^J z{!ocy1B#n1pR?KIArzdoTKLXNi!xyNJTxWE(V0U}+ooCNNEG{;_d!%ij9JrfhP+Nu z8Gky>$2&?Nr<&pJTlY#Z=?bAP-;*KE=IixNt$muJw>D67{QW!Xq%7i7Uqo9;!%l=G z%P4Kfg`Py{`E0TDC68`U(u9L}NgqXa!}X95Seom5+wybKJhMxRz%0~wjpw3uVn*Tf z@g+iw<1U{4*vHpztosXH680${zJ>=AKQk+|o~oNrgsn(hnOg9EE}?Yw5I{BzHl4*z zoO^swKQC1HaRj@di^tiu+NeU*b6jps5yf72Gz-_4Cu3!-#B*fOz18CbyioXfwGA_0 zt^pla))1C!)Wmwh;kUnM2%X6#%|pJPpY&TVTh1y#Sj*N_HiLCtO(I*ra0X*mZ;u`dvZmENbV2)|}@atGJ-&}c-2L{+vS_!;AMO*#3X=tB7KSWnh1;! zR;_Bb%ichyIN z7<}|zFnZCp?XHnxMCJ2sFZExm-CSh^0d+_S0V=~T{$3z3mhjO~AzO^k=Z&Z?y>#$7 zhINS3x(Js6NuK^NQ$+McPaUBen35{GGsopkTA}TI@)Zz``a`u7bi`Yxx+&=hI~<5h z_NdDK%SsVEA}q3gleZ5az3v+)xe$r(*YE22fJ15n#ZZ7M6rkij6YL&jTwnXR6pP<; zBU}PdFn?&agARw(i{oy=mWPgzBw;WEV*WB#RAp7j@bU(P{JT2tzZol9a{9F=oPy^+ z0{l2M00t5wnSc@W z*YyJp@Z5i1|9H;7dM?;2Kyk>vXCyN=a(E4J;Tc=LcK87WKmg(bitrtag2u|oOdk+O zWM%Zvy7K{e1`DFr^*tx^D0jQ#&Mj(U_DD-PUJw8Rro92&&H$}&1jr_2_*Xt~;Gy7vctGoXzeE1G|MUaBz<3}8@ZX>KM?U=F z&m$jz-QrLG_%lBK%ny&_;LrT{$cI1k<0Bsc|Nqa;j{v6sdn7H|Or00jK@ z0K6k`0pi?%5sw!z9F7yQU3qi<5~ab>JJVFNG|{&b^y@<2-p_D z_QCr>RDe4XfN%kX1t1`Q@PD}fJN&$WXA=Mj=LZ4H3M>!M|Fb;6c%U0dfN*3`z}*-? zz`D=?2$&woCtzJj0Yn5KY5)Q240HqY195?M2D<;1AGq1z|0w>S`GL>-Ej=W#Y=Qvd z0T2)i_+gh8K)^BrNghDJz6>OwANZjcSPo!$fdototOpT*fZ;#u1*{Wr`~ce$IEH{} zfqq~-FdfkSGoBdW1`;q2u)IXS5zzlL9(e!h|C#2e|7V(?{{L(|EBKK~#2 zp@sqF0geeEZ&-d18Q=~~2W$@@|ABP}#{cBsZ|(aZ_8(3_zWldR9|enC2%M9F z>l<*+`QJ6JfjH=|9Sm&%ci;sIASM7AI2t+V1MXT@#^3J%CkXNHT44Z9GJAbnTT5WX n&x_vdQT2?x(7^@>h;O9-Ju?s#L{{I)gq@3lg^7WgiRu3V;op2G literal 0 HcmV?d00001 diff --git a/docs/python_docs/python/tutorials/packages/autograd/index.md b/docs/python_docs/python/tutorials/packages/autograd/index.md index 586794a696ba..6623e15c1e2b 100644 --- a/docs/python_docs/python/tutorials/packages/autograd/index.md +++ b/docs/python_docs/python/tutorials/packages/autograd/index.md @@ -23,7 +23,7 @@ Gradients are fundamental to the process of training neural networks, and tell us how to change the parameters of the network to improve its performance. -![autograd-gradient](/_static/autograd/autograd_gradient.png) +![autograd-gradient](_static/autograd_gradient.png) ### Long Answer: @@ -39,7 +39,7 @@ Assuming we've calculated the gradient of each parameter with respect to the los We differentiate. [MXNet Gluon](/api/python/docs/tutorials/packages/gluon/index.html) uses Reverse Mode Automatic Differentiation (`autograd`) to backprogate gradients from the loss metric to the network parameters. -![forward-backward](/_static/autograd/autograd_forward_backward.png) +![forward-backward](_static/autograd_forward_backward.png) ### Long Answer: @@ -56,7 +56,7 @@ Stage 2. Work backwards through this record and evaluate the partial derivatives

@@ -173,7 +173,7 @@ net[0].weight.grad_req = 'null'

@@ -217,7 +217,7 @@ def f(x): We can plot the resultant function for $x$ between 0 and 1, and we should recognise certain functions in segments of $x$. Starting with a quadratic curve from 0 to 1/2, we have a cubic curve from 1/2 to 2/3, a quartic from 2/3 to 3/4 and finally a flatline. -![control-flow](/_static/autograd/autograd_control_flow.png) +![control-flow](_static/autograd_control_flow.png) Using `autograd`, let's now find the gradient of this arbritrary function. We don't have a vectorized function in this case, because of the control flow, so let's also create a function to calculate the gradient using `autograd`. @@ -234,7 +234,7 @@ grads = [get_grad(f, x).asscalar() for x in xs] print(grads) ``` -![flow-grad](/_static/autograd/autograd_control_flow_grad.png) +![flow-grad](_static/autograd_control_flow_grad.png) We can calculate the gradients by hand in this situation (since it's a toy example), and for the four segments discussed before we'd expect $2x$, $3x^2$, $4x^3$ and 0. As a spot check, for $x=0.6$ the hand calculated gradient would be $3x^2=1.08$, which equals `1.08` as computed by `autograd`. @@ -247,7 +247,7 @@ Most of the time `autograd` will be aware of the complete computational graph, a

diff --git a/docs/python_docs/python/tutorials/packages/gluon/image/index.rst b/docs/python_docs/python/tutorials/packages/gluon/image/index.rst index b6bf3dc7834d..27cb4c3182b2 100644 --- a/docs/python_docs/python/tutorials/packages/gluon/image/index.rst +++ b/docs/python_docs/python/tutorials/packages/gluon/image/index.rst @@ -39,16 +39,6 @@ Basic Image Tutorials How to use pretrained models to recognize what is in an image. -Advanced Image Tutorials ---------------------- - -.. container:: cards - - .. card:: - :title: Image similarity search with InfoGAN - :link: https://mxnet.apache.org/versions/master/tutorials/gluon/info_gan.html - - Implementing an InfoGAN based on the MXNet Gluon. GluonCV Toolkit Tutorials ------------------------- diff --git a/docs/python_docs/python/tutorials/packages/gluon/image/info_gan.md b/docs/python_docs/python/tutorials/packages/gluon/image/info_gan.md new file mode 100644 index 000000000000..646cf728e5ef --- /dev/null +++ b/docs/python_docs/python/tutorials/packages/gluon/image/info_gan.md @@ -0,0 +1,454 @@ + + + + + + + + + + + + + + + + + + +# Image similarity search with InfoGAN + +This notebook shows how to implement an InfoGAN based on Gluon. InfoGAN is an extension of GANs, where the generator input is split in 2 parts: random noise and a latent code (see [InfoGAN Paper](https://arxiv.org/pdf/1606.03657.pdf)). +The codes are made meaningful by maximizing the mutual information between code and generator output. InfoGAN learns a disentangled representation in a completely unsupervised manner. It can be used for many applications such as image similarity search. This notebook uses the DCGAN example from the [Straight Dope Book](https://gluon.mxnet.io/chapter14_generative-adversarial-networks/dcgan.html) and extends it to create an InfoGAN. + + +```python +from __future__ import print_function +from datetime import datetime +import logging +import multiprocessing +import os +import sys +import tarfile +import time + +import numpy as np +from matplotlib import pyplot as plt +from mxboard import SummaryWriter +import mxnet as mx +from mxnet import gluon +from mxnet import ndarray as nd +from mxnet.gluon import nn, utils +from mxnet import autograd + +``` + +The latent code vector can contain several variables, which can be categorical and/or continuous. We set `n_continuous` to 2 and `n_categories` to 10. + + +```python +batch_size = 64 +z_dim = 100 +n_continuous = 2 +n_categories = 10 +ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu() +``` + +Some functions to load and normalize images. + + +```python +lfw_url = 'http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz' +data_path = 'lfw_dataset' +if not os.path.exists(data_path): + os.makedirs(data_path) + data_file = utils.download(lfw_url) + with tarfile.open(data_file) as tar: + tar.extractall(path=data_path) + +``` + + +```python +def transform(data, width=64, height=64): + data = mx.image.imresize(data, width, height) + data = nd.transpose(data, (2,0,1)) + data = data.astype(np.float32)/127.5 - 1 + if data.shape[0] == 1: + data = nd.tile(data, (3, 1, 1)) + return data.reshape((1,) + data.shape) +``` + + +```python +def get_files(data_dir): + images = [] + filenames = [] + for path, _, fnames in os.walk(data_dir): + for fname in fnames: + if not fname.endswith('.jpg'): + continue + img = os.path.join(path, fname) + img_arr = mx.image.imread(img) + img_arr = transform(img_arr) + images.append(img_arr) + filenames.append(path + "/" + fname) + return images, filenames +``` + +Load the dataset `lfw_dataset` which contains images of celebrities. + + +```python +data_dir = 'lfw_dataset' +images, filenames = get_files(data_dir) +split = int(len(images)*0.8) +test_images = images[split:] +test_filenames = filenames[split:] +train_images = images[:split] +train_filenames = filenames[:split] + +train_data = gluon.data.ArrayDataset(nd.concatenate(train_images)) +train_dataloader = gluon.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, last_batch='rollover', num_workers=multiprocessing.cpu_count()-1) +``` + +## Generator +Define the Generator model. Architecture is taken from the DCGAN implementation in [Straight Dope Book](https://gluon.mxnet.io/chapter14_generative-adversarial-networks/dcgan.html). The Generator consist of 4 layers where each layer involves a strided convolution, batch normalization, and rectified nonlinearity. It takes as input random noise and the latent code and produces an `(64,64,3)` output image. + + +```python +class Generator(gluon.HybridBlock): + def __init__(self, **kwargs): + super(Generator, self).__init__(**kwargs) + with self.name_scope(): + self.prev = nn.HybridSequential() + self.prev.add(nn.Dense(1024, use_bias=False), nn.BatchNorm(), nn.Activation(activation='relu')) + self.G = nn.HybridSequential() + + self.G.add(nn.Conv2DTranspose(64 * 8, 4, 1, 0, use_bias=False)) + self.G.add(nn.BatchNorm()) + self.G.add(nn.Activation('relu')) + self.G.add(nn.Conv2DTranspose(64 * 4, 4, 2, 1, use_bias=False)) + self.G.add(nn.BatchNorm()) + self.G.add(nn.Activation('relu')) + self.G.add(nn.Conv2DTranspose(64 * 2, 4, 2, 1, use_bias=False)) + self.G.add(nn.BatchNorm()) + self.G.add(nn.Activation('relu')) + self.G.add(nn.Conv2DTranspose(64, 4, 2, 1, use_bias=False)) + self.G.add(nn.BatchNorm()) + self.G.add(nn.Activation('relu')) + self.G.add(nn.Conv2DTranspose(3, 4, 2, 1, use_bias=False)) + self.G.add(nn.Activation('tanh')) + + def hybrid_forward(self, F, x): + x = self.prev(x) + x = F.reshape(x, (0, -1, 1, 1)) + return self.G(x) +``` + +## Discriminator +Define the Discriminator and Q model. The Q model shares many layers with the Discriminator. Its task is to estimate the code `c` for a given fake image. It is used to maximize the lower bound to the mutual information. + + +```python +class Discriminator(gluon.HybridBlock): + def __init__(self, **kwargs): + super(Discriminator, self).__init__(**kwargs) + with self.name_scope(): + self.D = nn.HybridSequential() + self.D.add(nn.Conv2D(64, 4, 2, 1, use_bias=False)) + self.D.add(nn.LeakyReLU(0.2)) + self.D.add(nn.Conv2D(64 * 2, 4, 2, 1, use_bias=False)) + self.D.add(nn.BatchNorm()) + self.D.add(nn.LeakyReLU(0.2)) + self.D.add(nn.Conv2D(64 * 4, 4, 2, 1, use_bias=False)) + self.D.add(nn.BatchNorm()) + self.D.add(nn.LeakyReLU(0.2)) + self.D.add(nn.Conv2D(64 * 8, 4, 2, 1, use_bias=False)) + self.D.add(nn.BatchNorm()) + self.D.add(nn.LeakyReLU(0.2)) + + self.D.add(nn.Dense(1024, use_bias=False), nn.BatchNorm(), nn.Activation(activation='relu')) + + self.prob = nn.Dense(1) + self.feat = nn.HybridSequential() + self.feat.add(nn.Dense(128, use_bias=False), nn.BatchNorm(), nn.Activation(activation='relu')) + self.category_prob = nn.Dense(n_categories) + self.continuous_mean = nn.Dense(n_continuous) + self.Q = nn.HybridSequential() + self.Q.add(self.feat, self.category_prob, self.continuous_mean) + + def hybrid_forward(self, F, x): + x = self.D(x) + prob = self.prob(x) + feat = self.feat(x) + category_prob = self.category_prob(feat) + continuous_mean = self.continuous_mean(feat) + + return prob, category_prob, continuous_mean +``` + +The InfoGAN has the following layout. + + +Discriminator and Generator are the same as in the DCGAN example. On top of the Disciminator is the Q model, which is estimating the code `c` for given fake images. The Generator's input is random noise and the latent code `c`. + +## Training Loop +Initialize Generator and Discriminator and define correspoing trainer function. + + +```python +generator = Generator() +generator.hybridize() +generator.initialize(mx.init.Normal(0.002), ctx=ctx) + +discriminator = Discriminator() +discriminator.hybridize() +discriminator.initialize(mx.init.Normal(0.002), ctx=ctx) + +lr = 0.0001 +beta = 0.5 + +g_trainer = gluon.Trainer(generator.collect_params(), 'adam', {'learning_rate': lr, 'beta1': beta}) +d_trainer = gluon.Trainer(discriminator.collect_params(), 'adam', {'learning_rate': lr, 'beta1': beta}) +q_trainer = gluon.Trainer(discriminator.Q.collect_params(), 'adam', {'learning_rate': lr, 'beta1': beta}) +``` + +Create vectors with real (=1) and fake labels (=0). + + +```python +real_label = nd.ones((batch_size,), ctx=ctx) +fake_label = nd.zeros((batch_size,),ctx=ctx) +``` + +Load a pretrained model. + + +```python +if os.path.isfile('infogan_d_latest.params') and os.path.isfile('infogan_g_latest.params'): + discriminator.load_parameters('infogan_d_latest.params', ctx=ctx, allow_missing=True, ignore_extra=True) + generator.load_parameters('infogan_g_latest.params', ctx=ctx, allow_missing=True, ignore_extra=True) +``` +There are 2 differences between InfoGAN and DCGAN: the extra latent code and the Q network to estimate the code. +The latent code is part of the Generator input and it contains mutliple variables (continuous, categorical) that can represent different distributions. In order to make sure that the Generator uses the latent code, mutual information is introduced into the GAN loss term. Mutual information measures how much X is known given Y or vice versa. It is defined as: + +![gif](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/info_gan/entropy.gif) + +The InfoGAN loss is: + +![gif](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/info_gan/loss.gif) + +where `V(D,G)` is the GAN loss and the mutual information `I(c, G(z, c))` goes in as regularization. The goal is to reach high mutual information, in order to learn meaningful codes for the data. + + +Define the loss functions. `SoftmaxCrossEntropyLoss` for the categorical code, `L2Loss` for the continious code and `SigmoidBinaryCrossEntropyLoss` for the normal GAN loss. + + +```python +loss1 = gluon.loss.SigmoidBinaryCrossEntropyLoss() +loss2 = gluon.loss.L2Loss() +loss3 = gluon.loss.SoftmaxCrossEntropyLoss() +``` + +This function samples `c`, `z`, and concatenates them to create the generator input. + + +```python +def create_generator_input(): + + #create random noise + z = nd.random_normal(0, 1, shape=(batch_size, z_dim), ctx=ctx) + label = nd.array(np.random.randint(n_categories, size=batch_size)).as_in_context(ctx) + c1 = nd.one_hot(label, depth=n_categories).as_in_context(ctx) + c2 = nd.random.uniform(-1, 1, shape=(batch_size, n_continuous)).as_in_context(ctx) + + # concatenate random noise with c which will be the input of the generator + return nd.concat(z, c1, c2, dim=1), label, c2 +``` + +Define the training loop. +1. The discriminator receives `real_data` and `loss1` measures how many real images have been identified as real +2. The discriminator receives `fake_image` from the Generator and `loss1` measures how many fake images have been identified as fake +3. Update Discriminator. Currently, it is updated every second iteration in order to avoid that the Discriminator becomes too strong. You may want to change that. +4. The updated discriminator receives `fake_image` and `loss1` measures how many fake images have been been identified as real, `loss2` measures the difference between the sampled continuous latent code `c` and the output of the Q model and `loss3` measures the difference between the sampled categorical latent code `c` and the output of the Q model. +4. Update Generator and Q + + +```python +with SummaryWriter(logdir='./logs/') as sw: + + epochs = 1 + counter = 0 + for epoch in range(epochs): + print("Epoch", epoch) + starttime = time.time() + + d_error_epoch = nd.zeros((1,), ctx=ctx) + g_error_epoch = nd.zeros((1,), ctx=ctx) + + for idx, data in enumerate(train_dataloader): + + #get real data and generator input + real_data = data.as_in_context(ctx) + g_input, label, c2 = create_generator_input() + + + #Update discriminator: Input real data and fake data + with autograd.record(): + output_real,_,_ = discriminator(real_data) + d_error_real = loss1(output_real, real_label) + + # create fake image and input it to discriminator + fake_image = generator(g_input) + output_fake,_,_ = discriminator(fake_image.detach()) + d_error_fake = loss1(output_fake, fake_label) + + # total discriminator error + d_error = d_error_real + d_error_fake + + d_error_epoch += d_error.mean() + + #Update D every second iteration + if (counter+1) % 2 == 0: + d_error.backward() + d_trainer.step(batch_size) + + #Update generator: Input random noise and latent code vector + with autograd.record(): + fake_image = generator(g_input) + output_fake, category_prob, continuous_mean = discriminator(fake_image) + g_error = loss1(output_fake, real_label) + loss3(category_prob, label) + loss2(c2, continuous_mean) + + g_error.backward() + g_error_epoch += g_error.mean() + + g_trainer.step(batch_size) + q_trainer.step(batch_size) + + # logging + if idx % 10 == 0: + count = idx + 1 + logging.info('speed: {} samples/s'.format(batch_size / (time.time() - starttime))) + logging.info('discriminator loss = %f, generator loss = %f at iter %d epoch %d' + %(d_error_epoch.asscalar()/count,g_error_epoch.asscalar()/count, count, epoch)) + + g_input,_,_ = create_generator_input() + + # create some fake image for logging in MXBoard + fake_image = generator(g_input) + + sw.add_scalar(tag='Loss_D', value={'test':d_error_epoch.asscalar()/count}, global_step=counter) + sw.add_scalar(tag='Loss_G', value={'test':d_error_epoch.asscalar()/count}, global_step=counter) + sw.add_image(tag='data_image', image=((fake_image[0]+ 1.0) * 127.5).astype(np.uint8) , global_step=counter) + sw.flush() + + discriminator.save_parameters("infogan_d_latest.params") + generator.save_parameters("infogan_g_latest.params") +``` + +## Image similarity +Once the InfoGAN is trained, we can use the Discriminator to do an image similarity search. The idea is that the network learned meaningful features from the images based on the mutual information e.g. pose of people in an image. + +Load the trained discriminator and retrieve one of its last layers. + + +```python +discriminator = Discriminator() +discriminator.load_parameters("infogan_d_latest.params", ctx=ctx, ignore_extra=True) + +discriminator = discriminator.D[:11] +print (discriminator) + +discriminator.hybridize() +``` + +Nearest neighbor function, which takes a matrix of features and an input feature vector. It returns the 3 closest features. + + +```python +def get_knn(features, input_vector, k=3): + dist = (nd.square(features - input_vector).sum(axis=1))/features.shape[0] + indices = dist.asnumpy().argsort()[:k] + return [(index, dist[index].asscalar()) for index in indices] +``` + +A helper function to visualize image data. + + +```python +def visualize(img_array): + plt.imshow(((img_array.asnumpy().transpose(1, 2, 0) + 1.0) * 127.5).astype(np.uint8)) + plt.axis('off') +``` + +Take some images from the test data, obtain its feature vector from `discriminator.D[:11]` and plot images of the corresponding closest vectors in the feature space. + + +```python +feature_size = 8192 + +features = nd.zeros((len(test_images), feature_size), ctx=ctx) + +for idx, image in enumerate(test_images): + + feature = discriminator(nd.array(image, ctx=ctx)) + feature = feature.reshape(feature_size,) + features[idx,:] = feature.copyto(ctx) + + +for image in test_images[:100]: + + feature = discriminator(mx.nd.array(image, ctx=ctx)) + feature = feature.reshape((feature_size,)) + image = image.reshape((3,64,64)) + + + indices = get_knn(features, feature, k=10) + fig = plt.figure(figsize=(15,12)) + plt.subplot(1,10,1) + + visualize(image) + for i in range(2,9): + if indices[i-1][1] < 1.5: + plt.subplot(1,10,i) + sim = test_images[indices[i-1][0]].reshape(3,64,64) + visualize(sim) + plt.show() + plt.clf() +``` +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/info_gan/output.png) + +## How the Generator learns +We trained the Generator for a couple of epochs and stored a couple of fake images per epoch. Check the video. + ![alt text](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/info_gan/infogan.gif) + + +The following function computes the TSNE on the feature matrix and stores the result in a json-file. This file can be loaded with [TSNEViewer](https://ml4a.github.io/guides/ImageTSNEViewer/) + + +```python +import json + +from sklearn.manifold import TSNE +from scipy.spatial import distance + +tsne = TSNE(n_components=2, learning_rate=150, perplexity=30, verbose=2).fit_transform(features.asnumpy()) + +# save data to json +data = [] +counter = 0 +for i,f in enumerate(test_filenames): + + point = [float((tsne[i,k] - np.min(tsne[:,k]))/(np.max(tsne[:,k]) - np.min(tsne[:,k]))) for k in range(2) ] + data.append({"path": os.path.abspath(os.path.join(os.getcwd(),f)), "point": point}) + +with open("imagetsne.json", 'w') as outfile: + json.dump(data, outfile) +``` + +Load the file with TSNEViewer. You can now inspect whether similiar looking images are grouped nearby or not. + + + + \ No newline at end of file diff --git a/docs/python_docs/python/tutorials/packages/gluon/index.rst b/docs/python_docs/python/tutorials/packages/gluon/index.rst index e2bdb1856953..d97279465287 100644 --- a/docs/python_docs/python/tutorials/packages/gluon/index.rst +++ b/docs/python_docs/python/tutorials/packages/gluon/index.rst @@ -72,21 +72,21 @@ Data .. card:: :title: Data Loading - :link: data.html + :link: data/data.html How to load data for training. .. card:: - :title: Image Augmentation - :link: image-augmentation.html + :title: Data Augmentation + :link: data/data_augmentation.md - Boost your training dataset with image augmentation. + A guide to data augmentation. .. card:: - :title: Data Augmentation + :title: Image Augmentation :link: image/image-augmentation.html - A guide to data augmentation. + Boost your training dataset with image augmentation. .. card:: :title: Gluon Datasets and DataLoader @@ -167,7 +167,7 @@ Training .. card:: :title: Autograd API - :link: /api/python/docs/tutorials/packages/autograd/index.html + :link: ../autograd/index.html How to use Automatic Differentiation with the Autograd API. @@ -194,19 +194,15 @@ Advanced Topics Building custom operators with numpy. -.. -<-- tutorial missing --> .. card:: :title: Custom Loss - :link: custom-loss/custom-loss.html + :link: loss/custom-loss.md A guide to implementing custom losses. -.. - .. card:: :title: Gotchas using NumPy in Apache MXNet - :link: ../ndarray/gotchas_numpy_in_mxnet.html + :link: ../ndarray/gotchas_numpy_in_mxnet.md Common misconceptions when using NumPy in Apache MXNet. diff --git a/docs/python_docs/python/tutorials/packages/index.rst b/docs/python_docs/python/tutorials/packages/index.rst index c24ebb8c04eb..9fdd75c837f7 100644 --- a/docs/python_docs/python/tutorials/packages/index.rst +++ b/docs/python_docs/python/tutorials/packages/index.rst @@ -46,13 +46,13 @@ Shared APIs .. card:: :title: Symbol API - :link: symbol/index.html + :link: ../../api/symbol/ - How to use MXNet's Symbol API. + MXNet Symbol API has been depricated. API documentation is still available for reference. .. card:: :title: Autograd API - :link: autograd/autograd.html + :link: autograd/index.html How to use Automatic Differentiation with the Autograd API. diff --git a/docs/python_docs/python/tutorials/performance/backend/index.rst b/docs/python_docs/python/tutorials/performance/backend/index.rst index 1113948e30d0..c4cfaf635701 100644 --- a/docs/python_docs/python/tutorials/performance/backend/index.rst +++ b/docs/python_docs/python/tutorials/performance/backend/index.rst @@ -47,7 +47,7 @@ The following tutorials will help you learn how to use backend tools to boost pe .. card:: :title: MXNet Operator Profiler - :link: profilder.html + :link: profiler.html Use the profiler to monitor the performance of individual operators .. diff --git a/docs/python_docs/python/tutorials/performance/index.rst b/docs/python_docs/python/tutorials/performance/index.rst index 78943a87dc16..e547ecd297ed 100644 --- a/docs/python_docs/python/tutorials/performance/index.rst +++ b/docs/python_docs/python/tutorials/performance/index.rst @@ -26,7 +26,7 @@ Essential .. card:: :title: Improving Performance - :link: https://mxnet.apache.org/api/faq/perf + :link: /api/faq/perf How to get the best performance from MXNet. @@ -38,8 +38,7 @@ Essential .. card:: :title: Tuning NumPy Operations - :link: https://mxnet.apache.org/versions/master/tutorials/gluon/gotchas_numpy_in_mxnet.html - + :link: ../packages/ndarray/gotchas_numpy_in_mxnet.html Gotchas using NumPy in MXNet. Compression @@ -49,13 +48,13 @@ Compression .. card:: :title: Compression: float16 - :link: compression/float16.html + :link: /api/faq/float16 How to use float16 in your model to boost training speed. .. card:: :title: Gradient Compression - :link: compression/gradient_compression.html + :link: /api/faq/gradient_compression How to use gradient compression to reduce communication bandwidth and increase speed. .. @@ -74,7 +73,7 @@ Accelerated Backend .. card:: :title: TensorRT - :link: backend/tensorRt.html + :link: backend/tensorrt/tensorrt.html How to use NVIDIA's TensorRT to boost inference performance. @@ -82,7 +81,7 @@ Accelerated Backend TBD Content .. card:: :title: MKL-DNN - :link: backend/mkl-dnn.html + :link: backend/mkldnn/mkldnn_readme How to get the most from your CPU by using Intel's MKL-DNN. @@ -100,19 +99,19 @@ Distributed Training .. card:: :title: Distributed Training Using the KVStore API - :link: https://mxnet.apache.org/versions/master/faq/distributed_training.html + :link: /api/faq/distributed_training.html How to use the KVStore API to use multiple GPUs when training a model. .. card:: :title: Training with Multiple GPUs Using Model Parallelism - :link: https://mxnet.apache.org/versions/master/faq/model_parallel_lstm.html + :link: /api/faq/model_parallel_lstm.html An overview of using multiple GPUs when training an LSTM. .. card:: :title: Data Parallelism in MXNet - :link: https://mxnet.apache.org/versions/master/faq/multi_devices.html + :link: /api/faq/multi_devices.html An overview of distributed training strategies. diff --git a/docs/static_site/src/pages/api/faq/multi_devices.md b/docs/static_site/src/pages/api/faq/multi_devices.md new file mode 100644 index 000000000000..d8bc81cb1106 --- /dev/null +++ b/docs/static_site/src/pages/api/faq/multi_devices.md @@ -0,0 +1,217 @@ +--- +layout: page_category +title: Data Parallelism with Multiple CPU/GPUs on MXNet +category: faq +faq_c: Model +question: How do I run MXNet on Multiple CPU/GPUs with data parallelism? +permalink: /api/faq/multi_device +--- + + + + + + + + + + + + + + + + + +# Run MXNet on Multiple CPU/GPUs with Data Parallelism + +_MXNet_ supports training with multiple CPUs and GPUs, which may be located on different physical machines. + +## Data Parallelism vs Model Parallelism + +By default, _MXNet_ uses data parallelism to partition the workload over multiple +devices. +Assume there are *n* devices. +Then each one will receive a copy of the complete model +and train it on *1/n* of the data. +The results such as gradients and +updated model are communicated across these devices. + +MXNet also supports model parallelism. +In this approach, each device holds onto only part of the model. +This proves useful when the model is too large to fit onto a single device. +As an example, see the following [tutorial](./model_parallel_lstm.md) +which shows how to use model parallelism for training a multi-layer LSTM model. +In this tutorial, we'll focus on data parallelism. + +## Multiple GPUs within a Single Machine + +### Workload Partitioning + +By default, _MXNet_ partitions a data batch evenly among the available GPUs. +Assume a batch size *b* and assume there are *k* GPUs, then in one iteration +each GPU will perform forward and backward on *b/k* examples. The +gradients are then summed over all GPUs before updating the model. + +### How to Use + +> To use GPUs, we need to compile MXNet with GPU support. For +> example, set `USE_CUDA=1` in `config.mk` before `make`. (see +> [MXNet installation guide](/get_started) for more options). + +If a machine has one or more GPU cards installed, +then each card is labeled by a number starting from 0. +To use a particular GPU, one can either +specify the context `context` in code +or pass `--gpus` at the command line. +For example, to use GPU 0 and 2 in python, +one can typically create a module with +```python +import mxnet as mx +module = mx.module.Module(context=[mx.gpu(0), mx.gpu(2)], ...) +``` +while if the program accepts a `--gpus` flag (as seen in +[example/image-classification](https://github.com/dmlc/mxnet/tree/master/example/image-classification)), +then we can try +```bash +python train_mnist.py --gpus 0,2 ... +``` + +### Advanced Usage +If the available GPUs are not all equally powerful, +we can partition the workload accordingly. +For example, if GPU 0 is 3 times faster than GPU 2, +then we might use the workload option `work_load_list=[3, 1]`, +see [Module](/api/python/docs/api/module/index.html) +for more details. + +Training with multiple GPUs should yield the same results +as training on a single GPU if all other hyper-parameters are the same. +In practice, the results may exhibit small differences, +owing to the randomness of I/O (random order or other augmentations), +weight initialization with different seeds, and CUDNN. + +We can control on which devices the gradient is aggregated +and on which device the model is updated via [`KVStore`](/api/python/docs/api/kvstore/index.html), +the _MXNet_ module that supports data communication. +One can either use `mx.kvstore.create(type)` to get an instance +or use the program flag `--kv-store type`. + +There are two commonly used types, + +- `local`: all gradients are copied to CPU memory and weights are updated there. +- `device`: both gradient aggregation and weight updates are run on GPUs. +With this setting, the `KVStore` also attempts to use GPU peer-to-peer communication, +potentially accelerating the communication. +Note that this option may result in higher GPU memory usage. + +When using a large number of GPUs, e.g. >=4, we suggest using `device` for better performance. + +## Distributed Training with Multiple Machines + +`KVStore` also supports a number of options for running on multiple machines. + +- `dist_sync` behaves similarly to `local` but exhibits one major difference. + With `dist_sync`, `batch-size` now means the batch size used on each machine. + So if there are *n* machines and we use batch size *b*, + then `dist_sync` behaves like `local` with batch size *n\*b*. +- `dist_device_sync` is similar to `dist_sync`. The difference between them is that + `dist_device_sync` aggregates gradients and updates weight on GPUs + while `dist_sync` does so on CPU memory. +- `dist_async` performs asynchronous updates. + The weight is updated whenever gradients are received from any machine. + The update is atomic, i.e., no two updates happen on the same weight at the same time. + However, the order is not guaranteed. + +### How to Launch a Job + +> To use distributed training, we need to compile with `USE_DIST_KVSTORE=1` +> (see [MXNet installation guide](/get_started) for more options). + +Launching a distributed job is a bit different from running on a single +machine. MXNet provides +[tools/launch.py](https://github.com/dmlc/mxnet/blob/master/tools/launch.py) to +start a job by using `ssh`, `mpi`, `sge`, or `yarn`. + +An easy way to set up a cluster of EC2 instances for distributed deep learning +is using an [AWS CloudFormation template](https://github.com/awslabs/deeplearning-cfn). +If you do not have a cluster, you can check the repository before you continue. + +Assume we are at the directory `mxnet/example/image-classification` +and want to train LeNet to classify MNIST images, as demonstrated here: +[train_mnist.py](https://github.com/dmlc/mxnet/blob/master/example/image-classification/train_mnist.py). + +On a single machine, we can run: + +```bash +python train_mnist.py --network lenet +``` + +Now, say we are given two ssh-able machines and _MXNet_ is installed on both machines. +We want to train LeNet on these two machines. +First, we save the IPs (or hostname) of these two machines in file `hosts`, e.g. + +```bash +$ cat hosts +172.30.0.172 +172.30.0.171 +``` + +Next, if the mxnet folder is accessible from both machines, e.g. on a +[network filesystem](https://help.ubuntu.com/lts/serverguide/network-file-system.html), +then we can run: + +```bash +python ../../tools/launch.py -n 2 --launcher ssh -H hosts python train_mnist.py --network lenet --kv-store dist_sync +``` + +Note that here we + +- use `launch.py` to submit the job. +- provide launcher, `ssh` if all machines are ssh-able, `mpi` if `mpirun` is + available, `sge` for Sun Grid Engine, and `yarn` for Apache Yarn. +- `-n` number of worker nodes to run on +- `-H` the host file which is required by `ssh` and `mpi` +- `--kv-store` use either `dist_sync` or `dist_async` + + +### Synchronize Directory + +Now consider if the mxnet folder is not accessible. +We can first copy the `MXNet` library to this folder by +```bash +cp -r ../../python/mxnet . +cp -r ../../lib/libmxnet.so mxnet +``` + +then ask `launch.py` to synchronize the current directory to all machines' + `/tmp/mxnet` directory with `--sync-dst-dir` + +```bash +python ../../tools/launch.py -n 2 -H hosts --sync-dst-dir /tmp/mxnet \ + python train_mnist.py --network lenet --kv-store dist_sync +``` + +### Use a Particular Network Interface + +_MXNet_ often chooses the first available network interface. +But for machines that have multiple interfaces, +we can specify which network interface to use for data +communication by the environment variable `DMLC_INTERFACE`. +For example, to use the interface `eth0`, we can + +``` +export DMLC_INTERFACE=eth0; python ../../tools/launch.py ... +``` + +### Debug Connection + +Set`PS_VERBOSE=1` to see the debug logging, e.g +``` +export PS_VERBOSE=1; python ../../tools/launch.py ... +``` + +### More + +- See more launch options by `python ../../tools/launch.py -h` +- See more options of [ps-lite](http://ps-lite.readthedocs.org/en/latest/how_to.html) \ No newline at end of file From 21e320a9850df210cb054629ffa32b47aea0863e Mon Sep 17 00:00:00 2001 From: Xingjian Shi Date: Mon, 9 Dec 2019 10:41:28 -0800 Subject: [PATCH 13/62] [Bugfix] [Numpy] Add `kAddTo` and kNullOp to Transpose (#16979) * update Check for repeated axes enable addto to transpose fix fix fix fix remove unused ndim Update pseudo2DTranspose_op-inl.cuh Update pseudo2DTranspose_op-inl.cuh Update pseudo2DTranspose_op-inl.cuh fix Update pseudo2DTranspose_op-inl.cuh try to fix Update pseudo2DTranspose_op-inl.cuh Update pseudo2DTranspose_op-inl.cuh Update pseudo2DTranspose_op-inl.cuh fix Update np_matrix_op.cc Update test_numpy_op.py update test case fix implementation fix bug update fix bug Update pseudo2DTranspose_op-inl.cuh fix fix Update test_numpy_op.py * Fix bug * fix docstring * try to address comment * no need to change this line * Fix bug * address comments * address comment --- src/operator/numpy/np_matrix_op-inl.h | 16 +- src/operator/numpy/np_matrix_op.cc | 12 +- src/operator/tensor/matrix_op-inl.h | 106 ++++++++---- src/operator/tensor/matrix_op.cc | 5 +- .../tensor/pseudo2DTranspose_op-inl.cuh | 160 +++++++----------- tests/python/unittest/test_numpy_op.py | 84 +++++---- 6 files changed, 206 insertions(+), 177 deletions(-) diff --git a/src/operator/numpy/np_matrix_op-inl.h b/src/operator/numpy/np_matrix_op-inl.h index fee534315b77..37af908042c9 100644 --- a/src/operator/numpy/np_matrix_op-inl.h +++ b/src/operator/numpy/np_matrix_op-inl.h @@ -124,16 +124,22 @@ void NumpyTranspose(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { const NumpyTransposeParam& param = nnvm::get(attrs.parsed); - CHECK_EQ(req[0], kWriteTo) << "Transpose does not support inplace"; + if (req[0] == kNullOp) return; + CHECK(req[0] == kWriteTo || req[0] == kAddTo) + << "Transpose only supports kWriteTo, kNullOp and kAddTo"; + mxnet::TShape axes; if (ndim_is_known(param.axes)) { - mxnet::TShape axes = common::CanonicalizeAxes(param.axes); - TransposeImpl(ctx.run_ctx, inputs[0], outputs[0], axes); + axes = common::CanonicalizeAxes(param.axes); } else { - mxnet::TShape axes(inputs[0].ndim(), -1); + axes = mxnet::TShape(inputs[0].ndim(), -1); for (int i = 0; i < axes.ndim(); ++i) { axes[i] = axes.ndim() - 1 - i; } - TransposeImpl(ctx.run_ctx, inputs[0], outputs[0], axes); + } + if (req[0] == kAddTo) { + TransposeImpl(ctx.run_ctx, inputs[0], outputs[0], axes); + } else { + TransposeImpl(ctx.run_ctx, inputs[0], outputs[0], axes); } } diff --git a/src/operator/numpy/np_matrix_op.cc b/src/operator/numpy/np_matrix_op.cc index e496202a0b41..05b7e948e8a0 100644 --- a/src/operator/numpy/np_matrix_op.cc +++ b/src/operator/numpy/np_matrix_op.cc @@ -24,6 +24,7 @@ */ #include +#include #include "./np_matrix_op-inl.h" #include "../nn/concat-inl.h" @@ -67,8 +68,13 @@ bool NumpyTransposeShape(const nnvm::NodeAttrs& attrs, mxnet::TShape ret(ndim, -1); if (ndim_is_known(param.axes)) { - CHECK_EQ(ndim, param.axes.ndim()); + CHECK_EQ(ndim, param.axes.ndim()) + << "The number of axes does not match the dimension of the tensor. axes = " + << param.axes << ", input tensor shape = " << shp; mxnet::TShape axes = common::CanonicalizeAxes(param.axes); + std::set axes_set(axes.begin(), axes.end()); + CHECK_EQ(axes_set.size(), axes.ndim()) << "Repeated axis in transpose. param.axes = " + << param.axes; if (ndim_is_known(shp)) { for (int i = 0; i < ndim; ++i) { ret[i] = shp[axes[i]]; @@ -117,9 +123,9 @@ NNVM_REGISTER_OP(_np_transpose) } std::ostringstream os; os << axes; - return MakeNonlossGradNode("transpose", n, ograds, {}, {{"axes", os.str()}}); + return MakeNonlossGradNode("_np_transpose", n, ograds, {}, {{"axes", os.str()}}); } else { - return MakeNonlossGradNode("transpose", n, ograds, {}, + return MakeNonlossGradNode("_np_transpose", n, ograds, {}, std::unordered_map()); } }) diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index 0fee2a26c0ed..4bd059ae81df 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -269,8 +269,10 @@ struct TransposeParam : public dmlc::Parameter { * \param out output tensor * \param row shape of dim 0 of input * \param col shape of dim 1 of input + * \tparam DType Data type + * \tparam is_addto */ -template +template MSHADOW_XINLINE void Transpose2D(const DType *in, DType *out, index_t row, index_t col) { // ensure cache line hits and prevent cache miss for any configuration // L1 cache size to be utilized = 32kb = 2^15 @@ -282,7 +284,7 @@ MSHADOW_XINLINE void Transpose2D(const DType *in, DType *out, index_t row, index // Block-size - 2^5 v 2^5 (32 v 32) with potential 4 pragma for loop unrolled // blocksize * blocksize * num_threads = cache_size / dtype_size // Instead of explicit unroll, let compiler figure out optimal unroll factor - index_t blocksize = 32; + const index_t blocksize = 32; // collapse 2 parallelizes 2 for loops // inner 2 for loops aren't parallelized to prevent cache miss @@ -299,14 +301,25 @@ MSHADOW_XINLINE void Transpose2D(const DType *in, DType *out, index_t row, index // transpose the block for (index_t a = j; (a < blocksize + j) && (a < col); ++a) { for (index_t b = i; (b < blocksize + i) && (b < row); ++b) { - out[a * row + b] = in[b * col + a]; + if (!is_addto) { + out[a * row + b] = in[b * col + a]; + } else { + out[a * row + b] += in[b * col + a]; + } } } } } } -template +inline bool IsIdentityTranspose(const TShape& axes) { + for (dim_t i = 0; i < axes.ndim(); i++) { + if (axes[i] != i) return false; + } + return true; +} + +template void TransposeImpl(RunContext ctx, const TBlob& src, const TBlob& ret, @@ -323,62 +336,79 @@ void TransposeImpl(RunContext ctx, // Example: (0, 2, 3, 1) or (0, 3, 1, 2), but not (0, 2, 1, 3). if (isPseudo2DTranspose(axes)) { MSHADOW_TYPE_SWITCH(ret.type_flag_, DType, { - transpose_pseudo2D(ret, src, axes, s); + transpose_pseudo2D(ret, src, axes, s); }); return; } #endif + // Special handle the identity case + if (IsIdentityTranspose(axes)) { + MSHADOW_TYPE_SWITCH(ret.type_flag_, DType, { + Tensor in = src.get_with_shape(mshadow::Shape1(src.Size()), s); + Tensor out = ret.get_with_shape(mshadow::Shape1(ret.Size()), s); + if (!is_addto) { + // Use memcpy to accelerate the speed + Copy(out, in, s); + } else { + mxnet_op::Kernel, xpu>::Launch( + s, ret.Size(), out.dptr_, in.dptr_); + } + }); + return; + } + // Handle the general transpose case MSHADOW_TYPE_SWITCH(ret.type_flag_, DType, { switch (axes.ndim()) { - case 0: { - Tensor in = src.get_with_shape(mshadow::Shape1(1), s); - Tensor out = ret.get_with_shape(mshadow::Shape1(1), s); - Copy(out, in, s); - break; - } - case 1: { - Tensor in = src.get(s); - Tensor out = ret.get(s); - Copy(out, in, s); - break; - } case 2: { - mshadow::Tensor in = src.FlatTo2D(s); - mshadow::Tensor out = ret.FlatTo2D(s); - - if (axes[0] == 1 && axes[1] == 0) { - if (ctx.get_ctx().dev_mask() == cpu::kDevMask) { - Transpose2D(in.dptr_, out.dptr_, in.shape_[0], in.shape_[1]); - } else { - out = in.T(); - } + Tensor in = src.get(s); + Tensor out = ret.get(s); + if (ctx.get_ctx().dev_mask() == cpu::kDevMask) { + Transpose2D(in.dptr_, out.dptr_, in.shape_[0], in.shape_[1]); } else { - Copy(out, in, s); + LOG(FATAL) << "Not Implemented. We should never reach here because the 2D case " + "in GPU has been covered by transpose_pseudo2D." + " Report an issue in Github."; } break; } case 3: { Tensor in = src.get(s); Tensor out = ret.get(s); - out = transpose(in, axes.get<3>()); + if (!is_addto) { + out = transpose(in, axes.get<3>()); + } else { + out += transpose(in, axes.get<3>()); + } break; } case 4: { Tensor in = src.get(s); Tensor out = ret.get(s); - out = transpose(in, axes.get<4>()); + if (!is_addto) { + out = transpose(in, axes.get<4>()); + } else { + out += transpose(in, axes.get<4>()); + } break; } case 5: { Tensor in = src.get(s); Tensor out = ret.get(s); - out = transpose(in, axes.get<5>()); + if (!is_addto) { + out = transpose(in, axes.get<5>()); + } else { + out += transpose(in, axes.get<5>()); + } break; } case 6: { Tensor in = src.get(s); Tensor out = ret.get(s); - out = transpose(in, axes.get<6>()); + if (!is_addto) { + out = transpose(in, axes.get<6>()); + } else { + out += transpose(in, axes.get<6>()); + } break; } default: @@ -399,15 +429,21 @@ void Transpose(const nnvm::NodeAttrs& attrs, return; } const TransposeParam& param = nnvm::get(attrs.parsed); - CHECK_EQ(req[0], kWriteTo) << "Transpose does not support kWriteInplace and kAddTo"; + CHECK(req[0] == kWriteTo || req[0] == kAddTo) + << "Transpose only supports kNullOp, kWriteTo and kAddTo"; + mxnet::TShape axes; if (param.axes.ndim() == 0) { - mxnet::TShape axes(inputs[0].ndim(), -1); + axes = mxnet::TShape(inputs[0].ndim(), -1); for (int i = 0; i < axes.ndim(); ++i) { axes[i] = axes.ndim() - 1 - i; } - TransposeImpl(ctx.run_ctx, inputs[0], outputs[0], axes); } else { - TransposeImpl(ctx.run_ctx, inputs[0], outputs[0], param.axes); + axes = common::CanonicalizeAxes(param.axes); + } + if (req[0] == kAddTo) { + TransposeImpl(ctx.run_ctx, inputs[0], outputs[0], axes); + } else { + TransposeImpl(ctx.run_ctx, inputs[0], outputs[0], axes); } } diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc index 5b702fbaa2d6..15b954f11c1d 100644 --- a/src/operator/tensor/matrix_op.cc +++ b/src/operator/tensor/matrix_op.cc @@ -283,11 +283,12 @@ static void TransposeComputeExCPU(const nnvm::NodeAttrs& attrs, return; } const TransposeParam& param = nnvm::get(attrs.parsed); - CHECK_EQ(req[0], kWriteTo) << "Transpose does not support kWriteInplace and kAddTo"; + CHECK(req[0] == kWriteTo || req[0] == kAddTo) << + "Transpose only supports kNullOp, kWriteTo and kAddTo"; CHECK_EQ(inputs.size(), 1U); CHECK_EQ(outputs.size(), 1U); - if (SupportMKLDNNTranspose(param, inputs[0])) { + if (SupportMKLDNNTranspose(param, inputs[0]) && req[0] == kWriteTo) { MKLDNNTransposeForward(attrs, ctx, inputs[0], req[0], outputs[0]); return; } diff --git a/src/operator/tensor/pseudo2DTranspose_op-inl.cuh b/src/operator/tensor/pseudo2DTranspose_op-inl.cuh index 5b7cf04daef4..b3ca9fbfa0c9 100644 --- a/src/operator/tensor/pseudo2DTranspose_op-inl.cuh +++ b/src/operator/tensor/pseudo2DTranspose_op-inl.cuh @@ -39,22 +39,31 @@ namespace mxnet { namespace op { namespace cuda { - -template +/*! + * \brief The `transpose_pseudo2D` based on chosen vectorized types. It transposes an array of + * shape (k, m, n) to (k, n, m) + * \param out Pointer to output memory. + * \param inp Pointer to input memory. + * \param m First of tensor dimensions. + * \param n Second of tensor dimensions. + * \param nIterY The number of iterations in the y-dim of the thread to cover all rows. (1-->m) + * \param nIterZ The number of iterations in the z-dim of the thread to cover all rows. (1-->k) + * \tparam DType Data type + * \tparam CType The type to load the data. + * \tparam is_addto Whether to perform out += transpose(data) or out = transpose(data) + */ +template __global__ void transpose_pseudo2D(DType* out, DType* inp, const index_t m, const index_t n, const index_t nIterY, const index_t nIterZ) { - const index_t TSR = sizeof(CType)/sizeof(DType); // TypeSizeRatio + // Calculate the TypeSizeRatio + const index_t TSR = sizeof(CType) / sizeof(DType) > 0 ? sizeof(CType) / sizeof(DType) : 1; const index_t chunked_n = n/TSR; const index_t chunked_m = m/TSR; - union transp_t { - CType valChunk; - DType values[TSR]; - }; - - __shared__ DType d_shm[1024*TSR*TSR]; - CType* c_shm = reinterpret_cast(d_shm); + extern __shared__ char buf[]; + DType* d_shm = reinterpret_cast(buf); + CType* c_shm = reinterpret_cast(buf); CType* cInp = reinterpret_cast(inp); CType* cOut = reinterpret_cast(out); @@ -78,23 +87,34 @@ __global__ void transpose_pseudo2D(DType* out, DType* inp, } __syncthreads(); - // read from shared to registers - transp_t tmp[TSR]; + // read from shared to local registers + CType tmp[TSR]; #pragma unroll for (index_t i = 0; i < TSR; i++) { + DType* tmp_dptr = reinterpret_cast(&tmp[i]); #pragma unroll for (int j = 0; j < TSR; j++) { index_t shmIdx = (TSR*threadIdx.y + j)*blockDim.x*TSR + TSR*threadIdx.x + i; - tmp[i].values[j] = d_shm[shmIdx]; + tmp_dptr[j] = d_shm[shmIdx]; } } __syncthreads(); // write back to global output - offset = blockIdx_z*m*chunked_n + blockIdx.x*blockDim.x*TSR*chunked_m + blockIdx_y*blockDim.y; + offset = blockIdx_z*m*chunked_n + blockIdx.x*blockDim.x*TSR*chunked_m + + blockIdx_y*blockDim.y; #pragma unroll for (index_t i = 0; i < TSR; i++) { - cOut[offset + (TSR*threadIdx.x + i)*chunked_m + threadIdx.y] = tmp[i].valChunk; + if (is_addto) { + DType* tmp_dptr = reinterpret_cast(&tmp[i]); + #pragma unroll + for (int j = 0; j < TSR; j++) { + out[TSR * (offset + (TSR*threadIdx.x + i)*chunked_m + threadIdx.y) + j] + += tmp_dptr[j]; + } + } else { + cOut[offset + (TSR*threadIdx.x + i)*chunked_m + threadIdx.y] = tmp[i]; + } } } } @@ -107,7 +127,6 @@ __global__ void transpose_pseudo2D(DType* out, DType* inp, /*! * \brief Calls proper version of kernel `transpose_pseudo2D` * basing on chosen type sizes. - * \param dTypeSize Size of data type. * \param cTypeSize Size of type that should be use to copy. * \param grid Grid dimensions for the kernel. * \param block Block dimensions for the kernel. @@ -116,92 +135,39 @@ __global__ void transpose_pseudo2D(DType* out, DType* inp, * \param inp Pointer to input memory. * \param m First of tensor dimensions. * \param n Second of tensor dimensions. + * \tparam DType Data type + * \tparam is_addto Whether to trigger add the transpose result to the output tensor. */ -inline void call_transpose_pseudo2D(index_t dTypeSize, index_t cTypeSize, - dim3 grid, dim3 block, cudaStream_t stream, - void* out, void* inp, const index_t m, const index_t n, - const index_t nIterY, const index_t nIterZ) { - switch (dTypeSize) { - case (1): { - uint8_t* d_outPtr = reinterpret_cast(out); - uint8_t* d_inpPtr = reinterpret_cast(inp); - switch (cTypeSize) { - case (1): - cuda::transpose_pseudo2D<<>> +template +inline void call_transpose_pseudo2D(index_t cTypeSize, + dim3 grid, dim3 block, cudaStream_t stream, + DType* d_outPtr, DType* d_inpPtr, + const index_t m, const index_t n, + const index_t nIterY, const index_t nIterZ) { + const int nshared = 1024 * cTypeSize / sizeof(DType) * cTypeSize; + switch (cTypeSize) { + case (1): + cuda::transpose_pseudo2D<<>> (d_outPtr, d_inpPtr, m, n, nIterY, nIterZ); break; - case (2): - cuda::transpose_pseudo2D<<>> + case (2): + cuda::transpose_pseudo2D<<>> (d_outPtr, d_inpPtr, m, n, nIterY, nIterZ); break; - case (4): - cuda::transpose_pseudo2D<<>> + case (4): + cuda::transpose_pseudo2D<<>> (d_outPtr, d_inpPtr, m, n, nIterY, nIterZ); break; - case (8): - // case guarded against in function getBestCopyTypeSize - LOG(FATAL) << "cuda::transpose_pseudo2D would take too much shared memory"; - default: - LOG(FATAL) << "Unsupported type combination"; - } - break; - } - case (2): { - uint16_t* d_outPtr = reinterpret_cast(out); - uint16_t* d_inpPtr = reinterpret_cast(inp); - switch (cTypeSize) { - case (2): - cuda::transpose_pseudo2D<<>> + case (8): + cuda::transpose_pseudo2D<<>> (d_outPtr, d_inpPtr, m, n, nIterY, nIterZ); break; - case (4): - cuda::transpose_pseudo2D<<>> - (d_outPtr, d_inpPtr, m, n, nIterY, nIterZ); - break; - case (8): - cuda::transpose_pseudo2D<<>> - (d_outPtr, d_inpPtr, m, n, nIterY, nIterZ); - break; - default: - LOG(FATAL) << "Unsupported type combination"; - } - break; - } - case (4): { - uint32_t* d_outPtr = reinterpret_cast(out); - uint32_t* d_inpPtr = reinterpret_cast(inp); - switch (cTypeSize) { - case (4): - cuda::transpose_pseudo2D<<>> - (d_outPtr, d_inpPtr, m, n, nIterY, nIterZ); - break; - case (8): - cuda::transpose_pseudo2D<<>> - (d_outPtr, d_inpPtr, m, n, nIterY, nIterZ); - break; - default: - LOG(FATAL) << "Unsupported type combination"; - } - break; - } - case (8): { - uint64_t* d_outPtr = reinterpret_cast(out); - uint64_t* d_inpPtr = reinterpret_cast(inp); - switch (cTypeSize) { - case (8): - cuda::transpose_pseudo2D<<>> - (d_outPtr, d_inpPtr, m, n, nIterY, nIterZ); - break; - default: - LOG(FATAL) << "Unsupported type combination"; - } - break; - } - default: - LOG(FATAL) << "Unsupported type combination"; + default: + LOG(FATAL) << "Unsupported type combination. " << "Copy type size = " << cTypeSize; } auto cuErr = cudaPeekAtLastError(); - CHECK_EQ(cuErr, cudaSuccess) << "Transpose kernel failure: " << cudaGetErrorString(cuErr) << ". " + CHECK_EQ(cuErr, cudaSuccess) << "TransposePseudo2D kernel failure: " + << cudaGetErrorString(cuErr) << ". " << "block: (" << block.x << "," << block.y << "," << block.z << ")" << " grid: (" << grid.x << "," << grid.y << "," << grid.z << ")"; } @@ -225,7 +191,6 @@ inline bool isPseudo2DTranspose(const TShape& params) { return n_swpDims == 2; } - struct pseudo2DSizes { index_t leadDimS; index_t M; @@ -306,15 +271,14 @@ inline std::pair calculateKernelParams(pseudo2DSizes sizes, const in * \param outBlob Tensor blob to store result. * \param inpBlob Tensor blob with input data. * \param params Parameters (axes) of the transpose. + * \param is_addto Whether to add the transpose result to the outBlob * \param s Pointer to GPU stream. */ -template +template void transpose_pseudo2D(const TBlob& outBlob, const TBlob& inpBlob, const TShape& params, mshadow::Stream* s) { const TShape& shape = inpBlob.shape_; CHECK_EQ(shape.ndim(), params.ndim()); - auto ndim = params.ndim(); - auto sizes = getPackedTransposeDimensions(shape, params); index_t cTypeSize = getBestCopyTypeSize(sizeof(DType), sizes.M, sizes.N); @@ -337,8 +301,10 @@ void transpose_pseudo2D(const TBlob& outBlob, const TBlob& inpBlob, } cudaStream_t stream = mshadow::Stream::GetStream(s); - call_transpose_pseudo2D(sizeof(DType), cTypeSize, grid, block, stream, - outBlob.dptr_, inpBlob.dptr_, sizes.M, sizes.N, nIterY, nIterZ); + call_transpose_pseudo2D + (cTypeSize, grid, block, stream, + outBlob.dptr(), inpBlob.dptr(), + sizes.M, sizes.N, nIterY, nIterZ); } } // namespace op diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 0947b6112158..07942505464c 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -1489,7 +1489,9 @@ def np_transpose_grad(out_shape, dtype, axes=None): if axes is None or axes == (): return _np.transpose(ograd, axes) np_axes = _np.array(list(axes)) - return _np.transpose(ograd, tuple(list(_np.argsort(np_axes)))) + transpose_axes = _np.zeros_like(np_axes) + transpose_axes[np_axes] = _np.arange(len(np_axes)) + return _np.transpose(ograd, tuple(list(transpose_axes))) class TestTranspose(HybridBlock): def __init__(self, axes=None): @@ -1498,45 +1500,57 @@ def __init__(self, axes=None): def hybrid_forward(self, F, a): return F.np.transpose(a, self.axes) + test_workloads = [[(), [(), None]], + [(2,), [(0,), None]], + [(0, 2), [(0, 1), (1, 0)]], + [(5, 10), [(0, 1), (1, 0), None]], + [(8, 2, 3), [(2, 0, 1), (0, 2, 1), (0, 1, 2), (2, 1, 0), (-1, 1, 0), None]], + [(8, 2, 16), [(0, 2, 1), (2, 0, 1), (0, 1, 2), (2, 1, 0), (-1, -2, -3)]], + [(8, 3, 4, 8), [(0, 2, 3, 1), (1, 2, 3, 0), (0, 3, 2, 1)]], + [(8, 3, 2, 3, 8), [(0, 1, 3, 2, 4), (0, 1, 2, 3, 4), (4, 0, 1, 2, 3)]], + [(3, 4, 3, 4, 3, 2), [(0, 1, 3, 2, 4, 5), (2, 3, 4, 1, 0, 5), None]]] for hybridize in [True, False]: - for dtype in [_np.int32, _np.float32]: - for ndim in range(7): - shape = rand_shape_nd(ndim, dim=5, allow_zero_size=True) - axeses = [None] - if ndim == 0: - axeses += [()] - else: - axes = [i for i in range(ndim)] - axeses.append(tuple(axes)) - random.shuffle(axes) - axeses.append(tuple(axes)) - axeses.append([i - len(axes) for i in axes]) - for axes in axeses: - test_trans = TestTranspose(axes) - if hybridize: - test_trans.hybridize() - x = rand_ndarray(shape).as_np_ndarray() - x = x.astype(dtype) - x.attach_grad() - np_out = _np.transpose(x.asnumpy(), axes) - with mx.autograd.record(): - mx_out = test_trans(x) - assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) - mx_out.backward() - np_backward = np_transpose_grad(np_out.shape, dtype, axes) - assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5, use_broadcast=False) - - mx_out = x.transpose(axes) - np_out = x.asnumpy().transpose(axes) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) + for dtype in [_np.float32, _np.float16, _np.int32]: + for data_shape, axes_workload in test_workloads: + for axes in axes_workload: + for grad_req in ['write', 'add']: + test_trans = TestTranspose(axes) + if hybridize: + test_trans.hybridize() + x = np.random.normal(0, 1, data_shape).astype(dtype) + x = x.astype(dtype) + x.attach_grad(grad_req=grad_req) + if grad_req == 'add': + x.grad[()] = np.random.normal(0, 1, x.grad.shape).astype(x.grad.dtype) + x_grad_np = x.grad.asnumpy() + np_out = _np.transpose(x.asnumpy(), axes) + with mx.autograd.record(): + mx_out = test_trans(x) + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) + mx_out.backward() + np_backward = np_transpose_grad(np_out.shape, dtype, axes) + if grad_req == 'add': + assert_almost_equal(x.grad.asnumpy(), np_backward + x_grad_np, + rtol=1e-3, atol=1e-5, use_broadcast=False) + else: + assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5, use_broadcast=False) - if isinstance(axes, (list, tuple)): - mx_out = x.transpose(*axes) - np_out = x.asnumpy().transpose(*axes) + mx_out = x.transpose(axes) + np_out = x.asnumpy().transpose(axes) assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) + if isinstance(axes, (list, tuple)): + mx_out = x.transpose(*axes) + np_out = x.asnumpy().transpose(*axes) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) + # Test for error raising + dat = np.random.normal(0, 1, (3, 4, 5), dtype=np.float32) + assert_raises(MXNetError, lambda: dat.transpose((0, 0, 1))) + assert_raises(MXNetError, lambda: dat.transpose((0, 1, 3))) + + @with_seed() @use_np From 4b7262be310e8781ffac53d8d4e0da86c7da3617 Mon Sep 17 00:00:00 2001 From: dw_sjtu <46704444+sjtuWangDing@users.noreply.github.com> Date: Tue, 10 Dec 2019 03:01:11 +0800 Subject: [PATCH 14/62] set rtol = 1e-2 and atol = 1e-4 when dtype == np.float32 in test_numpy_op.py:test_np_linalg_solve (#17025) --- python/mxnet/symbol/numpy/linalg.py | 1 + .../python/unittest/test_numpy_interoperability.py | 4 ++-- tests/python/unittest/test_numpy_op.py | 13 ++++++++----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/python/mxnet/symbol/numpy/linalg.py b/python/mxnet/symbol/numpy/linalg.py index 1aaf4b990e31..6df175ebfc4b 100644 --- a/python/mxnet/symbol/numpy/linalg.py +++ b/python/mxnet/symbol/numpy/linalg.py @@ -340,6 +340,7 @@ def slogdet(a): """ return _npi.slogdet(a) + def solve(a, b): r""" Solve a linear matrix equation, or system of linear scalar equations. diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 6b5efa0c96b0..d1047c80b7b4 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -320,8 +320,8 @@ def _add_workload_linalg_inv(): def _add_workload_linalg_solve(): - shapes = [(0,0), (1,1), (5,5), (20,20), (3,5,5), (3,0,0), (2,20,20), (0,20,20), (2,3,20,20)] - nrhs = (0, 1, 2, 10) + shapes = [(0,0), (1,1), (5,5), (6,6), (3,5,5), (3,0,0), (2,5,5), (0,5,5), (2,3,4,4)] + nrhs = (0, 1, 2, 3) dtypes = (np.float32, np.float64) for dtype, shape in itertools.product(dtypes, shapes): a = _np.random.rand(*shape) diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 07942505464c..20482e8241da 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -3666,19 +3666,22 @@ def get_grad_b(A, X): (0, 0), (1, 1), (3, 3), - (20, 20), - (3, 20, 20), + (4, 4), + (3, 2, 2), (1, 0, 0), (0, 1, 1), (0, 5, 3, 3), (5, 0, 0, 0), - (2, 3, 10, 10) + (2, 2, 5, 5) ] - nrhs = (-1, 0, 1, 2, 5) + nrhs = (-1, 0, 1, 2, 3) dtypes = ['float32', 'float64'] for hybridize, shape, dtype, nrh in itertools.product([False, True], shapes, dtypes, nrhs): - rtol = 1e-3 + rtol = 1e-3 atol = 1e-5 + if dtype == 'float32': + rtol = 1e-2 + atol = 1e-4 test_solve = TestSolve() if hybridize: test_solve.hybridize() From 5f5b83f267b2bff2bbfea947deed9c6565e975a7 Mon Sep 17 00:00:00 2001 From: alicia <32725332+Alicia1529@users.noreply.github.com> Date: Tue, 10 Dec 2019 03:02:17 +0800 Subject: [PATCH 15/62] skip quantized conv flaky case (#16866) (#16774) * Fix quantized concat when inputs are mixed int8 and uint8 Change-Id: I4da04bf4502425134a466823fb5f73da2d7a419b * skip flaky test * trigger ci [Numpy] add op full_like, c++ impl, fix zeros_like, ones_like type inference (#16804) From 1e1d774edfe1020e17af59bac43df6a6acb274f3 Mon Sep 17 00:00:00 2001 From: Jake Lee Date: Mon, 9 Dec 2019 14:41:33 -0800 Subject: [PATCH 16/62] Revert downgrade cublas version as Nvidia-docker have fixed this (#17026) --- ci/docker/Dockerfile.build.centos7_gpu | 4 --- ci/docker/Dockerfile.build.ubuntu_base_gpu | 4 --- ci/docker/Dockerfile.build.ubuntu_build_cuda | 4 --- ci/docker/Dockerfile.build.ubuntu_gpu_cu101 | 4 --- ci/docker/Dockerfile.build.ubuntu_nightly_gpu | 4 --- ci/docker/install/centos7_cublas.sh | 27 ------------------- ci/docker/install/ubuntu_cublas.sh | 26 ------------------ 7 files changed, 73 deletions(-) delete mode 100755 ci/docker/install/centos7_cublas.sh delete mode 100755 ci/docker/install/ubuntu_cublas.sh diff --git a/ci/docker/Dockerfile.build.centos7_gpu b/ci/docker/Dockerfile.build.centos7_gpu index a784e872083d..7e49e88b3a52 100644 --- a/ci/docker/Dockerfile.build.centos7_gpu +++ b/ci/docker/Dockerfile.build.centos7_gpu @@ -33,10 +33,6 @@ ENV CUDNN_VERSION=7.6.0.64 COPY install/centos7_cudnn.sh /work/ RUN /work/centos7_cudnn.sh -# hotfix nvidia-docker image come with wrong version of libcublas -COPY install/centos7_cublas.sh /work/ -RUN /work/centos7_cublas.sh - ARG USER_ID=0 COPY install/centos7_adduser.sh /work/ RUN /work/centos7_adduser.sh diff --git a/ci/docker/Dockerfile.build.ubuntu_base_gpu b/ci/docker/Dockerfile.build.ubuntu_base_gpu index daf6fa342d43..94e6437e578b 100644 --- a/ci/docker/Dockerfile.build.ubuntu_base_gpu +++ b/ci/docker/Dockerfile.build.ubuntu_base_gpu @@ -29,10 +29,6 @@ ENV CUDNN_VERSION=7.6.0.64 COPY install/ubuntu_cudnn.sh /work/ RUN /work/ubuntu_cudnn.sh -# hotfix nvidia-docker image come with wrong version of libcublas -COPY install/ubuntu_cublas.sh /work/ -RUN /work/ubuntu_cublas.sh - ARG USER_ID=0 ARG GROUP_ID=0 COPY install/ubuntu_adduser.sh /work/ diff --git a/ci/docker/Dockerfile.build.ubuntu_build_cuda b/ci/docker/Dockerfile.build.ubuntu_build_cuda index ecc2b0f1498a..ce6d0007875e 100644 --- a/ci/docker/Dockerfile.build.ubuntu_build_cuda +++ b/ci/docker/Dockerfile.build.ubuntu_build_cuda @@ -54,10 +54,6 @@ RUN /work/ubuntu_cudnn.sh COPY install/ubuntu_nvidia.sh /work/ RUN /work/ubuntu_nvidia.sh -# hotfix nvidia-docker image come with wrong version of libcublas -COPY install/ubuntu_cublas.sh /work/ -RUN /work/ubuntu_cublas.sh - # Keep this at the end since this command is not cachable ARG USER_ID=0 ARG GROUP_ID=0 diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 index d005312ffe5b..7e0f8d93ed37 100644 --- a/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 @@ -69,10 +69,6 @@ ENV CUDNN_VERSION=7.5.1.10 COPY install/ubuntu_cudnn.sh /work/ RUN /work/ubuntu_cudnn.sh -# hotfix nvidia-docker image come with wrong version of libcublas -COPY install/ubuntu_cublas.sh /work/ -RUN /work/ubuntu_cublas.sh - # Always last ARG USER_ID=0 ARG GROUP_ID=0 diff --git a/ci/docker/Dockerfile.build.ubuntu_nightly_gpu b/ci/docker/Dockerfile.build.ubuntu_nightly_gpu index 9a6781685031..5e812c433b43 100644 --- a/ci/docker/Dockerfile.build.ubuntu_nightly_gpu +++ b/ci/docker/Dockerfile.build.ubuntu_nightly_gpu @@ -72,10 +72,6 @@ ENV CUDNN_VERSION=7.6.0.64 COPY install/ubuntu_cudnn.sh /work/ RUN /work/ubuntu_cudnn.sh -# hotfix nvidia-docker image come with wrong version of libcublas -COPY install/ubuntu_cublas.sh /work/ -RUN /work/ubuntu_cublas.sh - ARG USER_ID=0 ARG GROUP_ID=0 COPY install/ubuntu_adduser.sh /work/ diff --git a/ci/docker/install/centos7_cublas.sh b/ci/docker/install/centos7_cublas.sh deleted file mode 100755 index 0e0f91fc441f..000000000000 --- a/ci/docker/install/centos7_cublas.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex - -# fix nvidia docker image come with wrong version of libcublas -yum -y downgrade libcublas-devel-10.2.1.243-1.x86_64 -yum -y downgrade libcublas10-10.2.1.243-1.x86_64 \ No newline at end of file diff --git a/ci/docker/install/ubuntu_cublas.sh b/ci/docker/install/ubuntu_cublas.sh deleted file mode 100755 index eb6c4f145fae..000000000000 --- a/ci/docker/install/ubuntu_cublas.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -apt update -# fix nvidia docker image come with wrong version of libcublas -apt install -y --allow-downgrades libcublas-dev=10.2.1.243-1 -apt install -y --allow-downgrades libcublas10=10.2.1.243-1 From 986a90217b1dfbbe5213a35a6d6dc6b82001c060 Mon Sep 17 00:00:00 2001 From: liuzh91 Date: Tue, 10 Dec 2019 07:13:24 +0800 Subject: [PATCH 17/62] introduce gradient update handler to the base estimator (#16900) * introduce gradient update handler to the base estimator * Modify the gradient update handler to include the batch size * Remove unrelated gradient update handler. * Modify gradient update handler to take the current batch size. * Remove white space to avoid the sanity check failure * add small tweak to the handler code * Modify the documentation of priority parameter of relevant handlers. * small modification on the documentation. * Add small modification on the documentation. * Remove unnecessary list check --- .../gluon/contrib/estimator/estimator.py | 8 +-- .../gluon/contrib/estimator/event_handler.py | 54 ++++++++++++++++--- tests/python/unittest/test_gluon_estimator.py | 7 +-- 3 files changed, 56 insertions(+), 13 deletions(-) diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py index 54a0b165016e..ab7018f58e1f 100644 --- a/python/mxnet/gluon/contrib/estimator/estimator.py +++ b/python/mxnet/gluon/contrib/estimator/estimator.py @@ -24,7 +24,7 @@ import sys import warnings -from .event_handler import MetricHandler, ValidationHandler, LoggingHandler, StoppingHandler +from .event_handler import MetricHandler, ValidationHandler, LoggingHandler, StoppingHandler, GradientUpdateHandler from .event_handler import TrainBegin, EpochBegin, BatchBegin, BatchEnd, EpochEnd, TrainEnd from .event_handler import _check_event_handlers from .utils import _check_metrics, _suggest_metric_for_loss, _check_handler_metric_ref @@ -307,8 +307,6 @@ def fit_batch(self, train_batch, batch_axis=0): for l in loss: l.backward() - self.trainer.step(batch_size) - return data, label, pred, loss def fit(self, train_data, @@ -360,6 +358,7 @@ def fit(self, train_data, self.max_epoch = epochs self.max_batch = batches + self.batch_axis = batch_axis # provide default handlers event_handlers = self._prepare_default_handlers(val_data, event_handlers) @@ -414,6 +413,9 @@ def _prepare_default_handlers(self, val_data, event_handlers): # no need to add to default handler check as StoppingHandler does not use metrics added_default_handlers.append(StoppingHandler(self.max_epoch, self.max_batch)) + if not any(isinstance(handler, GradientUpdateHandler) for handler in event_handlers): + added_default_handlers.append(GradientUpdateHandler()) + if not any(isinstance(handler, MetricHandler) for handler in event_handlers): added_default_handlers.append(MetricHandler(train_metrics=self.train_metrics)) diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py index 53ba07dc836a..64777608bef0 100644 --- a/python/mxnet/gluon/contrib/estimator/event_handler.py +++ b/python/mxnet/gluon/contrib/estimator/event_handler.py @@ -31,7 +31,7 @@ __all__ = ['TrainBegin', 'TrainEnd', 'EpochBegin', 'EpochEnd', 'BatchBegin', 'BatchEnd', 'StoppingHandler', 'MetricHandler', 'ValidationHandler', - 'LoggingHandler', 'CheckpointHandler', 'EarlyStoppingHandler'] + 'LoggingHandler', 'CheckpointHandler', 'EarlyStoppingHandler', 'GradientUpdateHandler'] class EventHandler(object): @@ -130,13 +130,16 @@ class MetricHandler(EpochBegin, BatchEnd): ---------- train_metrics : List of EvalMetrics Training metrics to be updated at batch end. + priority : scalar + Priority level of the MetricHandler. Priority level is sorted in ascending + order. The lower the number is, the higher priority level the handler is. """ - def __init__(self, train_metrics): + def __init__(self, train_metrics, priority=-1000): self.train_metrics = _check_metrics(train_metrics) # order to be called among all callbacks # metrics need to be calculated before other callbacks can access them - self.priority = -np.Inf + self.priority = priority def epoch_begin(self, estimator, *args, **kwargs): for metric in self.train_metrics: @@ -176,6 +179,10 @@ class ValidationHandler(TrainBegin, BatchEnd, EpochEnd): batch_period : int, default None How often to run validation at batch end, by default :py:class:`ValidationHandler` does not validate at batch end. + priority: scalar, default -1000 + Priority level of the ValidationHandler. Priority level is sorted in + ascending order. The lower the number is, the higher priority level the + handler is. """ def __init__(self, @@ -183,7 +190,8 @@ def __init__(self, eval_fn, val_metrics=None, epoch_period=1, - batch_period=None): + batch_period=None, + priority=-1000): self.val_data = val_data self.eval_fn = eval_fn self.epoch_period = epoch_period @@ -193,7 +201,7 @@ def __init__(self, self.current_epoch = 0 # order to be called among all callbacks # validation metrics need to be calculated before other callbacks can access them - self.priority = -np.Inf + self.priority = priority def train_begin(self, estimator, *args, **kwargs): # reset epoch and batch counter @@ -235,11 +243,16 @@ class LoggingHandler(TrainBegin, TrainEnd, EpochBegin, EpochEnd, BatchBegin, Bat Training metrics to be logged, logged at batch end, epoch end, train end. val_metrics : list of EvalMetrics Validation metrics to be logged, logged at epoch end, train end. + priority : scalar, default np.Inf + Priority level of the LoggingHandler. Priority level is sorted in + ascending order. The lower the number is, the higher priority level the + handler is. """ def __init__(self, log_interval='epoch', train_metrics=None, - val_metrics=None): + val_metrics=None, + priority=np.Inf): super(LoggingHandler, self).__init__() if not isinstance(log_interval, int) and log_interval != 'epoch': raise ValueError("log_interval must be either an integer or string 'epoch'") @@ -250,7 +263,7 @@ def __init__(self, log_interval='epoch', self.processed_samples = 0 # logging handler need to be called at last to make sure all states are updated # it will also shut down logging at train end - self.priority = np.Inf + self.priority = priority self.log_interval = log_interval def train_begin(self, estimator, *args, **kwargs): @@ -704,3 +717,30 @@ def train_end(self, estimator, *args, **kwargs): estimator.logger.info('[Epoch %d] EarlyStoppingHanlder: ' 'early stopping due to %s not improving', self.stopped_epoch, self.monitor.get()[0]) + +class GradientUpdateHandler(BatchEnd): + """Gradient Update Handler that apply gradients on network weights + + :py:class:`GradientUpdateHandler` takes the priority level. It updates weight parameters + at the end of each batch + + Parameters + ---------- + priority : scalar, default -2000 + priority level of the gradient update handler. Priority level is sorted in ascending + order. The lower the number is, the higher priority level the handler is. + ---------- + """ + def __init__(self, priority=-2000): + self.priority = priority + + def batch_end(self, estimator, *args, **kwargs): + loss = kwargs['loss'] + batch_size = 0 + if not isinstance(loss, list): + loss = [loss] + if isinstance(loss, list): + for l in loss: + batch_size += l.shape[estimator.batch_axis] + + estimator.trainer.step(batch_size) diff --git a/tests/python/unittest/test_gluon_estimator.py b/tests/python/unittest/test_gluon_estimator.py index cf913a6161c0..21f949a0bba6 100644 --- a/tests/python/unittest/test_gluon_estimator.py +++ b/tests/python/unittest/test_gluon_estimator.py @@ -367,6 +367,7 @@ def test_default_handlers(): val_metrics = est.val_metrics early_stopping = EarlyStoppingHandler(monitor=val_metrics[0]) handlers = est._prepare_default_handlers(val_data=None, event_handlers=[early_stopping]) - assert len(handlers) == 4 - assert isinstance(handlers[0], MetricHandler) - assert isinstance(handlers[3], LoggingHandler) + assert len(handlers) == 5 + assert isinstance(handlers[0], GradientUpdateHandler) + assert isinstance(handlers[1], MetricHandler) + assert isinstance(handlers[4], LoggingHandler) From 248acfa655729f40b19476d61f0484a9d72f438e Mon Sep 17 00:00:00 2001 From: Rohit Kumar Srivastava Date: Mon, 9 Dec 2019 16:52:02 -0800 Subject: [PATCH 18/62] Multi Precision Lamb Update operator (#16885) * multi-precision lamb update operator * removing multi-tensor code from lamb * doing operation beta^t outside of kernel call * removing unecessary functions from PyLAMB --- python/mxnet/optimizer/optimizer.py | 59 ++++++--- src/operator/optimizer_op-inl.h | 159 +++++++++++++++++++++++- src/operator/optimizer_op.cc | 90 +++++++++++++- src/operator/optimizer_op.cu | 5 + tests/python/gpu/test_operator_gpu.py | 1 + tests/python/unittest/test_optimizer.py | 14 ++- 6 files changed, 304 insertions(+), 24 deletions(-) diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py index 00d130bb5534..b2509d8b4d8a 100644 --- a/python/mxnet/optimizer/optimizer.py +++ b/python/mxnet/optimizer/optimizer.py @@ -34,7 +34,8 @@ multi_sgd_update, multi_sgd_mom_update, multi_mp_sgd_update, multi_mp_sgd_mom_update, preloaded_multi_sgd_update, preloaded_multi_sgd_mom_update, preloaded_multi_mp_sgd_update, - preloaded_multi_mp_sgd_mom_update, lamb_update_phase1, lamb_update_phase2) + preloaded_multi_mp_sgd_mom_update, lamb_update_phase1, lamb_update_phase2, + mp_lamb_update_phase1, mp_lamb_update_phase2) from ..ndarray import sparse from ..random import normal from ..util import is_np_array @@ -1262,11 +1263,10 @@ def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-6, def create_state(self, index, weight): stype = weight.stype - dtype = weight.dtype - return (zeros(weight.shape, weight.context, dtype=dtype, stype=stype), - zeros(weight.shape, weight.context, dtype=dtype, stype=stype)) + return (zeros(weight.shape, weight.context, dtype=numpy.float32, stype=stype), + zeros(weight.shape, weight.context, dtype=numpy.float32, stype=stype)) - def update(self, index, weight, grad, state): + def _update_impl(self, index, weight, grad, state, multi_precision=False): assert(isinstance(weight, NDArray)) assert(isinstance(grad, NDArray)) self._update_count(index) @@ -1277,19 +1277,46 @@ def update(self, index, weight, grad, state): kwargs = {'beta1': self.beta1, 'beta2': self.beta2, 'epsilon': self.epsilon, 'bias_correction': self.bias_correction, 't': t, 'rescale_grad': self.rescale_grad} - mean, var = state + if self.clip_gradient: kwargs['clip_gradient'] = self.clip_gradient - g = lamb_update_phase1(weight, grad, mean, var, wd=wd, **kwargs) - - kwargs = {} - if self.lower_bound: - kwargs['lower_bound'] = self.lower_bound - if self.upper_bound: - kwargs['upper_bound'] = self.upper_bound - r_1 = weight.norm() - r_2 = g.norm() - lamb_update_phase2(weight, g, r_1, r_2, lr=lr, out=weight, **kwargs) + + if multi_precision: + mean, var = state[1] + weight32 = state[0] + g = mp_lamb_update_phase1(weight, grad, mean, var, weight32, wd=wd, **kwargs) + + kwargs = {} + if self.lower_bound: + kwargs['lower_bound'] = self.lower_bound + if self.upper_bound: + kwargs['upper_bound'] = self.upper_bound + r_1 = weight32.norm() + r_2 = g.norm() + mp_lamb_update_phase2(weight, g, r_1, r_2, weight32, lr=lr, out=weight, **kwargs) + else: + mean, var = state + g = lamb_update_phase1(weight, grad, mean, var, wd=wd, **kwargs) + + kwargs = {} + if self.lower_bound: + kwargs['lower_bound'] = self.lower_bound + if self.upper_bound: + kwargs['upper_bound'] = self.upper_bound + r_1 = weight.norm() + r_2 = g.norm() + lamb_update_phase2(weight, g, r_1, r_2, lr=lr, out=weight, **kwargs) + + def update(self, index, weight, grad, state): + self._update_impl(index, weight, grad, state, multi_precision=False) + + def update_multi_precision(self, index, weight, grad, state): + if not isinstance(index, (tuple, list)): + use_multi_precision = self.multi_precision and weight.dtype == numpy.float16 + else: + use_multi_precision = self.multi_precision and weight[0].dtype == numpy.float16 + self._update_impl(index, weight, grad, state, + multi_precision=use_multi_precision) # pylint: enable=line-too-long diff --git a/src/operator/optimizer_op-inl.h b/src/operator/optimizer_op-inl.h index 146e411b447c..2df574c46909 100644 --- a/src/operator/optimizer_op-inl.h +++ b/src/operator/optimizer_op-inl.h @@ -1751,6 +1751,164 @@ inline void LambUpdatePhaseTwo(const nnvm::NodeAttrs& attrs, }); } +template +inline bool MPLambPhaseOneType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), static_cast(total_in)) << " in operator " << attrs.name; + CHECK_EQ(out_attrs->size(), static_cast(n_out)) << " in operator " << attrs.name; + for (int i = 0; i < n_in; ++i) { + TYPE_ASSIGN_CHECK(*in_attrs, i, mshadow::kFloat16); + } + for (int i = n_in; i < total_in; ++i) { + TYPE_ASSIGN_CHECK(*in_attrs, i, mshadow::kFloat32); + } + for (int i = 0; i < n_out; ++i) { + TYPE_ASSIGN_CHECK(*out_attrs, i, mshadow::kFloat32); + } + return true; +} + +struct MPLambUpdatePhaseOneKernel { + template + MSHADOW_XINLINE static void Map(int i, float* out_data, + float* mean_data, float* var_data, const DType* weight_data, + const DType* grad_data, const float* weight32_data, + const float clip_gradient, const float rescale_grad, + const float beta1_t, const float beta1, + const float beta2_t, const float beta2, + const float wd, const float epsilon, const int t, + bool bias_correction, const OpReqType req) { + using namespace mshadow_op; + + float grad_rescaled = grad_data[i] * rescale_grad; + if (clip_gradient >= 0.f) { + grad_rescaled = clip::Map(grad_rescaled, clip_gradient); + } + + mean_data[i] = beta1 * mean_data[i] + (1.f - beta1) * grad_rescaled; + var_data[i] = beta2 * var_data[i] + (1.f - beta2) * grad_rescaled * grad_rescaled; + + float g = mean_data[i] / (square_root::Map(var_data[i]) + epsilon) + wd * weight32_data[i]; + + if (bias_correction) { + float mean_hat = mean_data[i] / (1. - beta1_t); + float var_hat = var_data[i] / (1 - beta2_t); + g = mean_hat / (square_root::Map(var_hat) + epsilon) + wd * weight32_data[i]; + } + KERNEL_ASSIGN(out_data[i], req, g); + } +}; + +template +inline void MPLambUpdatePhaseOne(const nnvm::NodeAttrs& attrs, + const OpContext &ctx, + const std::vector &inputs, + const std::vector &req, + const std::vector &outputs) { + using namespace mxnet_op; + const LambUpdatePhaseOneParam& param = nnvm::get(attrs.parsed); + Stream* s = ctx.get_stream(); + MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, { + float beta1_t = std::pow(param.beta1, param.t); + float beta2_t = std::pow(param.beta2, param.t); + Tensor weight = inputs[0].FlatTo2D(s); + Tensor grad = inputs[1].FlatTo2D(s); + Tensor mean = inputs[2].FlatTo2D(s); + Tensor var = inputs[3].FlatTo2D(s); + Tensor weight32 = inputs[4].FlatTo2D(s); + Tensor out = outputs[0].FlatTo2D(s); + + Kernel::Launch(s, weight.shape_.Size(), + out.dptr_, mean.dptr_, var.dptr_, weight.dptr_, grad.dptr_, weight32.dptr_, + param.clip_gradient, param.rescale_grad, beta1_t, param.beta1, beta2_t, param.beta2, + param.wd, param.epsilon, param.t, param.bias_correction, req[0]); + }); +} + +inline bool MPLambUpdatePhaseTwoShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector* in_attrs, + mxnet::ShapeVector* out_attrs) { + CHECK_EQ(in_attrs->size(), 5U); + CHECK_EQ(out_attrs->size(), 1U); + + mxnet::TShape expected_out(in_attrs->at(0).ndim(), -1); + + mxnet::TShape& weight_shape = in_attrs->at(0); + mxnet::TShape& g_shape = in_attrs->at(1); + mxnet::TShape& weight32_shape = in_attrs->at(4); + CHECK_EQ(weight_shape.ndim(), g_shape.ndim()) + << "total no. of dimensions for weights and g must match"; + CHECK_EQ(weight_shape.ndim(), weight32_shape.ndim()) + << "total no. of dimensions for weights and g must match"; + for (int i=0; i < weight_shape.ndim(); ++i) { + CHECK_EQ(weight_shape[i], g_shape[i]) + << "weight and g dimension size mismatch at " << i << "-th index"; + CHECK_EQ(weight_shape[i], weight32_shape[i]) + << "weight and g dimension size mismatch at " << i << "-th index"; + } + mxnet::TShape& r1_shape = in_attrs->at(2); + mxnet::TShape& r2_shape = in_attrs->at(3); + CHECK_EQ(r1_shape[0], 1U) << "r1 shape incorrect"; + CHECK_EQ(r2_shape[0], 1U) << "r2 shape incorrect"; + for (int i=0; i < expected_out.ndim(); ++i) { + expected_out[i] = weight_shape[i]; + } + + SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out); + return shape_is_known(expected_out); +} + +struct MPLambUpdatePhaseTwoKernel { + template + MSHADOW_XINLINE static void Map(int i, DType* out_data, + const DType* weight_data, const float* g, + const float* r1, const float* r2, const float* weight32_data, + float lr, const float lower_bound, + const float upper_bound, const OpReqType req) { + using namespace mshadow_op; + + float new_r1 = r1[0]; + if (lower_bound >= 0) { + new_r1 = maximum::Map(new_r1, lower_bound); + } + if (upper_bound >= 0) { + new_r1 = minimum::Map(new_r1, upper_bound); + } + if (new_r1 == 0.0f || r2[0] == 0.0f) { + lr = lr * 1.0f; + } else { + lr = lr * new_r1 / r2[0]; + } + + KERNEL_ASSIGN(out_data[i], req, weight32_data[i] - lr * g[i]); + } +}; + +template +inline void MPLambUpdatePhaseTwo(const nnvm::NodeAttrs& attrs, + const OpContext &ctx, + const std::vector &inputs, + const std::vector &req, + const std::vector &outputs) { + using namespace mxnet_op; + const LambUpdatePhaseTwoParam& param = nnvm::get(attrs.parsed); + Stream* s = ctx.get_stream(); + MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, { + Tensor weight = inputs[0].FlatTo2D(s); + Tensor g = inputs[1].FlatTo2D(s); + Tensor r1 = inputs[2].FlatTo2D(s); + Tensor r2 = inputs[3].FlatTo2D(s); + Tensor weight32 = inputs[4].FlatTo2D(s); + Tensor out = outputs[0].FlatTo2D(s); + + Kernel::Launch(s, weight.shape_.Size(), + out.dptr_, weight.dptr_, g.dptr_, r1.dptr_, r2.dptr_, weight32.dptr_, + param.lr, param.lower_bound, + param.upper_bound, req[0]); + }); +} + // This RMSProp code follows the version in // http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) // by Alex Graves, 2013. @@ -2493,5 +2651,4 @@ inline void AdagradUpdateEx(const nnvm::NodeAttrs& attrs, } // namespace op } // namespace mxnet - #endif // MXNET_OPERATOR_OPTIMIZER_OP_INL_H_ diff --git a/src/operator/optimizer_op.cc b/src/operator/optimizer_op.cc index 9cf32778b15c..93e1267cc8c7 100644 --- a/src/operator/optimizer_op.cc +++ b/src/operator/optimizer_op.cc @@ -947,7 +947,7 @@ Link to paper: https://arxiv.org/pdf/1904.00962.pdf var_hat = var / (1 - beta2^t); g = mean_hat / (var_hat^(1/2) + epsilon) + wd * weight; else - g = mean / (var_data^(1/2) + epsilon) + wd * weight_data[i]; + g = mean / (var_data^(1/2) + epsilon) + wd * weight; \end{gather*} )code" ADD_FILELINE) @@ -1002,5 +1002,93 @@ Link to paper: https://arxiv.org/pdf/1904.00962.pdf .add_argument("r2", "NDArray-or-Symbol", "r2") .add_arguments(LambUpdatePhaseTwoParam::__FIELDS__()); +NNVM_REGISTER_OP(mp_lamb_update_phase1) +.describe(R"code(Mixed Precision version of Phase I of lamb update +it performs the following operations and returns g:. + + Link to paper: https://arxiv.org/pdf/1904.00962.pdf + + .. math:: + \begin{gather*} + grad32 = grad(float16) * rescale_grad + if (grad < -clip_gradient) + then + grad = -clip_gradient + if (grad > clip_gradient) + then + grad = clip_gradient + + mean = beta1 * mean + (1 - beta1) * grad; + variance = beta2 * variance + (1. - beta2) * grad ^ 2; + + if (bias_correction) + then + mean_hat = mean / (1. - beta1^t); + var_hat = var / (1 - beta2^t); + g = mean_hat / (var_hat^(1/2) + epsilon) + wd * weight32; + else + g = mean / (var_data^(1/2) + epsilon) + wd * weight32; + \end{gather*} + + )code" ADD_FILELINE) +.set_num_inputs(5) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", ElemwiseShape<5, 1>) +.set_attr("FInferType", MPLambPhaseOneType<2, 1, 5>) +.set_attr("FCompute", MPLambUpdatePhaseOne) +.set_attr("FMutateInputs", + [](const nnvm::NodeAttrs& attrs) { + return std::vector{2, 3}; + }) +.add_argument("weight", "NDArray-or-Symbol", "Weight") +.add_argument("grad", "NDArray-or-Symbol", "Gradient") +.add_argument("mean", "NDArray-or-Symbol", "Moving mean") +.add_argument("var", "NDArray-or-Symbol", "Moving variance") +.add_argument("weight32", "NDArray-or-Symbol", "Weight32") +.add_arguments(LambUpdatePhaseOneParam::__FIELDS__()); + +NNVM_REGISTER_OP(mp_lamb_update_phase2) +.describe(R"code(Mixed Precision version Phase II of lamb update +it performs the following operations and updates grad. + + Link to paper: https://arxiv.org/pdf/1904.00962.pdf + + .. math:: + \begin{gather*} + if (lower_bound >= 0) + then + r1 = max(r1, lower_bound) + if (upper_bound >= 0) + then + r1 = max(r1, upper_bound) + + if (r1 == 0 or r2 == 0) + then + lr = lr + else + lr = lr * (r1/r2) + weight32 = weight32 - lr * g + weight(float16) = weight32 + \end{gather*} + + )code" ADD_FILELINE) +.set_num_inputs(5) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", MPLambUpdatePhaseTwoShape) +.set_attr("FInferType", MP_InferType<1, 1, 5>) +.set_attr("FCompute", MPLambUpdatePhaseTwo) +.set_attr("FMutateInputs", + [](const nnvm::NodeAttrs& attrs) { + return std::vector{4}; + }) +.add_argument("weight", "NDArray-or-Symbol", "Weight") +.add_argument("g", "NDArray-or-Symbol", "Output of mp_lamb_update_phase 1") +.add_argument("r1", "NDArray-or-Symbol", "r1") +.add_argument("r2", "NDArray-or-Symbol", "r2") +.add_argument("weight32", "NDArray-or-Symbol", "Weight32") +.add_arguments(LambUpdatePhaseTwoParam::__FIELDS__()); + } // namespace op } // namespace mxnet diff --git a/src/operator/optimizer_op.cu b/src/operator/optimizer_op.cu index a602b649b63d..6920cb06e482 100644 --- a/src/operator/optimizer_op.cu +++ b/src/operator/optimizer_op.cu @@ -283,6 +283,11 @@ NNVM_REGISTER_OP(lamb_update_phase1) NNVM_REGISTER_OP(lamb_update_phase2) .set_attr("FCompute", LambUpdatePhaseTwo); +NNVM_REGISTER_OP(mp_lamb_update_phase1) +.set_attr("FCompute", MPLambUpdatePhaseOne); + +NNVM_REGISTER_OP(mp_lamb_update_phase2) +.set_attr("FCompute", MPLambUpdatePhaseTwo); } // namespace op } // namespace mxnet diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index efa55d2c1cde..114fab770efd 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -422,6 +422,7 @@ def test_preloaded_multi_sgd(): shapes = [np.random.randint(1, maxdim + 1, size=maxndim) for i in range(nparam)] check_preloaded_multi_sgd(dtype, shapes, momentum, use_master_weights) + @with_seed() def test_batchnorm_with_type(): ctx_list_v1_2D = [ diff --git a/tests/python/unittest/test_optimizer.py b/tests/python/unittest/test_optimizer.py index 4dbf25128c5c..811923000411 100644 --- a/tests/python/unittest/test_optimizer.py +++ b/tests/python/unittest/test_optimizer.py @@ -443,20 +443,20 @@ def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-6, def create_state(self, index, weight): stype = weight.stype - return (mx.nd.zeros(weight.shape, weight.context, dtype=weight.dtype, stype=stype), - mx.nd.zeros(weight.shape, weight.context, dtype=weight.dtype, stype=stype)) + return (mx.nd.zeros(weight.shape, weight.context, dtype=np.float32, stype=stype), + mx.nd.zeros(weight.shape, weight.context, dtype=np.float32, stype=stype)) + def update(self, index, weight, grad, state): self._update_count(index) lr = self._get_lr(index) wd = self._get_wd(index) t = self._index_update_count[index] - + mean, var = state grad *= self.rescale_grad if self.clip_gradient is not None: grad = mx.nd.clip(grad, -self.clip_gradient, self.clip_gradient) - mean, var = state mean[:] = self.beta1 * mean + (1. - self.beta1) * grad var[:] = self.beta2 * var + (1. - self.beta2) * mx.nd.square(grad) @@ -472,6 +472,7 @@ def update(self, index, weight, grad, state): var_hat = var / (1. - mx.nd.power(self.beta2, t)) g = mean_hat / (mx.nd.sqrt(var_hat) + self.epsilon) + wd * weight + r2 = g.norm() # calculate lamb_trust_ratio r = 1. if r1 == 0. or r2 == 0. else r1 / r2 @@ -479,8 +480,6 @@ def update(self, index, weight, grad, state): # update weight weight[:] -= lr * g - def update_multi_precision(self, index, weight, grad, state): - self.update(index, weight, grad, state) @with_seed() def test_lamb(): @@ -495,7 +494,10 @@ def test_lamb(): ub_options = [{}, {'upper_bound': None}, {'upper_bound': 10}] for params in itertools.product(cg_options, rg_options, wd_options, bc_options, lb_options, ub_options): kwarg = {k: v for param in params for k, v in param.items()} + kwarg['multi_precision'] = False compare_optimizer(opt1(**kwarg), opt2(**kwarg), shape, np.float32) + kwarg['multi_precision'] = True + compare_optimizer(opt1(**kwarg), opt2(**kwarg), shape, np.float16, rtol=1e-3, atol=1e-3) #SGLD From 59535dbe7e0a135ad0c9378940d4bb3442df4ace Mon Sep 17 00:00:00 2001 From: Minghao Liu <40382964+Tommliu@users.noreply.github.com> Date: Tue, 10 Dec 2019 09:52:39 +0800 Subject: [PATCH 19/62] Op_Diagonal [Numpy] (#16989) * numpy diagonal * diagonal fix --- python/mxnet/_numpy_op_doc.py | 54 ++++- python/mxnet/numpy_dispatch_protocol.py | 1 + src/operator/numpy/np_matrix_op-inl.h | 218 ++++++++++++++++++ src/operator/numpy/np_matrix_op.cc | 23 ++ src/operator/numpy/np_matrix_op.cu | 6 + .../unittest/test_numpy_interoperability.py | 18 ++ tests/python/unittest/test_numpy_op.py | 79 +++++++ 7 files changed, 398 insertions(+), 1 deletion(-) diff --git a/python/mxnet/_numpy_op_doc.py b/python/mxnet/_numpy_op_doc.py index 7679d21f0e59..0d0e3b64491b 100644 --- a/python/mxnet/_numpy_op_doc.py +++ b/python/mxnet/_numpy_op_doc.py @@ -1053,6 +1053,58 @@ def _np_diag(array, k=0): pass +def _np_diagonal(a, offset=0, axis1=0, axis2=1): + """ + If a is 2-D, returns the diagonal of a with the given offset, i.e., the collection of elements of + the form a[i, i+offset]. If a has more than two dimensions, then the axes specified by axis1 and + axis2 are used to determine the 2-D sub-array whose diagonal is returned. The shape of the + resulting array can be determined by removing axis1 and axis2 and appending an index to the + right equal to the size of the resulting diagonals. + + Parameters + ---------- + a : Symbol + Input data from which diagonal are taken. + offset: int, Optional + Offset of the diagonal from the main diagonal + axis1: int, Optional + Axis to be used as the first axis of the 2-D sub-arrays + axis2: int, Optional + Axis to be used as the second axis of the 2-D sub-arrays + + Returns + ------- + out : Symbol + Output result + + Raises + ------- + ValueError: If the dimension of a is less than 2. + + Examples + -------- + >>> a = np.arange(4).reshape(2,2) + >>> a + array([[0, 1], + [2, 3]]) + >>> np.diagonal(a) + array([0, 3]) + >>> np.diagonal(a, 1) + array([1]) + + >>> a = np.arange(8).reshape(2,2,2) + >>>a + array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) + >>> np.diagonal(a, 0, 0, 1) + array([[0, 6], + [1, 7]]) + """ + pass + + def _np_diagflat(array, k=0): """ Create a two-dimensional array with the flattened input as a diagonal. @@ -1086,4 +1138,4 @@ def _np_diagflat(array, k=0): [0, 0, 2], [0, 0, 0]]) """ - pass \ No newline at end of file + pass diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index c8b11d85b000..1a238ec2c7c7 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -95,6 +95,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs): 'copy', 'cumsum', 'diag', + 'diagonal', 'diagflat', 'dot', 'expand_dims', diff --git a/src/operator/numpy/np_matrix_op-inl.h b/src/operator/numpy/np_matrix_op-inl.h index 37af908042c9..3abf5627f20f 100644 --- a/src/operator/numpy/np_matrix_op-inl.h +++ b/src/operator/numpy/np_matrix_op-inl.h @@ -1156,6 +1156,224 @@ void NumpyDiagOpBackward(const nnvm::NodeAttrs &attrs, in_data.Size(), param.k, s, req[0]); } +struct NumpyDiagonalParam : public dmlc::Parameter { + int offset; + int32_t axis1; + int32_t axis2; + DMLC_DECLARE_PARAMETER(NumpyDiagonalParam) { + DMLC_DECLARE_FIELD(offset) + .set_default(0) + .describe("Diagonal in question. The default is 0. " + "Use k>0 for diagonals above the main diagonal, " + "and k<0 for diagonals below the main diagonal. " + "If input has shape (S0 S1) k must be between -S0 and S1"); + DMLC_DECLARE_FIELD(axis1) + .set_default(0) + .describe("The first axis of the sub-arrays of interest. " + "Ignored when the input is a 1-D array."); + DMLC_DECLARE_FIELD(axis2) + .set_default(1) + .describe("The second axis of the sub-arrays of interest. " + "Ignored when the input is a 1-D array."); + } +}; + +inline mxnet::TShape NumpyDiagonalShapeImpl(const mxnet::TShape& ishape, const int k, + const int32_t axis1, const int32_t axis2) { + int32_t x1 = CheckAxis(axis1, ishape.ndim()); + int32_t x2 = CheckAxis(axis2, ishape.ndim()); + + CHECK_NE(x1, x2) << "axis1 and axis2 cannot refer to the same axis " << x1; + + auto h = ishape[x1]; + auto w = ishape[x2]; + if (k > 0) { + w -= k; + } else if (k < 0) { + h += k; + } + auto s = std::min(h, w); + if (s < 0) s = 0; + if (x1 > x2) std::swap(x1, x2); + + int32_t n_dim = ishape.ndim() - 1; + mxnet::TShape oshape(n_dim, -1); + + // remove axis1 and axis2 and append the new axis to the end + uint32_t idx = 0; + for (int i = 0; i <= n_dim; ++i) { + if (i != x1 && i != x2) { + oshape[idx++] = ishape[i]; + } + } + oshape[n_dim - 1] = s; + return oshape; +} + +inline bool NumpyDiagonalOpShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector* in_attrs, + mxnet::ShapeVector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + + const mxnet::TShape& ishape = (*in_attrs)[0]; + CHECK_GE(ishape.ndim(), 2) << "Input array should be at least 2d"; + if (!mxnet::ndim_is_known(ishape)) { + return false; + } + + const NumpyDiagonalParam& param = nnvm::get(attrs.parsed); + mxnet::TShape oshape = NumpyDiagonalShapeImpl(ishape, param.offset, param.axis1, + param.axis2); + if (shape_is_none(oshape)) { + LOG(FATAL) << "Diagonal does not exist."; + } + SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); + return shape_is_known(out_attrs->at(0)); +} + +inline bool NumpyDiagonalOpType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + + TYPE_ASSIGN_CHECK(*out_attrs, 0, (*in_attrs)[0]); + TYPE_ASSIGN_CHECK(*in_attrs, 0, (*out_attrs)[0]); + return (*out_attrs)[0] != -1; +} + +template +struct diag_n { + template + MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* a, + mshadow::Shape oshape, + mshadow::Shape ishape, + index_t stride, index_t offset, + index_t base) { + using namespace mxnet_op; + index_t idx = i / base; + index_t j = ravel(unravel(idx, oshape), ishape) + offset + stride * (i - idx * base); + if (back) { + KERNEL_ASSIGN(out[j], req, a[i]); + } else { + KERNEL_ASSIGN(out[i], req, a[j]); + } + } +}; + +template +void NumpyDiagonalOpImpl(const TBlob& in_data, + const TBlob& out_data, + const mxnet::TShape& ishape, + const mxnet::TShape& oshape, + index_t dsize, + const NumpyDiagonalParam& param, + mxnet_op::Stream *s, + const std::vector& req) { + using namespace mxnet_op; + using namespace mshadow; + uint32_t x1 = CheckAxis(param.axis1, ishape.ndim()); + uint32_t x2 = CheckAxis(param.axis2, ishape.ndim()); + uint32_t idim = ishape.ndim(), odim = oshape.ndim(); + uint32_t minx = x1, maxx = x2; + if (minx > maxx) std::swap(minx, maxx); + + index_t oleading = 1, + obody = 1, + otrailing = 1; + for (uint32_t i = 0; i < minx; ++i) { + oleading *= ishape[i]; + } + for (uint32_t i = minx + 1; i < maxx; ++i) { + obody *= ishape[i]; + } + for (uint32_t i = maxx + 1; i < idim; ++i) { + otrailing *= ishape[i]; + } + + index_t ileading = oleading, + ibody = obody * ishape[minx], + itrailing = otrailing * ishape[maxx]; + + index_t stride1 = itrailing * obody, + stride2 = otrailing; + // stride1 + stride2 is the stride for iterating over the diagonal + + if (x1 == maxx) std::swap(stride1, stride2); + index_t offset; + int k = param.offset; + if (k > 0) { + offset = stride2 * k; + } else if (k < 0) { + offset = stride1 * -k; + } else { + offset = 0; + } // the extra index offset introduced by k + + MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + if (back && req[0] != kAddTo && req[0] != kNullOp) { + out_data.FlatTo1D(s) = 0; + } + if (ileading == 1) { + Kernel, xpu>::Launch(s, dsize, out_data.dptr(), + in_data.dptr(), Shape2(obody, otrailing), Shape2(ibody, itrailing), + stride1 + stride2, offset, oshape[odim - 1]); + } else { + Kernel, xpu>::Launch(s, dsize, out_data.dptr(), + in_data.dptr(), Shape3(oleading, obody, otrailing), + Shape3(ileading, ibody, itrailing), stride1 + stride2, offset, oshape[odim - 1]); + } + }); + }); +} + +template +void NumpyDiagonalOpForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mxnet_op; + using namespace mshadow; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + CHECK_EQ(req[0], kWriteTo); + Stream *s = ctx.get_stream(); + const TBlob& in_data = inputs[0]; + const TBlob& out_data = outputs[0]; + const mxnet::TShape& ishape = inputs[0].shape_; + const mxnet::TShape& oshape = outputs[0].shape_; + const NumpyDiagonalParam& param = nnvm::get(attrs.parsed); + + NumpyDiagonalOpImpl(in_data, out_data, ishape, oshape, + out_data.Size(), param, s, req); +} + +template +void NumpyDiagonalOpBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mxnet_op; + using namespace mshadow; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + Stream *s = ctx.get_stream(); + + const TBlob& in_data = inputs[0]; + const TBlob& out_data = outputs[0]; + const mxnet::TShape& ishape = inputs[0].shape_; + const mxnet::TShape& oshape = outputs[0].shape_; + const NumpyDiagonalParam& param = nnvm::get(attrs.parsed); + + NumpyDiagonalOpImpl(in_data, out_data, oshape, ishape, + in_data.Size(), param, s, req); +} + struct NumpyDiagflatParam : public dmlc::Parameter { int k; DMLC_DECLARE_PARAMETER(NumpyDiagflatParam) { diff --git a/src/operator/numpy/np_matrix_op.cc b/src/operator/numpy/np_matrix_op.cc index 05b7e948e8a0..0a85887e2d4f 100644 --- a/src/operator/numpy/np_matrix_op.cc +++ b/src/operator/numpy/np_matrix_op.cc @@ -38,6 +38,7 @@ DMLC_REGISTER_PARAMETER(NumpyRot90Param); DMLC_REGISTER_PARAMETER(NumpyReshapeParam); DMLC_REGISTER_PARAMETER(NumpyXReshapeParam); DMLC_REGISTER_PARAMETER(NumpyDiagParam); +DMLC_REGISTER_PARAMETER(NumpyDiagonalParam); DMLC_REGISTER_PARAMETER(NumpyDiagflatParam); @@ -1332,6 +1333,28 @@ NNVM_REGISTER_OP(_backward_np_diag) .set_attr("TIsBackward", true) .set_attr("FCompute", NumpyDiagOpBackward); +NNVM_REGISTER_OP(_np_diagonal) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"data"}; + }) +.set_attr("FInferShape", NumpyDiagonalOpShape) +.set_attr("FInferType", NumpyDiagonalOpType) +.set_attr("FCompute", NumpyDiagonalOpForward) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_np_diagonal"}) +.add_argument("data", "NDArray-or-Symbol", "Input ndarray") +.add_arguments(NumpyDiagonalParam::__FIELDS__()); + +NNVM_REGISTER_OP(_backward_np_diagonal) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("TIsBackward", true) +.set_attr("FCompute", NumpyDiagonalOpBackward); + NNVM_REGISTER_OP(_np_diagflat) .set_attr_parser(ParamParser) .set_num_inputs(1) diff --git a/src/operator/numpy/np_matrix_op.cu b/src/operator/numpy/np_matrix_op.cu index 6f292ab95802..10ff0eac2c29 100644 --- a/src/operator/numpy/np_matrix_op.cu +++ b/src/operator/numpy/np_matrix_op.cu @@ -124,6 +124,12 @@ NNVM_REGISTER_OP(_np_diag) NNVM_REGISTER_OP(_backward_np_diag) .set_attr("FCompute", NumpyDiagOpBackward); +NNVM_REGISTER_OP(_np_diagonal) +.set_attr("FCompute", NumpyDiagonalOpForward); + +NNVM_REGISTER_OP(_backward_np_diagonal) +.set_attr("FCompute", NumpyDiagonalOpBackward); + NNVM_REGISTER_OP(_np_diagflat) .set_attr("FCompute", NumpyDiagflatOpForward); diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index d1047c80b7b4..4c4e8b90eca9 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -97,6 +97,23 @@ def get_mat(n): OpArgMngr.add_workload('diag', vals_f, k=-2) +def _add_workload_diagonal(): + A = np.arange(12).reshape((3, 4)) + B = np.arange(8).reshape((2,2,2)) + + OpArgMngr.add_workload('diagonal', A) + OpArgMngr.add_workload('diagonal', A, offset=0) + OpArgMngr.add_workload('diagonal', A, offset=-1) + OpArgMngr.add_workload('diagonal', A, offset=1) + OpArgMngr.add_workload('diagonal', B, offset=0) + OpArgMngr.add_workload('diagonal', B, offset=1) + OpArgMngr.add_workload('diagonal', B, offset=-1) + OpArgMngr.add_workload('diagonal', B, 0, 1, 2) + OpArgMngr.add_workload('diagonal', B, 0, 0, 1) + OpArgMngr.add_workload('diagonal', B, offset=1, axis1=0, axis2=2) + OpArgMngr.add_workload('diagonal', B, 0, 2, 1) + + def _add_workload_concatenate(array_pool): OpArgMngr.add_workload('concatenate', [array_pool['4x1'], array_pool['4x1']]) OpArgMngr.add_workload('concatenate', [array_pool['4x1'], array_pool['4x1']], axis=1) @@ -1364,6 +1381,7 @@ def _prepare_workloads(): _add_workload_ravel() _add_workload_unravel_index() _add_workload_diag() + _add_workload_diagonal() _add_workload_diagflat() _add_workload_dot() _add_workload_expand_dims() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 20482e8241da..078e37fc4146 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -5158,6 +5158,85 @@ def hybrid_forward(self, F, a): assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + +@with_seed() +@use_np +def test_np_diagonal(): + class TestDiagonal(HybridBlock): + def __init__(self, k=0, axis1=0, axis2=1): + super(TestDiagonal, self).__init__() + self._k = k + self._axis1 = axis1 + self._axis2 = axis2 + + def hybrid_forward(self, F, a): + return F.np.diagonal(a, self._k, self._axis1, self._axis2) + + configs = [ + [(1, 5), (0, 1)], [(2, 2),(0, 1)], + [(2, 5), (0, 1)], [(5, 5), (0, 1)], + [(2, 2, 2), (0, 1)], [(2, 4, 4), (0, 2)], + [(3, 3, 3), (1, 2)], [(4, 8, 8), (1, 2)], + [(4, 4, 4, 4), (1, 2)], [(5, 6, 7, 8), (2, 3)], + [(6, 7, 8, 9, 10), (3, 4)] + ] + dtypes = [np.int8, np.uint8, np.int32, np.int64, np.float16, np.float32, np.float64] + offsets = [0, 2, 4, 6] + combination = itertools.product([False, True], configs, dtypes, offsets) + for hybridize, config, dtype, k in combination: + rtol = 1e-2 if dtype == np.float16 else 1e-3 + atol = 1e-4 if dtype == np.float16 else 1e-5 + shape = config[0] + axis = config[1] + axis1 = axis[0] + axis2 = axis[1] + x = np.random.uniform(-5.0, 5.0, size=shape).astype(dtype) + x.attach_grad() + test_diagonal = TestDiagonal(k, axis1, axis2) + if hybridize: + test_diagonal.hybridize() + np_out = _np.diagonal(x.asnumpy(), offset=k, axis1=axis[0], axis2=axis[1]) + with mx.autograd.record(): + mx_out = test_diagonal(x) + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + + # check backward function + mx_out.backward() + size_out = np_out.size + shape_out = np_out.shape + ndim = len(shape) + h = shape[axis1] + w = shape[axis2] + np_backward_slice = _np.zeros((h, w)) + np_backward = _np.zeros(shape) + if k > 0: + w -= k + else: + h += k + s = min(w, h) + if s > 0: + if k >= 0: + for i in range(s): + np_backward_slice[0+i][k+i] = 1 + else: + for i in range(s): + np_backward_slice[-k+i][0+i] = 1 + ileading = int(size_out/s) + array_temp = _np.array([np_backward_slice for i in range(ileading)]) + array_temp = array_temp.reshape(shape_out[:-1] + (shape[axis1], shape[axis2])) + axis_idx = [i for i in range(ndim-2)] + axis_idx[axis1:axis1] = [ndim - 2] + axis_idx[axis2:axis2] = [ndim - 1] + np_backward = _np.transpose(array_temp, tuple(axis_idx)) + assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=rtol, atol=atol) + + # Test imperative once again + mx_out = np.diagonal(x, k, axis[0], axis[1]) + np_out = _np.diagonal(x.asnumpy(), offset=k, axis1=axis[0], axis2=axis[1]) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + + @with_seed() @use_np def test_np_nan_to_num(): From 8645b9a4a220940bdccba3aeee577eb645a86b34 Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Tue, 10 Dec 2019 02:15:55 +0000 Subject: [PATCH 20/62] Fix CUDNN detection for CMake build (#17019) --- CMakeLists.txt | 10 ++++++---- cmake/BuildTVM.cmake | 12 ------------ cmake/FirstClassLangCuda.cmake | 28 ---------------------------- cmake/Modules/FindCUDNN.cmake | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 44 deletions(-) create mode 100644 cmake/Modules/FindCUDNN.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index aec5f21131d1..8a8a3d5c512e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -502,13 +502,15 @@ add_subdirectory(${GTEST_ROOT}) find_package(GTest REQUIRED) # cudnn detection -if(USE_CUDNN AND USE_CUDA) - detect_cuDNN() - if(HAVE_CUDNN) +if(USE_CUDNN) + find_package(CUDNN) + if(CUDNN_FOUND) add_definitions(-DUSE_CUDNN) include_directories(SYSTEM ${CUDNN_INCLUDE}) list(APPEND mxnet_LINKER_LIBS ${CUDNN_LIBRARY}) - add_definitions(-DMSHADOW_USE_CUDNN=1) + add_definitions(-DMSHADOW_USE_CUDNN=1) + else() + set(USE_CUDNN OFF) endif() endif() diff --git a/cmake/BuildTVM.cmake b/cmake/BuildTVM.cmake index 7fcf706ffb7c..4bb749552f01 100644 --- a/cmake/BuildTVM.cmake +++ b/cmake/BuildTVM.cmake @@ -98,18 +98,6 @@ set(USE_RANDOM OFF) # Whether use NNPack set(USE_NNPACK OFF) -# Whether use CuDNN -if(USE_CUDNN AND USE_CUDA) - detect_cuDNN() - if(HAVE_CUDNN) - set(USE_CUDNN ON) - else() - set(USE_CUDNN OFF) - endif() -else() - set(USE_CUDNN OFF) -endif() - # Whether use cuBLAS set(USE_CUBLAS OFF) diff --git a/cmake/FirstClassLangCuda.cmake b/cmake/FirstClassLangCuda.cmake index 8d79c2b63ad9..0eca1aff78d4 100644 --- a/cmake/FirstClassLangCuda.cmake +++ b/cmake/FirstClassLangCuda.cmake @@ -23,34 +23,6 @@ if(USE_CXX14_IF_AVAILABLE) check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14) endif() -################################################################################################ -# Short command for cuDNN detection. Believe it soon will be a part of CUDA toolkit distribution. -# That's why not FindcuDNN.cmake file, but just the macro -# Usage: -# detect_cuDNN() -function(detect_cuDNN) - set(CUDNN_ROOT "" CACHE PATH "CUDNN root folder") - - find_path(CUDNN_INCLUDE cudnn.h - PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} - DOC "Path to cuDNN include directory." ) - - - find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a - PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} - PATH_SUFFIXES lib lib/x64 - DOC "Path to cuDNN library.") - - if(CUDNN_INCLUDE AND CUDNN_LIBRARY) - set(HAVE_CUDNN TRUE PARENT_SCOPE) - set(CUDNN_FOUND TRUE PARENT_SCOPE) - - mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT) - message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})") - endif() -endfunction() - - ################################################################################################ # A function for automatic detection of GPUs installed (if autodetection is enabled) diff --git a/cmake/Modules/FindCUDNN.cmake b/cmake/Modules/FindCUDNN.cmake new file mode 100644 index 000000000000..a8fda5c87d9a --- /dev/null +++ b/cmake/Modules/FindCUDNN.cmake @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include(FindPackageHandleStandardArgs) + +set(CUDNN_ROOT "/usr/local/cuda/include" CACHE PATH "cuDNN root folder") + +find_path(CUDNN_INCLUDE cudnn.h + PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} + DOC "Path to cuDNN include directory." ) + +find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a + PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} + PATH_SUFFIXES lib lib/x64 cuda/lib cuda/lib64 lib/x64 + DOC "Path to cuDNN library.") + +find_package_handle_standard_args(CUDNN DEFAULT_MSG CUDNN_LIBRARY CUDNN_INCLUDE) + +mark_as_advanced(CUDNN_ROOT CUDNN_INCLUDE CUDNN_LIBRARY) From bf3b4dcc3f12aa54246de32af392787647b5f839 Mon Sep 17 00:00:00 2001 From: Zach Kimberg Date: Mon, 9 Dec 2019 20:23:47 -0600 Subject: [PATCH 21/62] Merge make/pip and make/maven configurations (#17027) --- ci/docker/runtime_functions.sh | 2 +- ci/publish/python/build.sh | 2 +- ci/publish/scala/build.sh | 2 +- make/maven/maven_darwin_mkl.mk | 184 ----------------- make/maven/maven_linux_cu90mkl.mk | 187 ------------------ make/maven/maven_linux_cu92mkl.mk | 187 ------------------ make/maven/maven_linux_mkl.mk | 183 ----------------- .../darwin_cpu.mk} | 0 .../darwin_mkl.mk} | 0 .../linux_cpu.mk} | 0 .../linux_cu100.mk} | 0 .../linux_cu100mkl.mk} | 0 .../linux_cu101.mk} | 0 .../linux_cu101mkl.mk} | 0 .../linux_cu75.mk} | 0 .../linux_cu75mkl.mk} | 0 .../linux_cu80.mk} | 0 .../linux_cu80mkl.mk} | 0 .../linux_cu90.mk} | 0 .../linux_cu90mkl.mk} | 0 .../linux_cu91.mk} | 0 .../linux_cu91mkl.mk} | 0 .../linux_cu92.mk} | 0 .../linux_cu92mkl.mk} | 0 .../linux_mkl.mk} | 0 tools/dependencies/README.md | 6 +- tools/staticbuild/README.md | 7 +- tools/staticbuild/build.sh | 3 +- tools/staticbuild/build_lib.sh | 2 +- 29 files changed, 11 insertions(+), 754 deletions(-) delete mode 100644 make/maven/maven_darwin_mkl.mk delete mode 100644 make/maven/maven_linux_cu90mkl.mk delete mode 100644 make/maven/maven_linux_cu92mkl.mk delete mode 100644 make/maven/maven_linux_mkl.mk rename make/{pip/pip_darwin_cpu.mk => staticbuild/darwin_cpu.mk} (100%) rename make/{pip/pip_darwin_mkl.mk => staticbuild/darwin_mkl.mk} (100%) rename make/{pip/pip_linux_cpu.mk => staticbuild/linux_cpu.mk} (100%) rename make/{pip/pip_linux_cu100.mk => staticbuild/linux_cu100.mk} (100%) rename make/{pip/pip_linux_cu100mkl.mk => staticbuild/linux_cu100mkl.mk} (100%) rename make/{pip/pip_linux_cu101.mk => staticbuild/linux_cu101.mk} (100%) rename make/{pip/pip_linux_cu101mkl.mk => staticbuild/linux_cu101mkl.mk} (100%) rename make/{pip/pip_linux_cu75.mk => staticbuild/linux_cu75.mk} (100%) rename make/{pip/pip_linux_cu75mkl.mk => staticbuild/linux_cu75mkl.mk} (100%) rename make/{pip/pip_linux_cu80.mk => staticbuild/linux_cu80.mk} (100%) rename make/{pip/pip_linux_cu80mkl.mk => staticbuild/linux_cu80mkl.mk} (100%) rename make/{pip/pip_linux_cu90.mk => staticbuild/linux_cu90.mk} (100%) rename make/{pip/pip_linux_cu90mkl.mk => staticbuild/linux_cu90mkl.mk} (100%) rename make/{pip/pip_linux_cu91.mk => staticbuild/linux_cu91.mk} (100%) rename make/{pip/pip_linux_cu91mkl.mk => staticbuild/linux_cu91mkl.mk} (100%) rename make/{pip/pip_linux_cu92.mk => staticbuild/linux_cu92.mk} (100%) rename make/{pip/pip_linux_cu92mkl.mk => staticbuild/linux_cu92mkl.mk} (100%) rename make/{pip/pip_linux_mkl.mk => staticbuild/linux_mkl.mk} (100%) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 78d541e0ad5f..9f56a6708ea4 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -1977,7 +1977,7 @@ build_static_libmxnet() { set -ex pushd . local mxnet_variant=${1:?"This function requires a python command as the first argument"} - source tools/staticbuild/build.sh ${mxnet_variant} pip + source tools/staticbuild/build.sh ${mxnet_variant} popd } diff --git a/ci/publish/python/build.sh b/ci/publish/python/build.sh index 61549896c0b7..976963c93eef 100755 --- a/ci/publish/python/build.sh +++ b/ci/publish/python/build.sh @@ -18,7 +18,7 @@ set -ex -source tools/staticbuild/build.sh $mxnet_variant pip +source tools/staticbuild/build.sh $mxnet_variant set -ex diff --git a/ci/publish/scala/build.sh b/ci/publish/scala/build.sh index 30ee20e732f0..77bb34178c5c 100755 --- a/ci/publish/scala/build.sh +++ b/ci/publish/scala/build.sh @@ -22,7 +22,7 @@ set -ex # MAVEN_PUBLISH_OS_TYPE: linux-x86_64-cpu|linux-x86_64-gpu|osx-x86_64-cpu # export MAVEN_PUBLISH_OS_TYPE=linux-x86_64-cpu -source tools/staticbuild/build.sh $mxnet_variant maven +source tools/staticbuild/build.sh $mxnet_variant set -ex diff --git a/make/maven/maven_darwin_mkl.mk b/make/maven/maven_darwin_mkl.mk deleted file mode 100644 index 9bf3fc46ce0b..000000000000 --- a/make/maven/maven_darwin_mkl.mk +++ /dev/null @@ -1,184 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------------------------- -# Template configuration for compiling mxnet for making maven package -#------------------------------------------------------------------------------- - -#--------------------- -# choice of compiler -#-------------------- - -export CC = gcc -export CXX = g++ -export NVCC = nvcc - -# whether compile with options for MXNet developer -DEV = 0 - -# whether compile with debug -DEBUG = 0 - - -# whether to turn on signal handler (e.g. segfault logger) -USE_SIGNAL_HANDLER = 1 - -# the additional link flags you want to add -ADD_LDFLAGS += -L$(DEPS_PATH)/lib -lpng -ltiff -lz -framework CoreFoundation -framework Security -Wl,-exported_symbols_list,$(CURDIR)/make/config/libmxnet.sym,-rpath,'$${ORIGIN}',-dead_strip - -# the additional compile flags you want to add -ADD_CFLAGS += -I$(DEPS_PATH)/include -ffunction-sections -fdata-sections - -#--------------------------------------------- -# matrix computation libraries for CPU/GPU -#--------------------------------------------- - -# choose the version of blas you want to use -# can be: mkl, blas, atlas, openblas -# in default use atlas for linux while apple for osx -USE_BLAS=apple - -# whether use opencv during compilation -# you can disable it, however, you will not able to use -# imbin iterator -USE_OPENCV = 1 -# Add OpenCV include path, in which the directory `opencv2` exists -USE_OPENCV_INC_PATH = NONE -# Add OpenCV shared library path, in which the shared library exists -USE_OPENCV_LIB_PATH = NONE - -# whether use CUDA during compile -USE_CUDA = 0 - -# add the path to CUDA library to link and compile flag -# if you have already add them to environment variable, leave it as NONE -# USE_CUDA_PATH = /usr/local/cuda -USE_CUDA_PATH = NONE - -# whether use CuDNN R3 library -USE_CUDNN = 0 - -# CUDA architecture setting: going with all of them. -# For CUDA < 6.0, comment the *_50 lines for compatibility. -# CUDA_ARCH := - -# whether use cuda runtime compiling for writing kernels in native language (i.e. Python) -ENABLE_CUDA_RTC = 0 - -# use openmp for parallelization -USE_OPENMP = 0 -USE_OPERATOR_TUNING = 1 -USE_LIBJPEG_TURBO = 1 - - -# MKL ML Library for Intel CPU/Xeon Phi -# Please refer to MKL_README.md for details - -# whether use MKL-DNN library -USE_MKLDNN = 1 - -# MKL ML Library folder, need to be root for /usr/local -# Change to User Home directory for standard user -# For USE_BLAS!=mkl only -MKLML_ROOT=/usr/local - -# whether use MKL2017 library -USE_MKL2017 = 0 - -# whether use MKL2017 experimental feature for high performance -# Prerequisite USE_MKL2017=1 -USE_MKL2017_EXPERIMENTAL = 0 - -# whether use NNPACK library -USE_NNPACK = 0 - -# whether use lapack during compilation -# only effective when compiled with blas versions openblas/apple/atlas/mkl -USE_LAPACK = 1 - -# path to lapack library in case of a non-standard installation -USE_LAPACK_PATH = $(DEPS_PATH)/lib - -# add path to intel library, you may need it for MKL, if you did not add the path -# to environment variable -USE_INTEL_PATH = NONE - -# If use MKL, choose static link automatically to allow python wrapper -ifeq ($(USE_BLAS), mkl) -USE_STATIC_MKL = 1 -else -USE_STATIC_MKL = NONE -endif - -#---------------------------- -# Settings for power and arm arch -#---------------------------- -ARCH := $(shell uname -a) -ifneq (,$(filter $(ARCH), armv6l armv7l powerpc64le ppc64le aarch64)) - USE_SSE=0 -else - USE_SSE=1 -endif - -#---------------------------- -# distributed computing -#---------------------------- - -# whether or not to enable multi-machine supporting -USE_DIST_KVSTORE = 1 - -# whether or not allow to read and write HDFS directly. If yes, then hadoop is -# required -USE_HDFS = 0 - -# path to libjvm.so. required if USE_HDFS=1 -LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server - -# whether or not allow to read and write AWS S3 directly. If yes, then -# libcurl4-openssl-dev is required, it can be installed on Ubuntu by -# sudo apt-get install -y libcurl4-openssl-dev -USE_S3 = 1 - -#---------------------------- -# additional operators -#---------------------------- - -# path to folders containing projects specific operators that you don't want to put in src/operators -EXTRA_OPERATORS = - - -#---------------------------- -# plugins -#---------------------------- - -# whether to use caffe integration. This requires installing caffe. -# You also need to add CAFFE_PATH/build/lib to your LD_LIBRARY_PATH -# CAFFE_PATH = $(HOME)/caffe -# MXNET_PLUGINS += plugin/caffe/caffe.mk - -# whether to use torch integration. This requires installing torch. -# You also need to add TORCH_PATH/install/lib to your LD_LIBRARY_PATH -# TORCH_PATH = $(HOME)/torch -# MXNET_PLUGINS += plugin/torch/torch.mk - -# WARPCTC_PATH = $(HOME)/warp-ctc -# MXNET_PLUGINS += plugin/warpctc/warpctc.mk - -# whether to use sframe integration. This requires build sframe -# git@github.com:dato-code/SFrame.git -# SFRAME_PATH = $(HOME)/SFrame -# MXNET_PLUGINS += plugin/sframe/plugin.mk diff --git a/make/maven/maven_linux_cu90mkl.mk b/make/maven/maven_linux_cu90mkl.mk deleted file mode 100644 index e8caf73f186e..000000000000 --- a/make/maven/maven_linux_cu90mkl.mk +++ /dev/null @@ -1,187 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------------------------- -# Template configuration for compiling mxnet for making maven package -#------------------------------------------------------------------------------- - -#--------------------- -# choice of compiler -#-------------------- - -export CC = gcc -export CXX = g++ -export NVCC = nvcc - -# whether compile with options for MXNet developer -DEV = 0 - -# whether compile with debug -DEBUG = 0 - -# whether to turn on signal handler (e.g. segfault logger) -USE_SIGNAL_HANDLER = 1 - -# the additional link flags you want to add -ADD_LDFLAGS += -L$(DEPS_PATH)/lib $(DEPS_PATH)/lib/libculibos.a -lpng -ltiff -ljpeg -lz -ldl -lgfortran -Wl,--version-script=$(CURDIR)/make/config/libmxnet.ver,-rpath,'$${ORIGIN}',--gc-sections - -# the additional compile flags you want to add -ADD_CFLAGS += -I$(DEPS_PATH)/include -ffunction-sections -fdata-sections - -#--------------------------------------------- -# matrix computation libraries for CPU/GPU -#--------------------------------------------- - -# choose the version of blas you want to use -# can be: mkl, blas, atlas, openblas -# in default use atlas for linux while apple for osx -USE_BLAS=openblas - -# whether use opencv during compilation -# you can disable it, however, you will not able to use -# imbin iterator -USE_OPENCV = 1 -# Add OpenCV include path, in which the directory `opencv2` exists -USE_OPENCV_INC_PATH = NONE -# Add OpenCV shared library path, in which the shared library exists -USE_OPENCV_LIB_PATH = NONE - -# whether use CUDA during compile -USE_CUDA = 1 - -# add the path to CUDA library to link and compile flag -# if you have already add them to environment variable, leave it as NONE -# USE_CUDA_PATH = /usr/local/cuda -USE_CUDA_PATH = $(DEPS_PATH)/usr/local/cuda-9.0 - -# whether to use CuDNN library -USE_CUDNN = 1 - -# whether to use NCCL library -USE_NCCL = 1 - -# CUDA architecture setting: going with all of them. -# For CUDA < 6.0, comment the *_50 lines for compatibility. -# CUDA_ARCH := - -# whether use cuda runtime compiling for writing kernels in native language (i.e. Python) -USE_NVTX=1 -ENABLE_CUDA_RTC = 1 - -# use openmp for parallelization -USE_OPENMP = 1 -USE_OPERATOR_TUNING = 1 -USE_LIBJPEG_TURBO = 1 - -# whether use MKL-DNN library -USE_MKLDNN = 1 - - -# MKL ML Library for Intel CPU/Xeon Phi -# Please refer to MKL_README.md for details - -# MKL ML Library folder, need to be root for /usr/local -# Change to User Home directory for standard user -# For USE_BLAS!=mkl only -MKLML_ROOT=/usr/local - -# whether use MKL2017 library -USE_MKL2017 = 0 - -# whether use MKL2017 experimental feature for high performance -# Prerequisite USE_MKL2017=1 -USE_MKL2017_EXPERIMENTAL = 0 - -# whether use NNPACK library -USE_NNPACK = 0 - -# whether use lapack during compilation -# only effective when compiled with blas versions openblas/apple/atlas/mkl -USE_LAPACK = 1 - -# path to lapack library in case of a non-standard installation -USE_LAPACK_PATH = $(DEPS_PATH)/lib - -# add path to intel library, you may need it for MKL, if you did not add the path -# to environment variable -USE_INTEL_PATH = NONE - -# If use MKL, choose static link automatically to allow python wrapper -ifeq ($(USE_BLAS), mkl) -USE_STATIC_MKL = 1 -else -USE_STATIC_MKL = NONE -endif - -#---------------------------- -# Settings for power and arm arch -#---------------------------- -ARCH := $(shell uname -a) -ifneq (,$(filter $(ARCH), armv6l armv7l powerpc64le ppc64le aarch64)) - USE_SSE=0 -else - USE_SSE=1 -endif - -#---------------------------- -# distributed computing -#---------------------------- - -# whether or not to enable multi-machine supporting -USE_DIST_KVSTORE = 1 - -# whether or not allow to read and write HDFS directly. If yes, then hadoop is -# required -USE_HDFS = 0 - -# path to libjvm.so. required if USE_HDFS=1 -LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server - -# whether or not allow to read and write AWS S3 directly. If yes, then -# libcurl4-openssl-dev is required, it can be installed on Ubuntu by -# sudo apt-get install -y libcurl4-openssl-dev -USE_S3 = 1 - -#---------------------------- -# additional operators -#---------------------------- - -# path to folders containing projects specific operators that you don't want to put in src/operators -EXTRA_OPERATORS = - - -#---------------------------- -# plugins -#---------------------------- - -# whether to use caffe integration. This requires installing caffe. -# You also need to add CAFFE_PATH/build/lib to your LD_LIBRARY_PATH -# CAFFE_PATH = $(HOME)/caffe -# MXNET_PLUGINS += plugin/caffe/caffe.mk - -# whether to use torch integration. This requires installing torch. -# You also need to add TORCH_PATH/install/lib to your LD_LIBRARY_PATH -# TORCH_PATH = $(HOME)/torch -# MXNET_PLUGINS += plugin/torch/torch.mk - -# WARPCTC_PATH = $(HOME)/warp-ctc -# MXNET_PLUGINS += plugin/warpctc/warpctc.mk - -# whether to use sframe integration. This requires build sframe -# git@github.com:dato-code/SFrame.git -# SFRAME_PATH = $(HOME)/SFrame -# MXNET_PLUGINS += plugin/sframe/plugin.mk diff --git a/make/maven/maven_linux_cu92mkl.mk b/make/maven/maven_linux_cu92mkl.mk deleted file mode 100644 index 930341e71cb1..000000000000 --- a/make/maven/maven_linux_cu92mkl.mk +++ /dev/null @@ -1,187 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------------------------- -# Template configuration for compiling mxnet for making maven package -#------------------------------------------------------------------------------- - -#--------------------- -# choice of compiler -#-------------------- - -export CC = gcc -export CXX = g++ -export NVCC = nvcc - -# whether compile with options for MXNet developer -DEV = 0 - -# whether compile with debug -DEBUG = 0 - -# whether to turn on signal handler (e.g. segfault logger) -USE_SIGNAL_HANDLER = 1 - -# the additional link flags you want to add -ADD_LDFLAGS += -L$(DEPS_PATH)/lib $(DEPS_PATH)/lib/libculibos.a -lpng -ltiff -ljpeg -lz -ldl -lgfortran -Wl,--version-script=$(CURDIR)/make/config/libmxnet.ver,-rpath,'$${ORIGIN}',--gc-sections - -# the additional compile flags you want to add -ADD_CFLAGS += -I$(DEPS_PATH)/include -ffunction-sections -fdata-sections - -#--------------------------------------------- -# matrix computation libraries for CPU/GPU -#--------------------------------------------- - -# choose the version of blas you want to use -# can be: mkl, blas, atlas, openblas -# in default use atlas for linux while apple for osx -USE_BLAS=openblas - -# whether use opencv during compilation -# you can disable it, however, you will not able to use -# imbin iterator -USE_OPENCV = 1 -# Add OpenCV include path, in which the directory `opencv2` exists -USE_OPENCV_INC_PATH = NONE -# Add OpenCV shared library path, in which the shared library exists -USE_OPENCV_LIB_PATH = NONE - -# whether use CUDA during compile -USE_CUDA = 1 - -# add the path to CUDA library to link and compile flag -# if you have already add them to environment variable, leave it as NONE -# USE_CUDA_PATH = /usr/local/cuda -USE_CUDA_PATH = $(DEPS_PATH)/usr/local/cuda-9.2 - -# whether to use CuDNN library -USE_CUDNN = 1 - -# whether to use NCCL library -USE_NCCL = 1 - -# CUDA architecture setting: going with all of them. -# For CUDA < 6.0, comment the *_50 lines for compatibility. -# CUDA_ARCH := - -# whether use cuda runtime compiling for writing kernels in native language (i.e. Python) -USE_NVTX=1 -ENABLE_CUDA_RTC = 1 - -# use openmp for parallelization -USE_OPENMP = 1 -USE_OPERATOR_TUNING = 1 -USE_LIBJPEG_TURBO = 1 - -# whether use MKL-DNN library -USE_MKLDNN = 1 - - -# MKL ML Library for Intel CPU/Xeon Phi -# Please refer to MKL_README.md for details - -# MKL ML Library folder, need to be root for /usr/local -# Change to User Home directory for standard user -# For USE_BLAS!=mkl only -MKLML_ROOT=/usr/local - -# whether use MKL2017 library -USE_MKL2017 = 0 - -# whether use MKL2017 experimental feature for high performance -# Prerequisite USE_MKL2017=1 -USE_MKL2017_EXPERIMENTAL = 0 - -# whether use NNPACK library -USE_NNPACK = 0 - -# whether use lapack during compilation -# only effective when compiled with blas versions openblas/apple/atlas/mkl -USE_LAPACK = 1 - -# path to lapack library in case of a non-standard installation -USE_LAPACK_PATH = $(DEPS_PATH)/lib - -# add path to intel library, you may need it for MKL, if you did not add the path -# to environment variable -USE_INTEL_PATH = NONE - -# If use MKL, choose static link automatically to allow python wrapper -ifeq ($(USE_BLAS), mkl) -USE_STATIC_MKL = 1 -else -USE_STATIC_MKL = NONE -endif - -#---------------------------- -# Settings for power and arm arch -#---------------------------- -ARCH := $(shell uname -a) -ifneq (,$(filter $(ARCH), armv6l armv7l powerpc64le ppc64le aarch64)) - USE_SSE=0 -else - USE_SSE=1 -endif - -#---------------------------- -# distributed computing -#---------------------------- - -# whether or not to enable multi-machine supporting -USE_DIST_KVSTORE = 1 - -# whether or not allow to read and write HDFS directly. If yes, then hadoop is -# required -USE_HDFS = 0 - -# path to libjvm.so. required if USE_HDFS=1 -LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server - -# whether or not allow to read and write AWS S3 directly. If yes, then -# libcurl4-openssl-dev is required, it can be installed on Ubuntu by -# sudo apt-get install -y libcurl4-openssl-dev -USE_S3 = 1 - -#---------------------------- -# additional operators -#---------------------------- - -# path to folders containing projects specific operators that you don't want to put in src/operators -EXTRA_OPERATORS = - - -#---------------------------- -# plugins -#---------------------------- - -# whether to use caffe integration. This requires installing caffe. -# You also need to add CAFFE_PATH/build/lib to your LD_LIBRARY_PATH -# CAFFE_PATH = $(HOME)/caffe -# MXNET_PLUGINS += plugin/caffe/caffe.mk - -# whether to use torch integration. This requires installing torch. -# You also need to add TORCH_PATH/install/lib to your LD_LIBRARY_PATH -# TORCH_PATH = $(HOME)/torch -# MXNET_PLUGINS += plugin/torch/torch.mk - -# WARPCTC_PATH = $(HOME)/warp-ctc -# MXNET_PLUGINS += plugin/warpctc/warpctc.mk - -# whether to use sframe integration. This requires build sframe -# git@github.com:dato-code/SFrame.git -# SFRAME_PATH = $(HOME)/SFrame -# MXNET_PLUGINS += plugin/sframe/plugin.mk diff --git a/make/maven/maven_linux_mkl.mk b/make/maven/maven_linux_mkl.mk deleted file mode 100644 index 10aee5f35a46..000000000000 --- a/make/maven/maven_linux_mkl.mk +++ /dev/null @@ -1,183 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------------------------- -# Template configuration for compiling mxnet for making maven package -#------------------------------------------------------------------------------- - -#--------------------- -# choice of compiler -#-------------------- - -export CC = gcc -export CXX = g++ -export NVCC = nvcc - -# whether compile with options for MXNet developer -DEV = 0 - -# whether compile with debug -DEBUG = 0 - -# whether to turn on signal handler (e.g. segfault logger) -USE_SIGNAL_HANDLER = 1 - -# the additional link flags you want to add -ADD_LDFLAGS += -L$(DEPS_PATH)/lib -lpng -ltiff -ljpeg -lz -lgfortran -ldl -Wl,--version-script=$(CURDIR)/make/config/libmxnet.ver,-rpath,'$${ORIGIN}',--gc-sections - -# the additional compile flags you want to add -ADD_CFLAGS += -I$(DEPS_PATH)/include -ffunction-sections -fdata-sections - -#--------------------------------------------- -# matrix computation libraries for CPU/GPU -#--------------------------------------------- - -# choose the version of blas you want to use -# can be: mkl, blas, atlas, openblas -# in default use atlas for linux while apple for osx -USE_BLAS=openblas - -# whether use opencv during compilation -# you can disable it, however, you will not able to use -# imbin iterator -USE_OPENCV = 1 -# Add OpenCV include path, in which the directory `opencv2` exists -USE_OPENCV_INC_PATH = NONE -# Add OpenCV shared library path, in which the shared library exists -USE_OPENCV_LIB_PATH = NONE - -# whether use CUDA during compile -USE_CUDA = 0 - -# add the path to CUDA library to link and compile flag -# if you have already add them to environment variable, leave it as NONE -# USE_CUDA_PATH = /usr/local/cuda -USE_CUDA_PATH = NONE - -# whether use CuDNN R3 library -USE_CUDNN = 0 - -# CUDA architecture setting: going with all of them. -# For CUDA < 6.0, comment the *_50 lines for compatibility. -# CUDA_ARCH := - -# whether use cuda runtime compiling for writing kernels in native language (i.e. Python) -ENABLE_CUDA_RTC = 0 - -# use openmp for parallelization -USE_OPENMP = 1 -USE_OPERATOR_TUNING = 1 -USE_LIBJPEG_TURBO = 1 - -# whether use MKL-DNN library -USE_MKLDNN = 1 - - -# MKL ML Library for Intel CPU/Xeon Phi -# Please refer to MKL_README.md for details - -# MKL ML Library folder, need to be root for /usr/local -# Change to User Home directory for standard user -# For USE_BLAS!=mkl only -MKLML_ROOT=/usr/local - -# whether use MKL2017 library -USE_MKL2017 = 0 - -# whether use MKL2017 experimental feature for high performance -# Prerequisite USE_MKL2017=1 -USE_MKL2017_EXPERIMENTAL = 0 - -# whether use NNPACK library -USE_NNPACK = 0 - -# whether use lapack during compilation -# only effective when compiled with blas versions openblas/apple/atlas/mkl -USE_LAPACK = 1 - -# path to lapack library in case of a non-standard installation -USE_LAPACK_PATH = $(DEPS_PATH)/lib - -# add path to intel library, you may need it for MKL, if you did not add the path -# to environment variable -USE_INTEL_PATH = NONE - -# If use MKL, choose static link automatically to allow python wrapper -ifeq ($(USE_BLAS), mkl) -USE_STATIC_MKL = 1 -else -USE_STATIC_MKL = NONE -endif - -#---------------------------- -# Settings for power and arm arch -#---------------------------- -ARCH := $(shell uname -a) -ifneq (,$(filter $(ARCH), armv6l armv7l powerpc64le ppc64le aarch64)) - USE_SSE=0 -else - USE_SSE=1 -endif - -#---------------------------- -# distributed computing -#---------------------------- - -# whether or not to enable multi-machine supporting -USE_DIST_KVSTORE = 1 - -# whether or not allow to read and write HDFS directly. If yes, then hadoop is -# required -USE_HDFS = 0 - -# path to libjvm.so. required if USE_HDFS=1 -LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server - -# whether or not allow to read and write AWS S3 directly. If yes, then -# libcurl4-openssl-dev is required, it can be installed on Ubuntu by -# sudo apt-get install -y libcurl4-openssl-dev -USE_S3 = 1 - -#---------------------------- -# additional operators -#---------------------------- - -# path to folders containing projects specific operators that you don't want to put in src/operators -EXTRA_OPERATORS = - - -#---------------------------- -# plugins -#---------------------------- - -# whether to use caffe integration. This requires installing caffe. -# You also need to add CAFFE_PATH/build/lib to your LD_LIBRARY_PATH -# CAFFE_PATH = $(HOME)/caffe -# MXNET_PLUGINS += plugin/caffe/caffe.mk - -# whether to use torch integration. This requires installing torch. -# You also need to add TORCH_PATH/install/lib to your LD_LIBRARY_PATH -# TORCH_PATH = $(HOME)/torch -# MXNET_PLUGINS += plugin/torch/torch.mk - -# WARPCTC_PATH = $(HOME)/warp-ctc -# MXNET_PLUGINS += plugin/warpctc/warpctc.mk - -# whether to use sframe integration. This requires build sframe -# git@github.com:dato-code/SFrame.git -# SFRAME_PATH = $(HOME)/SFrame -# MXNET_PLUGINS += plugin/sframe/plugin.mk diff --git a/make/pip/pip_darwin_cpu.mk b/make/staticbuild/darwin_cpu.mk similarity index 100% rename from make/pip/pip_darwin_cpu.mk rename to make/staticbuild/darwin_cpu.mk diff --git a/make/pip/pip_darwin_mkl.mk b/make/staticbuild/darwin_mkl.mk similarity index 100% rename from make/pip/pip_darwin_mkl.mk rename to make/staticbuild/darwin_mkl.mk diff --git a/make/pip/pip_linux_cpu.mk b/make/staticbuild/linux_cpu.mk similarity index 100% rename from make/pip/pip_linux_cpu.mk rename to make/staticbuild/linux_cpu.mk diff --git a/make/pip/pip_linux_cu100.mk b/make/staticbuild/linux_cu100.mk similarity index 100% rename from make/pip/pip_linux_cu100.mk rename to make/staticbuild/linux_cu100.mk diff --git a/make/pip/pip_linux_cu100mkl.mk b/make/staticbuild/linux_cu100mkl.mk similarity index 100% rename from make/pip/pip_linux_cu100mkl.mk rename to make/staticbuild/linux_cu100mkl.mk diff --git a/make/pip/pip_linux_cu101.mk b/make/staticbuild/linux_cu101.mk similarity index 100% rename from make/pip/pip_linux_cu101.mk rename to make/staticbuild/linux_cu101.mk diff --git a/make/pip/pip_linux_cu101mkl.mk b/make/staticbuild/linux_cu101mkl.mk similarity index 100% rename from make/pip/pip_linux_cu101mkl.mk rename to make/staticbuild/linux_cu101mkl.mk diff --git a/make/pip/pip_linux_cu75.mk b/make/staticbuild/linux_cu75.mk similarity index 100% rename from make/pip/pip_linux_cu75.mk rename to make/staticbuild/linux_cu75.mk diff --git a/make/pip/pip_linux_cu75mkl.mk b/make/staticbuild/linux_cu75mkl.mk similarity index 100% rename from make/pip/pip_linux_cu75mkl.mk rename to make/staticbuild/linux_cu75mkl.mk diff --git a/make/pip/pip_linux_cu80.mk b/make/staticbuild/linux_cu80.mk similarity index 100% rename from make/pip/pip_linux_cu80.mk rename to make/staticbuild/linux_cu80.mk diff --git a/make/pip/pip_linux_cu80mkl.mk b/make/staticbuild/linux_cu80mkl.mk similarity index 100% rename from make/pip/pip_linux_cu80mkl.mk rename to make/staticbuild/linux_cu80mkl.mk diff --git a/make/pip/pip_linux_cu90.mk b/make/staticbuild/linux_cu90.mk similarity index 100% rename from make/pip/pip_linux_cu90.mk rename to make/staticbuild/linux_cu90.mk diff --git a/make/pip/pip_linux_cu90mkl.mk b/make/staticbuild/linux_cu90mkl.mk similarity index 100% rename from make/pip/pip_linux_cu90mkl.mk rename to make/staticbuild/linux_cu90mkl.mk diff --git a/make/pip/pip_linux_cu91.mk b/make/staticbuild/linux_cu91.mk similarity index 100% rename from make/pip/pip_linux_cu91.mk rename to make/staticbuild/linux_cu91.mk diff --git a/make/pip/pip_linux_cu91mkl.mk b/make/staticbuild/linux_cu91mkl.mk similarity index 100% rename from make/pip/pip_linux_cu91mkl.mk rename to make/staticbuild/linux_cu91mkl.mk diff --git a/make/pip/pip_linux_cu92.mk b/make/staticbuild/linux_cu92.mk similarity index 100% rename from make/pip/pip_linux_cu92.mk rename to make/staticbuild/linux_cu92.mk diff --git a/make/pip/pip_linux_cu92mkl.mk b/make/staticbuild/linux_cu92mkl.mk similarity index 100% rename from make/pip/pip_linux_cu92mkl.mk rename to make/staticbuild/linux_cu92mkl.mk diff --git a/make/pip/pip_linux_mkl.mk b/make/staticbuild/linux_mkl.mk similarity index 100% rename from make/pip/pip_linux_mkl.mk rename to make/staticbuild/linux_mkl.mk diff --git a/tools/dependencies/README.md b/tools/dependencies/README.md index 024fedf16f73..ec1e80088895 100644 --- a/tools/dependencies/README.md +++ b/tools/dependencies/README.md @@ -190,7 +190,7 @@ cd incubator-mxnet # http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/ # Build PyPi package -tools/staticbuild/build.sh cu100mkl pip +tools/staticbuild/build.sh cu100mkl # Wait for 10 - 30 mins, you will find libmxnet.so under the incubator-mxnet/lib @@ -279,7 +279,7 @@ sudo apt-get install -y git \ 2. Build PyPi package ``` # Update the dependency under tools/dependencies, then -tools/staticbuild/build.sh mkl pip +tools/staticbuild/build.sh mkl # Wait for 10 - 30 mins, you will find libmxnet.so under the incubator-mxnet/lib @@ -328,7 +328,7 @@ sudo apt-get install -y git \ 2. Build PyPi package ``` # Update the dependency under tools/dependencies, then -tools/staticbuild/build.sh mkl pip +tools/staticbuild/build.sh mkl # Wait for 10 - 30 mins, you will find libmxnet.so under the incubator-mxnet/lib diff --git a/tools/staticbuild/README.md b/tools/staticbuild/README.md index b861cc3308c6..b48043b1ef87 100644 --- a/tools/staticbuild/README.md +++ b/tools/staticbuild/README.md @@ -23,11 +23,11 @@ This folder contains the core script used to build the static library. This READ This script is a wrapper around `build_lib.sh. It simplifies the things by automatically identifing the system version, number of cores, and all environment variable settings. Here are examples you can run with this script: ``` -tools/staticbuild/build.sh cu92 maven +tools/staticbuild/build.sh cu92 ``` -This would build the mxnet package based on CUDA9.2 and Maven (Scala) build setttings. +This would build the mxnet package based on CUDA9.2. ``` -tools/staticbuild/build.sh mkl pip +tools/staticbuild/build.sh mkl ``` This would build the mxnet package based on MKLDNN and and pypi configuration settings. @@ -37,7 +37,6 @@ As the result, users would have a complete static dependencies in `/staticdeps` This script clones the most up-to-date master and builds the MXNet backend with a static library. In order to run the static library, you must set the following environment variables: - `DEPS_PATH` Path to your static dependencies -- `STATIC_BUILD_TARGET` Either `pip` or `maven` as your publish platform - `PLATFORM` linux, darwin - `VARIANT` cpu, cu*, cu*mkl, mkl diff --git a/tools/staticbuild/build.sh b/tools/staticbuild/build.sh index 7207cf062180..2d1ac43e4a1b 100755 --- a/tools/staticbuild/build.sh +++ b/tools/staticbuild/build.sh @@ -18,13 +18,12 @@ # under the License. if [ $# -lt 1 ]; then - >&2 echo "Usage: build.sh " + >&2 echo "Usage: build.sh " fi export CURDIR=$PWD export DEPS_PATH=$PWD/staticdeps export VARIANT=$(echo $1 | tr '[:upper:]' '[:lower:]') -export STATIC_BUILD_TARGET=$(echo $2 | tr '[:upper:]' '[:lower:]') export PLATFORM=$(uname | tr '[:upper:]' '[:lower:]') if [[ $VARIANT == darwin* ]]; then diff --git a/tools/staticbuild/build_lib.sh b/tools/staticbuild/build_lib.sh index 4935d67b9316..f4fbf399ce86 100755 --- a/tools/staticbuild/build_lib.sh +++ b/tools/staticbuild/build_lib.sh @@ -18,7 +18,7 @@ # under the License. # This script builds the libraries of mxnet. -make_config=make/${STATIC_BUILD_TARGET}/${STATIC_BUILD_TARGET}_${PLATFORM}_${VARIANT}.mk +make_config=make/staticbuild/${PLATFORM}_${VARIANT}.mk if [[ ! -f $make_config ]]; then >&2 echo "Couldn't find make config $make_config for the current settings." exit 1 From 9f5b8bca2e972d30ba22ab4a7287d55277af865e Mon Sep 17 00:00:00 2001 From: Przemyslaw Tredak Date: Mon, 9 Dec 2019 20:21:09 -0800 Subject: [PATCH 22/62] Workaround problem with fusion in CUDA 9 (#17028) --- src/operator/fusion/fused_op-inl.h | 232 +++++++++++++++-------------- src/operator/fusion/fused_op.cu | 5 +- 2 files changed, 123 insertions(+), 114 deletions(-) diff --git a/src/operator/fusion/fused_op-inl.h b/src/operator/fusion/fused_op-inl.h index e86ce7682ad8..7373cd07400a 100644 --- a/src/operator/fusion/fused_op-inl.h +++ b/src/operator/fusion/fused_op-inl.h @@ -256,22 +256,22 @@ struct LoadType { }; template -inline typename LoadType::Type load(const DType input) { +__device__ inline typename LoadType::Type load(const DType input) { return input; } template <> -inline float load(const half input) { +__device__ inline float load(const half input) { return __half2float(input); } template -inline DType1 store(const DType2 input, DType1* ref) { +__device__ inline DType1 store(const DType2 input, DType1* ref) { return input; } template -inline half store(const DType input, half* ref) { +__device__ inline half store(const DType input, half* ref) { return __float2half(input); } @@ -297,12 +297,12 @@ struct VectorConfig<32> { }; template -inline DType add_elem(const DType& x, const DType& y) { +__device__ inline DType add_elem(const DType& x, const DType& y) { return x + y; } template <> -inline half add_elem(const half& x, const half& y) { +__device__ inline half add_elem(const half& x, const half& y) { return __float2half(__half2float(x) + __half2float(y)); } @@ -310,14 +310,14 @@ template union VectorType { typename VectorConfig::IndexType y; DType x[nvec]; - VectorType () {}; - VectorType (const VectorType& y2) { + __device__ VectorType () {}; + __device__ VectorType (const VectorType& y2) { y = y2.y; } - VectorType (const decltype(y) &y2) { + __device__ VectorType (const decltype(y) &y2) { y = y2; } - inline VectorType& operator+=(const VectorType& rhs) { + __device__ inline VectorType& operator+=(const VectorType& rhs) { #pragma unroll for (int i = 0; i < nvec; ++i) { x[i] = add_elem(x[i], rhs.x[i]); @@ -330,13 +330,13 @@ template struct Shape { int x[ndim]; size_t size; - inline const int& operator [](const int i) const { + __device__ inline const int& operator [](const int i) const { return x[i]; } - inline int& operator [](const int i) { + __device__ inline int& operator [](const int i) { return x[i]; } - inline void set(const int def) { + __device__ inline void set(const int def) { #pragma unroll for (int i = 0; i < ndim; i++) { x[i] = def; @@ -350,7 +350,8 @@ struct Shape<0> { }; template -inline VectorType load_index(const DType * input, int i, const Shape &shape) { +__device__ inline VectorType load_index(const DType * input, int i, + const Shape &shape) { if (i < shape.size) { const auto* vector_input = reinterpret_cast< const typename VectorConfig::IndexType *>( @@ -364,7 +365,8 @@ inline VectorType load_index(const DType * input, int i, const Shap } template -inline VectorType global_load_index(const DType * input, int i, const Shape &shape) { +__device__ inline VectorType global_load_index(const DType * input, int i, + const Shape &shape) { if (i < shape.size) { const auto* vector_input = reinterpret_cast< const typename VectorConfig::IndexType *>( @@ -378,7 +380,9 @@ inline VectorType global_load_index(const DType * input, int i, con } template -inline VectorType load_slice(const DType * input, const Shape& shape, Shape begin, Shape end, int offset) { +__device__ inline VectorType load_slice(const DType * input, const Shape& shape, + Shape begin, Shape end, + int offset) { int idx[nvec]; Shape ref_strides; @@ -417,7 +421,11 @@ inline VectorType load_slice(const DType * input, const Shape } template -inline VectorType fast_load_slice(const DType * input, const Shape& shape, Shape begin, Shape end, int offset) { +__device__ inline VectorType fast_load_slice(const DType * input, + const Shape& shape, + Shape begin, + Shape end, + int offset) { int idx = 0; Shape ref_strides; @@ -447,7 +455,7 @@ inline VectorType fast_load_slice(const DType * input, const Shape< } template -inline void store_index(const VectorType value, int i, +__device__ inline void store_index(const VectorType value, int i, DType * output, const Shape& shape) { if (i < (shape.size + nvec - 1) / nvec) { auto vector_output = reinterpret_cast< @@ -457,7 +465,7 @@ inline void store_index(const VectorType value, int i, } template -inline void store_add_index(const VectorType value, int i, +__device__ inline void store_add_index(const VectorType value, int i, DType * output, const Shape& shape) { if (i < (shape.size + nvec - 1) / nvec) { auto vector_output = reinterpret_cast< @@ -469,116 +477,116 @@ inline void store_add_index(const VectorType value, int i, } template -inline DType identity(const DType val) { +__device__ inline DType identity(const DType val) { return val; } template -inline DType add(const DType a, const DType2 b) { +__device__ inline DType add(const DType a, const DType2 b) { return a + b; } template -inline DType sub(const DType a, const DType2 b) { +__device__ inline DType sub(const DType a, const DType2 b) { return a - b; } template -inline DType mul(const DType a, const DType2 b) { +__device__ inline DType mul(const DType a, const DType2 b) { return a * b; } template -inline DType div(const DType a, const DType2 b) { +__device__ inline DType div(const DType a, const DType2 b) { return a / b; } template -inline DType rdiv(const DType a, const DType2 b) { +__device__ inline DType rdiv(const DType a, const DType2 b) { return b / a; } template -inline DType power(const DType a, const DType2 b) { +__device__ inline DType power(const DType a, const DType2 b) { return powf(a, b); } template -inline DType rpow(const DType a, const DType2 b) { +__device__ inline DType rpow(const DType a, const DType2 b) { return powf(b, a); } template -inline DType max(const DType a, const DType2 b) { +__device__ inline DType max(const DType a, const DType2 b) { return a > b ? a : b; } template -inline DType min(const DType a, const DType2 b) { +__device__ inline DType min(const DType a, const DType2 b) { return a < b ? a : b; } template -inline DType hypot(const DType a, const DType2 b) { +__device__ inline DType hypot(const DType a, const DType2 b) { return hypotf(a, b); } template -inline typename LoadType::Type cast(const DType val) { +__device__ inline typename LoadType::Type cast(const DType val) { return static_cast::Type>(val); } // activations template -inline DType relu(const DType val) { +__device__ inline DType relu(const DType val) { return val > 0 ? val : 0; } template -inline DType sigmoid(const DType val) { +__device__ inline DType sigmoid(const DType val) { return 1.f/(1 + expf(-val)); } template -inline DType softrelu(const DType val) { +__device__ inline DType softrelu(const DType val) { return logf(1 + expf(val)); } template -inline DType softsign(const DType val) { +__device__ inline DType softsign(const DType val) { return val / (1 + fabsf(val)); } // exp and log template -inline DType exp(const DType val) { +__device__ inline DType exp(const DType val) { return expf(val); } template -inline DType expm1(const DType val) { +__device__ inline DType expm1(const DType val) { return expm1f(val); } template -inline DType log(const DType val) { +__device__ inline DType log(const DType val) { return logf(val); } template -inline DType log10(const DType val) { +__device__ inline DType log10(const DType val) { return log10f(val); } template -inline DType log2(const DType val) { +__device__ inline DType log2(const DType val) { return log2f(val); } template -inline DType log1p(const DType val) { +__device__ inline DType log1p(const DType val) { return log1pf(val); } @@ -587,197 +595,197 @@ inline DType log1p(const DType val) { constexpr double pi = 3.14159265358979323846; template -inline DType degrees(const DType val) { +__device__ inline DType degrees(const DType val) { return (val / pi) * 180; } template -inline DType radians(const DType val) { +__device__ inline DType radians(const DType val) { return (val / 180.0) * pi; } template -inline DType sin(const DType val) { +__device__ inline DType sin(const DType val) { return sinf(val); } template -inline DType cos(const DType val) { +__device__ inline DType cos(const DType val) { return cosf(val); } template -inline DType tan(const DType val) { +__device__ inline DType tan(const DType val) { return tanf(val); } template -inline DType arcsin(const DType val) { +__device__ inline DType arcsin(const DType val) { return asinf(val); } template -inline DType arccos(const DType val) { +__device__ inline DType arccos(const DType val) { return acosf(val); } template -inline DType arctan(const DType val) { +__device__ inline DType arctan(const DType val) { return atanf(val); } template -inline DType sinh(const DType val) { +__device__ inline DType sinh(const DType val) { return sinhf(val); } template -inline DType cosh(const DType val) { +__device__ inline DType cosh(const DType val) { return coshf(val); } template -inline DType tanh(const DType val) { +__device__ inline DType tanh(const DType val) { return tanhf(val); } template -inline DType arcsinh(const DType val) { +__device__ inline DType arcsinh(const DType val) { return asinhf(val); } template -inline DType arccosh(const DType val) { +__device__ inline DType arccosh(const DType val) { return acoshf(val); } template -inline DType arctanh(const DType val) { +__device__ inline DType arctanh(const DType val) { return atanhf(val); } // sqrt template -inline DType sqrt(const DType val) { +__device__ inline DType sqrt(const DType val) { return sqrtf(val); } template -inline DType rsqrt(const DType val) { +__device__ inline DType rsqrt(const DType val) { return rsqrtf(val); } template -inline DType cbrt(const DType val) { +__device__ inline DType cbrt(const DType val) { return cbrtf(val); } template -inline DType rcbrt(const DType val) { +__device__ inline DType rcbrt(const DType val) { return rcbrtf(val); } template -inline DType square(const DType val) { +__device__ inline DType square(const DType val) { return val * val; } template -inline typename LoadType::Type zero(const DType val) { +__device__ inline typename LoadType::Type zero(const DType val) { return 0; } template -inline typename LoadType::Type zero() { +__device__ inline typename LoadType::Type zero() { return 0; } template -inline typename LoadType::Type one(const DType val) { +__device__ inline typename LoadType::Type one(const DType val) { return 1; } template -inline typename LoadType::Type one() { +__device__ inline typename LoadType::Type one() { return 1; } template -inline DType round(const DType val) { +__device__ inline DType round(const DType val) { return roundf(val); } template -inline DType rint(const DType val) { +__device__ inline DType rint(const DType val) { return rintf(val); } template -inline DType fix(const DType val) { +__device__ inline DType fix(const DType val) { const auto floor = floorf(val); const auto ceil = ceilf(val); return (floor > 0 ? floor : -floor) < (ceil > 0 ? ceil : -ceil) ? floor : ceil; } template -inline DType floor(const DType val) { +__device__ inline DType floor(const DType val) { return floorf(val); } template -inline DType ceil(const DType val) { +__device__ inline DType ceil(const DType val) { return ceilf(val); } template -inline DType trunc(const DType val) { +__device__ inline DType trunc(const DType val) { return truncf(val); } template -inline DType clip(const DType val, const float a_min, const float a_max) { +__device__ inline DType clip(const DType val, const float a_min, const float a_max) { return max(min(val, a_max), a_min); } template -inline DType sign(const DType val) { +__device__ inline DType sign(const DType val) { if (val < 0) return -1; return val > 0 ? 1 : 0; } template -inline DType reciprocal(const DType val) { +__device__ inline DType reciprocal(const DType val) { return 1.0f / val; } template -inline DType abs(const DType val) { +__device__ inline DType abs(const DType val) { return fabsf(val); } template -inline DType gamma(const DType val) { +__device__ inline DType gamma(const DType val) { return tgammaf(val); } template -inline DType gammaln(const DType val) { +__device__ inline DType gammaln(const DType val) { return lgammaf(val); } template -inline DType erf(const DType val) { +__device__ inline DType erf(const DType val) { return erff(val); } template -inline DType erfinv(const DType val) { +__device__ inline DType erfinv(const DType val) { return erfinvf(val); } template -inline DType1 smooth_l1(const DType1 val, const DType2 scalar) { +__device__ inline DType1 smooth_l1(const DType1 val, const DType2 scalar) { const auto bsq = scalar * scalar; const auto ibsq = 1.0f / bsq; if (val > ibsq) { @@ -798,147 +806,148 @@ const char backward_function_definitions[] = R"code( namespace op { template -inline DTypeGrad backward_relu(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_relu(const DType val, const DTypeGrad grad) { return val > 0 ? grad : 0; } template -inline DTypeGrad backward_sigmoid(const DType out, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_sigmoid(const DType out, const DTypeGrad grad) { return grad * out * (1 - out); } template -inline DTypeGrad backward_softrelu(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_softrelu(const DType val, const DTypeGrad grad) { return grad * sigmoid(val); } template -inline DTypeGrad backward_softsign(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_softsign(const DType val, const DTypeGrad grad) { const DType ap1 = 1 + fabsf(val); return grad / (ap1 * ap1); } template -inline DTypeGrad backward_exp(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_exp(const DType val, const DTypeGrad grad) { return grad * expf(val); } template -inline DTypeGrad backward_expm1(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_expm1(const DType val, const DTypeGrad grad) { return grad * expf(val); } template -inline DTypeGrad backward_log(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_log(const DType val, const DTypeGrad grad) { return grad / val; } template -inline DTypeGrad backward_log10(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_log10(const DType val, const DTypeGrad grad) { return grad / (val * logf(10)); } template -inline DTypeGrad backward_log2(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_log2(const DType val, const DTypeGrad grad) { return grad / (val * logf(2)); } template -inline DTypeGrad backward_log1p(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_log1p(const DType val, const DTypeGrad grad) { return grad / (1 + val); } template -inline DTypeGrad backward_sin(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_sin(const DType val, const DTypeGrad grad) { return grad * cosf(val); } template -inline DTypeGrad backward_cos(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_cos(const DType val, const DTypeGrad grad) { return -grad * sinf(val); } // Uses output from tan template -inline DTypeGrad backward_tan(const DType out, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_tan(const DType out, const DTypeGrad grad) { return grad * (out * out + 1); } template -inline DTypeGrad backward_arcsin(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_arcsin(const DType val, const DTypeGrad grad) { return grad / sqrtf(1 - val*val); } template -inline DTypeGrad backward_arccos(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_arccos(const DType val, const DTypeGrad grad) { return -grad / sqrtf(1 - val*val); } template -inline DTypeGrad backward_arctan(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_arctan(const DType val, const DTypeGrad grad) { return grad / (1 + val*val); } template -inline DTypeGrad backward_sinh(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_sinh(const DType val, const DTypeGrad grad) { return grad * coshf(val); } template -inline DTypeGrad backward_cosh(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_cosh(const DType val, const DTypeGrad grad) { return grad * sinhf(val); } // Uses tanh output template -inline DTypeGrad backward_tanh(const DType out, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_tanh(const DType out, const DTypeGrad grad) { return grad * (1 - out * out); } template -inline DTypeGrad backward_arcsinh(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_arcsinh(const DType val, const DTypeGrad grad) { return grad / sqrtf(val * val + 1); } template -inline DTypeGrad backward_arccosh(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_arccosh(const DType val, const DTypeGrad grad) { return grad / sqrtf(val * val - 1); } template -inline DTypeGrad backward_arctanh(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_arctanh(const DType val, const DTypeGrad grad) { return grad / (1 - val * val); } template -inline DTypeGrad backward_sqrt(const DType out, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_sqrt(const DType out, const DTypeGrad grad) { return 0.5 * grad / out; } template -inline DTypeGrad backward_rsqrt(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_rsqrt(const DType val, const DTypeGrad grad) { const DType inv = 1 / val; return -0.5 * grad * sqrtf(inv) * inv; } template -inline DTypeGrad backward_cbrt(const DType out, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_cbrt(const DType out, const DTypeGrad grad) { return grad / (3.0f * out * out); } template -inline DTypeGrad backward_rcbrt(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_rcbrt(const DType val, const DTypeGrad grad) { const DType inv = 1 / val; return -1.f/3.f * grad * cbrtf(inv) * inv; } template -inline DTypeGrad backward_square(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_square(const DType val, const DTypeGrad grad) { return 2 * val * grad; } template -inline DTypeGrad backward_clip(const DType val, const DTypeGrad grad, const float a_min, const float a_max) { +__device__ inline DTypeGrad backward_clip(const DType val, const DTypeGrad grad, + const float a_min, const float a_max) { if (val > a_max || val < a_min) { return 0; } else { @@ -947,22 +956,23 @@ inline DTypeGrad backward_clip(const DType val, const DTypeGrad grad, const floa } template -inline DTypeGrad backward_reciprocal(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_reciprocal(const DType val, const DTypeGrad grad) { return -grad / (val * val); } template -inline DTypeGrad backward_erf(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_erf(const DType val, const DTypeGrad grad) { return 2.0f / sqrt(pi) * exp(-(val*val)) * grad; } template -inline DTypeGrad backward_erfinv(const DType val, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_erfinv(const DType val, const DTypeGrad grad) { return 0.5f * sqrt(pi) * exp(val * val) * grad; } template -inline DTypeGrad backward_smooth_l1(const DType val, const DType2 scalar, const DTypeGrad grad) { +__device__ inline DTypeGrad backward_smooth_l1(const DType val, const DType2 scalar, + const DTypeGrad grad) { auto bsq = scalar * scalar; auto ibsq = 1.0f / bsq; if (val > ibsq) { diff --git a/src/operator/fusion/fused_op.cu b/src/operator/fusion/fused_op.cu index 34360544cd11..c8a888301abd 100644 --- a/src/operator/fusion/fused_op.cu +++ b/src/operator/fusion/fused_op.cu @@ -594,13 +594,12 @@ CUfunction FusedOp::CompileCode(const std::string &code, std::string gpu_arch_arg = "--gpu-architecture=compute_" + std::to_string(sm_arch); const char *opts[] = {gpu_arch_arg.c_str(), - "--std=c++11", - "-default-device"}; + "--std=c++11"}; const std::string kernel_name_demangled = "FusedKernel_" + kernel_name; NVRTC_CALL(nvrtcAddNameExpression(program, (kernel_name_demangled).c_str())); nvrtcResult compileResult = nvrtcCompileProgram(program, // prog - 3, // num options + 2, // num options opts); // options CHECK_EQ(compileResult, NVRTC_SUCCESS) << "NVRTC Compilation failed. Please set environment variable MXNET_USE_FUSION to 0.\n" From e18e4ce553ae7818780078e6f1ecccc91fe3c0df Mon Sep 17 00:00:00 2001 From: liuzh91 Date: Tue, 10 Dec 2019 14:12:15 +0800 Subject: [PATCH 23/62] Include eval_net the validation model in the gluon estimator api (#16957) * Include eval_net the validation model in the estimator api * fix small issue --- .../gluon/contrib/estimator/estimator.py | 32 +++++++- tests/python/unittest/test_gluon_estimator.py | 75 ++++++++++++++++++- 2 files changed, 101 insertions(+), 6 deletions(-) diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py index ab7018f58e1f..ac7c3d3825ab 100644 --- a/python/mxnet/gluon/contrib/estimator/estimator.py +++ b/python/mxnet/gluon/contrib/estimator/estimator.py @@ -59,9 +59,29 @@ class Estimator(object): Trainer to apply optimizer on network parameters. context : Context or list of Context Device(s) to run the training on. - evaluation_loss: gluon.loss.loss - Loss (objective) function to calculate during evaluation. If set evaluation_loss + evaluation_loss : gluon.loss.loss + Loss (objective) function to calculate during validation. If set evaluation_loss None, it will use the same loss function as self.loss + eval_net : gluon.Block + The model used for validation. The validation model does not necessarily belong to + the same model class as the training model. But the two models typically share the + same architecture. Therefore the validation model can reuse parameters of the + training model. + + The code example of consruction of eval_net sharing the same network parameters as + the training net is given below: + + >>> net = _get_train_network() + >>> eval_net = _get_test_network(params=net.collect_params()) + >>> net.initialize(ctx=ctx) + >>> est = Estimator(net, loss, eval_net=eval_net) + + Proper namespace match is required for weight sharing between two networks. Most networks + inheriting :py:class:`Block` can share their parameters correctly. An exception is + Sequential networks that Block scope must be specified for correct weight sharing. For + the naming in mxnet Gluon API, please refer to the site + (https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/naming.html) + for future information. """ @@ -89,7 +109,8 @@ def __init__(self, net, initializer=None, trainer=None, context=None, - evaluation_loss=None): + evaluation_loss=None, + eval_net=None): self.net = net self.loss = self._check_loss(loss) self._train_metrics = _check_metrics(metrics) @@ -98,6 +119,9 @@ def __init__(self, net, self.evaluation_loss = self.loss if evaluation_loss is not None: self.evaluation_loss = self._check_loss(evaluation_loss) + self.eval_net = self.net + if eval_net is not None: + self.eval_net = eval_net self.logger = logging.Logger(name='Estimator', level=logging.INFO) self.logger.addHandler(logging.StreamHandler(sys.stdout)) @@ -234,7 +258,7 @@ def evaluate_batch(self, Batch axis to split the validation data into devices. """ data, label = self._get_data_and_label(val_batch, self.context, batch_axis) - pred = [self.net(x) for x in data] + pred = [self.eval_net(x) for x in data] loss = [self.evaluation_loss(y_hat, y) for y_hat, y in zip(pred, label)] # update metrics for metric in val_metrics: diff --git a/tests/python/unittest/test_gluon_estimator.py b/tests/python/unittest/test_gluon_estimator.py index 21f949a0bba6..dba3f122a9b6 100644 --- a/tests/python/unittest/test_gluon_estimator.py +++ b/tests/python/unittest/test_gluon_estimator.py @@ -29,11 +29,16 @@ from nose.tools import assert_raises -def _get_test_network(): - net = nn.Sequential() +def _get_test_network(params=None): + net = nn.Sequential(params=params) net.add(nn.Dense(4, activation='relu', flatten=False)) return net +def _get_test_network_with_namescope(params=None): + net = nn.Sequential(params=params) + with net.name_scope(): + net.add(nn.Dense(4, activation='relu', flatten=False)) + return net def _get_test_data(): batch_size = 4 @@ -371,3 +376,69 @@ def test_default_handlers(): assert isinstance(handlers[0], GradientUpdateHandler) assert isinstance(handlers[1], MetricHandler) assert isinstance(handlers[4], LoggingHandler) + +def test_eval_net(): + ''' test estimator with a different evaluation net ''' + ''' test weight sharing of sequential networks without namescope ''' + net = _get_test_network() + eval_net = _get_test_network(params=net.collect_params()) + dataloader, dataiter = _get_test_data() + num_epochs = 1 + ctx = mx.cpu() + loss = gluon.loss.L2Loss() + evaluation_loss = gluon.loss.L2Loss() + acc = mx.metric.Accuracy() + net.initialize(ctx=ctx) + trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) + est = Estimator(net=net, + loss=loss, + metrics=acc, + trainer=trainer, + context=ctx, + evaluation_loss=evaluation_loss, + eval_net=eval_net) + + with assert_raises(RuntimeError): + est.fit(train_data=dataloader, + val_data=dataloader, + epochs=num_epochs) + + ''' test weight sharing of sequential networks with namescope ''' + net = _get_test_network_with_namescope() + eval_net = _get_test_network_with_namescope(params=net.collect_params()) + net.initialize(ctx=ctx) + trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) + est = Estimator(net=net, + loss=loss, + metrics=acc, + trainer=trainer, + context=ctx, + evaluation_loss=evaluation_loss, + eval_net=eval_net) + + est.fit(train_data=dataloader, + val_data=dataloader, + epochs=num_epochs) + + ''' test weight sharing of two resnets ''' + net = gluon.model_zoo.vision.resnet18_v1(pretrained=False, ctx=ctx) + net.output = gluon.nn.Dense(10) + eval_net = gluon.model_zoo.vision.resnet18_v1(pretrained=False, ctx=ctx) + eval_net.output = gluon.nn.Dense(10, params=net.collect_params()) + dataset = gluon.data.ArrayDataset(mx.nd.zeros((10, 3, 224, 224)), mx.nd.zeros((10, 10))) + dataloader = gluon.data.DataLoader(dataset=dataset, batch_size=5) + net.initialize(ctx=ctx) + eval_net.initialize(ctx=ctx) + trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) + est = Estimator(net=net, + loss=loss, + metrics=acc, + trainer=trainer, + context=ctx, + evaluation_loss=evaluation_loss, + eval_net=eval_net) + + est.fit(train_data=dataloader, + val_data=dataloader, + epochs=num_epochs) + From 60f77f5ab56e3a47e483bb270f1c1a4dc275f183 Mon Sep 17 00:00:00 2001 From: perdasilva Date: Tue, 10 Dec 2019 16:44:58 +0100 Subject: [PATCH 24/62] [CD] dynamic libmxet pipeline fix + small fixes (#16966) * Adds mx_mkldnn_deps back to dynamic libmxnet pipeline * Fixes cudnn version for cuda 10.1 Dockefile * Turns off unnecessary DEBUG flag * Adds USE_NVTX=1 to cu90 make configuration --- cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy | 2 ++ cd/mxnet_lib/static/Jenkins_pipeline.groovy | 2 ++ ci/docker/Dockerfile.build.ubuntu_gpu_cu101 | 2 +- ci/docker/runtime_functions.sh | 2 +- make/staticbuild/linux_cu90.mk | 2 ++ make/staticbuild/linux_cu90mkl.mk | 2 ++ 6 files changed, 10 insertions(+), 2 deletions(-) diff --git a/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy b/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy index 6a51b69cfe50..e3d4b5908d57 100644 --- a/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy +++ b/cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy @@ -22,6 +22,7 @@ // NOTE: ci_utils is loaded by the originating Jenkins job, e.g. jenkins/Jenkinsfile_release_job +// NOTE: the following variables are referenced in the mxnet_lib_pipeline jenkins file imported bellow // libmxnet location libmxnet = 'lib/libmxnet.so' @@ -30,6 +31,7 @@ licenses = 'licenses/*' // libmxnet dependencies mx_deps = '' +mx_mkldnn_deps = '' // library type // either static or dynamic - depending on how it links to its dependencies diff --git a/cd/mxnet_lib/static/Jenkins_pipeline.groovy b/cd/mxnet_lib/static/Jenkins_pipeline.groovy index 84466dd551e7..c6b6eaeaef90 100644 --- a/cd/mxnet_lib/static/Jenkins_pipeline.groovy +++ b/cd/mxnet_lib/static/Jenkins_pipeline.groovy @@ -23,6 +23,8 @@ // To avoid confusion, please note: // ci_utils is loaded by the originating Jenkins job, e.g. jenkins/Jenkinsfile_release_job +// NOTE: the following variables are referenced in the mxnet_lib_pipeline jenkins file imported bellow + // libmxnet location libmxnet = 'lib/libmxnet.so' diff --git a/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 b/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 index 7e0f8d93ed37..ff6b89bf8e06 100644 --- a/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 +++ b/ci/docker/Dockerfile.build.ubuntu_gpu_cu101 @@ -65,7 +65,7 @@ RUN /work/ubuntu_docs.sh COPY install/ubuntu_tutorials.sh /work/ RUN /work/ubuntu_tutorials.sh -ENV CUDNN_VERSION=7.5.1.10 +ENV CUDNN_VERSION=7.6.0.64 COPY install/ubuntu_cudnn.sh /work/ RUN /work/ubuntu_cudnn.sh diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 9f56a6708ea4..63fe32879d75 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -1002,7 +1002,7 @@ sanity_check() { cd_unittest_ubuntu() { set -ex export PYTHONPATH=./python/ - export MXNET_MKLDNN_DEBUG=1 # Ignored if not present + export MXNET_MKLDNN_DEBUG=0 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_ENABLE_CYTHON=0 diff --git a/make/staticbuild/linux_cu90.mk b/make/staticbuild/linux_cu90.mk index 1bca27b2b80c..310b1fac421b 100644 --- a/make/staticbuild/linux_cu90.mk +++ b/make/staticbuild/linux_cu90.mk @@ -81,6 +81,8 @@ USE_NCCL = 1 # whether use cuda runtime compiling for writing kernels in native language (i.e. Python) ENABLE_CUDA_RTC = 1 +USE_NVTX=1 + # use openmp for parallelization USE_OPENMP = 1 USE_OPERATOR_TUNING = 1 diff --git a/make/staticbuild/linux_cu90mkl.mk b/make/staticbuild/linux_cu90mkl.mk index 8a8c273f3a09..c46c10f6358b 100644 --- a/make/staticbuild/linux_cu90mkl.mk +++ b/make/staticbuild/linux_cu90mkl.mk @@ -81,6 +81,8 @@ USE_NCCL = 1 # whether use cuda runtime compiling for writing kernels in native language (i.e. Python) ENABLE_CUDA_RTC = 1 +USE_NVTX=1 + # use openmp for parallelization USE_OPENMP = 1 USE_OPERATOR_TUNING = 1 From 0c17ddd61988d05eaca14312a22738fb84bfa2ac Mon Sep 17 00:00:00 2001 From: liuzh91 Date: Wed, 11 Dec 2019 01:50:22 +0800 Subject: [PATCH 25/62] Extend estimator.evaluate() to support event handlers (#16971) * fix unittest failures for the new api interface * Add comments in the code for readability * Remove unused argument val_metrics * merge changes with the master branch * fix some regression errors * fix bugs introduced in the merging phase --- .../gluon/contrib/estimator/estimator.py | 105 ++++++++++++------ .../gluon/contrib/estimator/event_handler.py | 59 +++++----- tests/python/unittest/test_gluon_estimator.py | 73 ++++++++---- .../unittest/test_gluon_event_handler.py | 23 ++-- 4 files changed, 160 insertions(+), 100 deletions(-) diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py index ac7c3d3825ab..0d9ab9ec0a02 100644 --- a/python/mxnet/gluon/contrib/estimator/estimator.py +++ b/python/mxnet/gluon/contrib/estimator/estimator.py @@ -51,8 +51,10 @@ class Estimator(object): The model used for training. loss : gluon.loss.Loss Loss (objective) function to calculate during training. - metrics : EvalMetric or list of EvalMetric - Metrics for evaluating models. + train_metrics : EvalMetric or list of EvalMetric + Training metrics for evaluating models on training dataset. + val_metrics : EvalMetric or list of EvalMetric + Validation metrics for evaluating models on validation dataset. initializer : Initializer Initializer to initialize the network. trainer : Trainer @@ -105,7 +107,8 @@ class Estimator(object): def __init__(self, net, loss, - metrics=None, + train_metrics=None, + val_metrics=None, initializer=None, trainer=None, context=None, @@ -113,7 +116,8 @@ def __init__(self, net, eval_net=None): self.net = net self.loss = self._check_loss(loss) - self._train_metrics = _check_metrics(metrics) + self._train_metrics = _check_metrics(train_metrics) + self._val_metrics = _check_metrics(val_metrics) self._add_default_training_metrics() self._add_validation_metrics() self.evaluation_loss = self.loss @@ -226,13 +230,21 @@ def _add_default_training_metrics(self): self._train_metrics.append(metric_loss(loss_name)) for metric in self._train_metrics: - metric.name = "training " + metric.name + # add training prefix to the metric name + # it is useful for event handlers to distinguish them from validation metrics + metric.name = 'training ' + metric.name def _add_validation_metrics(self): - self._val_metrics = [copy.deepcopy(metric) for metric in self._train_metrics] + if not self._val_metrics: + self._val_metrics = [copy.deepcopy(metric) for metric in self._train_metrics] for metric in self._val_metrics: - metric.name = "validation " + metric.name + # add validation prefix to the metric name + # it is useful for event handlers to distinguish them from training metrics + if 'training' in metric.name: + metric.name = metric.name.replace('training', 'validation') + else: + metric.name = 'validation ' + metric.name @property def train_metrics(self): @@ -244,7 +256,6 @@ def val_metrics(self): def evaluate_batch(self, val_batch, - val_metrics, batch_axis=0): """Evaluate model on a batch of validation data. @@ -252,25 +263,19 @@ def evaluate_batch(self, ---------- val_batch : tuple Data and label of a batch from the validation data loader. - val_metrics : EvalMetric or list of EvalMetrics - Metrics to update validation result. batch_axis : int, default 0 Batch axis to split the validation data into devices. """ data, label = self._get_data_and_label(val_batch, self.context, batch_axis) pred = [self.eval_net(x) for x in data] loss = [self.evaluation_loss(y_hat, y) for y_hat, y in zip(pred, label)] - # update metrics - for metric in val_metrics: - if isinstance(metric, metric_loss): - metric.update(0, loss) - else: - metric.update(label, pred) + + return data, label, pred, loss def evaluate(self, val_data, - val_metrics, - batch_axis=0): + batch_axis=0, + event_handlers=None): """Evaluate model on validation data. This function calls :py:func:`evaluate_batch` on each of the batches from the @@ -281,21 +286,42 @@ def evaluate(self, ---------- val_data : DataLoader Validation data loader with data and labels. - val_metrics : EvalMetric or list of EvalMetrics - Metrics to update validation result. batch_axis : int, default 0 Batch axis to split the validation data into devices. + event_handlers : EventHandler or list of EventHandler + List of :py:class:`EventHandlers` to apply during validation. Besides + event handlers specified here, a default MetricHandler and a LoggingHandler + will be added if not specified explicitly. """ if not isinstance(val_data, DataLoader): raise ValueError("Estimator only support input as Gluon DataLoader. Alternatively, you " "can transform your DataIter or any NDArray into Gluon DataLoader. " "Refer to gluon.data.DataLoader") - for metric in val_metrics: + for metric in self.val_metrics: metric.reset() + event_handlers = self._prepare_default_validation_handlers(event_handlers) + + _, epoch_begin, batch_begin, batch_end, \ + epoch_end, _ = self._categorize_handlers(event_handlers) + + estimator_ref = self + + for handler in epoch_begin: + handler.epoch_begin(estimator_ref) + for _, batch in enumerate(val_data): - self.evaluate_batch(batch, val_metrics, batch_axis) + for handler in batch_begin: + handler.batch_begin(estimator_ref, batch=batch) + + _, label, pred, loss = self.evaluate_batch(batch, batch_axis) + + for handler in batch_end: + handler.batch_end(estimator_ref, batch=batch, pred=pred, label=label, loss=loss) + + for handler in epoch_end: + handler.epoch_end(estimator_ref) def fit_batch(self, train_batch, batch_axis=0): """Trains the model on a batch of training data. @@ -441,23 +467,17 @@ def _prepare_default_handlers(self, val_data, event_handlers): added_default_handlers.append(GradientUpdateHandler()) if not any(isinstance(handler, MetricHandler) for handler in event_handlers): - added_default_handlers.append(MetricHandler(train_metrics=self.train_metrics)) + added_default_handlers.append(MetricHandler(metrics=self.train_metrics)) if not any(isinstance(handler, ValidationHandler) for handler in event_handlers): # no validation handler if val_data: - val_metrics = self.val_metrics # add default validation handler if validation data found added_default_handlers.append(ValidationHandler(val_data=val_data, - eval_fn=self.evaluate, - val_metrics=val_metrics)) - else: - # set validation metrics to None if no validation data and no validation handler - val_metrics = [] + eval_fn=self.evaluate)) if not any(isinstance(handler, LoggingHandler) for handler in event_handlers): - added_default_handlers.append(LoggingHandler(train_metrics=self.train_metrics, - val_metrics=val_metrics)) + added_default_handlers.append(LoggingHandler(metrics=self.train_metrics)) # if there is a mix of user defined event handlers and default event handlers # they should have the same set of metrics @@ -474,6 +494,29 @@ def _prepare_default_handlers(self, val_data, event_handlers): event_handlers.sort(key=lambda handler: getattr(handler, 'priority', 0)) return event_handlers + def _prepare_default_validation_handlers(self, event_handlers): + event_handlers = _check_event_handlers(event_handlers) + added_default_handlers = [] + + # add default logging handler and metric handler for validation + if not any(isinstance(handler, MetricHandler) for handler in event_handlers): + added_default_handlers.append(MetricHandler(metrics=self.val_metrics)) + + if not any(isinstance(handler, LoggingHandler) for handler in event_handlers): + added_default_handlers.append(LoggingHandler(metrics=self.val_metrics)) + + mixing_handlers = event_handlers and added_default_handlers + event_handlers.extend(added_default_handlers) + + # check if all handlers refer to well-defined validation metrics + if mixing_handlers: + known_metrics = set(self.val_metrics) + for handler in event_handlers: + _check_handler_metric_ref(handler, known_metrics) + + event_handlers.sort(key=lambda handler: getattr(handler, 'priority', 0)) + return event_handlers + def _categorize_handlers(self, event_handlers): """ categorize handlers into 6 event lists to avoid calling empty methods diff --git a/python/mxnet/gluon/contrib/estimator/event_handler.py b/python/mxnet/gluon/contrib/estimator/event_handler.py index 64777608bef0..c7551362fa5b 100644 --- a/python/mxnet/gluon/contrib/estimator/event_handler.py +++ b/python/mxnet/gluon/contrib/estimator/event_handler.py @@ -128,28 +128,28 @@ class MetricHandler(EpochBegin, BatchEnd): Parameters ---------- - train_metrics : List of EvalMetrics - Training metrics to be updated at batch end. + metrics : List of EvalMetrics + Metrics to be updated at batch end. priority : scalar Priority level of the MetricHandler. Priority level is sorted in ascending order. The lower the number is, the higher priority level the handler is. """ - def __init__(self, train_metrics, priority=-1000): - self.train_metrics = _check_metrics(train_metrics) + def __init__(self, metrics, priority=-1000): + self.metrics = _check_metrics(metrics) # order to be called among all callbacks # metrics need to be calculated before other callbacks can access them self.priority = priority def epoch_begin(self, estimator, *args, **kwargs): - for metric in self.train_metrics: + for metric in self.metrics: metric.reset() def batch_end(self, estimator, *args, **kwargs): pred = kwargs['pred'] label = kwargs['label'] loss = kwargs['loss'] - for metric in self.train_metrics: + for metric in self.metrics: if isinstance(metric, metric_loss): # metric wrapper for loss values metric.update(0, loss) @@ -171,8 +171,6 @@ class ValidationHandler(TrainBegin, BatchEnd, EpochEnd): eval_fn : function A function defines how to run evaluation and calculate loss and metrics. - val_metrics : List of EvalMetrics - Validation metrics to be updated. epoch_period : int, default 1 How often to run validation at epoch end, by default :py:class:`ValidationHandler` validate every epoch. @@ -188,7 +186,6 @@ class ValidationHandler(TrainBegin, BatchEnd, EpochEnd): def __init__(self, val_data, eval_fn, - val_metrics=None, epoch_period=1, batch_period=None, priority=-1000): @@ -196,7 +193,6 @@ def __init__(self, self.eval_fn = eval_fn self.epoch_period = epoch_period self.batch_period = batch_period - self.val_metrics = _check_metrics(val_metrics) self.current_batch = 0 self.current_epoch = 0 # order to be called among all callbacks @@ -211,20 +207,12 @@ def train_begin(self, estimator, *args, **kwargs): def batch_end(self, estimator, *args, **kwargs): self.current_batch += 1 if self.batch_period and self.current_batch % self.batch_period == 0: - self.eval_fn(val_data=self.val_data, - val_metrics=self.val_metrics) - msg = '[Epoch %d] ValidationHandler: %d batches reached, ' \ - % (self.current_epoch, self.current_batch) - for monitor in self.val_metrics: - name, value = monitor.get() - msg += '%s: %.4f, ' % (name, value) - estimator.logger.info(msg.rstrip(',')) + self.eval_fn(val_data=self.val_data) def epoch_end(self, estimator, *args, **kwargs): self.current_epoch += 1 if self.epoch_period and self.current_epoch % self.epoch_period == 0: - self.eval_fn(val_data=self.val_data, - val_metrics=self.val_metrics) + self.eval_fn(val_data=self.val_data) class LoggingHandler(TrainBegin, TrainEnd, EpochBegin, EpochEnd, BatchBegin, BatchEnd): @@ -239,10 +227,8 @@ class LoggingHandler(TrainBegin, TrainEnd, EpochBegin, EpochEnd, BatchBegin, Bat Logging interval during training. log_interval='epoch': display metrics every epoch log_interval=integer k: display metrics every interval of k batches - train_metrics : list of EvalMetrics - Training metrics to be logged, logged at batch end, epoch end, train end. - val_metrics : list of EvalMetrics - Validation metrics to be logged, logged at epoch end, train end. + metrics : list of EvalMetrics + Metrics to be logged, logged at batch end, epoch end, train end. priority : scalar, default np.Inf Priority level of the LoggingHandler. Priority level is sorted in ascending order. The lower the number is, the higher priority level the @@ -250,14 +236,12 @@ class LoggingHandler(TrainBegin, TrainEnd, EpochBegin, EpochEnd, BatchBegin, Bat """ def __init__(self, log_interval='epoch', - train_metrics=None, - val_metrics=None, + metrics=None, priority=np.Inf): super(LoggingHandler, self).__init__() if not isinstance(log_interval, int) and log_interval != 'epoch': raise ValueError("log_interval must be either an integer or string 'epoch'") - self.train_metrics = _check_metrics(train_metrics) - self.val_metrics = _check_metrics(val_metrics) + self.metrics = _check_metrics(metrics) self.batch_index = 0 self.current_epoch = 0 self.processed_samples = 0 @@ -265,6 +249,7 @@ def __init__(self, log_interval='epoch', # it will also shut down logging at train end self.priority = priority self.log_interval = log_interval + self.log_interval_time = 0 def train_begin(self, estimator, *args, **kwargs): self.train_start = time.time() @@ -288,7 +273,7 @@ def train_end(self, estimator, *args, **kwargs): train_time = time.time() - self.train_start msg = 'Train finished using total %ds with %d epochs. ' % (train_time, self.current_epoch) # log every result in train stats including train/validation loss & metrics - for metric in self.train_metrics + self.val_metrics: + for metric in self.metrics: name, value = metric.get() msg += '%s: %.4f, ' % (name, value) estimator.logger.info(msg.rstrip(', ')) @@ -307,7 +292,7 @@ def batch_end(self, estimator, *args, **kwargs): if self.batch_index % self.log_interval == 0: msg += 'time/interval: %.3fs ' % self.log_interval_time self.log_interval_time = 0 - for metric in self.train_metrics: + for metric in self.metrics: # only log current training loss & metric after each interval name, value = metric.get() msg += '%s: %.4f, ' % (name, value) @@ -316,15 +301,23 @@ def batch_end(self, estimator, *args, **kwargs): def epoch_begin(self, estimator, *args, **kwargs): if isinstance(self.log_interval, int) or self.log_interval == 'epoch': + is_training = False + # use the name hack defined in __init__() of estimator class + for metric in self.metrics: + if 'training' in metric.name: + is_training = True self.epoch_start = time.time() - estimator.logger.info("[Epoch %d] Begin, current learning rate: %.4f", - self.current_epoch, estimator.trainer.learning_rate) + if is_training: + estimator.logger.info("[Epoch %d] Begin, current learning rate: %.4f", + self.current_epoch, estimator.trainer.learning_rate) + else: + estimator.logger.info("Validation Begin") def epoch_end(self, estimator, *args, **kwargs): if isinstance(self.log_interval, int) or self.log_interval == 'epoch': epoch_time = time.time() - self.epoch_start msg = '[Epoch %d] Finished in %.3fs, ' % (self.current_epoch, epoch_time) - for monitor in self.train_metrics + self.val_metrics: + for monitor in self.metrics: name, value = monitor.get() msg += '%s: %.4f, ' % (name, value) estimator.logger.info(msg.rstrip(', ')) diff --git a/tests/python/unittest/test_gluon_estimator.py b/tests/python/unittest/test_gluon_estimator.py index dba3f122a9b6..924dd083bef4 100644 --- a/tests/python/unittest/test_gluon_estimator.py +++ b/tests/python/unittest/test_gluon_estimator.py @@ -63,7 +63,7 @@ def test_fit(): trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, loss=loss, - metrics=acc, + train_metrics=acc, trainer=trainer, context=ctx) @@ -93,7 +93,7 @@ def test_validation(): trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, loss=loss, - metrics=acc, + train_metrics=acc, trainer=trainer, context=ctx, evaluation_loss=evaluation_loss) @@ -105,8 +105,7 @@ def test_validation(): # using validation handler train_metrics = est.train_metrics val_metrics = est.val_metrics - validation_handler = ValidationHandler(val_data=dataloader, eval_fn=est.evaluate, - val_metrics=val_metrics) + validation_handler = ValidationHandler(val_data=dataloader, eval_fn=est.evaluate) with assert_raises(ValueError): est.fit(train_data=dataiter, @@ -132,7 +131,7 @@ def test_initializer(): # no initializer est = Estimator(net=net, loss=loss, - metrics=acc, + train_metrics=acc, context=ctx) est.fit(train_data=train_data, epochs=num_epochs) @@ -145,7 +144,7 @@ def test_initializer(): with warnings.catch_warnings(record=True) as w: est = Estimator(net=net, loss=loss, - metrics=acc, + train_metrics=acc, initializer=mx.init.MSRAPrelu(), trainer=trainer, context=ctx) @@ -153,7 +152,7 @@ def test_initializer(): # net partially initialized, fine tuning use case net = gluon.model_zoo.vision.resnet18_v1(pretrained=True, ctx=ctx) net.output = gluon.nn.Dense(10) #last layer not initialized - est = Estimator(net, loss=loss, metrics=acc, context=ctx) + est = Estimator(net, loss=loss, train_metrics=acc, context=ctx) dataset = gluon.data.ArrayDataset(mx.nd.zeros((10, 3, 224, 224)), mx.nd.zeros((10, 10))) train_data = gluon.data.DataLoader(dataset=dataset, batch_size=5) est.fit(train_data=train_data, @@ -175,7 +174,7 @@ def test_trainer(): with warnings.catch_warnings(record=True) as w: est = Estimator(net=net, loss=loss, - metrics=acc, + train_metrics=acc, context=ctx) assert 'No trainer specified' in str(w[-1].message) est.fit(train_data=train_data, @@ -186,7 +185,7 @@ def test_trainer(): with assert_raises(ValueError): est = Estimator(net=net, loss=loss, - metrics=acc, + train_metrics=acc, trainer=trainer, context=ctx) @@ -212,7 +211,7 @@ def test_metric(): metrics = [mx.metric.Accuracy(), mx.metric.Accuracy()] est = Estimator(net=net, loss=loss, - metrics=metrics, + train_metrics=metrics, trainer=trainer, context=ctx) est.fit(train_data=train_data, @@ -221,7 +220,7 @@ def test_metric(): with assert_raises(ValueError): est = Estimator(net=net, loss=loss, - metrics='acc', + train_metrics='acc', trainer=trainer, context=ctx) # test default metric @@ -244,7 +243,7 @@ def test_loss(): with assert_raises(ValueError): est = Estimator(net=net, loss='mse', - metrics=acc, + train_metrics=acc, trainer=trainer, context=ctx) @@ -257,26 +256,26 @@ def test_context(): # input no context est = Estimator(net=net, loss=loss, - metrics=metrics) + train_metrics=metrics) # input list of context gpus = mx.context.num_gpus() ctx = [mx.gpu(i) for i in range(gpus)] if gpus > 0 else [mx.cpu()] net = _get_test_network() est = Estimator(net=net, loss=loss, - metrics=metrics, + train_metrics=metrics, context=ctx) # input invalid context with assert_raises(ValueError): est = Estimator(net=net, loss=loss, - metrics=metrics, + train_metrics=metrics, context='cpu') with assert_raises(AssertionError): est = Estimator(net=net, loss=loss, - metrics=metrics, + train_metrics=metrics, context=[mx.gpu(0), mx.gpu(100)]) @@ -341,7 +340,7 @@ def test_default_handlers(): est = Estimator(net=net, loss=loss, - metrics=train_acc, + train_metrics=train_acc, trainer=trainer, context=ctx) # no handler(all default handlers), no warning @@ -352,18 +351,18 @@ def test_default_handlers(): # use mix of default and user defined handlers train_metrics = est.train_metrics val_metrics = est.val_metrics - logging = LoggingHandler(train_metrics=train_metrics, val_metrics=val_metrics) + logging = LoggingHandler(metrics=train_metrics) est.fit(train_data=train_data, epochs=num_epochs, event_handlers=[logging]) # handler with all user defined metrics # use mix of default and user defined handlers - metric = MetricHandler(train_metrics=[train_acc]) - logging = LoggingHandler(train_metrics=[train_acc]) + metric = MetricHandler(metrics=[train_acc]) + logging = LoggingHandler(metrics=[train_acc]) est.fit(train_data=train_data, epochs=num_epochs, event_handlers=[metric, logging]) # handler with mixed metrics, some handler use metrics prepared by estimator # some handler use metrics user prepared - logging = LoggingHandler(train_metrics=train_metrics, val_metrics=[mx.metric.RMSE("val acc")]) + logging = LoggingHandler(metrics=[mx.metric.RMSE("val acc")]) with assert_raises(ValueError): est.fit(train_data=train_data, epochs=num_epochs, event_handlers=[logging]) @@ -392,7 +391,7 @@ def test_eval_net(): trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, loss=loss, - metrics=acc, + train_metrics=acc, trainer=trainer, context=ctx, evaluation_loss=evaluation_loss, @@ -410,7 +409,7 @@ def test_eval_net(): trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, loss=loss, - metrics=acc, + train_metrics=acc, trainer=trainer, context=ctx, evaluation_loss=evaluation_loss, @@ -432,7 +431,7 @@ def test_eval_net(): trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, loss=loss, - metrics=acc, + train_metrics=acc, trainer=trainer, context=ctx, evaluation_loss=evaluation_loss, @@ -442,3 +441,29 @@ def test_eval_net(): val_data=dataloader, epochs=num_epochs) +def test_val_handlers(): + net = _get_test_network() + train_data, _ = _get_test_data() + val_data, _ = _get_test_data() + + num_epochs = 1 + ctx = mx.cpu() + net.initialize(ctx=ctx) + trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) + + train_acc = mx.metric.RMSE() + loss = gluon.loss.L2Loss() + + est = Estimator(net=net, + loss=loss, + train_metrics=train_acc, + trainer=trainer, + context=ctx) + + with warnings.catch_warnings(record=True) as w: + est.fit(train_data=train_data, epochs=num_epochs) + est.evaluate(val_data=val_data) + + logging = LoggingHandler(log_interval=1, metrics=est.val_metrics) + est.evaluate(val_data=val_data, event_handlers=[logging]) + diff --git a/tests/python/unittest/test_gluon_event_handler.py b/tests/python/unittest/test_gluon_event_handler.py index 658fb88f47e5..41b790102f62 100644 --- a/tests/python/unittest/test_gluon_event_handler.py +++ b/tests/python/unittest/test_gluon_event_handler.py @@ -54,7 +54,7 @@ def test_checkpoint_handler(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() acc = mx.metric.Accuracy() - est = estimator.Estimator(net, loss=ce_loss, metrics=acc) + est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) checkpoint_handler = event_handler.CheckpointHandler(model_dir=tmpdir, model_prefix=model_prefix, monitor=acc, @@ -72,7 +72,7 @@ def test_checkpoint_handler(): file_path = os.path.join(tmpdir, model_prefix) net = _get_test_network(nn.HybridSequential()) net.hybridize() - est = estimator.Estimator(net, loss=ce_loss, metrics=acc) + est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) checkpoint_handler = event_handler.CheckpointHandler(model_dir=tmpdir, model_prefix=model_prefix, epoch_period=None, @@ -100,7 +100,7 @@ def test_resume_checkpoint(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() acc = mx.metric.Accuracy() - est = estimator.Estimator(net, loss=ce_loss, metrics=acc) + est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) checkpoint_handler = event_handler.CheckpointHandler(model_dir=tmpdir, model_prefix=model_prefix, monitor=acc, @@ -125,7 +125,7 @@ def test_early_stopping(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() acc = mx.metric.Accuracy() - est = estimator.Estimator(net, loss=ce_loss, metrics=acc) + est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) early_stopping = event_handler.EarlyStoppingHandler(monitor=acc, patience=0, mode='min') @@ -149,14 +149,13 @@ def test_logging(): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() acc = mx.metric.Accuracy() - est = estimator.Estimator(net, loss=ce_loss, metrics=acc) + est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) est.logger.addHandler(logging.FileHandler(output_dir)) train_metrics = est.train_metrics val_metrics = est.val_metrics - logging_handler = event_handler.LoggingHandler(train_metrics=train_metrics, - val_metrics=val_metrics) + logging_handler = event_handler.LoggingHandler(metrics=train_metrics) est.fit(test_data, event_handlers=[logging_handler], epochs=3) assert logging_handler.batch_index == 0 assert logging_handler.current_epoch == 3 @@ -197,7 +196,7 @@ def epoch_end(self, estimator, *args, **kwargs): net = _get_test_network() ce_loss = loss.SoftmaxCrossEntropyLoss() acc = mx.metric.Accuracy() - est = estimator.Estimator(net, loss=ce_loss, metrics=acc) + est = estimator.Estimator(net, loss=ce_loss, train_metrics=acc) custom_handler = CustomStopHandler(3, 2) est.fit(test_data, event_handlers=[custom_handler], epochs=3) assert custom_handler.num_batch == 3 @@ -220,10 +219,10 @@ def test_logging_interval(): num_epochs = 1 ce_loss = loss.SoftmaxCrossEntropyLoss() acc = mx.metric.Accuracy() - logging = LoggingHandler(train_metrics=[acc], log_interval=log_interval) + logging = LoggingHandler(metrics=[acc], log_interval=log_interval) est = estimator.Estimator(net=net, loss=ce_loss, - metrics=acc) + train_metrics=acc) est.fit(train_data=dataloader, epochs=num_epochs, @@ -245,10 +244,10 @@ def test_logging_interval(): sys.stdout = mystdout = StringIO() acc = mx.metric.Accuracy() log_interval = 5 - logging = LoggingHandler(train_metrics=[acc], log_interval=log_interval) + logging = LoggingHandler(metrics=[acc], log_interval=log_interval) est = estimator.Estimator(net=net, loss=ce_loss, - metrics=acc) + train_metrics=acc) est.fit(train_data=dataloader, epochs=num_epochs, event_handlers=[logging]) From 7895f93e67dc3e9da360f7a9c667e3c0f1e76c0f Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Wed, 11 Dec 2019 02:52:20 +0900 Subject: [PATCH 26/62] Replace mxnet_option macro with standard CMAKE_DEPENDENT_OPTION (#17018) --- CMakeLists.txt | 73 +++++++++++++++++++++---------------- cmake/Modules/FindMKL.cmake | 10 ++--- cmake/Utils.cmake | 41 --------------------- 3 files changed, 46 insertions(+), 78 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a8a3d5c512e..6a06ac548190 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,39 +18,48 @@ endif() include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Utils.cmake) +include(CMakeDependentOption) #Some things have order. This must be put in front alone -mxnet_option(USE_CUDA "Build with CUDA support" ON) -mxnet_option(USE_OLDCMAKECUDA "Build with old cmake cuda" OFF) -mxnet_option(USE_NCCL "Use NVidia NCCL with CUDA" OFF) -mxnet_option(USE_OPENCV "Build with OpenCV support" ON) -mxnet_option(USE_OPENMP "Build with Openmp support" ON) -mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path -mxnet_option(USE_SSE "Build with x86 SSE instruction support" ON IF NOT ARM) -mxnet_option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON -mxnet_option(USE_LAPACK "Build with lapack support" ON) -mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON) -mxnet_option(USE_MKLDNN "Build with MKL-DNN support" ON IF USE_MKL_IF_AVAILABLE AND (NOT APPLE) AND (NOT MSVC) AND (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") AND (NOT CMAKE_CROSSCOMPILING)) -mxnet_option(USE_OPERATOR_TUNING "Enable auto-tuning of operators" ON IF NOT MSVC) -mxnet_option(USE_GPERFTOOLS "Build with GPerfTools support" OFF) -mxnet_option(USE_JEMALLOC "Build with Jemalloc support" ON) -mxnet_option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF) -mxnet_option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF) -mxnet_option(USE_PLUGIN_CAFFE "Use Caffe Plugin" OFF) -mxnet_option(USE_CPP_PACKAGE "Build C++ Package" OFF) -mxnet_option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON) -mxnet_option(USE_GPROF "Compile with gprof (profiling) flag" OFF) -mxnet_option(USE_CXX14_IF_AVAILABLE "Build with C++14 if the compiler supports it" OFF) -mxnet_option(USE_VTUNE "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path -mxnet_option(USE_TVM_OP "Enable use of TVM operator build system." OFF) -mxnet_option(ENABLE_CUDA_RTC "Build with CUDA runtime compilation support" ON) -mxnet_option(BUILD_CPP_EXAMPLES "Build cpp examples" ON) -mxnet_option(INSTALL_EXAMPLES "Install the example source files." OFF) -mxnet_option(USE_SIGNAL_HANDLER "Print stack traces on segfaults." ON) -mxnet_option(USE_TENSORRT "Enable inference optimization with TensorRT." OFF) -mxnet_option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF) -mxnet_option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage metric output" OFF) -mxnet_option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF) -mxnet_option(BUILD_CYTHON_MODULES "Build cython modules." OFF) +option(USE_CUDA "Build with CUDA support" ON) +option(USE_OLDCMAKECUDA "Build with old cmake cuda" OFF) +option(USE_NCCL "Use NVidia NCCL with CUDA" OFF) +option(USE_OPENCV "Build with OpenCV support" ON) +option(USE_OPENMP "Build with Openmp support" ON) +cmake_dependent_option(USE_CUDNN "Build with cudnn support" ON "USE_CUDA" OFF) # one could set CUDNN_ROOT for search path +cmake_dependent_option(USE_SSE "Build with x86 SSE instruction support" ON "NOT ARM" OFF) +option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON +option(USE_LAPACK "Build with lapack support" ON) +option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON) +if(USE_MKL_IF_AVAILABLE AND (NOT APPLE) AND (NOT MSVC) AND (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") AND (NOT CMAKE_CROSSCOMPILING)) + option(USE_MKLDNN "Build with MKL-DNN support" ON) +else() + option(USE_MKLDNN "Build with MKL-DNN support" OFF) +endif() +if(NOT MSVC) + option(USE_OPERATOR_TUNING "Enable auto-tuning of operators" ON) +else() + option(USE_OPERATOR_TUNING "Enable auto-tuning of operators" OFF) +endif() +option(USE_GPERFTOOLS "Build with GPerfTools support" OFF) +option(USE_JEMALLOC "Build with Jemalloc support" ON) +option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF) +option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF) +option(USE_PLUGIN_CAFFE "Use Caffe Plugin" OFF) +option(USE_CPP_PACKAGE "Build C++ Package" OFF) +option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON) +option(USE_GPROF "Compile with gprof (profiling) flag" OFF) +option(USE_CXX14_IF_AVAILABLE "Build with C++14 if the compiler supports it" OFF) +option(USE_VTUNE "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path +option(USE_TVM_OP "Enable use of TVM operator build system." OFF) +option(ENABLE_CUDA_RTC "Build with CUDA runtime compilation support" ON) +option(BUILD_CPP_EXAMPLES "Build cpp examples" ON) +option(INSTALL_EXAMPLES "Install the example source files." OFF) +option(USE_SIGNAL_HANDLER "Print stack traces on segfaults." ON) +option(USE_TENSORRT "Enable inference optimization with TensorRT." OFF) +option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF) +option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage metric output" OFF) +option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF) +option(BUILD_CYTHON_MODULES "Build cython modules." OFF) message(STATUS "CMAKE_CROSSCOMPILING ${CMAKE_CROSSCOMPILING}") message(STATUS "CMAKE_HOST_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}") diff --git a/cmake/Modules/FindMKL.cmake b/cmake/Modules/FindMKL.cmake index 51fca23c1161..51eff8fe09aa 100644 --- a/cmake/Modules/FindMKL.cmake +++ b/cmake/Modules/FindMKL.cmake @@ -45,11 +45,11 @@ set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs") # ---[ Options - mxnet_option(MKL_USE_SINGLE_DYNAMIC_LIBRARY "Use single dynamic library interface" ON) - mxnet_option(MKL_USE_STATIC_LIBS "Use static libraries" OFF IF NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY) - mxnet_option(MKL_MULTI_THREADED "Use multi-threading" ON IF NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY) - mxnet_option(MKL_USE_ILP64 "Use ilp64 data model" OFF) - mxnet_option(MKL_USE_CLUSTER "Use cluster functions" OFF IF CMAKE_SIZEOF_VOID_P EQUAL 4) + option(MKL_USE_SINGLE_DYNAMIC_LIBRARY "Use single dynamic library interface" ON) + cmake_dependent_option(MKL_USE_STATIC_LIBS "Use static libraries" OFF "NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY" OFF) + cmake_dependent_option(MKL_MULTI_THREADED "Use multi-threading" ON "NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY" OFF) + option(MKL_USE_ILP64 "Use ilp64 data model" OFF) + cmake_dependent_option(MKL_USE_CLUSTER "Use cluster functions" OFF "CMAKE_SIZEOF_VOID_P EQUAL 4" OFF) find_path(MKL_ROOT include/mkl.h PATHS $ENV{MKL_ROOT} ${INTEL_ROOT}/mkl DOC "Folder contains MKL") diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index 6b427db85ec9..294e7cf4cc5c 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -179,47 +179,6 @@ function(mxnet_parse_header_single_define LIBNAME HDR_PATH VARNAME) endif() endfunction() -######################################################################################################## -# An option that the user can select. Can accept condition to control when option is available for user. -# Usage: -# mxnet_option( "doc string" [IF ]) -function(mxnet_option variable description value) - set(__value ${value}) - set(__condition "") - set(__varname "__value") - foreach(arg ${ARGN}) - if(arg STREQUAL "IF" OR arg STREQUAL "if") - set(__varname "__condition") - else() - list(APPEND ${__varname} ${arg}) - endif() - endforeach() - unset(__varname) - if("${__condition}" STREQUAL "") - set(__condition 2 GREATER 1) - endif() - - if(${__condition}) - if("${__value}" MATCHES ";") - if(${__value}) - option(${variable} "${description}" ON) - else() - option(${variable} "${description}" OFF) - endif() - elseif(DEFINED ${__value}) - if(${__value}) - option(${variable} "${description}" ON) - else() - option(${variable} "${description}" OFF) - endif() - else() - option(${variable} "${description}" ${__value}) - endif() - else() - option(${variable} "${description}" OFF) - endif() -endfunction() - ################################################################################################ # Utility macro for comparing two lists. Used for CMake debugging purposes # Usage: From c31ee9954f4add59c3c3c38e44d304d52d6ee78d Mon Sep 17 00:00:00 2001 From: Chris Olivier Date: Tue, 10 Dec 2019 20:55:31 -0800 Subject: [PATCH 27/62] Fix omp assert issue (#17039) --- CMakeLists.txt | 17 +++++++++++------ src/engine/openmp.cc | 7 +++++++ src/engine/openmp.h | 7 +++++++ src/initialize.cc | 2 ++ 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a06ac548190..1be19c577ae3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -249,7 +249,7 @@ if(USE_TENSORRT) endif() # please note that when you enable this, you might run into an linker not being able to work properly due to large code injection. -# you can find more information here https://github.com/apache/incubator-mxnet/issues/15971 +# you can find more information here https://github.com/apache/incubator-mxnet/issues/15971 if(ENABLE_TESTCOVERAGE) message(STATUS "Compiling with test coverage support enabled. This will result in additional files being written to your source directory!") find_program( GCOV_PATH gcov ) @@ -445,6 +445,15 @@ endif() # ---[ OpenMP if(USE_OPENMP) + + function(load_omp) + # Intel/llvm OpenMP: https://github.com/llvm-mirror/openmp + set(OPENMP_STANDALONE_BUILD TRUE) + set(LIBOMP_ENABLE_SHARED TRUE) + set(CMAKE_BUILD_TYPE Release) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp) + endfunction() + find_package(OpenMP REQUIRED) # This should build on Windows, but there's some problem and I don't have a Windows box, so # could a Windows user please fix? @@ -452,11 +461,7 @@ if(USE_OPENMP) AND SYSTEM_ARCHITECTURE STREQUAL "x86_64" AND NOT MSVC AND NOT CMAKE_CROSSCOMPILING) - - # Intel/llvm OpenMP: https://github.com/llvm-mirror/openmp - set(OPENMP_STANDALONE_BUILD TRUE) - set(LIBOMP_ENABLE_SHARED TRUE) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp) + load_omp() list(REMOVE_ITEM mxnet_LINKER_LIBS iomp5) list(APPEND mxnet_LINKER_LIBS omp) if(UNIX) diff --git a/src/engine/openmp.cc b/src/engine/openmp.cc index 8fe3939892d2..98fbc407fce8 100644 --- a/src/engine/openmp.cc +++ b/src/engine/openmp.cc @@ -41,6 +41,7 @@ OpenMP *OpenMP::Get() { OpenMP::OpenMP() : omp_num_threads_set_in_environment_(is_env_set("OMP_NUM_THREADS")) { #ifdef _OPENMP + initialize_process(); const int max = dmlc::GetEnv("MXNET_OMP_MAX_THREADS", INT_MIN); if (max != INT_MIN) { omp_thread_max_ = max; @@ -61,6 +62,12 @@ OpenMP::OpenMP() #endif } +void OpenMP:: initialize_process() { +#ifdef _OPENMP + omp_get_num_procs(); // will force OpenMP to be initialized +#endif +} + void OpenMP::on_start_worker_thread(bool use_omp) { #ifdef _OPENMP if (!omp_num_threads_set_in_environment_) { diff --git a/src/engine/openmp.h b/src/engine/openmp.h index 800ea2f91b62..94b83e3aa25b 100644 --- a/src/engine/openmp.h +++ b/src/engine/openmp.h @@ -74,6 +74,13 @@ class OpenMP { */ void on_start_worker_thread(bool use_omp); + /*! + * \brief Initialize a new process to use omp (after a fork, + * in case you're starting threads in the atfork() that may interfere + * with the initialization. Can serialize the init with this first. + */ + void initialize_process(); + /*! * \brief Get the OpenMP object's singleton pointer * \return Singleton OpenMP object pointer diff --git a/src/initialize.cc b/src/initialize.cc index 071e8d32e548..a3dbce22a384 100644 --- a/src/initialize.cc +++ b/src/initialize.cc @@ -209,6 +209,7 @@ void LibraryInitializer::atfork_child() { #if MXNET_USE_OPENCV && !__APPLE__ cv::setNumThreads(mp_cv_num_threads_); #endif // MXNET_USE_OPENCV + engine::OpenMP::Get()->initialize_process(); engine::OpenMP::Get()->set_thread_max(1); engine::OpenMP::Get()->set_enabled(false); Engine::Get()->Start(); @@ -218,6 +219,7 @@ void LibraryInitializer::atfork_child() { void LibraryInitializer::install_pthread_atfork_handlers() { #ifndef _WIN32 + engine::OpenMP::Get()->initialize_process(); // force omp to set its atfork handler first pthread_atfork(pthread_atfork_prepare, pthread_atfork_parent, pthread_atfork_child); #endif } From 9634786f96388004f68c223d72e120ad425c2f12 Mon Sep 17 00:00:00 2001 From: Tao Lv Date: Wed, 11 Dec 2019 13:06:59 +0800 Subject: [PATCH 28/62] mshadow: fix vector access (#17021) --- 3rdparty/mshadow/mshadow/dot_engine-inl.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/3rdparty/mshadow/mshadow/dot_engine-inl.h b/3rdparty/mshadow/mshadow/dot_engine-inl.h index 21816f209e40..1a02eb9a39d8 100644 --- a/3rdparty/mshadow/mshadow/dot_engine-inl.h +++ b/3rdparty/mshadow/mshadow/dot_engine-inl.h @@ -312,12 +312,9 @@ struct BLASEngine { CBLAS_TRANSPOSE p_transa[GROUP_SIZE] = {cblas_a_trans}; CBLAS_TRANSPOSE p_transb[GROUP_SIZE] = {cblas_b_trans}; - std::vector pp_A; - std::vector pp_B; - std::vector pp_C; - pp_A.reserve(batch_count); - pp_B.reserve(batch_count); - pp_C.reserve(batch_count); + std::vector pp_A(batch_count, nullptr); + std::vector pp_B(batch_count, nullptr); + std::vector pp_C(batch_count, nullptr); auto m_k = m * k; auto k_n = k * n; From 27389b1046770e34aac86bd3b18564e4e6283414 Mon Sep 17 00:00:00 2001 From: liuzh91 Date: Wed, 11 Dec 2019 13:30:10 +0800 Subject: [PATCH 29/62] Fix Gluon Estimator nightly test (#17042) --- tests/nightly/estimator/test_estimator_cnn.py | 4 ++-- tests/nightly/estimator/test_sentiment_rnn.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/nightly/estimator/test_estimator_cnn.py b/tests/nightly/estimator/test_estimator_cnn.py index 4a3bb2076524..af519536dbed 100644 --- a/tests/nightly/estimator/test_estimator_cnn.py +++ b/tests/nightly/estimator/test_estimator_cnn.py @@ -116,7 +116,7 @@ def test_estimator_cpu(): # Define estimator est = estimator.Estimator(net=net, loss=loss, - metrics=mx.metric.Accuracy(), + train_metrics=mx.metric.Accuracy(), trainer=trainer, context=context) # Call fit() @@ -145,7 +145,7 @@ def test_estimator_gpu(): # Define estimator est = estimator.Estimator(net=net, loss=loss, - metrics=acc, + train_metrics=acc, trainer=trainer, context=context) # Call fit() diff --git a/tests/nightly/estimator/test_sentiment_rnn.py b/tests/nightly/estimator/test_sentiment_rnn.py index 233355b7ebfd..ab124ba95db3 100644 --- a/tests/nightly/estimator/test_sentiment_rnn.py +++ b/tests/nightly/estimator/test_sentiment_rnn.py @@ -197,7 +197,7 @@ def run(net, train_dataloader, test_dataloader, num_epochs, ctx, lr): nested_metrics.add([metrics, mx.metric.Accuracy()]) # Define estimator - est = estimator.Estimator(net=net, loss=loss, metrics=nested_metrics, + est = estimator.Estimator(net=net, loss=loss, train_metrics=nested_metrics, trainer=trainer, context=ctx) # Begin training est.fit(train_data=train_dataloader, val_data=test_dataloader, From c82af38211dbf8356a4f3b35f023632c5bf880ae Mon Sep 17 00:00:00 2001 From: liuzh91 Date: Wed, 11 Dec 2019 19:14:59 +0800 Subject: [PATCH 30/62] Add support of plug and play fit_batch and evaluate_batch (#16982) * Add support of plug and play fit_batch and evaluate_batch * Add check for the validity of the estimator model * Rename estimator model as batch processor * Remove unused import * Add documentation of the batch processor class * refine the documentation of the batch processor * Fix merge bugs * fix bugs introduced during merge * fix sanity check failures * fix CI bugs --- .../mxnet/gluon/contrib/estimator/__init__.py | 2 + .../contrib/estimator/batch_processor.py | 105 ++++++++++++++++ .../gluon/contrib/estimator/estimator.py | 84 ++++--------- .../unittest/test_gluon_batch_processor.py | 117 ++++++++++++++++++ 4 files changed, 248 insertions(+), 60 deletions(-) create mode 100644 python/mxnet/gluon/contrib/estimator/batch_processor.py create mode 100644 tests/python/unittest/test_gluon_batch_processor.py diff --git a/python/mxnet/gluon/contrib/estimator/__init__.py b/python/mxnet/gluon/contrib/estimator/__init__.py index bb0a0917c363..5ffd603b426c 100644 --- a/python/mxnet/gluon/contrib/estimator/__init__.py +++ b/python/mxnet/gluon/contrib/estimator/__init__.py @@ -19,5 +19,7 @@ """Gluon Estimator Module""" from . import estimator from . import event_handler +from . import batch_processor from .estimator import * from .event_handler import * +from .batch_processor import * diff --git a/python/mxnet/gluon/contrib/estimator/batch_processor.py b/python/mxnet/gluon/contrib/estimator/batch_processor.py new file mode 100644 index 000000000000..4985f8c81bf3 --- /dev/null +++ b/python/mxnet/gluon/contrib/estimator/batch_processor.py @@ -0,0 +1,105 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# coding: utf-8 +# pylint: disable=wildcard-import, unused-argument, too-many-ancestors +"""Gluon Batch Processor for Estimators""" + +from ...utils import split_and_load +from .... import autograd + +__all__ = ['BatchProcessor'] + +class BatchProcessor(object): + """BatchProcessor Class for plug and play fit_batch & evaluate_batch + + During training or validation, data are divided into minibatches for processing. This + class aims at providing hooks of training or validating on a minibatch of data. Users + may provide customized fit_batch() and evaluate_batch() methods by inheriting from + this class and overriding class methods. + + :py:class:`BatchProcessor` can be used to replace fit_batch() and evaluate_batch() + in the base estimator class + """ + + def __init__(self): + pass + + def _get_data_and_label(self, batch, ctx, batch_axis=0): + data = batch[0] + label = batch[1] + data = split_and_load(data, ctx_list=ctx, batch_axis=batch_axis) + label = split_and_load(label, ctx_list=ctx, batch_axis=batch_axis) + return data, label + + def evaluate_batch(self, estimator, + val_batch, + batch_axis=0): + """Evaluate the estimator model on a batch of validation data. + + Parameters + ---------- + estimator : Estimator + Reference to the estimator + val_batch : tuple + Data and label of a batch from the validation data loader. + batch_axis : int, default 0 + Batch axis to split the validation data into devices. + """ + data, label = self._get_data_and_label(val_batch, estimator.context, batch_axis) + pred = [estimator.eval_net(x) for x in data] + loss = [estimator.evaluation_loss(y_hat, y) for y_hat, y in zip(pred, label)] + + return data, label, pred, loss + + def fit_batch(self, estimator, + train_batch, + batch_axis=0): + """Trains the estimator model on a batch of training data. + + Parameters + ---------- + estimator : Estimator + Reference to the estimator + train_batch : tuple + Data and label of a batch from the training data loader. + batch_axis : int, default 0 + Batch axis to split the training data into devices. + + Returns + ------- + data: List of NDArray + Sharded data from the batch. Data is sharded with + `gluon.split_and_load`. + label: List of NDArray + Sharded label from the batch. Labels are sharded with + `gluon.split_and_load`. + pred: List of NDArray + Prediction on each of the sharded inputs. + loss: List of NDArray + Loss on each of the sharded inputs. + """ + data, label = self._get_data_and_label(train_batch, estimator.context, batch_axis) + + with autograd.record(): + pred = [estimator.net(x) for x in data] + loss = [estimator.loss(y_hat, y) for y_hat, y in zip(pred, label)] + + for l in loss: + l.backward() + + return data, label, pred, loss diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py index 0d9ab9ec0a02..09f43151e235 100644 --- a/python/mxnet/gluon/contrib/estimator/estimator.py +++ b/python/mxnet/gluon/contrib/estimator/estimator.py @@ -32,9 +32,9 @@ from ...loss import Loss as gluon_loss from ...trainer import Trainer from ...utils import split_and_load -from .... import autograd from ....context import Context, cpu, gpu, num_gpus from ....metric import Loss as metric_loss +from .batch_processor import BatchProcessor __all__ = ['Estimator'] @@ -84,7 +84,8 @@ class Estimator(object): the naming in mxnet Gluon API, please refer to the site (https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/naming.html) for future information. - + batch_processor: BatchProcessor + BatchProcessor provides customized fit_batch() and evaluate_batch() methods """ logger = None @@ -113,7 +114,8 @@ def __init__(self, net, trainer=None, context=None, evaluation_loss=None, - eval_net=None): + eval_net=None, + batch_processor=None): self.net = net self.loss = self._check_loss(loss) self._train_metrics = _check_metrics(train_metrics) @@ -133,6 +135,7 @@ def __init__(self, net, self.context = self._check_context(context) self._initialize(initializer) self.trainer = self._check_trainer(trainer) + self.batch_processor = self._check_batch_processor(batch_processor) def _check_loss(self, loss): if not isinstance(loss, gluon_loss): @@ -173,6 +176,18 @@ def _check_context(self, context): context = [cpu()] return context + def _check_batch_processor(self, batch_processor): + # check whether the batch processor contains fit_batch() and evaluate_batch() methods + if batch_processor is not None: + model_fit = getattr(batch_processor, 'fit_batch', None) + model_evaluate = getattr(batch_processor, 'evaluate_batch', None) + if not callable(model_fit) or not callable(model_evaluate): + raise ValueError('Customized Batch Processor must contain fit_batch()' + ' and evaluate_batch() methods') + else: + batch_processor = BatchProcessor() + return batch_processor + def _initialize(self, initializer): # initialize the network if not self._is_initialized(): @@ -254,24 +269,6 @@ def train_metrics(self): def val_metrics(self): return self._val_metrics - def evaluate_batch(self, - val_batch, - batch_axis=0): - """Evaluate model on a batch of validation data. - - Parameters - ---------- - val_batch : tuple - Data and label of a batch from the validation data loader. - batch_axis : int, default 0 - Batch axis to split the validation data into devices. - """ - data, label = self._get_data_and_label(val_batch, self.context, batch_axis) - pred = [self.eval_net(x) for x in data] - loss = [self.evaluation_loss(y_hat, y) for y_hat, y in zip(pred, label)] - - return data, label, pred, loss - def evaluate(self, val_data, batch_axis=0, @@ -300,6 +297,7 @@ def evaluate(self, for metric in self.val_metrics: metric.reset() + estimator_ref = self event_handlers = self._prepare_default_validation_handlers(event_handlers) @@ -315,7 +313,9 @@ def evaluate(self, for handler in batch_begin: handler.batch_begin(estimator_ref, batch=batch) - _, label, pred, loss = self.evaluate_batch(batch, batch_axis) + _, label, pred, loss = \ + self.batch_processor.evaluate_batch(estimator_ref, batch, + batch_axis) for handler in batch_end: handler.batch_end(estimator_ref, batch=batch, pred=pred, label=label, loss=loss) @@ -323,42 +323,6 @@ def evaluate(self, for handler in epoch_end: handler.epoch_end(estimator_ref) - def fit_batch(self, train_batch, batch_axis=0): - """Trains the model on a batch of training data. - - Parameters - ---------- - train_batch : tuple - Data and label of a batch from the training data loader. - batch_axis : int, default 0 - Batch axis to split the training data into devices. - - Returns - ------- - data: List of NDArray - Sharded data from the batch. Data is sharded with - `gluon.split_and_load`. - label: List of NDArray - Sharded label from the batch. Labels are sharded with - `gluon.split_and_load`. - pred: List of NDArray - Prediction on each of the sharded inputs. - loss: List of NDArray - Loss on each of the sharded inputs. - """ - data, label = self._get_data_and_label(train_batch, self.context, batch_axis) - - batch_size = train_batch[0].shape[batch_axis] - - with autograd.record(): - pred = [self.net(x) for x in data] - loss = [self.loss(y_hat, y) for y_hat, y in zip(pred, label)] - - for l in loss: - l.backward() - - return data, label, pred, loss - def fit(self, train_data, val_data=None, epochs=None, @@ -432,8 +396,8 @@ def fit(self, train_data, for handler in batch_begin: handler.batch_begin(estimator_ref, batch=batch) - _, label, pred, loss = self.fit_batch(batch, batch_axis) - + _, label, pred, loss = self.batch_processor.fit_batch(estimator_ref, + batch, batch_axis) # batch end batch_end_result = [] diff --git a/tests/python/unittest/test_gluon_batch_processor.py b/tests/python/unittest/test_gluon_batch_processor.py new file mode 100644 index 000000000000..4bd6f769aa44 --- /dev/null +++ b/tests/python/unittest/test_gluon_batch_processor.py @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +''' Unit tests for Gluon Batch Processor ''' + +import sys +import unittest +import warnings + +import mxnet as mx +from mxnet import gluon +from mxnet.gluon import nn +from mxnet.gluon.contrib.estimator import * +from mxnet.gluon.contrib.estimator.event_handler import * +from mxnet.gluon.contrib.estimator.batch_processor import BatchProcessor +from nose.tools import assert_raises + +def _get_test_network(): + net = nn.Sequential() + net.add(nn.Dense(4, activation='relu', flatten=False)) + return net + + +def _get_test_data(): + batch_size = 4 + in_data = mx.nd.random.uniform(shape=(10, 3)) + out_data = mx.nd.random.uniform(shape=(10, 4)) + # Input dataloader + dataset = gluon.data.dataset.ArrayDataset(in_data, out_data) + dataloader = gluon.data.DataLoader(dataset, batch_size=batch_size) + dataiter = mx.io.NDArrayIter(data=in_data, label=out_data, batch_size=batch_size) + return dataloader, dataiter + +def test_batch_processor_fit(): + ''' test estimator with different train data types ''' + net = _get_test_network() + dataloader, dataiter = _get_test_data() + num_epochs = 1 + ctx = mx.cpu() + loss = gluon.loss.L2Loss() + acc = mx.metric.Accuracy() + net.initialize(ctx=ctx) + processor = BatchProcessor() + trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) + est = Estimator(net=net, + loss=loss, + train_metrics=acc, + trainer=trainer, + context=ctx, + batch_processor=processor) + + est.fit(train_data=dataloader, + epochs=num_epochs) + + with assert_raises(ValueError): + est.fit(train_data=dataiter, + epochs=num_epochs) + + # Input NDArray + with assert_raises(ValueError): + est.fit(train_data=[mx.nd.ones(shape=(10, 3))], + epochs=num_epochs) + + +def test_batch_processor_validation(): + ''' test different validation data types''' + net = _get_test_network() + dataloader, dataiter = _get_test_data() + num_epochs = 1 + ctx = mx.cpu() + loss = gluon.loss.L2Loss() + acc = mx.metric.Accuracy() + evaluation_loss = gluon.loss.L1Loss() + net.initialize(ctx=ctx) + processor = BatchProcessor() + trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) + est = Estimator(net=net, + loss=loss, + train_metrics=acc, + trainer=trainer, + context=ctx, + evaluation_loss=evaluation_loss, + batch_processor=processor) + # Input dataloader + est.fit(train_data=dataloader, + val_data=dataloader, + epochs=num_epochs) + + # using validation handler + train_metrics = est.train_metrics + val_metrics = est.val_metrics + validation_handler = ValidationHandler(val_data=dataloader, eval_fn=est.evaluate) + + with assert_raises(ValueError): + est.fit(train_data=dataiter, + val_data=dataiter, + epochs=num_epochs) + # Input NDArray + with assert_raises(ValueError): + est.fit(train_data=[mx.nd.ones(shape=(10, 3))], + val_data=[mx.nd.ones(shape=(10, 3))], + epochs=num_epochs) + From 04ebe45aed44433c8527fc9c1fcaf5d2662c5063 Mon Sep 17 00:00:00 2001 From: Pedro Larroy Date: Wed, 11 Dec 2019 12:30:59 -0800 Subject: [PATCH 31/62] Prevent after-fork number of OMP threads being bigger than 1. (#16999) * Prevent after-fork number of OMP threads being bigger than 1. This could happen if it was set in the environment. As we are setting engine::OpenMP::Get()->set_enabled(false) in initialize.cc in the child after forking, the behaviour goes back to what it was before #15762 was introduced. Regions using omp get the threads count from GetRecommendedOMPThreadCount, so if omp is disabled they will get 1 thread and run serially * add C++ unit test * Add comment --- src/engine/openmp.cc | 10 +++--- tests/cpp/engine/omp_test.cc | 50 ++++++++++++++++++++++++++++ tests/python/unittest/test_engine.py | 41 +++++++++++++++++++++++ 3 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 tests/cpp/engine/omp_test.cc diff --git a/src/engine/openmp.cc b/src/engine/openmp.cc index 98fbc407fce8..0d31f71aa9a3 100644 --- a/src/engine/openmp.cc +++ b/src/engine/openmp.cc @@ -90,10 +90,11 @@ void OpenMP::set_reserve_cores(int cores) { int OpenMP::GetRecommendedOMPThreadCount(bool exclude_reserved) const { #ifdef _OPENMP - if (omp_num_threads_set_in_environment_) { - return omp_get_max_threads(); - } if (enabled_) { + // OMP_NUM_THREADS was set in the environment at the time of static initialization + if (omp_num_threads_set_in_environment_) { + return omp_get_max_threads(); + } int thread_count = omp_get_max_threads(); if (exclude_reserved) { if (reserve_cores_ >= thread_count) { @@ -107,8 +108,9 @@ int OpenMP::GetRecommendedOMPThreadCount(bool exclude_reserved) const { return thread_count; } return omp_thread_max_; + } else { + return 1; } - return 1; #else return 1; #endif diff --git a/tests/cpp/engine/omp_test.cc b/tests/cpp/engine/omp_test.cc new file mode 100644 index 000000000000..2be7d9d0307c --- /dev/null +++ b/tests/cpp/engine/omp_test.cc @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include + +#include "../include/test_util.h" +#include "../../src/engine/openmp.h" + +#if defined(unix) || defined(__unix__) || defined(__unix) +#include +#include +#include + + +TEST(OMPBehaviour, after_fork) { + /* + * Check that after fork, OMP is disabled, and the recommended thread count is 1 to prevent + * process fanout. + */ + using namespace mxnet::engine; + auto openmp = OpenMP::Get(); + pid_t pid = fork(); + if (pid == 0) { + EXPECT_FALSE(openmp->enabled()); + EXPECT_EQ(openmp->GetRecommendedOMPThreadCount(), 1); + } else if (pid > 0) { + int status; + int ret = waitpid(pid, &status, 0); + CHECK_EQ(ret, pid) << "waitpid failed"; + } else { + CHECK(false) << "fork failed"; + } +} +#endif diff --git a/tests/python/unittest/test_engine.py b/tests/python/unittest/test_engine.py index 29b7b822b3ef..61d94ddbf4ec 100644 --- a/tests/python/unittest/test_engine.py +++ b/tests/python/unittest/test_engine.py @@ -17,6 +17,9 @@ import nose import mxnet as mx +import os +import unittest +from mxnet.test_utils import EnvManager def test_bulk(): with mx.engine.bulk(10): @@ -30,6 +33,44 @@ def test_bulk(): x += 1 assert (x.asnumpy() == 104).all() +@unittest.skip("OMP platform dependent") +def test_engine_openmp_after_fork(): + """ + Test that the number of max threads in the child is 1. After forking we should not use a bigger + OMP thread pool. + + With GOMP the child always has the same number when calling omp_get_max_threads, with LLVM OMP + the child respects the number of max threads set in the parent. + """ + with EnvManager('OMP_NUM_THREADS', '42'): + r, w = os.pipe() + pid = os.fork() + if pid: + os.close(r) + wfd = os.fdopen(w, 'w') + wfd.write('a') + omp_max_threads = mx.base._LIB.omp_get_max_threads() + print("Parent omp max threads: {}".format(omp_max_threads)) + try: + wfd.close() + except: + pass + try: + (cpid, status) = os.waitpid(pid, 0) + assert cpid == pid + exit_status = status >> 8 + assert exit_status == 0 + except: + pass + else: + os.close(w) + rfd = os.fdopen(r, 'r') + rfd.read(1) + omp_max_threads = mx.base._LIB.omp_get_max_threads() + print("Child omp max threads: {}".format(omp_max_threads)) + assert omp_max_threads == 1 + + if __name__ == '__main__': import nose From 05af5c40bfb82fb534a5be2d6a9fe33ad4836008 Mon Sep 17 00:00:00 2001 From: Haibin Lin Date: Wed, 11 Dec 2019 15:18:09 -0800 Subject: [PATCH 32/62] [BUGFIX] Fix race condition in kvstore.pushpull (#17007) * add back gluon test * fix typo * change back gpu ctx * also handle the case there some are pull and some are pushpull * fix typo --- src/kvstore/kvstore_dist_server.h | 35 ++++++++++++++++------- tests/nightly/dist_device_sync_kvstore.py | 35 ++++++++++++----------- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/src/kvstore/kvstore_dist_server.h b/src/kvstore/kvstore_dist_server.h index 65ded79743e4..1dc222c0d7da 100644 --- a/src/kvstore/kvstore_dist_server.h +++ b/src/kvstore/kvstore_dist_server.h @@ -364,21 +364,34 @@ class KVStoreDistServer { if (log_verbose_) { LOG(INFO) << "sent response to " << update_buf->request.size() << " workers"; } + /** + * Request can be for either push, pull or pushpull + * If pull flag is set, respond immediately with the updated values + * Otherwise, only send the notification + */ + bool has_pull = false; for (const auto& req : update_buf->request) { - /** - * Request can be for either push, pull or pushpull - * If pull flag is set, respond immediately with the updated values - * Otherwise, only send the notification - */ - if (req.pull) { - DefaultStorageResponse(type, key, req, req_data, server); - } else { + has_pull = has_pull || req.pull; + } + if (has_pull) { + // if there is a pull request, perform WaitToRead() once before DefaultStorageResponse + if (has_multi_precision_copy(type)) CopyFromTo(stored, store_[key]); + stored.WaitToRead(); + for (const auto& req : update_buf->request) { + if (req.pull) { + DefaultStorageResponse(type, key, req, req_data, server); + } + } + update_buf->request.clear(); + } else { + // otherwise, send response directly + for (const auto& req : update_buf->request) { server->Response(req); } + update_buf->request.clear(); + if (has_multi_precision_copy(type)) CopyFromTo(stored, store_[key]); + stored.WaitToRead(); } - update_buf->request.clear(); - if (has_multi_precision_copy(type)) CopyFromTo(stored, store_[key]); - stored.WaitToRead(); } else { update_buf->merged.WaitToRead(); } diff --git a/tests/nightly/dist_device_sync_kvstore.py b/tests/nightly/dist_device_sync_kvstore.py index dc2c7bc35747..f3fe737f5653 100644 --- a/tests/nightly/dist_device_sync_kvstore.py +++ b/tests/nightly/dist_device_sync_kvstore.py @@ -44,7 +44,10 @@ def check_diff_to_scalar(A, x, rank=None): def init_kv(): # init kv dns keys kv.init(keys, [mx.nd.ones(shape)] * len(keys)) + kv.init('9', mx.nd.ones(shape)) + kv.init('10', mx.nd.ones(shape)) kv.init('99', mx.nd.ones(big_shape)) + kv.init('100', mx.nd.ones(big_shape)) # worker info my_rank = kv.rank nworker = kv.num_workers @@ -55,33 +58,30 @@ def init_kv(): def test_sync_push_pull(): kv, my_rank, nworker = init_kv() num_gpus = 2 - def check_default_keys(kv, my_rank, nworker, nrepeat=3, offset=0, use_pushpull=False): + def check_default_keys(kv, my_rank, nworker, nrepeat=3): # checks pull after push in loop, because behavior during # consecutive pushes doesn't offer any guarantees - for i in range(offset, nrepeat): + for i in range(nrepeat): scale = my_rank + 1 num = (nworker + 1) * nworker * rate * num_gpus / 2 * (i + 1) + 1 arr = [mx.nd.ones(shape, ctx=mx.gpu(j)) * scale for j in range(num_gpus)] val = mx.nd.zeros(shape) - if use_pushpull: - kv.pushpull('3', arr, out=val) - else: - kv.push('3', arr) - kv.pull('3', out=val) + kv.push('9', arr) + kv.pull('9', out=val) + check_diff_to_scalar(val, num) + kv.pushpull('10', arr, out=val) check_diff_to_scalar(val, num) big_arr = [mx.nd.ones(big_shape, ctx=mx.gpu(j)) * scale for j in range(num_gpus)] big_val = mx.nd.zeros(big_shape) - if use_pushpull: - kv.pushpull('99', big_arr, out=big_val) - else: - kv.push('99', big_arr) - kv.pull('99', out=big_val) + kv.push('99', big_arr) + kv.pull('99', out=big_val) + check_diff_to_scalar(big_val, num) + kv.pushpull('100', big_arr, out=big_val) check_diff_to_scalar(big_val, num) - check_default_keys(kv, my_rank, nworker, nrepeat=3, offset=0, use_pushpull=False) - check_default_keys(kv, my_rank, nworker, nrepeat=3, offset=3, use_pushpull=True) + check_default_keys(kv, my_rank, nworker, nrepeat=3) print('worker ' + str(my_rank) + ' is done') def test_sync_init(): @@ -106,10 +106,12 @@ def check_trainer_kv_update(update_on_kv): x = params.get('x', shape=(10,1), lr_mult=1.0) params.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros') try: - trainer = mx.gluon.Trainer(params, 'sgd', {'learning_rate': 0.1}, kvstore=kv, update_on_kvstore=update_on_kv) + trainer = mx.gluon.Trainer(params, 'sgd', {'learning_rate': 0.1}, + kvstore=kv, update_on_kvstore=update_on_kv) trainer._init_kvstore() assert trainer._kv_initialized - assert trainer._update_on_kvstore is True + if update_on_kv is not None: + assert trainer._update_on_kvstore is update_on_kv except ValueError: assert update_on_kv is False @@ -122,3 +124,4 @@ def check_trainer_kv_update(update_on_kv): if __name__ == "__main__": test_sync_init() test_sync_push_pull() + test_gluon_trainer_type() From 634f95e2431ec107f0e5182a60db27e3a7dd9545 Mon Sep 17 00:00:00 2001 From: Lanking Date: Wed, 11 Dec 2019 16:12:04 -0800 Subject: [PATCH 33/62] swap wget to curl in Scala scripts (#17041) * swap wget to curl * fix cifar10 --- scala-package/core/scripts/get_cifar_data.sh | 2 +- scala-package/core/scripts/get_mnist_data.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scala-package/core/scripts/get_cifar_data.sh b/scala-package/core/scripts/get_cifar_data.sh index b061c1895e4a..a5cdcda419b7 100755 --- a/scala-package/core/scripts/get_cifar_data.sh +++ b/scala-package/core/scripts/get_cifar_data.sh @@ -32,7 +32,7 @@ fi cifar_data_path="$data_path/cifar10.zip" if [ ! -f "$cifar_data_path" ]; then - wget http://data.mxnet.io/mxnet/data/cifar10.zip -P $data_path + curl -L -o $cifar_data_path http://data.mxnet.io/mxnet/data/cifar10.zip cd $data_path unzip -u cifar10.zip fi diff --git a/scala-package/core/scripts/get_mnist_data.sh b/scala-package/core/scripts/get_mnist_data.sh index ded206fbb134..a7be96a9f401 100755 --- a/scala-package/core/scripts/get_mnist_data.sh +++ b/scala-package/core/scripts/get_mnist_data.sh @@ -32,7 +32,7 @@ fi mnist_data_path="$data_path/mnist.zip" if [ ! -f "$mnist_data_path" ]; then - wget http://data.mxnet.io/mxnet/data/mnist.zip -P $data_path + curl -L -o $mnist_data_path http://data.mxnet.io/mxnet/data/mnist.zip cd $data_path unzip -u mnist.zip fi From 9092f17c909ae9f2af049d7787cc19015ee624d0 Mon Sep 17 00:00:00 2001 From: Minghao Liu <40382964+Tommliu@users.noreply.github.com> Date: Fri, 13 Dec 2019 05:52:38 +0800 Subject: [PATCH 34/62] array_split pr (#17032) --- python/mxnet/ndarray/numpy/_op.py | 71 ++++++++++++++++++- python/mxnet/numpy/multiarray.py | 56 ++++++++++++++- python/mxnet/numpy_dispatch_protocol.py | 1 + python/mxnet/symbol/numpy/_symbol.py | 54 +++++++++++++- src/operator/tensor/matrix_op-inl.h | 10 ++- .../unittest/test_numpy_interoperability.py | 13 ++++ tests/python/unittest/test_numpy_op.py | 57 +++++++++++++++ 7 files changed, 252 insertions(+), 10 deletions(-) diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index 538d5202942d..c111a95a707a 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -32,9 +32,9 @@ 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', - 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', - 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'argsort', 'tensordot', 'histogram', 'eye', - 'linspace', 'logspace', 'expand_dims', 'tile', 'arange', 'split', 'vsplit', 'concatenate', 'append', + 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', 'histogram', + 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'argsort', 'tensordot', 'eye', 'linspace', + 'logspace', 'expand_dims', 'tile', 'arange', 'array_split', 'split', 'vsplit', 'concatenate', 'append', 'stack', 'vstack', 'column_stack', 'dstack', 'average', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'argmin', 'std', 'var', 'indices', 'copysign', 'ravel', 'unravel_index', 'hanning', 'hamming', 'blackman', 'flip', 'around', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', @@ -3029,6 +3029,71 @@ def split(ary, indices_or_sections, axis=0): # pylint: enable=redefined-outer-name +# pylint: disable=redefined-outer-name +@set_module('mxnet.ndarray.numpy') +def array_split(ary, indices_or_sections, axis=0): + """Split an array into multiple sub-arrays. + + If `indices_or_sections` is an integer, N, the array will be divided + into N equal arrays along `axis`. If such a split is not possible, + an array of length l that should be split into n sections, it returns + l % n sub-arrays of size l//n + 1 and the rest of size l//n. + + If `indices_or_sections` is a 1-D array of sorted integers, the entries + indicate where along `axis` the array is split. For example, + ``[2, 3]`` would, for ``axis=0``, result in + - ary[:2] + - ary[2:3] + - ary[3:] + If an index exceeds the dimension of the array along `axis`, + an empty sub-array is returned correspondingly. + + Parameters + ---------- + ary : ndarray + Array to be divided into sub-arrays. + indices_or_sections : int or 1-D Python tuple, list or set. + Param used to determine the number and size of the subarray. + axis : int, optional + The axis along which to split, default is 0. + + Returns + ------- + sub-arrays : list of ndarrays + A list of sub-arrays. + + Examples + -------- + >>> x = np.arange(9.0) + >>> np.array_split(x, 3) + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7., 8.])] + + >>> np.array_split(x, [3, 5, 6, 8]) + [array([0., 1., 2.]), array([3., 4.]), array([5.]), array([6., 7.]), array([])] + + >>> x = np.arange(8.0) + >>> np.array_split(x, 3) + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7.])] + + >>> x = np.arange(7.0) + >>> np.array_split(x, 3) + [array([0., 1., 2.]), array([3., 4.]), array([5., 6.])] + """ + indices = [] + sections = 0 + if isinstance(indices_or_sections, integer_types): + sections = indices_or_sections + elif isinstance(indices_or_sections, (list, set, tuple)): + indices = [0] + list(indices_or_sections) + else: + raise ValueError('indices_or_sections must be either int, or tuple / list / set of ints') + ret = _npi.split(ary, indices, axis, False, sections) + if not isinstance(ret, list): + return [ret] + return ret +# pylint: enable=redefined-outer-name + + # pylint: disable=redefined-outer-name @set_module('mxnet.ndarray.numpy') def hsplit(ary, indices_or_sections): diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index aa0762bf0e3f..5795c62942df 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -50,9 +50,9 @@ 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', - 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', + 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'histogram', 'fix', 'ceil', 'floor', 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'append', 'argsort', - 'tensordot', 'histogram', 'eye', 'linspace', 'logspace', 'expand_dims', 'tile', 'arange', + 'tensordot', 'eye', 'linspace', 'logspace', 'expand_dims', 'tile', 'arange', 'array_split', 'split', 'vsplit', 'concatenate', 'stack', 'vstack', 'column_stack', 'dstack', 'average', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'argmin', 'std', 'var', 'indices', 'copysign', 'ravel', 'unravel_index', 'hanning', 'hamming', 'blackman', 'flip', 'around', 'arctan2', 'hypot', @@ -4841,6 +4841,58 @@ def split(ary, indices_or_sections, axis=0): return _mx_nd_np.split(ary, indices_or_sections, axis=axis) +@set_module('mxnet.numpy') +def array_split(ary, indices_or_sections, axis=0): + """Split an array into multiple sub-arrays. + + If `indices_or_sections` is an integer, N, the array will be divided + into N equal arrays along `axis`. If such a split is not possible, + an array of length l that should be split into n sections, it returns + l % n sub-arrays of size l//n + 1 and the rest of size l//n. + + If `indices_or_sections` is a 1-D array of sorted integers, the entries + indicate where along `axis` the array is split. For example, + ``[2, 3]`` would, for ``axis=0``, result in + - ary[:2] + - ary[2:3] + - ary[3:] + If an index exceeds the dimension of the array along `axis`, + an empty sub-array is returned correspondingly. + + Parameters + ---------- + ary : ndarray + Array to be divided into sub-arrays. + indices_or_sections : int or 1-D Python tuple, list or set. + Param used to determine the number and size of the subarray. + axis : int, optional + The axis along which to split, default is 0. + + Returns + ------- + sub-arrays : list of ndarrays + A list of sub-arrays. + + Examples + -------- + >>> x = np.arange(9.0) + >>> np.array_split(x, 3) + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7., 8.])] + + >>> np.array_split(x, [3, 5, 6, 8]) + [array([0., 1., 2.]), array([3., 4.]), array([5.]), array([6., 7.]), array([])] + + >>> x = np.arange(8.0) + >>> np.array_split(x, 3) + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7.])] + + >>> x = np.arange(7.0) + >>> np.array_split(x, 3) + [array([0., 1., 2.]), array([3., 4.]), array([5., 6.])] + """ + return _mx_nd_np.array_split(ary, indices_or_sections, axis=axis) + + @set_module('mxnet.numpy') def vsplit(ary, indices_or_sections): r""" diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index 1a238ec2c7c7..23593a47e6ba 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -113,6 +113,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs): 'reshape', 'roll', 'split', + 'array_split', 'squeeze', 'stack', 'std', diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index 4b06bbec7cae..c61d5b2d393d 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -40,9 +40,9 @@ 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', - 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', - 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'argsort', 'tensordot', 'histogram', 'eye', - 'linspace', 'logspace', 'expand_dims', 'tile', 'arange', 'split', 'vsplit', 'concatenate', 'append', + 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', 'histogram', + 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'argsort', 'tensordot', 'eye', 'linspace', + 'logspace', 'expand_dims', 'tile', 'arange', 'array_split', 'split', 'vsplit', 'concatenate', 'append', 'stack', 'vstack', 'column_stack', 'dstack', 'average', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'argmin', 'std', 'var', 'indices', 'copysign', 'ravel', 'unravel_index', 'hanning', 'hamming', 'blackman', 'flip', 'around', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', @@ -3116,6 +3116,54 @@ def split(ary, indices_or_sections, axis=0): # pylint: enable=redefined-outer-name +# pylint: disable=redefined-outer-name +@set_module('mxnet.symbol.numpy') +def array_split(ary, indices_or_sections, axis=0): + """Split an array into multiple sub-arrays. + + If `indices_or_sections` is an integer, N, the array will be divided + into N equal arrays along `axis`. If such a split is not possible, + an array of length l that should be split into n sections, it returns + l % n sub-arrays of size l//n + 1 and the rest of size l//n. + + If `indices_or_sections` is a 1-D array of sorted integers, the entries + indicate where along `axis` the array is split. For example, + ``[2, 3]`` would, for ``axis=0``, result in + - ary[:2] + - ary[2:3] + - ary[3:] + If an index exceeds the dimension of the array along `axis`, + an empty sub-array is returned correspondingly. + + Parameters + ---------- + ary : _Symbol + Array to be divided into sub-arrays. + indices_or_sections : int or 1-D Python tuple, list or set. + Param used to determine the number and size of the subarray. + axis : int, optional + The axis along which to split, default is 0. + + Returns + ------- + sub-arrays : list of ndarrays + A list of sub-arrays. + """ + indices = [] + sections = 0 + if isinstance(indices_or_sections, int): + sections = indices_or_sections + elif isinstance(indices_or_sections, (list, set, tuple)): + indices = [0] + list(indices_or_sections) + else: + raise ValueError('indices_or_sections must either int or tuple / list / set of ints') + ret = _npi.split(ary, indices, axis, False, sections) + if not isinstance(ret, list): + return [ret] + return ret +# pylint: enable=redefined-outer-name + + # pylint: disable=redefined-outer-name @set_module('mxnet.symbol.numpy') def hsplit(ary, indices_or_sections): diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index 4bd059ae81df..0c501808a6c0 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -2727,9 +2727,15 @@ struct SplitParam : public dmlc::Parameter { inline mxnet::TShape GetSplitIndices(const mxnet::TShape& ishape, int axis, int sections) { mxnet::TShape indices(sections+1, -1); indices[0] = 0; - int64_t section_size = ishape[axis] / sections; + int64_t section_size_b = (int64_t) (ishape[axis] / sections); + int64_t section_size_a = section_size_b + 1; + int section_a = ishape[axis] % sections; for (int i = 0; i < sections; ++i) { - indices[i+1] = section_size * (i + 1); + if ( i < section_a ) { + indices[i+1] = section_size_a * (i + 1); + } else { + indices[i+1] = section_size_b + indices[i]; + } } return indices; } diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 4c4e8b90eca9..0e875825a699 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -181,6 +181,18 @@ def _add_workload_split(): assertRaises(ValueError, np.split, np.arange(10), 3) +def _add_workload_array_split(): + a = np.arange(10) + b = np.array([np.arange(10), np.arange(10)]) + + for i in range(1, 12): + OpArgMngr.add_workload('array_split', a, i) + OpArgMngr.add_workload('array_split', b, 3, axis=0) + OpArgMngr.add_workload('array_split', b, [0, 1, 2], axis=0) + OpArgMngr.add_workload('array_split', b, 3, axis=-1) + OpArgMngr.add_workload('array_split', b, 3) + + def _add_workload_squeeze(): OpArgMngr.add_workload('squeeze', np.random.uniform(size=(4, 1))) OpArgMngr.add_workload('squeeze', np.random.uniform(size=(20, 10, 10, 1, 1))) @@ -1398,6 +1410,7 @@ def _prepare_workloads(): _add_workload_rint(array_pool) _add_workload_roll() _add_workload_split() + _add_workload_array_split() _add_workload_squeeze() _add_workload_stack(array_pool) _add_workload_std() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 078e37fc4146..54bce0aaae84 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -2237,6 +2237,63 @@ def get_indices(axis_size): assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) +@with_seed() +@use_np +def test_np_array_split(): + class TestArray_split(HybridBlock): + def __init__(self, indices_or_sections, axis=None): + super(TestArray_split, self).__init__() + self._axis = axis + self._indices_or_sections = indices_or_sections + + def hybrid_forward(self, F, a, *args, **kwargs): + return F.np.array_split(a, indices_or_sections=self._indices_or_sections, + axis=self._axis) + + def get_indices(axis_size): + if axis_size is 0: + axis_size = random.randint(3, 6) + samples = random.randint(1, axis_size - 1) + indices = sorted(random.sample([i for i in range(0, axis_size + 1)], samples)) + indices = tuple(indices) + return indices + + shapes = [(), (5, ), (10, ), + (2, 5), (5, 5), (10, 10), + (4, 4, 4), (4, 6, 9), (6, 6, 6), + (7, 8, 9, 10)] + dtypes = [np.int8, np.uint8, np.int32, np.int64, np.float16, np.float32, np.float64] + + combinations = itertools.product([False, True], shapes, dtypes) + for hybridize, shape, dtype in combinations: + rtol = 1e-2 if dtype == np.float16 else 1e-3 + atol = 1e-4 if dtype == np.float16 else 1e-5 + for axis in range(len(shape)): + x = np.random.uniform(-5.0, 5.0, size=shape).astype(dtype) + indices = get_indices(shape[axis]) + sections = 7 if x.shape[axis] is 0 else random.randint(1,x.shape[axis]) + for indices_or_sections in [indices, sections]: + # test gluon + test_array_split = TestArray_split(axis=axis, indices_or_sections=indices_or_sections) + if hybridize: + test_array_split.hybridize() + x.attach_grad() + expected_ret = _np.array_split(x.asnumpy(), indices_or_sections=indices_or_sections, axis=axis) + with mx.autograd.record(): + y = test_array_split(x) + assert len(y) == len(expected_ret) + for mx_out, np_out in zip(y, expected_ret): + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + mx.autograd.backward(y) + assert_almost_equal(x.grad.asnumpy(), _np.ones(x.shape), rtol=rtol, atol=atol) + + # test imperative + mx_outs = np.array_split(x, indices_or_sections=indices_or_sections, axis=axis) + np_outs = _np.array_split(x.asnumpy(), indices_or_sections=indices_or_sections, axis=axis) + for mx_out, np_out in zip(mx_outs, np_outs): + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + + @with_seed() @use_np def test_np_vsplit(): From f701f3f76a2a2f741c08354c622924344d01f13c Mon Sep 17 00:00:00 2001 From: jonatanmil <55560698+jonatanmil@users.noreply.github.com> Date: Thu, 12 Dec 2019 15:07:18 -0800 Subject: [PATCH 35/62] [MXNET-1431] Multiple channel support in Gluon PReLU (#16262) * Multiple channel support in Gluon PReLU * Update activations.py --- python/mxnet/gluon/nn/activations.py | 11 ++++++++--- tests/python/unittest/test_gluon.py | 5 +++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/python/mxnet/gluon/nn/activations.py b/python/mxnet/gluon/nn/activations.py index a3baae004311..1b9ce91dd2aa 100644 --- a/python/mxnet/gluon/nn/activations.py +++ b/python/mxnet/gluon/nn/activations.py @@ -120,7 +120,10 @@ class PReLU(HybridBlock): ---------- alpha_initializer : Initializer Initializer for the `embeddings` matrix. - + in_channels : int, default 1 + Number of channels (alpha parameters) to learn. Can either be 1 + or `n` where `n` is the size of the second dimension of the input + tensor. Inputs: - **data**: input tensor with arbitrary shape. @@ -128,10 +131,12 @@ class PReLU(HybridBlock): Outputs: - **out**: output tensor with the same shape as `data`. """ - def __init__(self, alpha_initializer=initializer.Constant(0.25), **kwargs): + def __init__(self, alpha_initializer=initializer.Constant(0.25), + in_channels=1, **kwargs): super(PReLU, self).__init__(**kwargs) with self.name_scope(): - self.alpha = self.params.get('alpha', shape=(1,), init=alpha_initializer) + self.alpha = self.params.get('alpha', shape=(in_channels,), + init=alpha_initializer) def hybrid_forward(self, F, x, alpha): return F.LeakyReLU(x, gamma=alpha, act_type='prelu', name='fwd') diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index 0163341dc9bb..d27c241f9c99 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -1389,6 +1389,11 @@ def selu(x): x = point_to_validate.reshape((1, 3, 2)) assert_almost_equal(prelu(x).asnumpy(), mx.nd.where(x >= 0, x, 0.25 * x).asnumpy()) + multichannel_init = mx.initializer.Constant(mx.nd.array([0.1, 0.25, 0.5])) + prelu_multichannel = mx.gluon.nn.PReLU(alpha_initializer=multichannel_init, in_channels=3) + prelu_multichannel.initialize() + assert_almost_equal(prelu_multichannel(x).asnumpy(), np.array([[-0.01, 0.1], [-0.025, 0.1], [-0.05, 0.1]])) + gelu = mx.gluon.nn.GELU() def gelu_test(x): CUBE_CONSTANT = 0.044715 From 61013a8bf9ef8a7b79d684504df1b321b1efb8d8 Mon Sep 17 00:00:00 2001 From: Sheng Zha Date: Thu, 12 Dec 2019 16:25:34 -0800 Subject: [PATCH 36/62] use env var to control stack trace logging (#17038) * use env var to control stack trace logging * update issue template --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- 3rdparty/dmlc-core | 2 +- CMakeLists.txt | 3 +- Makefile | 2 + ci/docker/runtime_functions.sh | 39 +++++++++++++++++++ docs/static_site/src/pages/api/faq/env_var.md | 6 +++ 6 files changed, 51 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 4540f8f5f7af..7a0115d36433 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -10,7 +10,7 @@ assignees: '' (A clear and concise description of what the bug is.) ### Error Message -(Paste the complete error message, including stack trace.) +(Paste the complete error message. Please also include stack trace by setting environment variable `DMLC_LOG_STACK_TRACE_DEPTH=10` before running your script.) ## To Reproduce (If you developed your own code, please provide a short script that reproduces the error. For existing examples, please provide link.) diff --git a/3rdparty/dmlc-core b/3rdparty/dmlc-core index ca9f9329654d..b3a4c715bfc3 160000 --- a/3rdparty/dmlc-core +++ b/3rdparty/dmlc-core @@ -1 +1 @@ -Subproject commit ca9f9329654dbd42f06988276c6f74eb8bdd5038 +Subproject commit b3a4c715bfc37a08f245844a800933f10e47c1ea diff --git a/CMakeLists.txt b/CMakeLists.txt index 1be19c577ae3..fcbe707aa9ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,7 +111,8 @@ endif() #Switch off modern thread local for dmlc-core, please see: https://github.com/dmlc/dmlc-core/issues/571#issuecomment-543467484 add_definitions(-DDMLC_MODERN_THREAD_LOCAL=0) - +# disable stack trace in exception by default. +add_definitions(-DDMLC_LOG_STACK_TRACE_SIZE=0) if(MSVC) add_definitions(-DWIN32_LEAN_AND_MEAN) diff --git a/Makefile b/Makefile index 639f259487ab..1b858cc48671 100644 --- a/Makefile +++ b/Makefile @@ -94,6 +94,8 @@ WARNFLAGS= -Wall -Wsign-compare CFLAGS = -DMSHADOW_FORCE_STREAM $(WARNFLAGS) # use old thread local implementation in DMLC-CORE CFLAGS += -DDMLC_MODERN_THREAD_LOCAL=0 +# disable stack trace in exception by default. +CFLAGS += -DDMLC_LOG_STACK_TRACE_SIZE=0 ifeq ($(DEV), 1) CFLAGS += -g -Werror diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 63fe32879d75..9198eee32896 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -1007,6 +1007,7 @@ cd_unittest_ubuntu() { export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_ENABLE_CYTHON=0 export CD_JOB=1 # signal this is a CD run so any unecessary tests can be skipped + export DMLC_LOG_STACK_TRACE_DEPTH=10 local mxnet_variant=${1:?"This function requires a mxnet variant as the first argument"} local python_cmd=${2:?"This function requires a python command as the first argument"} @@ -1050,6 +1051,7 @@ unittest_ubuntu_python2_cpu_cython() { export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_ENABLE_CYTHON=1 export MXNET_ENFORCE_CYTHON=1 + export DMLC_LOG_STACK_TRACE_DEPTH=10 check_cython 2 nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_train.xml --verbose tests/python/train @@ -1063,6 +1065,7 @@ unittest_ubuntu_python2_cpu() { export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_ENABLE_CYTHON=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_train.xml --verbose tests/python/train nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_quantization.xml --verbose tests/python/quantization @@ -1075,6 +1078,7 @@ unittest_ubuntu_python3_cpu() { export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_ENABLE_CYTHON=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_quantization.xml --verbose tests/python/quantization } @@ -1086,6 +1090,7 @@ unittest_ubuntu_python3_cpu_mkldnn() { export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_ENABLE_CYTHON=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_mkl.xml --verbose tests/python/mkl } @@ -1097,6 +1102,7 @@ unittest_ubuntu_python2_gpu() { export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export MXNET_SUBGRAPH_VERBOSE=0 export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu } @@ -1108,6 +1114,7 @@ unittest_ubuntu_python3_gpu() { export MXNET_SUBGRAPH_VERBOSE=0 export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export MXNET_ENABLE_CYTHON=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu } @@ -1120,6 +1127,7 @@ unittest_ubuntu_python3_gpu_cython() { export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export MXNET_ENABLE_CYTHON=1 export MXNET_ENFORCE_CYTHON=1 + export DMLC_LOG_STACK_TRACE_DEPTH=10 check_cython 3 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu } @@ -1131,6 +1139,7 @@ unittest_ubuntu_python3_gpu_nocudnn() { export MXNET_SUBGRAPH_VERBOSE=0 export CUDNN_OFF_TEST_ONLY=true export MXNET_ENABLE_CYTHON=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu } @@ -1142,6 +1151,7 @@ unittest_ubuntu_tensorrt_gpu() { export LD_LIBRARY_PATH=/work/mxnet/lib:$LD_LIBRARY_PATH export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export MXNET_ENABLE_CYTHON=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 python tests/python/tensorrt/lenet5_train.py nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_trt_gpu.xml --verbose --nocapture tests/python/tensorrt/ } @@ -1156,6 +1166,7 @@ unittest_ubuntu_python2_quantization_gpu() { export MXNET_SUBGRAPH_VERBOSE=0 export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export MXNET_ENABLE_CYTHON=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_quantization_gpu.xml --verbose tests/python/quantization_gpu } @@ -1169,6 +1180,7 @@ unittest_ubuntu_python3_quantization_gpu() { export MXNET_SUBGRAPH_VERBOSE=0 export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} export MXNET_ENABLE_CYTHON=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_quantization_gpu.xml --verbose tests/python/quantization_gpu } @@ -1313,12 +1325,14 @@ unittest_centos7_gpu() { set -ex cd /work/mxnet export CUDNN_VERSION=${CUDNN_VERSION:-7.0.3} + export DMLC_LOG_STACK_TRACE_DEPTH=10 python3.6 -m "nose" $NOSE_COVERAGE_ARGUMENTS $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu } integrationtest_ubuntu_cpu_onnx() { set -ex export PYTHONPATH=./python/ + export DMLC_LOG_STACK_TRACE_DEPTH=10 python tests/python-pytest/onnx/backend_test.py pytest tests/python-pytest/onnx/mxnet_export_test.py pytest tests/python-pytest/onnx/test_models.py @@ -1330,18 +1344,21 @@ integrationtest_ubuntu_gpu_python() { export PYTHONPATH=./python/ export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export MXNET_SUBGRAPH_VERBOSE=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 python example/image-classification/test_score.py } integrationtest_ubuntu_gpu_caffe() { set -ex export PYTHONPATH=/work/deps/caffe/python:./python + export DMLC_LOG_STACK_TRACE_DEPTH=10 python tools/caffe_converter/test_converter.py } integrationtest_ubuntu_cpu_asan() { set -ex export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.5 + export DMLC_LOG_STACK_TRACE_DEPTH=10 cd /work/mxnet/build/cpp-package/example/ /work/mxnet/cpp-package/example/get_data.sh @@ -1350,6 +1367,7 @@ integrationtest_ubuntu_cpu_asan() { integrationtest_ubuntu_gpu_cpp_package() { set -ex + export DMLC_LOG_STACK_TRACE_DEPTH=10 cpp-package/tests/ci_test.sh } @@ -1360,6 +1378,7 @@ integrationtest_ubuntu_cpu_dist_kvstore() { export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export MXNET_SUBGRAPH_VERBOSE=0 export MXNET_USE_OPERATOR_TUNING=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 cd tests/nightly/ ../../tools/launch.py -n 7 --launcher local python dist_sync_kvstore.py --type=gluon_step_cpu ../../tools/launch.py -n 7 --launcher local python dist_sync_kvstore.py --type=gluon_sparse_step_cpu @@ -1375,6 +1394,7 @@ integrationtest_ubuntu_cpu_dist_kvstore() { integrationtest_ubuntu_cpu_scala() { set -ex + export DMLC_LOG_STACK_TRACE_DEPTH=10 scala_prepare cd scala-package mvn -B verify -DskipTests=false @@ -1382,6 +1402,7 @@ integrationtest_ubuntu_cpu_scala() { integrationtest_ubuntu_gpu_scala() { set -ex + export DMLC_LOG_STACK_TRACE_DEPTH=10 scala_prepare cd scala-package export SCALA_TEST_ON_GPU=1 @@ -1394,6 +1415,7 @@ integrationtest_ubuntu_gpu_dist_kvstore() { export PYTHONPATH=./python/ export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export MXNET_SUBGRAPH_VERBOSE=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 cd tests/nightly/ ../../tools/launch.py -n 4 --launcher local python dist_device_sync_kvstore.py ../../tools/launch.py -n 4 --launcher local python dist_sync_kvstore.py --type=init_gpu @@ -1404,6 +1426,7 @@ test_ubuntu_cpu_python2() { set -ex pushd . export MXNET_LIBRARY_PATH=/work/build/libmxnet.so + export DMLC_LOG_STACK_TRACE_DEPTH=10 VENV=mxnet_py2_venv virtualenv -p `which python2` $VENV @@ -1421,6 +1444,7 @@ test_ubuntu_cpu_python3() { set -ex pushd . export MXNET_LIBRARY_PATH=/work/build/libmxnet.so + export DMLC_LOG_STACK_TRACE_DEPTH=10 VENV=mxnet_py3_venv virtualenv -p `which python3` $VENV source $VENV/bin/activate @@ -1478,6 +1502,7 @@ nightly_test_installation() { # Runs Imagenet inference nightly_test_imagenet_inference() { set -ex + export DMLC_LOG_STACK_TRACE_DEPTH=10 echo $PWD cp /work/mxnet/build/cpp-package/example/imagenet_inference /work/mxnet/cpp-package/example/inference/ cd /work/mxnet/cpp-package/example/inference/ @@ -1487,6 +1512,7 @@ nightly_test_imagenet_inference() { #Runs a simple MNIST training example nightly_test_image_classification() { set -ex + export DMLC_LOG_STACK_TRACE_DEPTH=10 ./tests/nightly/test_image_classification.sh } @@ -1494,6 +1520,7 @@ nightly_test_image_classification() { nightly_test_KVStore_singleNode() { set -ex export PYTHONPATH=./python/ + export DMLC_LOG_STACK_TRACE_DEPTH=10 python tests/nightly/test_kvstore.py } @@ -1501,12 +1528,14 @@ nightly_test_KVStore_singleNode() { nightly_test_large_tensor() { set -ex export PYTHONPATH=./python/ + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-3.4 tests/nightly/test_large_array.py } #Tests Amalgamation Build with 5 different sets of flags nightly_test_amalgamation() { set -ex + export DMLC_LOG_STACK_TRACE_DEPTH=10 # Amalgamation can not be run with -j nproc make -C amalgamation/ clean make -C amalgamation/ ${1} ${2} @@ -1516,6 +1545,7 @@ nightly_test_amalgamation() { nightly_test_javascript() { set -ex export LLVM=/work/deps/emscripten-fastcomp/build/bin + export DMLC_LOG_STACK_TRACE_DEPTH=10 # This part is needed to run emcc correctly cd /work/deps/emscripten ./emcc @@ -1527,6 +1557,7 @@ nightly_test_javascript() { nightly_model_backwards_compat_test() { set -ex export PYTHONPATH=/work/mxnet/python/ + export DMLC_LOG_STACK_TRACE_DEPTH=10 ./tests/nightly/model_backwards_compatibility_check/model_backward_compat_checker.sh } @@ -1534,6 +1565,7 @@ nightly_model_backwards_compat_test() { nightly_model_backwards_compat_train() { set -ex export PYTHONPATH=./python/ + export DMLC_LOG_STACK_TRACE_DEPTH=10 ./tests/nightly/model_backwards_compatibility_check/train_mxnet_legacy_models.sh } @@ -1543,6 +1575,7 @@ nightly_straight_dope_python2_single_gpu_tests() { cd /work/mxnet/tests/nightly/straight_dope export PYTHONPATH=/work/mxnet/python/ export MXNET_TEST_KERNEL=python2 + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-2.7 $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_straight_dope_python2_single_gpu.xml \ test_notebooks_single_gpu.py --nologcapture } @@ -1552,6 +1585,7 @@ nightly_straight_dope_python3_single_gpu_tests() { cd /work/mxnet/tests/nightly/straight_dope export PYTHONPATH=/work/mxnet/python/ export MXNET_TEST_KERNEL=python3 + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-3.4 $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_straight_dope_python3_single_gpu.xml \ test_notebooks_single_gpu.py --nologcapture } @@ -1562,6 +1596,7 @@ nightly_straight_dope_python2_multi_gpu_tests() { cd /work/mxnet/tests/nightly/straight_dope export PYTHONPATH=/work/mxnet/python/ export MXNET_TEST_KERNEL=python2 + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-2.7 $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_straight_dope_python2_multi_gpu.xml \ test_notebooks_multi_gpu.py --nologcapture } @@ -1571,6 +1606,7 @@ nightly_straight_dope_python3_multi_gpu_tests() { cd /work/mxnet/tests/nightly/straight_dope export PYTHONPATH=/work/mxnet/python/ export MXNET_TEST_KERNEL=python3 + export DMLC_LOG_STACK_TRACE_DEPTH=10 nosetests-3.4 $NOSE_TIMER_ARGUMENTS --with-xunit --xunit-file nosetests_straight_dope_python3_multi_gpu.xml \ test_notebooks_multi_gpu.py --nologcapture } @@ -1580,6 +1616,7 @@ nightly_tutorial_test_ubuntu_python3_gpu() { cd /work/mxnet/docs export BUILD_VER=tutorial export MXNET_DOCS_BUILD_MXNET=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 make html export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export MXNET_SUBGRAPH_VERBOSE=0 @@ -1594,6 +1631,7 @@ nightly_tutorial_test_ubuntu_python2_gpu() { cd /work/mxnet/docs export BUILD_VER=tutorial export MXNET_DOCS_BUILD_MXNET=0 + export DMLC_LOG_STACK_TRACE_DEPTH=10 make html export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export MXNET_SUBGRAPH_VERBOSE=0 @@ -1621,6 +1659,7 @@ nightly_scala_demo_test_cpu() { nightly_estimator() { set -ex + export DMLC_LOG_STACK_TRACE_DEPTH=10 cd /work/mxnet/tests/nightly/estimator export PYTHONPATH=/work/mxnet/python/ nosetests test_estimator_cnn.py diff --git a/docs/static_site/src/pages/api/faq/env_var.md b/docs/static_site/src/pages/api/faq/env_var.md index e4fe58a116c5..bc98c39d9570 100644 --- a/docs/static_site/src/pages/api/faq/env_var.md +++ b/docs/static_site/src/pages/api/faq/env_var.md @@ -223,6 +223,12 @@ The following environments can be used to profile the application without changi If cython modules are used, `mx.nd._internal.NDArrayBase` must be `mxnet._cy3.ndarray.NDArrayBase` for python 3 or `mxnet._cy2.ndarray.NDArrayBase` for python 2. If ctypes is used, it must be `mxnet._ctypes.ndarray.NDArrayBase`. +## Logging + +* DMLC_LOG_STACK_TRACE_DEPTH + - Values: Int ```(default=0)``` + - The depth of stack trace information to log when exception happens. + ## Other Environment Variables * MXNET_GPU_WORKER_NSTREAMS From c99a1108c0ed9d6fc32044ea79d3ae528c9d7a1d Mon Sep 17 00:00:00 2001 From: Przemyslaw Tredak Date: Fri, 13 Dec 2019 13:50:51 -0800 Subject: [PATCH 37/62] Add my key to KEYS (#17060) --- KEYS | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/KEYS b/KEYS index 51675754d62c..9f2db10a92e1 100644 --- a/KEYS +++ b/KEYS @@ -806,3 +806,62 @@ O4432VBxwfqYDQ4z1Qx4VvM6gUAFPvTpr/9ZqsVUFcK4RW/Tsst6bFWwlqksmxl4 AccomJAgVJoFPw== =hhsZ -----END PGP PUBLIC KEY BLOCK----- +pub rsa4096 2019-12-13 [SC] + ABD3F3423F41245A6E5E065D7273634B104F135F +uid [ultimate] Przemysław Krzysztof Trędak (CODE SIGNING KEY) +sig 3 7273634B104F135F 2019-12-13 Przemysław Krzysztof Trędak (CODE SIGNING KEY) +sub rsa4096 2019-12-13 [E] +sig 7273634B104F135F 2019-12-13 Przemysław Krzysztof Trędak (CODE SIGNING KEY) + +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQINBF3zz6sBEADF6pNnZUQ7QpbmitRyFYrbjTCuqj3+wNH1S+Oo+RyC7+PW2Ofe +pv+laqNNqWQcU0OEIbmsvVAiKdU1sYGwfC1QSpWJLYDd59Dtq3pQTVBo8/5dYfYa +6Qou/PNQnI9C1Km1APD+XZvF8wR9torTQHQV0BrN/C94EVuHLH9IK1pYp7VkiQfb +8p0rrlLsQHciWRZPxoZJuG1KM/IN0jftdfTUjJYfh2H8icymjWRPnJ/FcDKPvcfN +dmmvoBjwj/XO93Nx3oiyJ8ORIARltstcSqrGFwkBsUUGzs8/aICUZ5B1nVCIxT9M +T4UygsvQKtZGve0aZYxgWD/bzT/H2No1z2S66rshmO/V+5U6ukwf1hAJH2/KTF+N +q9wYSMSD8nXg5tcagpCPJKf4TG6EnsRP5rgg6lqKFqS8OxBk6czfEv3kuWfN4KOa +XeBh04eogxIAgH5KN4d86QIqbRd9hH2+dKwGSxrdqxks9kq+Jup9C1P1tqE+nk/G +IrOoR+C/DeUqvXFeeIyEgOveDjq4+Uu7I7xyhwpWcS0qArU6vgaBcVU676r+NCrA +/f64nOvihysfptFcrUydw79412IPMO4xxgsanW6o85ukMFjNKcWLDONAkSiNFj2p +y1pTkollNuvR7xEd/p2TJzPh7Fwz4cshD0hD1uJvLywMYl1G6WPFuAb2VQARAQAB +tEVQcnplbXlzxYJhdyBLcnp5c3p0b2YgVHLEmWRhayAoQ09ERSBTSUdOSU5HIEtF +WSkgPHB0cmVuZHhAYXBhY2hlLm9yZz6JAk4EEwEKADgWIQSr0/NCP0EkWm5eBl1y +c2NLEE8TXwUCXfPPqwIbAwULCQgHAgYVCgkICwIEFgIDAQIeAQIXgAAKCRByc2NL +EE8TX5u9EACRFkkzuttRP4NzgsB3ClF4yn2e8V1iS6ILDKC9FTj09QE5yDppWu9w +hLyaEkHTgoWMVyKgfmD8BkqlkOemHFI8bzs4pKEcCJFVAb5iwMG0H6AKg7nlo2dX +f7GSge+hXTX9VfU6OIs5bTFW0t32FWgGw1WDuEU4jdffGDLrPv8WqLa5bhxhzFKG +/PGHisxtRp5c9PZiU+sY5PiUjYId8lDc+djiekvsgHSG+6cJ41t5QKJOdwQu1fUu +A08zifUFyaR4PEhScYamw7zSJWx2DB0UP/8fqdDHlYRPw/O3wBO+fyQQOIGTzwCr +CWZy5h3UyJhj9aa7b+gZIji2Evx8FWtgcOC7hGIyot7wrepBt9s53byyC/r9y+8O +ii7Fk/mDYXSwpL2E5vpSdDF/m5d7kz37vP/P6cgUR17/44eibVKgHENQ7l4qX+C6 +gx7HtWUjrVzKCOCrWbBzThgC/7BVu1i4cxVJ6PPioOBjiMKbjpxbmDhvAa6hOPHn +/L2jiH3AQodaVExKadJDU/xGsyIWfS8LdwrEYxY+Y4TGJqqrs+x2Oc/s9C3AuYHz +XvoGAfv9zz+YDC4MbfcSXUm0PuY/NXkIKG20CiWWOyGXVcGwhYAw35nFdDn5bliF +0o2kIXRc9TVNA4qnPGRlCdvkyKomgR5XfiJzKps6yib797SGGttU4bkCDQRd88+r +ARAAqZlIhc2lRiFYgkBDt13rjZYoUQH6TPnCwCHDV4YR6le4Lc/oiF3ugzxEqtmj +FZu3QSrU6xLAMtCGACUqHW1QVONmQUAni3Fhzni7PplqiUet/VlUaR2EhOSCq+hZ +8f/I3O6GWKZnn7YcgKGZ+JE6byWzbKeztkMTLeXeczvLA2L9LOX9mHQpikKNXXYn +fDpSMetFAscN8agqd2HeQYCzZ0YVgvPSEqmETv9VhEhT7CYxp0AiuPtATxa8mflF +48ckkcsNaHF2bvn5qRrZsXKF3XrXtiRnNeBoXbA1Slsnca7Z6ozuC3/pc4yJVwN9 +XjdpxIzXwmsOX/Naz6hI6FpCiS8HYJoFC7ejWD2MMxn5gRmiYDZmLZjGETy59iOI +fBmpoHTalheHdfCzYBXCuw/pDv+Z21Qn1u07b7lwigcB7iv2MjgTPD0X24tVG6ug +f7oCFAvZHPG3Hu4tusXATCoW8Ycxa5O1p1qIYZyEOXAYlI5J6GcmJE72wNXjF2vX +cGio2lv8UYVdRXS56X9KUxVgN1NyOX+w++jZO5b2S9nBHf+vski2wFMkAGJZfkMm +atoX/fQrbjCW0P5cKNu56+/IBQRVtPDbrJhjyVqxTdp6Ipm9ScbXw05z6Hf4ZmaT +O4HKicoJ444fHsV7T1Emapn0GkdZHIsKF+RAGORgfWCsqB0AEQEAAYkCNgQYAQoA +IBYhBKvT80I/QSRabl4GXXJzY0sQTxNfBQJd88+rAhsMAAoJEHJzY0sQTxNfHmYQ +AKiofRI+MVZEotjbjLg0ACpey8fta/GynHasqUStlzmyBBNxLYOykRbES1rRs6JL +/vU8d795OYbt9V7BlAG7epWA6KsCNt29Wd/DTok+JJYcQKpcB9AFNmArioU8XD+y +YzOt+XtfWCTtaDErYP6H0b6fS1FXCnrX0vwPH++BYv1ufjlztzvv7Sa1FEqBgSqm +gaPM2KvmrYQgNo/FfN0hMSeAP2VFv2fuAZqpmRnneyxi2Okig7t7EPsfsFewuQGL +HbEiLLgW4FWYahFMRiF5hMWbuXGvwo5kCB1dIjXKvF9FyX5/mOXIwpPtGBg9p/WN +k9Kq4nyEDCW4voRX17RBgKJeieaQxTCpwMaqL16Wpnuz1u/hmoPxF6oPdJftaXRb +PvfsRQ3S4mFkTpEtXVPSuXjDQgz6KpVgmvPKs8PK7EQQ5xIiiB6GPI8T29X4f43J +pAPZVek/FLl1UQkUgaRXln8CQyn1RnSqHac8ujoN2xWqdWZEJyonXvxw75syjfA2 +RC170UEAUfOS7AvNBUIjaA2yVHecufA/a5pJO8kap7BIDravf0FWC6b+fAOe4U7V +8KnqffKjUcZzsBa9FUrKOUuGnvwC1EwjnSuOm3BEfvAPS6ct3R9qXphftVFlr1V3 +G5Kq0VNdNBAZJNvKNAqRb+kDQuEm/D+HbiVxI9dWdIvL +=m8Mt +-----END PGP PUBLIC KEY BLOCK----- From 3b911cf5edaabc384b360f9768df512cf00f0ce1 Mon Sep 17 00:00:00 2001 From: Talia <31782251+TEChopra1000@users.noreply.github.com> Date: Fri, 13 Dec 2019 14:38:57 -0800 Subject: [PATCH 38/62] Website edits (#17050) * adding gluon data tutorials and autogluon images * moved autogluon images to general python docs image (_static) folder * making modifications as per @sojiadeshina recommendations * fixed autogluon images * fixed link to data tutorials on package index page * Update docs/python_docs/python/tutorials/packages/autograd/index.md fixed link path --- .../autograd_control_flow.png | Bin .../autograd_control_flow_grad.png | Bin .../autograd_forward_backward.png | Bin .../autograd_images}/autograd_grad_req.mp4 | Bin .../autograd_images}/autograd_gradient.png | Bin .../autograd_images}/autograd_graph.mp4 | Bin .../autograd_images}/autograd_head_grad.mp4 | Bin .../python/tutorials/deploy/index.rst | 3 +- .../tutorials/packages/autograd/index.md | 14 +- .../packages/gluon/data/data_augmentation.md | 235 ++++++++++++ .../tutorials/packages/gluon/data/datasets.md | 333 ++++++++++++++++++ .../tutorials/packages/gluon/data/index.rst | 40 +++ .../python/tutorials/packages/gluon/index.rst | 8 +- .../python/tutorials/packages/index.rst | 2 +- .../python/tutorials/performance/index.rst | 2 +- .../_includes/get_started/macos/perl/perl.md | 2 +- .../macos/python/cpu/build-from-source.md | 2 +- 17 files changed, 622 insertions(+), 19 deletions(-) rename docs/python_docs/{python/tutorials/packages/autograd/_static => _static/autograd_images}/autograd_control_flow.png (100%) rename docs/python_docs/{python/tutorials/packages/autograd/_static => _static/autograd_images}/autograd_control_flow_grad.png (100%) rename docs/python_docs/{python/tutorials/packages/autograd/_static => _static/autograd_images}/autograd_forward_backward.png (100%) rename docs/python_docs/{python/tutorials/packages/autograd/_static => _static/autograd_images}/autograd_grad_req.mp4 (100%) rename docs/python_docs/{python/tutorials/packages/autograd/_static => _static/autograd_images}/autograd_gradient.png (100%) rename docs/python_docs/{python/tutorials/packages/autograd/_static => _static/autograd_images}/autograd_graph.mp4 (100%) rename docs/python_docs/{python/tutorials/packages/autograd/_static => _static/autograd_images}/autograd_head_grad.mp4 (100%) create mode 100644 docs/python_docs/python/tutorials/packages/gluon/data/data_augmentation.md create mode 100644 docs/python_docs/python/tutorials/packages/gluon/data/datasets.md create mode 100644 docs/python_docs/python/tutorials/packages/gluon/data/index.rst diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_control_flow.png b/docs/python_docs/_static/autograd_images/autograd_control_flow.png similarity index 100% rename from docs/python_docs/python/tutorials/packages/autograd/_static/autograd_control_flow.png rename to docs/python_docs/_static/autograd_images/autograd_control_flow.png diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_control_flow_grad.png b/docs/python_docs/_static/autograd_images/autograd_control_flow_grad.png similarity index 100% rename from docs/python_docs/python/tutorials/packages/autograd/_static/autograd_control_flow_grad.png rename to docs/python_docs/_static/autograd_images/autograd_control_flow_grad.png diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_forward_backward.png b/docs/python_docs/_static/autograd_images/autograd_forward_backward.png similarity index 100% rename from docs/python_docs/python/tutorials/packages/autograd/_static/autograd_forward_backward.png rename to docs/python_docs/_static/autograd_images/autograd_forward_backward.png diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_grad_req.mp4 b/docs/python_docs/_static/autograd_images/autograd_grad_req.mp4 similarity index 100% rename from docs/python_docs/python/tutorials/packages/autograd/_static/autograd_grad_req.mp4 rename to docs/python_docs/_static/autograd_images/autograd_grad_req.mp4 diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_gradient.png b/docs/python_docs/_static/autograd_images/autograd_gradient.png similarity index 100% rename from docs/python_docs/python/tutorials/packages/autograd/_static/autograd_gradient.png rename to docs/python_docs/_static/autograd_images/autograd_gradient.png diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_graph.mp4 b/docs/python_docs/_static/autograd_images/autograd_graph.mp4 similarity index 100% rename from docs/python_docs/python/tutorials/packages/autograd/_static/autograd_graph.mp4 rename to docs/python_docs/_static/autograd_images/autograd_graph.mp4 diff --git a/docs/python_docs/python/tutorials/packages/autograd/_static/autograd_head_grad.mp4 b/docs/python_docs/_static/autograd_images/autograd_head_grad.mp4 similarity index 100% rename from docs/python_docs/python/tutorials/packages/autograd/_static/autograd_head_grad.mp4 rename to docs/python_docs/_static/autograd_images/autograd_head_grad.mp4 diff --git a/docs/python_docs/python/tutorials/deploy/index.rst b/docs/python_docs/python/tutorials/deploy/index.rst index e0ff0db75f17..9c4d60578226 100644 --- a/docs/python_docs/python/tutorials/deploy/index.rst +++ b/docs/python_docs/python/tutorials/deploy/index.rst @@ -29,8 +29,9 @@ The following tutorials will help you learn export MXNet models. .. card:: :title: Export ONNX Models + :link: export/onnx.html - COMING SOON + Export your MXNet model to the Open Neural Exchange Format .. card:: :title: Export with GluonCV diff --git a/docs/python_docs/python/tutorials/packages/autograd/index.md b/docs/python_docs/python/tutorials/packages/autograd/index.md index 6623e15c1e2b..b1fe274ec41f 100644 --- a/docs/python_docs/python/tutorials/packages/autograd/index.md +++ b/docs/python_docs/python/tutorials/packages/autograd/index.md @@ -23,7 +23,7 @@ Gradients are fundamental to the process of training neural networks, and tell us how to change the parameters of the network to improve its performance. -![autograd-gradient](_static/autograd_gradient.png) +![auto-gradient](//_static/autograd_images/autograd_gradient.png) ### Long Answer: @@ -39,7 +39,7 @@ Assuming we've calculated the gradient of each parameter with respect to the los We differentiate. [MXNet Gluon](/api/python/docs/tutorials/packages/gluon/index.html) uses Reverse Mode Automatic Differentiation (`autograd`) to backprogate gradients from the loss metric to the network parameters. -![forward-backward](_static/autograd_forward_backward.png) +![forward-backward](//_static/autograd_images/autograd_forward_backward.png) ### Long Answer: @@ -56,7 +56,7 @@ Stage 2. Work backwards through this record and evaluate the partial derivatives

@@ -173,7 +173,7 @@ net[0].weight.grad_req = 'null'

@@ -217,7 +217,7 @@ def f(x): We can plot the resultant function for $x$ between 0 and 1, and we should recognise certain functions in segments of $x$. Starting with a quadratic curve from 0 to 1/2, we have a cubic curve from 1/2 to 2/3, a quartic from 2/3 to 3/4 and finally a flatline. -![control-flow](_static/autograd_control_flow.png) +![control-flow](//_static/autograd_images/autograd_control_flow.png) Using `autograd`, let's now find the gradient of this arbritrary function. We don't have a vectorized function in this case, because of the control flow, so let's also create a function to calculate the gradient using `autograd`. @@ -234,7 +234,7 @@ grads = [get_grad(f, x).asscalar() for x in xs] print(grads) ``` -![flow-grad](_static/autograd_control_flow_grad.png) +![flow-grad](//_static/autograd_images/autograd_control_flow_grad.png) We can calculate the gradients by hand in this situation (since it's a toy example), and for the four segments discussed before we'd expect $2x$, $3x^2$, $4x^3$ and 0. As a spot check, for $x=0.6$ the hand calculated gradient would be $3x^2=1.08$, which equals `1.08` as computed by `autograd`. @@ -247,7 +247,7 @@ Most of the time `autograd` will be aware of the complete computational graph, a

diff --git a/docs/python_docs/python/tutorials/packages/gluon/data/data_augmentation.md b/docs/python_docs/python/tutorials/packages/gluon/data/data_augmentation.md new file mode 100644 index 000000000000..0e320fc2890e --- /dev/null +++ b/docs/python_docs/python/tutorials/packages/gluon/data/data_augmentation.md @@ -0,0 +1,235 @@ + + + + + + + + + + + + + + + + + +# Image Augmentation + +Augmentation is the process of randomly adjusting the dataset samples used for training. As a result, a greater diversity of samples will be seen by the network and it is therefore less likely to overfit the training dataset. Some of the spurious characteristics of the dataset can be reduced using this technique. One example would be a dataset of images from the same camera having the same color tint: it's unhelpful when you want to apply this model to images from other cameras. You can avoid this by randomly shifting the colours of each image slightly and training your network on these augmented images. + +Although this technique can be applied in a variety of domains, it's very common in Computer Vision, and we will focus on image augmentations in this tutorial. Some example image augmentations include random crops and flips, and adjustments to the brightness and contrast. + +#### What are the prerequisites? + +You should be familiar with the concept of a transform and how to apply it to a dataset before reading this tutorial. Check out the [Data Transforms tutorial]() if this is new to you or you need a quick refresher. + +#### Where can I find the augmentation transforms? + +You can find them in the `mxnet.gluon.data.vision.transforms` module, alongside the deterministic transforms we've seen previously, such as `ToTensor`, `Normalize`, `CenterCrop` and `Resize`. Augmentations involve an element of randomness and all the augmentation transforms are prefixed with `Random`, such as `RandomResizedCrop` and `RandomBrightness`. We'll start by importing MXNet and the `transforms`. + + +```python +import matplotlib.pyplot as plt +import mxnet as mx +from mxnet.gluon.data.vision import transforms +``` + +#### Sample Image + +So that we can see the effects of all the vision augmentations, we'll take a sample image of a giraffe and apply various augmentations to it. We can see what it looks like to begin with. + + +```python +image_url = 'https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/data_aug/inputs/0.jpg' +mx.test_utils.download(image_url, "giraffe.jpg") +example_image = mx.image.imread("giraffe.jpg") +plt.imshow(example_image.asnumpy()) +``` + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_5_1.png) + + +Since these augmentations are random, we'll apply the same augmentation a few times and plot all of the outputs. We define a few utility functions to help with this. + + +```python +def show_images(imgs, num_rows, num_cols, scale=2): + # show augmented images in a grid layout + aspect_ratio = imgs[0].shape[0]/imgs[0].shape[1] + figsize = (num_cols * scale, num_rows * scale * aspect_ratio) + _, axes = plt.subplots(num_rows, num_cols, figsize=figsize) + for i in range(num_rows): + for j in range(num_cols): + axes[i][j].imshow(imgs[i * num_cols + j].asnumpy()) + axes[i][j].axes.get_xaxis().set_visible(False) + axes[i][j].axes.get_yaxis().set_visible(False) + plt.subplots_adjust(hspace=0.1, wspace=0) + return axes + +def apply(img, aug, num_rows=2, num_cols=4, scale=3): + # apply augmentation multiple times to obtain different samples + Y = [aug(img) for _ in range(num_rows * num_cols)] + show_images(Y, num_rows, num_cols, scale) +``` + +# Spatial Augmentation + +One form of augmentation affects the spatial position of pixel values. Using combinations of slicing, scaling, translating, rotating and flipping the values of the original image can be shifted to create new images. Some operations (like scaling and rotation) require interpolation as pixels in the new image are combinations of pixels in the original image. + +### `RandomResizedCrop` + +Many Computer Visions tasks, such as image classification and object detection, should be robust to changes in the scale and position of objects in the image. You can incorporate this into the network using pooling layers, but an alternative method is to crop random regions of the original image. + +As an example, we randomly (using a uniform distribution) crop a region of the image with: + +* an area of 10% to 100% of the original area +* a ratio of width to height between 0.5 and 2 + +And then we resize this cropped region to 200 by 200 pixels. + + +```python +shape_aug = transforms.RandomResizedCrop(size=(200, 200), + scale=(0.1, 1), + ratio=(0.5, 2)) +apply(example_image, shape_aug) +``` + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_12_0.png) + + +### `RandomFlipLeftRight` + +A simple augmentation technique is flipping. Usually flipping horizontally doesn't change the category of object and results in an image that's still plausible in the real world. Using `RandomFlipLeftRight`, we randomly flip the image horizontally 50% of the time. + + +```python +apply(example_image, transforms.RandomFlipLeftRight()) +``` + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_15_0.png) + + +### `RandomFlipTopBottom` + +Although it's not as common as flipping left and right, you can flip the image vertically 50% of the time with `RandomFlipTopBottom`. With our giraffe example, we end up with less plausible samples that horizontal flipping, with the ground above the sky in some cases. + + +```python +apply(example_image, transforms.RandomFlipTopBottom()) +``` + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_18_0.png) + + +# Color Augmentation + +Usually, exact coloring doesn't play a significant role in the classification or detection of objects, so augmenting the colors of images is a good technique to make the network invariant to color shifts. Color properties that can be changed include brightness, contrast, saturation and hue. + +### `RandomBrightness` + +Use `RandomBrightness` to add a random brightness jitter to images. Use the `brightness` parameter to control the amount of jitter in brightness, with value from 0 (no change) to 1 (potentially large change). `brightness` doesn't specify whether the brightness of the augmented image will be lighter or darker, just the potential strength of the effect. Specifically the augmentation is given by: + +``` +alpha = 1.0 + random.uniform(-brightness, brightness) +image *= alpha +``` + +So by setting this to 0.5 we randomly change the brightness of the image to a value between 50% ($1-0.5$) and 150% ($1+0.5$) of the original image. + + +```python +apply(example_image, transforms.RandomBrightness(0.5)) +``` + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_23_0.png) + + +### `RandomContrast` + +Use `RandomContrast` to add a random contrast jitter to an image. Contrast can be thought of as the degree to which light and dark colors in the image differ. Use the `contrast` parameter to control the amount of jitter in contrast, with value from 0 (no change) to 1 (potentially large change). `contrast` doesn't specify whether the contrast of the augmented image will be higher or lower, just the potential strength of the effect. Specifically, the augmentation is given by: + +``` +coef = nd.array([[[0.299, 0.587, 0.114]]]) +alpha = 1.0 + random.uniform(-contrast, contrast) +gray = image * coef +gray = (3.0 * (1.0 - alpha) / gray.size) * nd.sum(gray) +image *= alpha +image += gray +``` + + +```python +apply(example_image, transforms.RandomContrast(0.5)) +``` + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_26_0.png) + + +### `RandomSaturation` + +Use `RandomSaturation` to add a random saturation jitter to an image. Saturation can be thought of as the 'amount' of color in an image. Use the `saturation` parameter to control the amount of jitter in saturation, with value from 0 (no change) to 1 (potentially large change). `saturation` doesn't specify whether the saturation of the augmented image will be higher or lower, just the potential strength of the effect. Specifically the augmentation is using the method detailed [here](https://beesbuzz.biz/code/16-hsv-color-transforms). + + +```python +apply(example_image, transforms.RandomSaturation(0.5)) +``` + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_29_0.png) + + +### `RandomHue` + +Use `RandomHue` to add a random hue jitter to images. Hue can be thought of as the 'shade' of the colors in an image. Use the `hue` parameter to control the amount of jitter in hue, with value from 0 (no change) to 1 (potentially large change). `hue` doesn't specify whether the hue of the augmented image will be shifted one way or the other, just the potential strength of the effect. Specifically the augmentation is using the method detailed [here](https://beesbuzz.biz/code/16-hsv-color-transforms). + + +```python +apply(example_image, transforms.RandomHue(0.5)) +``` + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_32_0.png) + + +### `RandomColorJitter` + +`RandomColorJitter` is a convenience transform that can be used to perform multiple color augmentations at once. You can set the `brightness`, `contrast`, `saturation` and `hue` jitters, that function the same as above for their individual transforms. + + +```python +color_aug = transforms.RandomColorJitter(brightness=0.5, + contrast=0.5, + saturation=0.5, + hue=0.5) +apply(example_image, color_aug) +``` + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_35_0.png) + + +### `RandomLighting` + +Use `RandomLighting` for an AlexNet-style PCA-based noise augmentation. + + +```python +apply(example_image, transforms.RandomLighting(alpha=1)) +``` + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_38_0.png) + +# Composed Augmentations + +In practice, we apply multiple augmentation techniques to an image to increase the variety of images in the dataset. Using the `Compose` transform that was introduced in the [Data Transforms tutorial](), we can apply 3 of the transforms we previously used above. + + +```python +augs = transforms.Compose([ + transforms.RandomFlipLeftRight(), color_aug, shape_aug]) +apply(example_image, augs) +``` + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/transforms/output_41_0.png) + + \ No newline at end of file diff --git a/docs/python_docs/python/tutorials/packages/gluon/data/datasets.md b/docs/python_docs/python/tutorials/packages/gluon/data/datasets.md new file mode 100644 index 000000000000..1c952b86a3da --- /dev/null +++ b/docs/python_docs/python/tutorials/packages/gluon/data/datasets.md @@ -0,0 +1,333 @@ + + + + + + + + + + + + + + + + + + +# Gluon `Dataset`s and `DataLoader` + +One of the most critical steps for model training and inference is loading the data: without data you can't do Machine Learning! In this tutorial we use the Gluon API to define a [`Dataset`](/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset) and use a [`DataLoader`](/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader) to iterate through the dataset in mini-batches. + +## Introduction to `Dataset`s + +[`Dataset`](/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset) objects are used to represent collections of data, and include methods to load and parse the data (that is often stored on disk). Gluon has a number of different [`Dataset`](/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset) classes for working with image data straight out-of-the-box, but we'll use the [`ArrayDataset`](/api/python/gluon/data.html) to introduce the idea of a [`Dataset`](/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset). + +We first start by generating random data `X` (with 3 variables) and corresponding random labels `y` to simulate a typical supervised learning task. We generate 10 samples and we pass them all to the [`ArrayDataset`](/api/python/gluon/data/data.html). + + +```python +import mxnet as mx +import os +import tarfile + +mx.random.seed(42) # Fix the seed for reproducibility +X = mx.random.uniform(shape=(10, 3)) +y = mx.random.uniform(shape=(10, 1)) +dataset = mx.gluon.data.dataset.ArrayDataset(X, y) +``` + +A key feature of a [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset) is the __*ability to retrieve a single sample given an index*__. Our random data and labels were generated in memory, so this [`ArrayDataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=arraydataset#mxnet.gluon.data.ArrayDataset) doesn't have to load anything from disk, but the interface is the same for all [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset)s. + + +```python +sample_idx = 4 +sample = dataset[sample_idx] + +assert len(sample) == 2 +assert sample[0].shape == (3, ) +assert sample[1].shape == (1, ) +print(sample) +``` + +( +[ 0.4375872 0.29753461 0.89177299] +, +[ 0.83261985] +) + + +We get a tuple of a data sample and its corresponding label, which makes sense because we passed the data `X` and the labels `y` in that order when we instantiated the [`ArrayDataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=arraydataset#mxnet.gluon.data.ArrayDataset). We don't usually retrieve individual samples from [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset) objects though (unless we're quality checking the output samples). Instead we use a [`DataLoader`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader). + +## Introduction to `DataLoader` + +A [`DataLoader`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader) is used to create mini-batches of samples from a [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset), and provides a convenient iterator interface for looping these batches. It's typically much more efficient to pass a mini-batch of data through a neural network than a single sample at a time, because the computation can be performed in parallel. A required parameter of [`DataLoader`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader) is the size of the mini-batches you want to create, called `batch_size`. + +Another benefit of using [`DataLoader`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader) is the ability to easily load data in parallel using [`multiprocessing`](https://docs.python.org/3.6/library/multiprocessing.html). You can set the `num_workers` parameter to the number of CPUs avalaible on your machine for maximum performance, or limit it to a lower number to spare resources. + + +```python +from multiprocessing import cpu_count +CPU_COUNT = cpu_count() + +data_loader = mx.gluon.data.DataLoader(dataset, batch_size=5, num_workers=CPU_COUNT) + +for X_batch, y_batch in data_loader: + print("X_batch has shape {}, and y_batch has shape {}".format(X_batch.shape, y_batch.shape)) +``` + +`X_batch has shape (5, 3), and y_batch has shape (5, 1)` + +`X_batch has shape (5, 3), and y_batch has shape (5, 1)` + + +We can see 2 mini-batches of data (and labels), each with 5 samples, which makes sense given we started with a dataset of 10 samples. When comparing the shape of the batches to the samples returned by the [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset), we've gained an extra dimension at the start which is sometimes called the batch axis. + +Our `data_loader` loop will stop when every sample of `dataset` has been returned as part of a batch. Sometimes the dataset length isn't divisible by the mini-batch size, leaving a final batch with a smaller number of samples. [`DataLoader`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader)'s default behavior is to return this smaller mini-batch, but this can be changed by setting the `last_batch` parameter to `discard` (which ignores the last batch) or `rollover` (which starts the next epoch with the remaining samples). + +## Machine learning with `Dataset`s and `DataLoader`s + +You will often use a few different [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset) objects in your Machine Learning project. It's essential to separate your training dataset from testing dataset, and it's also good practice to have validation dataset (a.k.a. development dataset) that can be used for optimising hyperparameters. + +Using Gluon [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset) objects, we define the data to be included in each of these separate datasets. Common use cases for loading data are covered already (e.g. [`mxnet.gluon.data.vision.datasets.ImageFolderDataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=imagefolderdataset#mxnet.gluon.data.vision.datasets.ImageFolderDataset)), but it's simple to create your own custom [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset) classes for other types of data. You can even use included [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset) objects for common datasets if you want to experiment quickly; they download and parse the data for you! In this example we use the [Fashion MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset from Zalando Research. + +Many of the image [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset)s accept a function (via the optional `transform` parameter) which is applied to each sample returned by the [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset). It's useful for performing data augmentation, but can also be used for more simple data type conversion and pixel value scaling as seen below. + + +```python +def transform(data, label): + data = data.astype('float32')/255 + return data, label + +train_dataset = mx.gluon.data.vision.datasets.FashionMNIST(train=True, transform=transform) +valid_dataset = mx.gluon.data.vision.datasets.FashionMNIST(train=False, transform=transform) +``` + + +```python +%matplotlib inline +from matplotlib.pylab import imshow + +sample_idx = 234 +sample = train_dataset[sample_idx] +data = sample[0] +label = sample[1] +label_desc = {0:'T-shirt/top', 1:'Trouser', 2:'Pullover', 3:'Dress', 4:'Coat', 5:'Sandal', 6:'Shirt', 7:'Sneaker', 8:'Bag', 9:'Ankle boot'} + +imshow(data[:,:,0].asnumpy(), cmap='gray') +print("Data type: {}".format(data.dtype)) +print("Label: {}".format(label)) +print("Label description: {}".format(label_desc[label])) +``` + +`Data type: ` + +`Label: 8` + +`Label description: Bag` + + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/datasets/fashion_mnist_bag.png) + + +When training machine learning models it is important to shuffle the training samples every time you pass through the dataset (i.e. each epoch). Sometimes the order of your samples will have a spurious relationship with the target variable, and shuffling the samples helps remove this. With [`DataLoader`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader) it's as simple as adding `shuffle=True`. You don't need to shuffle the validation and testing data though. + +If you have more complex shuffling requirements (e.g. when handling sequential data), take a look at [`mxnet.gluon.data.BatchSampler`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=batchsampler#mxnet.gluon.data.BatchSampler) and pass this to your [`DataLoader`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader) instead. + + +```python +batch_size = 32 +train_data_loader = mx.gluon.data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=CPU_COUNT) +valid_data_loader = mx.gluon.data.DataLoader(valid_dataset, batch_size, num_workers=CPU_COUNT) +``` + +With both `DataLoader`s defined, we can now train a model to classify each image and evaluate the validation loss at each epoch. Our Fashion MNIST dataset has 10 classes including shirt, dress, sneakers, etc. We define a simple fully connected network with a softmax output and use cross entropy as our loss. + + +```python +from mxnet import gluon, autograd, ndarray + +def construct_net(): + net = gluon.nn.HybridSequential() + with net.name_scope(): + net.add(gluon.nn.Dense(128, activation="relu")) + net.add(gluon.nn.Dense(64, activation="relu")) + net.add(gluon.nn.Dense(10)) + return net + +# construct and initialize network. +ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu() + +net = construct_net() +net.hybridize() +net.initialize(mx.init.Xavier(), ctx=ctx) +# define loss and trainer. +criterion = gluon.loss.SoftmaxCrossEntropyLoss() +trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) +``` + +```python + + +epochs = 5 +for epoch in range(epochs): + # training loop (with autograd and trainer steps, etc.) + cumulative_train_loss = mx.nd.zeros(1, ctx=ctx) + training_samples = 0 + for batch_idx, (data, label) in enumerate(train_data_loader): + data = data.as_in_context(ctx).reshape((-1, 784)) # 28*28=784 + label = label.as_in_context(ctx) + with autograd.record(): + output = net(data) + loss = criterion(output, label) + loss.backward() + trainer.step(data.shape[0]) + cumulative_train_loss += loss.sum() + training_samples += data.shape[0] + train_loss = cumulative_train_loss.asscalar()/training_samples + + # validation loop + cumulative_valid_loss = mx.nd.zeros(1, ctx) + valid_samples = 0 + for batch_idx, (data, label) in enumerate(valid_data_loader): + data = data.as_in_context(ctx).reshape((-1, 784)) # 28*28=784 + label = label.as_in_context(ctx) + output = net(data) + loss = criterion(output, label) + cumulative_valid_loss += loss.sum() + valid_samples += data.shape[0] + valid_loss = cumulative_valid_loss.asscalar()/valid_samples + + print("Epoch {}, training loss: {:.2f}, validation loss: {:.2f}".format(epoch, train_loss, valid_loss)) +``` + +`Epoch 0, training loss: 0.54, validation loss: 0.45` + +`...` + +`Epoch 4, training loss: 0.32, validation loss: 0.33` + + +# Using own data with included `Dataset`s + +Gluon has a number of different [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset) classes for working with your own image data straight out-of-the-box. You can get started quickly using the [`mxnet.gluon.data.vision.datasets.ImageFolderDataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=imagefolderdataset#mxnet.gluon.data.vision.datasets.ImageFolderDataset) which loads images directly from a user-defined folder, and infers the label (i.e. class) from the folders. + +We will run through an example for image classification, but a similar process applies for other vision tasks. If you already have your own collection of images to work with you should partition your data into training and test sets, and place all objects of the same class into seperate folders. Similar to: +``` + ./images/train/car/abc.jpg + ./images/train/car/efg.jpg + ./images/train/bus/hij.jpg + ./images/train/bus/klm.jpg + ./images/test/car/xyz.jpg + ./images/test/bus/uvw.jpg +``` + +You can download the Caltech 101 dataset if you don't already have images to work with for this example, but please note the download is 126MB. + +```python + +data_folder = "data" +dataset_name = "101_ObjectCategories" +archive_file = "{}.tar.gz".format(dataset_name) +archive_path = os.path.join(data_folder, archive_file) +data_url = "https://s3.us-east-2.amazonaws.com/mxnet-public/" + +if not os.path.isfile(archive_path): + mx.test_utils.download("{}{}".format(data_url, archive_file), dirname = data_folder) + print('Extracting {} in {}...'.format(archive_file, data_folder)) + tar = tarfile.open(archive_path, "r:gz") + tar.extractall(data_folder) + tar.close() + print('Data extracted.') +``` + +After downloading and extracting the data archive, we have two folders: `data/101_ObjectCategories` and `data/101_ObjectCategories_test`. We load the data into separate training and testing [`ImageFolderDataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=imagefolderdataset#mxnet.gluon.data.vision.datasets.ImageFolderDataset)s. + +```python +training_path = os.path.join(data_folder, dataset_name) +testing_path = os.path.join(data_folder, "{}_test".format(dataset_name)) +``` + +We instantiate the [`ImageFolderDataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=imagefolderdataset#mxnet.gluon.data.vision.datasets.ImageFolderDataset)s by providing the path to the data, and the folder structure will be traversed to determine which image classes are available and which images correspond to each class. You must take care to ensure the same classes are both the training and testing datasets, otherwise the label encodings can get muddled. + +Optionally, you can pass a `transform` parameter to these [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset)s as we've seen before. + + +```python +train_dataset = mx.gluon.data.vision.datasets.ImageFolderDataset(training_path) +test_dataset = mx.gluon.data.vision.datasets.ImageFolderDataset(testing_path) +``` + +Samples from these datasets are tuples of data and label. Images are loaded from disk, decoded and optionally transformed when the `__getitem__(i)` method is called (equivalent to `train_dataset[i]`). + +As with the Fashion MNIST dataset the labels will be integer encoded. You can use the `synsets` property of the [`ImageFolderDataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=imagefolderdataset#mxnet.gluon.data.vision.datasets.ImageFolderDataset)s to retrieve the original descriptions (e.g. `train_dataset.synsets[i]`). + + +```python +sample_idx = 539 +sample = train_dataset[sample_idx] +data = sample[0] +label = sample[1] + +imshow(data.asnumpy(), cmap='gray') +print("Data type: {}".format(data.dtype)) +print("Label: {}".format(label)) +print("Label description: {}".format(train_dataset.synsets[label])) +assert label == 1 +``` + +`Data type: ` + +`Label: 1` + +`Label description: Faces_easy` + + +![png](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/gluon/datasets/caltech101_face.png) + + +# Using own data with custom `Dataset`s + +Sometimes you have data that doesn't quite fit the format expected by the included [`Dataset`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataset#mxnet.gluon.data.Dataset)s. You might be able to preprocess your data to fit the expected format, but it is easy to create your own dataset to do this. + +All you need to do is create a class that implements a `__getitem__` method, that returns a sample (i.e. a tuple of [`mx.nd.NDArray`](https://mxnet.incubator.apache.org/api/python/ndarray/ndarray.html#mxnet.ndarray.NDArray)s). + +See the [Data Augmentation with Masks](http://mxnet.incubator.apache.org/tutorials/python/data_augmentation_with_masks.html) tutorial for an example of this. + +# Appendix: Upgrading from Module `DataIter` to Gluon `DataLoader` + +Before Gluon's [`DataLoader`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader), MXNet used [`DataIter`](https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=dataiter#mxnet.io.DataIter) objects for loading data for training and testing. [`DataIter`](https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=dataiter#mxnet.io.DataIter) has a similar interface for iterating through data, but it isn't directly compatible with typical Gluon [`DataLoader`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader) loops. Unlike Gluon [`DataLoader`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader) which often returns a tuple of `(data, label)`, a [`DataIter`](https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=dataiter#mxnet.io.DataIter) returns a [`DataBatch`](https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=databatch#mxnet.io.DataBatch) object that has `data` and `label` properties. Switching to [`DataLoader`](https://mxnet.incubator.apache.org/api/python/gluon/data.html?highlight=dataloader#mxnet.gluon.data.DataLoader)s is highly recommended when using Gluon, but you'll need to take care of pre-processing steps such as augmentations in a `transform` function. + +So you can get up and running with Gluon quicker if you have already imlemented complex pre-processing steps using [`DataIter`](https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=dataiter#mxnet.io.DataIter), we have provided a simple class to wrap existing [`DataIter`](https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=dataiter#mxnet.io.DataIter) objects so they can be used in a typical Gluon training loop. You can use this class for `DataIter`s such as [`mxnet.image.ImageIter`](https://mxnet.incubator.apache.org/api/python/image/image.html?highlight=imageiter#mxnet.image.ImageIter) and [`mxnet.io.ImageRecordIter`](https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imagere#mxnet.io.ImageRecordIter) that have single data and label arrays. + + +```python +class DataIterLoader(): + def __init__(self, data_iter): + self.data_iter = data_iter + + def __iter__(self): + self.data_iter.reset() + return self + + def __next__(self): + batch = self.data_iter.__next__() + assert len(batch.data) == len(batch.label) == 1 + data = batch.data[0] + label = batch.label[0] + return data, label + + def next(self): + return self.__next__() # for Python 2 +``` + + +```python +data_iter = mx.io.NDArrayIter(data=X, label=y, batch_size=5) +data_iter_loader = DataIterLoader(data_iter) +for X_batch, y_batch in data_iter_loader: + assert X_batch.shape == (5, 3) + assert y_batch.shape == (5, 1) +``` + diff --git a/docs/python_docs/python/tutorials/packages/gluon/data/index.rst b/docs/python_docs/python/tutorials/packages/gluon/data/index.rst new file mode 100644 index 000000000000..9a1a0c263699 --- /dev/null +++ b/docs/python_docs/python/tutorials/packages/gluon/data/index.rst @@ -0,0 +1,40 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Data Tutorials +=============== + +These tutorials will help you learn how to load and use datasets with the Gluon API. + + .. card:: + :title: Data Augmentation + :link: data_augmentation.html + + A guide to data augmentation. + + .. card:: + :title: Gluon Datasets and DataLoader + :link: datasets.html + + A guide to loading data using the Gluon API. + +.. toctree:: + :hidden: + :maxdepth: 1 + :glob: + + * diff --git a/docs/python_docs/python/tutorials/packages/gluon/index.rst b/docs/python_docs/python/tutorials/packages/gluon/index.rst index d97279465287..4435637ee381 100644 --- a/docs/python_docs/python/tutorials/packages/gluon/index.rst +++ b/docs/python_docs/python/tutorials/packages/gluon/index.rst @@ -70,15 +70,9 @@ Data .. container:: cards - .. card:: - :title: Data Loading - :link: data/data.html - - How to load data for training. - .. card:: :title: Data Augmentation - :link: data/data_augmentation.md + :link: data/data_augmentation.html A guide to data augmentation. diff --git a/docs/python_docs/python/tutorials/packages/index.rst b/docs/python_docs/python/tutorials/packages/index.rst index 9fdd75c837f7..e43d658a0dbe 100644 --- a/docs/python_docs/python/tutorials/packages/index.rst +++ b/docs/python_docs/python/tutorials/packages/index.rst @@ -69,7 +69,7 @@ Shared APIs .. card:: :title: Data APIs - :link: data/index.html + :link: gluon/data/index.html How to use MXNet's data APIs. diff --git a/docs/python_docs/python/tutorials/performance/index.rst b/docs/python_docs/python/tutorials/performance/index.rst index e547ecd297ed..b1f5c66c2001 100644 --- a/docs/python_docs/python/tutorials/performance/index.rst +++ b/docs/python_docs/python/tutorials/performance/index.rst @@ -111,7 +111,7 @@ Distributed Training .. card:: :title: Data Parallelism in MXNet - :link: /api/faq/multi_devices.html + :link: /api/faq/multi_device An overview of distributed training strategies. diff --git a/docs/static_site/src/_includes/get_started/macos/perl/perl.md b/docs/static_site/src/_includes/get_started/macos/perl/perl.md index ab08489fdc56..45d59ddf78a6 100644 --- a/docs/static_site/src/_includes/get_started/macos/perl/perl.md +++ b/docs/static_site/src/_includes/get_started/macos/perl/perl.md @@ -1 +1 @@ -Refer to the [Perl section of installation guide](get_started/osx_setup.html#install-the-mxnet-package-for-perl). \ No newline at end of file +Refer to the [Perl section of installation guide](/get_started/osx_setup.html#install-the-mxnet-package-for-perl). \ No newline at end of file diff --git a/docs/static_site/src/_includes/get_started/macos/python/cpu/build-from-source.md b/docs/static_site/src/_includes/get_started/macos/python/cpu/build-from-source.md index 01dd35b9f332..ee8e378ec20e 100644 --- a/docs/static_site/src/_includes/get_started/macos/python/cpu/build-from-source.md +++ b/docs/static_site/src/_includes/get_started/macos/python/cpu/build-from-source.md @@ -1,2 +1,2 @@ -To build from source, refer to the [MXNet macOS installation guide](get_started/osx_setup.html). +To build from source, refer to the [MXNet macOS installation guide](/get_started/osx_setup.html). MXNet developers should refer to the MXNet wiki's [Developer Setup on Mac](https://cwiki.apache.org/confluence/display/MXNET/MXNet+Developer+Setup+on+Mac). From f045018f2c22f33c560eb58a23406c457855bb72 Mon Sep 17 00:00:00 2001 From: kshitij12345 Date: Sat, 14 Dec 2019 10:18:53 +0530 Subject: [PATCH 39/62] [MXNET-978] Higher Order Gradient Support `logp1`, `expm1`, `square`. (#15416) * support logp1, expm1, square for higher order grad * add relevant tests * update comments * update comments and tests * use NodeOpGen for readability. --- .../tensor/elemwise_unary_op_logexp.cc | 55 ++++++++++++++++++- src/operator/tensor/elemwise_unary_op_pow.cc | 30 +++++++++- .../python/unittest/test_higher_order_grad.py | 33 +++++++++++ 3 files changed, 115 insertions(+), 3 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_logexp.cc b/src/operator/tensor/elemwise_unary_op_logexp.cc index 7ca12e0b248b..7bc742104724 100644 --- a/src/operator/tensor/elemwise_unary_op_logexp.cc +++ b/src/operator/tensor/elemwise_unary_op_logexp.cc @@ -200,7 +200,32 @@ The storage type of ``log1p`` output depends upon the input storage type: .set_attr("FGradient", ElemwiseGradUseIn{"_backward_log1p"}); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_log1p, - unary_bwd); + unary_bwd) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // ograds[0]: head_grad_grads (dL/dxgrad) + // inputs[0]: dL/dy + // inputs[1]: x (ElemwiseGradUseIn) + // f(x) = y = log(1+x) + // f'(x) = 1/(1+x) + // f''(x) = -1/(1+x)^2 + auto dldy = n->inputs[0]; + auto x = n->inputs[1]; + auto dydx_mul_dldy = nnvm::NodeEntry{n}; // f'(x) * head_grads + auto op = mxnet::util::NodeOpGen{n}; + + auto dydx = op.div(dydx_mul_dldy, dldy); + + auto d2ydx2_mid = op.mul(dydx_mul_dldy, dydx_mul_dldy); + auto d2ydx2_neg_mid = op.negative(d2ydx2_mid); + auto d2ydx2 = op.div(d2ydx2_neg_mid, dldy); + + std::vector ret; + + ret.emplace_back(op.mul(ograds[0], dydx)); + ret.emplace_back(op.mul(ograds[0], d2ydx2)); + return ret; + }); // expm1 MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(expm1, cpu, mshadow_op::expm1) @@ -217,7 +242,33 @@ The storage type of ``expm1`` output depends upon the input storage type: )code" ADD_FILELINE) .set_attr("FGradient", ElemwiseGradUseIn{"_backward_expm1"}); -MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_expm1, unary_bwd); +MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_expm1, unary_bwd) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // ograds[0]: head_grad_grads (dL/dxgrad) + // inputs[0]: dL/dy + // inputs[1]: x (ElemwiseGradUseIn) + // f(x) = y = exp(x) - 1 + // f'(x) = exp(x) + // f''(x) = exp(x) + auto dldy = n->inputs[0]; + auto x = n->inputs[1]; + auto dydx_mul_dldy = nnvm::NodeEntry{n}; // f'(x) * head_grads + auto op = mxnet::util::NodeOpGen{n}; + + auto dydx = op.div(dydx_mul_dldy, dldy); + + auto exp_x = MakeNode("exp", n->attrs.name + "_backward_exp_grad", + {n->inputs[1]}, nullptr, &n); + auto d2ydx2_mul_dldy = op.mul(nnvm::NodeEntry{exp_x}, dldy); + + std::vector ret; + + + ret.emplace_back(op.mul(ograds[0], dydx)); + ret.emplace_back(op.mul(ograds[0], d2ydx2_mul_dldy)); + return ret; + }); } // namespace op } // namespace mxnet diff --git a/src/operator/tensor/elemwise_unary_op_pow.cc b/src/operator/tensor/elemwise_unary_op_pow.cc index 486fe268b0cf..084772980ed1 100644 --- a/src/operator/tensor/elemwise_unary_op_pow.cc +++ b/src/operator/tensor/elemwise_unary_op_pow.cc @@ -25,6 +25,7 @@ #include "elemwise_unary_op.h" #include "./elemwise_binary_op-inl.h" #include "../nn/mkldnn/mkldnn_ops-inl.h" +#include "../../nnvm/node_op_util.h" namespace mxnet { namespace op { @@ -120,7 +121,34 @@ The storage type of ``square`` output depends upon the input storage type: #endif MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_square, - unary_bwd); + unary_bwd) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // ograds[0]: head_grad_grads (dL/dxgrad) + // inputs[0]: dL/dy + // inputs[1]: x (ElemwiseGradUseIn) + // f(x) = y = x^2 + // f'(x) = 2*x + // f''(x) = 2 + auto dldy = n->inputs[0]; + auto x = n->inputs[1]; + auto dydx_mul_dldy = nnvm::NodeEntry{n}; // f'(x) * head_grads + auto op = mxnet::util::NodeOpGen{n}; + + auto dydx = op.div(dydx_mul_dldy, dldy); + + std::unordered_map args = {{"scalar", "2.0"}}; + auto ones_like = MakeNode("ones_like", n->attrs.name + "_backward_ones_like", + {n->inputs[1]}, nullptr, &n); + auto d2ydx2 = op.mul(2.0, nnvm::NodeEntry{ones_like}); + auto d2ydx2_mul_dldy = op.mul(d2ydx2, dldy); + + std::vector ret; + + ret.emplace_back(op.mul(ograds[0], dydx)); + ret.emplace_back(op.mul(ograds[0], d2ydx2_mul_dldy)); + return ret; + }); // sqrt MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(sqrt, cpu, mshadow_op::square_root) diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 0b0b00fffac7..527c35d5dd94 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -273,6 +273,39 @@ def grad_grad_op(x): check_second_order_unary(array, log10, grad_grad_op) +@with_seed() +def test_square(): + def grad_grad_op(x): + return nd.ones_like(x) * 2 + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + check_second_order_unary(array, nd.square, grad_grad_op) + + +@with_seed() +def test_expm1(): + def grad_grad_op(x): + return nd.exp(x) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + check_second_order_unary(array, nd.expm1, grad_grad_op) + + +@with_seed() +def test_log1p(): + def grad_grad_op(x): + return -1/((1+x)**2) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + check_second_order_unary(array, nd.log1p, grad_grad_op) + + @with_seed() def test_reciprocal(): def reciprocal(x): From 042682edca9b40e7d80e71f9713162227bd65ac7 Mon Sep 17 00:00:00 2001 From: Haibin Lin Date: Sat, 14 Dec 2019 08:28:25 -0800 Subject: [PATCH 40/62] [DOC] Fix tutorial link, and better error msg (#17057) --- python/mxnet/gluon/parameter.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py index b0f8fef2c0dd..695565e9cf46 100644 --- a/python/mxnet/gluon/parameter.py +++ b/python/mxnet/gluon/parameter.py @@ -972,7 +972,7 @@ def save(self, filename, strip_prefix=''): "this may be due to your Block shares parameters from other " "Blocks or you forgot to use 'with name_scope()' when creating " "child blocks. For more info on naming, please see " - "https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/naming.html"%( + "https://mxnet.io/api/python/docs/tutorials/packages/gluon/blocks/naming.html"%( strip_prefix, param.name, strip_prefix)) arg_dict[param.name[len(strip_prefix):]] = weight ndarray.save(filename, arg_dict) @@ -1006,7 +1006,9 @@ def load(self, filename, ctx=None, allow_missing=False, for name in self.keys(): assert name.startswith(restore_prefix), \ "restore_prefix is '%s' but Parameters name '%s' does not start " \ - "with '%s'"%(restore_prefix, name, restore_prefix) + "with '%s'. For more info on naming, please see " \ + "https://mxnet.io/api/python/docs/tutorials/packages/gluon/blocks/naming.html"%( + restore_prefix, name, restore_prefix) ndarray_load = ndarray.load(filename) self.load_dict(ndarray_load, ctx, allow_missing, ignore_extra, restore_prefix, filename, cast_dtype, dtype_source) @@ -1043,14 +1045,18 @@ def load_dict(self, param_dict, ctx=None, allow_missing=False, for name in self.keys(): assert name in arg_dict, \ "Parameter '%s' is missing in %s, which contains parameters: %s. " \ - "Please make sure source and target networks have the same prefix."%( + "Please make sure source and target networks have the same prefix." \ + "For more info on naming, please see " \ + "https://mxnet.io/api/python/docs/tutorials/packages/gluon/blocks/naming.html"%( name[lprefix:], error_str, _brief_print_list(arg_dict.keys())) for name in arg_dict: if name not in self._params: assert ignore_extra, \ "Parameter '%s' loaded from %s is not present in ParameterDict, " \ "choices are: %s. Set ignore_extra to True to ignore. " \ - "Please make sure source and target networks have the same prefix."%( + "Please make sure source and target networks have the same prefix." \ + "For more info on naming, please see " \ + "https://mxnet.io/api/python/docs/tutorials/packages/gluon/blocks/naming.html"%( name[lprefix:], error_str, _brief_print_list(self._params.keys())) continue self[name]._load_init(arg_dict[name], ctx, cast_dtype=cast_dtype, From 696c54738f2ceca89c9eb7eac18060b942e6dfab Mon Sep 17 00:00:00 2001 From: Haibin Lin Date: Sat, 14 Dec 2019 08:28:57 -0800 Subject: [PATCH 41/62] [BUGFIX] Fix trainer param order (#17068) * fix trainer param order * Update trainer.py * Update trainer.py * Update trainer.py --- python/mxnet/gluon/trainer.py | 5 ++++- tests/python/unittest/test_gluon_trainer.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/python/mxnet/gluon/trainer.py b/python/mxnet/gluon/trainer.py index 01f76d637a97..1ab86af2b93f 100644 --- a/python/mxnet/gluon/trainer.py +++ b/python/mxnet/gluon/trainer.py @@ -71,8 +71,11 @@ class Trainer(object): """ def __init__(self, params, optimizer, optimizer_params=None, kvstore='device', compression_params=None, update_on_kvstore=None): + param_list = [] if isinstance(params, (dict, ParameterDict)): - params = list(params.values()) + for key in sorted(list(params.keys())): + param_list.append(params[key]) + params = param_list if not isinstance(params, (list, tuple)): raise ValueError( "First argument must be a list or dict of Parameters, " \ diff --git a/tests/python/unittest/test_gluon_trainer.py b/tests/python/unittest/test_gluon_trainer.py index 2d5874a8b97b..9f02733d0a25 100644 --- a/tests/python/unittest/test_gluon_trainer.py +++ b/tests/python/unittest/test_gluon_trainer.py @@ -291,3 +291,19 @@ def test_trainer_lr_sched(): assert trainer.learning_rate == lr, (lr, trainer.learning_rate, i) lr *= factor mx.nd.waitall() + +@with_seed() +def test_gluon_trainer_param_order(): + net = mx.gluon.nn.Sequential() + # layers may be added in a random order for all workers + layers = {'ones_': 1, 'zeros_': 0} + for name, init in layers.items(): + net.add(mx.gluon.nn.Dense(10, in_units=10, weight_initializer=mx.init.Constant(init), + use_bias=False, prefix=name)) + params = net.collect_params() + net.initialize() + trainer = gluon.Trainer(params, 'sgd') + for name, init in layers.items(): + expected_idx = 0 if name == 'ones_' else 1 + expected_name = name + 'weight' + assert trainer._params[expected_idx].name == expected_name From bbdc1c3627ad5254c049c2bb871ecb4527d7dc14 Mon Sep 17 00:00:00 2001 From: MoisesHer <50716238+MoisesHer@users.noreply.github.com> Date: Sat, 14 Dec 2019 08:32:50 -0800 Subject: [PATCH 42/62] [reproducibility] multi_sum_sq review, AtomicAdd removal (#17002) * Update multi_sum_sq to avoid AtomicAdd * Add specific test for multi_sum_sq * Add a determism test and lint issues * better test for cheching op is deterministic * Follow MXNet letters case format * Reduce dimensions of tensors in the test --- src/operator/contrib/multi_sum_sq-inl.h | 10 ++- src/operator/contrib/multi_sum_sq.cc | 20 +++-- src/operator/contrib/multi_sum_sq.cu | 110 +++++++++++++++--------- tests/python/gpu/test_operator_gpu.py | 30 +++++++ 4 files changed, 118 insertions(+), 52 deletions(-) diff --git a/src/operator/contrib/multi_sum_sq-inl.h b/src/operator/contrib/multi_sum_sq-inl.h index 876155215d1c..b8609c0f217f 100644 --- a/src/operator/contrib/multi_sum_sq-inl.h +++ b/src/operator/contrib/multi_sum_sq-inl.h @@ -21,7 +21,7 @@ * Copyright (c) 2019 by Contributors * \file multi_l2_norm-inl.h * \brief vectorized L2 norm over multiple arrays operators - * \author Clement Fuji Tsang, Andrei Ivanov + * \author Clement Fuji Tsang, Andrei Ivanov, Moises Hernandez */ @@ -32,6 +32,10 @@ #include #include "../operator_common.h" +namespace multi_sum_sq { +enum MultiSumSqUpdateResource {kTempSpace}; +} // namespace multi_sum_sq + namespace mxnet { namespace op { @@ -80,7 +84,7 @@ inline bool MultiSumSqType(const NodeAttrs& attrs, template void MultiSumSqRun(const std::vector &inputs, int nInputs, - float *out_ptr, mshadow::Stream *s); + float *out_ptr, const OpContext &ctx); template void MultiSumSq(const nnvm::NodeAttrs& attrs, @@ -91,7 +95,7 @@ void MultiSumSq(const nnvm::NodeAttrs& attrs, auto s = ctx.get_stream(); const auto& p = dmlc::get(attrs.parsed); float* out_ptr = outputs[0].FlatTo2D(s).dptr_; - MultiSumSqRun(inputs, p.num_arrays, out_ptr, s); + MultiSumSqRun(inputs, p.num_arrays, out_ptr, ctx); } } // namespace op diff --git a/src/operator/contrib/multi_sum_sq.cc b/src/operator/contrib/multi_sum_sq.cc index cdb5423db23f..16c99d1c9699 100644 --- a/src/operator/contrib/multi_sum_sq.cc +++ b/src/operator/contrib/multi_sum_sq.cc @@ -21,7 +21,7 @@ * Copyright (c) 2019 by Contributors * \file multi_sum_sq.cc * \brief vectorized sum or squared over multiple arrays operators - * \author Clement Fuji Tsang, Andrei Ivanov + * \author Clement Fuji Tsang, Andrei Ivanov, Moises Hernandez */ #include "./multi_sum_sq-inl.h" @@ -52,20 +52,24 @@ NNVM_REGISTER_OP(multi_sum_sq) return ret; }) .set_attr("FCompute", MultiSumSq) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) .add_argument("data", "NDArray-or-Symbol[]", "Arrays") .add_arguments(MultiSumSqParam::__FIELDS__()); template -inline void CalcSumSq(const std::vector &inputs, int nInputs, +inline void CalcSumSq(const std::vector &inputs, int n_inputs, float *out_ptr, mshadow::Stream *s) { int i; size_t j; #pragma omp parallel for private(i, j) - for (i = 0; i < nInputs; ++i) { // array index in inputs + for (i = 0; i < n_inputs; ++i) { // array index in inputs float sum = 0; const auto address = inputs[i].FlatTo2D(s).dptr_; - const auto jMax = inputs[i].shape_.Size(); - for (j = 0; j < jMax; ++j) + const auto j_max = inputs[i].shape_.Size(); + for (j = 0; j < j_max; ++j) sum += address[j] * address[j]; out_ptr[i] = sum; @@ -73,10 +77,10 @@ inline void CalcSumSq(const std::vector &inputs, int nInputs, } template<> -void MultiSumSqRun(const std::vector &inputs, int nInputs, - float *out_ptr, mshadow::Stream *s) { +void MultiSumSqRun(const std::vector &inputs, int n_inputs, + float *out_ptr, const OpContext &ctx) { MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, - CalcSumSq(inputs, nInputs, out_ptr, s); + CalcSumSq(inputs, n_inputs, out_ptr, ctx.get_stream()); ) } diff --git a/src/operator/contrib/multi_sum_sq.cu b/src/operator/contrib/multi_sum_sq.cu index 6f6fe56bfd81..620c9ca8a073 100644 --- a/src/operator/contrib/multi_sum_sq.cu +++ b/src/operator/contrib/multi_sum_sq.cu @@ -21,7 +21,7 @@ * Copyright (c) 2019 by Contributors * \file multi_sum_sq.cu * \brief vectorized sums of squares norm over multiple arrays operators - * \author Clement Fuji Tsang, Andrei Ivanov + * \author Clement Fuji Tsang, Andrei Ivanov, Moises Hernandez */ #include "./multi_sum_sq-inl.h" #include @@ -43,96 +43,121 @@ struct MultiSumSqKernelParam { int sizes[ARRAY_LIMIT]; unsigned char block_to_tensor[BLOCK_LIMIT]; int block_to_chunk[BLOCK_LIMIT]; + int max_chunks_per_tensor = -1; }; template -__device__ __forceinline__ DType reduce_block_into_lanes(DType* x, - DType val, - int lanes = 1, - bool share_result = false) { - int tid = threadIdx.x + threadIdx.y * blockDim.x; - int blockSize = blockDim.x * blockDim.y; // blockSize is intended to be a multiple of 32. - - if (blockSize >= 64) { +__device__ __forceinline__ DType ReduceBlockIntoLanes(DType* x, + DType val) { + int tid = threadIdx.x; + int block_size = blockDim.x; + + if (block_size >= 64) { x[tid] = val; __syncthreads(); } #pragma unroll - for (int i = (blockSize >> 1); i >= 64; i >>= 1) { + for (int i = (block_size >> 1); i >= 64; i >>= 1) { if (tid < i) x[tid] = x[tid] + x[tid+i]; __syncthreads(); } DType final; - if (tid < 32) { - if (blockSize >= 64) + if (block_size >= 64) final = x[tid] + x[tid+32]; else final = val; - // __SYNCWARP(); #pragma unroll - for (int i = 16; i >= lanes; i >>= 1) + for (int i = 16; i >= 1; i >>= 1) final = final + __shfl_down_sync(0xffffffff, final, i); } - - if (share_result) { - if (tid < lanes) - x[tid] = final; // EpilogueOp - // Make sure the smem result is visible to all warps. - __syncthreads(); - } - return final; } template __global__ void MultiSumSqKernel(int chunk_size, MultiSumSqKernelParam param, - float* output) { + float* block_reductions, + int start_tensor_id) { const int tensor_loc = param.block_to_tensor[blockIdx.x]; const int chunk_len = param.block_to_chunk[blockIdx.x] * chunk_size; const int n = param.sizes[tensor_loc] - chunk_len; const DType* x = param.addresses[tensor_loc] + chunk_len; - const auto iMax = n <= chunk_size? n : chunk_size; + const auto i_max = n <= chunk_size ? n : chunk_size; __shared__ float vals[512]; // Non-divergent exit condition for __syncthreads, not necessary here float val = 0; for (int i_start = 0; - i_start < iMax; + i_start < i_max; i_start += blockDim.x * ILP) { int i = i_start + threadIdx.x; - // #pragma unroll - for (int ii = 0; ii < ILP && i < iMax; ++ii, i += blockDim.x) { +#pragma unroll + for (int ii = 0; ii < ILP && i < i_max; ++ii, i += blockDim.x) { const auto incoming_val = static_cast(x[i]); val += incoming_val * incoming_val; } } + const float final = ReduceBlockIntoLanes(vals, val); + + if (threadIdx.x == 0) { + block_reductions[(start_tensor_id + tensor_loc) * param.max_chunks_per_tensor + + param.block_to_chunk[blockIdx.x]] = final; + } +} + +template +__global__ void GlobalReductionKernel(MultiSumSqKernelParam param, + float* block_reductions, + float* output) { + __shared__ float vals[512]; + float* reductions_this_tensor = block_reductions + blockIdx.x * param.max_chunks_per_tensor; + float val = 0; + for (int i = threadIdx.x; i < param.max_chunks_per_tensor; i += blockDim.x) + val += reductions_this_tensor[i]; + + float final = ReduceBlockIntoLanes(vals, val); - const float final = reduce_block_into_lanes(vals, val); if (threadIdx.x == 0) - atomicAdd(output + tensor_loc, final); + output[blockIdx.x] = final; } template<> -void MultiSumSqRun(const std::vector &inputs, int nInputs, - float *out_ptr, mshadow::Stream *s) { +void MultiSumSqRun(const std::vector &inputs, int n_inputs, + float *out_ptr, const OpContext &ctx) { const int chunk_size = 32768; const int block_size = 512; using namespace mxnet_op; + auto s = ctx.get_stream(); auto stream = mshadow::Stream::GetStream(s); - CUDA_CALL(cudaMemsetAsync(out_ptr, 0, nInputs * sizeof(float), stream)); MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, { MultiSumSqKernelParam param; + // find max num of chunks in tensors + for (int t = 0; t < n_inputs; t++) { + int chunks_this_tensor = (inputs[t].shape_.Size() + chunk_size - 1) / chunk_size; + if (chunks_this_tensor > param.max_chunks_per_tensor) + param.max_chunks_per_tensor = chunks_this_tensor; + } + // temporary storage for the reduction of each block + size_t workspace_size = n_inputs * param.max_chunks_per_tensor * sizeof(float); + Tensor workspace = + ctx.requested[multi_sum_sq::kTempSpace].get_space_typed( + Shape1(workspace_size), s); + Tensor block_reductions(reinterpret_cast(&workspace[0]), + Shape1(n_inputs * param.max_chunks_per_tensor), s); + CUDA_CALL(cudaMemsetAsync(block_reductions.dptr_, 0, + n_inputs * param.max_chunks_per_tensor* sizeof(float), + stream)); + int loc_block_info = 0; // position in param.block_to_tensor and param.block_to_chunck int loc_tensor_info = 0; // position in param.sizes and param.addresses - int output_offset = 0; // array index of the first block pointed on by param.addresses - for (int t = 0; t < nInputs; t++, loc_tensor_info++) { // array index in inputs + int start_tensor_id = 0; + for (int t = 0; t < n_inputs; t++, loc_tensor_info++) { // array index in inputs param.sizes[loc_tensor_info] = inputs[t].shape_.Size(); param.addresses[loc_tensor_info] = inputs[t].FlatTo2D(s).dptr_; const int chunks_this_tensor = (inputs[t].shape_.Size() - 1) / chunk_size; @@ -142,27 +167,30 @@ void MultiSumSqRun(const std::vector &inputs, int nInputs, loc_block_info++; const bool last_curr_chunk = chunk == chunks_this_tensor; - const bool tensors_full = last_curr_chunk && loc_tensor_info == 109; - const bool blocks_full = (loc_block_info == 320); - const bool last_chunk = last_curr_chunk && t == nInputs - 1; + const bool tensors_full = last_curr_chunk && loc_tensor_info == (ARRAY_LIMIT-1); + const bool blocks_full = (loc_block_info == BLOCK_LIMIT); + const bool last_chunk = last_curr_chunk && t == n_inputs - 1; if (!(tensors_full || blocks_full || last_chunk)) continue; - MultiSumSqKernel<<>> - (chunk_size, param, out_ptr + output_offset); + (chunk_size, param, block_reductions.dptr_, start_tensor_id); MSHADOW_CUDA_POST_KERNEL_CHECK(MultiSumSqKernel); + loc_block_info = 0; if (last_curr_chunk) { // if you start from a new tensor loc_tensor_info = -1; - output_offset = t + 1; + start_tensor_id = t + 1; } else { // if you start from the same tensor param.sizes[0] = param.sizes[loc_tensor_info]; param.addresses[0] = param.addresses[loc_tensor_info]; loc_tensor_info = 0; - output_offset = t; + start_tensor_id = t; } } } + // Global reduction + GlobalReductionKernel<<>> + (param, block_reductions.dptr_, out_ptr); }); } diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index 114fab770efd..7d23c2ca0aaf 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -271,6 +271,36 @@ def test_fft(): def _make_ndarrays(input_list, ctx=mx.gpu(0)): return [mx.nd.array(arr, dtype=arr.dtype, ctx=ctx) for arr in input_list] +def check_multi_sum_sq(dtype, shapes, ctx, tol1, tol2): + values_arr = [np.random.rand(*shape).astype(dtype) * 10. for shape in shapes] + mx_vals = _make_ndarrays(values_arr, ctx=ctx) + sum_sq = mx.nd.multi_sum_sq(*mx_vals, num_arrays=len(shapes)) + sum_sq2 = mx.nd.multi_sum_sq(*mx_vals, num_arrays=len(shapes)) + # checks that operator is deterministic + assert np.array_equal(sum_sq.asnumpy(), sum_sq2.asnumpy()) + + ref_sum_sq = mx.nd.array([(v.astype('float32') ** 2).sum() for v in values_arr], + dtype='float32', ctx=ctx) + assert_almost_equal(ref_sum_sq.asnumpy(), sum_sq.asnumpy(), atol=tol1, rtol=tol1) + +@with_seed() +def test_multi_sum_sq(): + min_nparam = 100 + max_nparam = 120 + min_dim = 50000 + max_dim = 100000 + max_ndim = 1 + + dtypes = ['float16','float32', 'float64'] + for ctx in [mx.gpu(0)]: + for dtype in dtypes: + nparam = np.random.randint(min_nparam + 1, max_nparam + 1) + shapes = [np.random.randint(min_dim, max_dim + 1, size=max_ndim) for i in range(nparam)] + low_tol = ctx == mx.cpu(0) and ('float16'in [dtype]) + tol1 = 1e-3 if low_tol else 1e-5 + tol2 = 1e-6 if low_tol else 1e-7 + check_multi_sum_sq(dtype, shapes, ctx, tol1, tol2) + def check_fast_lars(w_dtype, g_dtype, shapes, ctx, tol1, tol2): weights_arr = [np.random.rand(*shape).astype(w_dtype) * 10. for shape in shapes] grads_arr = [np.random.rand(*shape).astype(g_dtype) for shape in shapes] From 831b548279be5d598660515158469f4e685f1078 Mon Sep 17 00:00:00 2001 From: Sam Skalicky Date: Sat, 14 Dec 2019 13:36:14 -0800 Subject: [PATCH 43/62] fix for number of inputs/outputs for backward custom ops (#17069) * fix for number of inputs/outputs for backward custom ops * retrigger CI --- src/c_api/c_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 24374cf19cdc..f8db501d46f0 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -265,7 +265,7 @@ int MXLoadLib(const char *path) { &num_in, &num_out)) << "Error calling ParseAttrs::num_outputs for custom operator '" << name_str << "'"; - return num_in + num_out; + return num_in + 2*num_out; }; // lambda function to call infer shape From 3d38dbde744954854015919d4faf56ac1aea16de Mon Sep 17 00:00:00 2001 From: perdasilva Date: Sat, 14 Dec 2019 22:47:42 +0100 Subject: [PATCH 44/62] Adds min cuda version assertion decorator for unit tests (#17054) --- tests/python/gpu/test_operator_gpu.py | 4 +- tests/python/unittest/common.py | 54 ++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index 7d23c2ca0aaf..e548217b9369 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -30,7 +30,7 @@ curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) sys.path.insert(0, os.path.join(curr_path, '../unittest')) -from common import setup_module, with_seed, teardown, assert_raises_cudnn_not_satisfied +from common import setup_module, with_seed, teardown, assert_raises_cudnn_not_satisfied, assert_raises_cuda_not_satisfied from common import run_in_spawned_process from test_operator import * from test_numpy_ndarray import * @@ -2696,6 +2696,7 @@ def convert_bias(F, q_bias, k_bias, v_bias, num_heads): assert(grads_orig[k].shape == grads_opti[k].shape) assert_allclose(grads_orig[k], grads_opti[k], rtol=1e-2, atol=1e-3) +@assert_raises_cuda_not_satisfied(min_version='9.1') def test_multihead_attention_selfatt(): for dtype in ['float16', 'float32']: check_multihead_attention_selfatt(dtype=dtype) @@ -2859,6 +2860,7 @@ def convert_bias(F, k_bias, v_bias, num_heads): assert(grads_orig[k].shape == grads_opti[k].shape) assert_allclose(grads_orig[k], grads_opti[k], rtol=1e-2, atol=1e-3) +@assert_raises_cuda_not_satisfied(min_version='9.1') def test_multihead_attention_encdec(): for dtype in ['float16', 'float32']: check_multihead_attention_encdec(dtype=dtype) diff --git a/tests/python/unittest/common.py b/tests/python/unittest/common.py index 816508fbc3c8..5a4604603a61 100644 --- a/tests/python/unittest/common.py +++ b/tests/python/unittest/common.py @@ -99,14 +99,42 @@ def random_seed(seed=None): random.seed(next_seed) -def assert_raises_cudnn_not_satisfied(min_version): +def _assert_raise_cuxx_version_not_satisfied(min_version, cfg): + + def less_than(version_left, version_right): + """Compares two version strings in the format num(.[num])*""" + if not version_left or not version_right: + return False + + left = version_left.split(".") + right = version_right.split(".") + + # 0 pad shortest version - e.g. + # less_than("9.1", "9.1.9") == less_than("9.1.0", "9.1.9") + longest = max(len(left), len(right)) + left.extend([0] * (longest - len(left))) + right.extend([0] * (longest - len(right))) + + # compare each of the version components + for l, r in zip(left, right): + if int(r) < int(l): + return False + + # keep track of how many are the same + if int(r) == int(l): + longest = longest - 1 + + # longest = 0 mean version_left == version_right -> False + # longest > 0 version_left < version_right -> True + return longest > 0 + def test_helper(orig_test): @make_decorator(orig_test) def test_new(*args, **kwargs): - cudnn_off = os.getenv('CUDNN_OFF_TEST_ONLY') == 'true' - cudnn_env_version = os.getenv('CUDNN_VERSION', None if cudnn_off else '7.3.1') - cudnn_test_disabled = cudnn_off or cudnn_env_version < min_version - if not cudnn_test_disabled or mx.context.current_context().device_type == 'cpu': + cuxx_off = os.getenv(cfg['TEST_OFF_ENV_VAR']) == 'true' + cuxx_env_version = os.getenv(cfg['VERSION_ENV_VAR'], None if cuxx_off else cfg['DEFAULT_VERSION']) + cuxx_test_disabled = cuxx_off or less_than(cuxx_env_version, min_version) + if not cuxx_test_disabled or mx.context.current_context().device_type == 'cpu': orig_test(*args, **kwargs) else: assert_raises((MXNetError, RuntimeError), orig_test, *args, **kwargs) @@ -114,6 +142,22 @@ def test_new(*args, **kwargs): return test_helper +def assert_raises_cudnn_not_satisfied(min_version): + return _assert_raise_cuxx_version_not_satisfied(min_version, { + 'TEST_OFF_ENV_VAR': 'CUDNN_OFF_TEST_ONLY', + 'VERSION_ENV_VAR': 'CUDNN_VERSION', + 'DEFAULT_VERSION': '7.3.1' + }) + + +def assert_raises_cuda_not_satisfied(min_version): + return _assert_raise_cuxx_version_not_satisfied(min_version, { + 'TEST_OFF_ENV_VAR': 'CUDA_OFF_TEST_ONLY', + 'VERSION_ENV_VAR': 'CUDA_VERSION', + 'DEFAULT_VERSION': '10.1' + }) + + def with_seed(seed=None): """ A decorator for nosetests test functions that manages rng seeds. From 7dbb4b710ce84e0cbfab70e90fa0f7e47eb2e602 Mon Sep 17 00:00:00 2001 From: dw_sjtu <46704444+sjtuWangDing@users.noreply.github.com> Date: Mon, 16 Dec 2019 14:38:58 +0800 Subject: [PATCH 45/62] fix precision problem in linalg_solve, linalg_tensorinv, linalg_cholesky op test (#16981) fix format set atol=rtol=1e-1 in test_np_linalg_cholesky fix test_np_linalg_tensorinv fix bug in test_np_linalg_tensorinv commit tensorinv src --- python/mxnet/ndarray/numpy/linalg.py | 57 +++++- python/mxnet/numpy/linalg.py | 57 +++++- python/mxnet/numpy_dispatch_protocol.py | 1 + python/mxnet/symbol/numpy/linalg.py | 57 +++++- src/operator/numpy/linalg/np_tensorinv-inl.h | 166 ++++++++++++++++++ src/operator/numpy/linalg/np_tensorinv.cc | 134 ++++++++++++++ src/operator/numpy/linalg/np_tensorinv.cu | 43 +++++ .../unittest/test_numpy_interoperability.py | 26 +++ tests/python/unittest/test_numpy_op.py | 166 +++++++++++++++--- 9 files changed, 682 insertions(+), 25 deletions(-) create mode 100644 src/operator/numpy/linalg/np_tensorinv-inl.h create mode 100644 src/operator/numpy/linalg/np_tensorinv.cc create mode 100644 src/operator/numpy/linalg/np_tensorinv.cu diff --git a/python/mxnet/ndarray/numpy/linalg.py b/python/mxnet/ndarray/numpy/linalg.py index a85c6324f685..4c49c35b4a44 100644 --- a/python/mxnet/ndarray/numpy/linalg.py +++ b/python/mxnet/ndarray/numpy/linalg.py @@ -21,7 +21,7 @@ from . import _op as _mx_nd_np from . import _internal as _npi -__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve'] +__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve', 'tensorinv'] def norm(x, ord=None, axis=None, keepdims=False): @@ -406,3 +406,58 @@ def solve(a, b): True """ return _npi.solve(a, b) + + +def tensorinv(a, ind=2): + r""" + Compute the 'inverse' of an N-dimensional array. + + The result is an inverse for `a` relative to the tensordot operation + ``tensordot(a, b, ind)``, i. e., up to floating-point accuracy, + ``tensordot(tensorinv(a), a, ind)`` is the "identity" tensor for the + tensordot operation. + + Parameters + ---------- + a : array_like + Tensor to 'invert'. Its shape must be 'square', i. e., + ``prod(a.shape[:ind]) == prod(a.shape[ind:])``. + ind : int, optional + Number of first indices that are involved in the inverse sum. + Must be a positive integer, default is 2. + + Returns + ------- + b : ndarray + `a`'s tensordot inverse, shape ``a.shape[ind:] + a.shape[:ind]``. + + Raises + ------ + MXNetError + If `a` is singular or not 'square' (in the above sense). + + See Also + -------- + tensordot, tensorsolve + + Examples + -------- + >>> a = np.eye(4*6) + >>> a.shape = (4, 6, 8, 3) + >>> ainv = np.linalg.tensorinv(a, ind=2) + >>> ainv.shape + (8, 3, 4, 6) + >>> b = np.random.randn(4, 6) + >>> np.allclose(np.tensordot(ainv, b), np.linalg.tensorsolve(a, b)) + True + + >>> a = np.eye(4*6) + >>> a.shape = (24, 8, 3) + >>> ainv = np.linalg.tensorinv(a, ind=1) + >>> ainv.shape + (8, 3, 24) + >>> b = np.random.randn(24) + >>> np.allclose(np.tensordot(ainv, b, 1), np.linalg.tensorsolve(a, b)) + True + """ + return _npi.tensorinv(a, ind) diff --git a/python/mxnet/numpy/linalg.py b/python/mxnet/numpy/linalg.py index 33d636b7044c..2ee2d2670693 100644 --- a/python/mxnet/numpy/linalg.py +++ b/python/mxnet/numpy/linalg.py @@ -20,7 +20,7 @@ from __future__ import absolute_import from ..ndarray import numpy as _mx_nd_np -__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve'] +__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve', 'tensorinv'] def norm(x, ord=None, axis=None, keepdims=False): @@ -424,3 +424,58 @@ def solve(a, b): True """ return _mx_nd_np.linalg.solve(a, b) + + +def tensorinv(a, ind=2): + r""" + Compute the 'inverse' of an N-dimensional array. + + The result is an inverse for `a` relative to the tensordot operation + ``tensordot(a, b, ind)``, i. e., up to floating-point accuracy, + ``tensordot(tensorinv(a), a, ind)`` is the "identity" tensor for the + tensordot operation. + + Parameters + ---------- + a : array_like + Tensor to 'invert'. Its shape must be 'square', i. e., + ``prod(a.shape[:ind]) == prod(a.shape[ind:])``. + ind : int, optional + Number of first indices that are involved in the inverse sum. + Must be a positive integer, default is 2. + + Returns + ------- + b : ndarray + `a`'s tensordot inverse, shape ``a.shape[ind:] + a.shape[:ind]``. + + Raises + ------ + MXNetError + If `a` is singular or not 'square' (in the above sense). + + See Also + -------- + tensordot, tensorsolve + + Examples + -------- + >>> a = np.eye(4*6) + >>> a.shape = (4, 6, 8, 3) + >>> ainv = np.linalg.tensorinv(a, ind=2) + >>> ainv.shape + (8, 3, 4, 6) + >>> b = np.random.randn(4, 6) + >>> np.allclose(np.tensordot(ainv, b), np.linalg.tensorsolve(a, b)) + True + + >>> a = np.eye(4*6) + >>> a.shape = (24, 8, 3) + >>> ainv = np.linalg.tensorinv(a, ind=1) + >>> ainv.shape + (8, 3, 24) + >>> b = np.random.randn(24) + >>> np.allclose(np.tensordot(ainv, b, 1), np.linalg.tensorsolve(a, b)) + True + """ + return _mx_nd_np.linalg.tensorinv(a, ind) diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index 23593a47e6ba..e93720564774 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -134,6 +134,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs): 'linalg.cholesky', 'linalg.inv', 'linalg.solve', + 'linalg.tensorinv', 'shape', 'trace', 'tril', diff --git a/python/mxnet/symbol/numpy/linalg.py b/python/mxnet/symbol/numpy/linalg.py index 6df175ebfc4b..a445c79001ec 100644 --- a/python/mxnet/symbol/numpy/linalg.py +++ b/python/mxnet/symbol/numpy/linalg.py @@ -22,7 +22,7 @@ from . import _op as _mx_sym_np from . import _internal as _npi -__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve'] +__all__ = ['norm', 'svd', 'cholesky', 'inv', 'det', 'slogdet', 'solve', 'tensorinv'] def norm(x, ord=None, axis=None, keepdims=False): @@ -393,3 +393,58 @@ def solve(a, b): True """ return _npi.solve(a, b) + + +def tensorinv(a, ind=2): + r""" + Compute the 'inverse' of an N-dimensional array. + + The result is an inverse for `a` relative to the tensordot operation + ``tensordot(a, b, ind)``, i. e., up to floating-point accuracy, + ``tensordot(tensorinv(a), a, ind)`` is the "identity" tensor for the + tensordot operation. + + Parameters + ---------- + a : array_like + Tensor to 'invert'. Its shape must be 'square', i. e., + ``prod(a.shape[:ind]) == prod(a.shape[ind:])``. + ind : int, optional + Number of first indices that are involved in the inverse sum. + Must be a positive integer, default is 2. + + Returns + ------- + b : ndarray + `a`'s tensordot inverse, shape ``a.shape[ind:] + a.shape[:ind]``. + + Raises + ------ + MXNetError + If `a` is singular or not 'square' (in the above sense). + + See Also + -------- + tensordot, tensorsolve + + Examples + -------- + >>> a = np.eye(4*6) + >>> a.shape = (4, 6, 8, 3) + >>> ainv = np.linalg.tensorinv(a, ind=2) + >>> ainv.shape + (8, 3, 4, 6) + >>> b = np.random.randn(4, 6) + >>> np.allclose(np.tensordot(ainv, b), np.linalg.tensorsolve(a, b)) + True + + >>> a = np.eye(4*6) + >>> a.shape = (24, 8, 3) + >>> ainv = np.linalg.tensorinv(a, ind=1) + >>> ainv.shape + (8, 3, 24) + >>> b = np.random.randn(24) + >>> np.allclose(np.tensordot(ainv, b, 1), np.linalg.tensorsolve(a, b)) + True + """ + return _npi.tensorinv(a, ind) diff --git a/src/operator/numpy/linalg/np_tensorinv-inl.h b/src/operator/numpy/linalg/np_tensorinv-inl.h new file mode 100644 index 000000000000..4f92ccf9d125 --- /dev/null +++ b/src/operator/numpy/linalg/np_tensorinv-inl.h @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_tensorinv-inl.h + * \brief Placeholder for tensor inverse operator + */ +#ifndef MXNET_OPERATOR_NUMPY_LINALG_NP_TENSORINV_INL_H_ +#define MXNET_OPERATOR_NUMPY_LINALG_NP_TENSORINV_INL_H_ + +#include +#include +#include "../../operator_common.h" +#include "../../mshadow_op.h" +#include "../../tensor/la_op.h" +#include "../../tensor/la_op-inl.h" + +namespace mxnet { +namespace op { + +using namespace mshadow; + +struct TensorinvParam : public dmlc::Parameter { + int ind; + DMLC_DECLARE_PARAMETER(TensorinvParam) { + DMLC_DECLARE_FIELD(ind) + .set_default(2) + .describe("Number of first indices that are involved in the inverse sum."); + } +}; + +template +void TensorinvOpForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + + mshadow::Stream *s = ctx.get_stream(); + const mxnet::TBlob& a_tblob = inputs[0]; + const mxnet::TBlob& inv_a_tblob = outputs[0]; + const mxnet::TShape& a_shape = a_tblob.shape_; + CHECK_EQ(inv_a_tblob.type_flag_, a_tblob.type_flag_) + << "Binary function only support input/output with the same type."; + MSHADOW_SGL_DBL_TYPE_SWITCH(outputs[0].type_flag_, OType, { + const int ind = nnvm::get(attrs.parsed).ind; + dim_t prod_front = 1, prod_back = 1; + if (ind < a_shape.ndim()) { + for (int i = 0; i < ind; ++i) { + prod_front *= a_shape[i]; + } + for (int i = ind; i < a_shape.ndim(); ++i) { + prod_back *= a_shape[i]; + } + } else { + for (int i = 0; i < a_shape.ndim(); ++i) { + prod_front *= a_shape[i]; + } + } + Tensor A = + a_tblob.get_with_shape(Shape3(1, prod_back, prod_front), s); + Tensor inv_A = + inv_a_tblob.get_with_shape(Shape3(1, prod_back, prod_front), s); + inverse::op(A, inv_A, ctx, attrs); + }); +} + +template +void TensorinvOpBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 2U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + + mshadow::Stream *s = ctx.get_stream(); + const TBlob& out_grad = inputs[0]; + const TBlob& inv_a = inputs[1]; + const TBlob& grad_a = outputs[0]; + const TShape& inv_a_shape = inv_a.shape_; + MSHADOW_SGL_DBL_TYPE_SWITCH(outputs[0].type_flag_, OType, { + const int axes = nnvm::get(attrs.parsed).ind; + CHECK_LE(inv_a_shape.ndim(), 6U) + << "tensorinv backward only support tensor's dimension <= 6"; + if (axes < inv_a_shape.ndim()) { + const int axes1 = inv_a_shape.ndim() - axes, axes2 = axes; + TShape inv_a_transpose_shape(inv_a_shape.ndim(), -1); + for (int i = 0; i < axes; ++i) { + inv_a_transpose_shape[i] = inv_a_shape[i + inv_a_shape.ndim() - axes]; + } + for (int i = axes; i < inv_a_shape.ndim(); ++i) { + inv_a_transpose_shape[i] = inv_a_shape[i - axes]; + } + TShape temp_shape(2 * axes, -1); + for (int i = 0; i < axes; ++i) { + temp_shape[i] = inv_a_transpose_shape[i]; + temp_shape[i + axes] = inv_a_transpose_shape[i]; + } + Tensor workspace = + ctx.requested[0].get_space_typed(Shape1(temp_shape.Size() * sizeof(OType)), + ctx.get_stream()); + TBlob temp_tblob = + TBlob(reinterpret_cast(workspace.dptr_), temp_shape, xpu::kDevMask); + dim_t a1 = 1, a2 = 1; + for (int i = 0; i < axes2; ++i) { + a1 *= inv_a_transpose_shape[i]; + } + for (int i = 0; i < axes1; ++i) { + a2 *= inv_a_shape[i]; + } + Tensor inv_a_tensor = + inv_a.get_with_shape(Shape3(1, a2, a1), s); + Tensor out_grad_tensor = + out_grad.get_with_shape(Shape3(1, a2, a1), s); + Tensor temp_tensor = + temp_tblob.get_with_shape(Shape3(1, a1, a1), s); + Tensor grad_a_tensor = + grad_a.get_with_shape(Shape3(1, a1, a2), s); + gemm2::op(inv_a_tensor, out_grad_tensor, temp_tensor, OType(1), true, false, s); + gemm2::op(temp_tensor, inv_a_tensor, grad_a_tensor, OType(-1), false, true, s); + } else { // axes >= inv_a_shape.ndim() + dim_t a = 1; + for (int i = 0; i < inv_a_shape.ndim(); ++i) { + a *= inv_a_shape[i]; + } + // check again + CHECK_EQ(a, 1U) + << "a shape must be square, i. e., prod(a.shape[:ind]) == prod(a.shape[ind:])."; + Tensor inv_a_tensor = + inv_a.get_with_shape(Shape1(1), s); + Tensor out_grad_tensor = + out_grad.get_with_shape(Shape1(1), s); + Tensor grad_a_tensor = + grad_a.get_with_shape(Shape1(1), s); + ASSIGN_DISPATCH(grad_a_tensor, kWriteTo, + OType(-1) * inv_a_tensor * out_grad_tensor * inv_a_tensor); + } + }); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_LINALG_NP_TENSORINV_INL_H_ diff --git a/src/operator/numpy/linalg/np_tensorinv.cc b/src/operator/numpy/linalg/np_tensorinv.cc new file mode 100644 index 000000000000..2fee11c846b7 --- /dev/null +++ b/src/operator/numpy/linalg/np_tensorinv.cc @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_tensorinv.cc + * \brief CPU implementation placeholder of Tensor Inverse Operator + */ +#include "./np_tensorinv-inl.h" + +namespace mxnet { +namespace op { + +inline bool TensorinvOpShape(const nnvm::NodeAttrs &attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + + const mxnet::TShape& a_shape = (*in_attrs)[0]; + const int a_ndim = a_shape.ndim(); + mxnet::TShape inv_a_shape(a_shape); + if (!ndim_is_known(a_shape)) { + return false; + } + // ind > 0, defalut = 2 + int ind = 2; + ind = nnvm::get(attrs.parsed).ind; + CHECK_GT(ind, 0) << "Invalid ind argument."; + + if (a_ndim > 0 && ind < a_ndim) { + for (int i = 0; i < ind; ++i) { + inv_a_shape[a_ndim - ind + i] = a_shape[i]; + } + for (int i = ind; i < a_ndim; ++i) { + inv_a_shape[i - ind] = a_shape[i]; + } + SHAPE_ASSIGN_CHECK(*out_attrs, 0, inv_a_shape); + } else { // ind >= a_ndim + SHAPE_ASSIGN_CHECK(*out_attrs, 0, inv_a_shape); + } + CHECK_NE(inv_a_shape.ndim(), 0) + << "can not reshape array"; + + dim_t prod_front = 1, prod_back = 1; + if (ind < a_ndim) { + for (int i = 0; i < ind; ++i) { + prod_front *= a_shape[i]; + } + for (int i = ind; i < a_ndim; ++i) { + prod_back *= a_shape[i]; + } + CHECK_GT(prod_back, 0) + << "can not reshape array of size 0 into shape"; + } else { + for (int i = 0; i < a_ndim; ++i) { + prod_front *= a_shape[i]; + } + } + // prod_back >= 1 and prod_front == prod_back + CHECK_EQ(prod_front, prod_back) + << "a shape must be square, i. e., prod(a.shape[:ind]) == prod(a.shape[ind:])."; + return !mxnet::op::shape_is_none(out_attrs->at(0)); +} + +inline bool TensorinvOpType(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + int a_type = in_attrs->at(0); + // unsupport float16 + CHECK_NE(a_type, mshadow::kFloat16) + << "array type float16 is unsupported in linalg"; + if (mshadow::kFloat32 == a_type) { + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + } else { + TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat64); + } + return out_attrs->at(0) != -1; +} + +DMLC_REGISTER_PARAMETER(TensorinvParam); + +NNVM_REGISTER_OP(_npi_tensorinv) +.describe(R"code()code" ADD_FILELINE) +.set_attr_parser(mxnet::op::ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; +}) +.set_attr("FInferShape", TensorinvOpShape) +.set_attr("FInferType", TensorinvOpType) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector(1, ResourceRequest::kTempSpace); +}) +.set_attr("THasDeterministicOutput", true) +.set_attr("FCompute", TensorinvOpForward) +.set_attr("FGradient", mxnet::op::ElemwiseGradUseOut{"_backward_npi_tensorinv"}) +.add_argument("a", "NDArray-or-Symbol", "First input") +.add_arguments(TensorinvParam::__FIELDS__()); + +NNVM_REGISTER_OP(_backward_npi_tensorinv) +.set_attr_parser(mxnet::op::ParamParser) +.set_num_inputs(2) +.set_num_outputs(1) +.set_attr("FResourceRequest", + [](const NodeAttrs& ){ + return std::vector{1, ResourceRequest::kTempSpace}; +}) +.set_attr("TIsBackward", true) +.set_attr("FCompute", TensorinvOpBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/linalg/np_tensorinv.cu b/src/operator/numpy/linalg/np_tensorinv.cu new file mode 100644 index 000000000000..8cad95f40b3a --- /dev/null +++ b/src/operator/numpy/linalg/np_tensorinv.cu @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_tensorinv.cu + * \brief GPU implementation of the Tensor Inverse Operator + */ + +#include +#include +#include "./np_tensorinv-inl.h" + +namespace mxnet { +namespace op { + +#if MXNET_USE_CUSOLVER == 1 + +NNVM_REGISTER_OP(_npi_tensorinv) +.set_attr("FCompute", TensorinvOpForward); + +NNVM_REGISTER_OP(_backward_npi_tensorinv) +.set_attr("FCompute", TensorinvOpBackward); + +#endif + +} // namespace op +} // namespace mxnet diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 0e875825a699..a670f794860f 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -374,6 +374,31 @@ def _add_workload_linalg_det(): OpArgMngr.add_workload('linalg.det', np.array(_np.ones((0, 1, 1)), dtype=np.float64)) +def _add_workload_linalg_tensorinv(): + shapes = [ + (1, 20, 4, 5), + (2, 2, 10, 4, 5), + (2, 12, 5, 3, 4, 5), + (3, 2, 3, 4, 24) + ] + dtypes = (np.float32, np.float64) + for dtype, shape in itertools.product(dtypes, shapes): + ind = shape[0] + prod_front = 1 + prod_back = 1 + for k in shape[1:ind + 1]: + prod_front *= k + for k in shape[1 + ind:]: + prod_back *= k + a_shape = (prod_back, prod_front) + a = _np.random.randn(*a_shape) + if prod_back == prod_front: + if _np.allclose(_np.dot(a, _np.linalg.inv(a)), _np.eye(prod_front)): + a_shape = shape[1:] + a = a.reshape(a_shape) + OpArgMngr.add_workload('linalg.tensorinv', np.array(a, dtype=dtype), ind) + + def _add_workload_linalg_slogdet(): OpArgMngr.add_workload('linalg.slogdet', np.array(_np.ones((2, 2)), dtype=np.float32)) OpArgMngr.add_workload('linalg.slogdet', np.array(_np.ones((0, 1, 1)), dtype=np.float64)) @@ -1428,6 +1453,7 @@ def _prepare_workloads(): _add_workload_linalg_inv() _add_workload_linalg_solve() _add_workload_linalg_det() + _add_workload_linalg_tensorinv() _add_workload_linalg_slogdet() _add_workload_trace() _add_workload_tril() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 54bce0aaae84..545466bf0814 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -3537,11 +3537,28 @@ def check_cholesky(L, data_np): assert L.shape == L_expected.shape assert_almost_equal(L.asnumpy(), L_expected, rtol=rtol, atol=atol) + def newSymmetricPositiveDefineMatrix_2D(shape, ran=(0., 10.), max_cond=4): + while 1: + D = _np.diag(_np.random.uniform(ran[0], ran[1], shape[-1])) + I = _np.eye(shape[-1]).reshape(shape) + v = _np.random.uniform(-1., 1., shape[-1]).reshape(shape[:-1] + (1,)) + v = v / _np.linalg.norm(v, axis=-2, keepdims=True) + v_T = _np.swapaxes(v, -1, -2) + U = I - 2 * _np.matmul(v, v_T) + a = _np.matmul(_np.matmul(U, D), _np.swapaxes(U, -1, -2)) + if (_np.linalg.cond(a, 2) < max_cond): + return a + + def newSymmetricPositiveDefineMatrix_nD(shape, ran=(0., 10.), max_cond=4): + n = int(_np.prod(shape[:-2])) if len(shape) > 2 else 1 + return _np.array([newSymmetricPositiveDefineMatrix_2D(shape[-2:], ran, max_cond) for i in range(n)]).reshape(shape) + shapes = [ (0, 0), (1, 1), (5, 5), (6, 6), + (10, 10), (6, 6, 6), (1, 0, 0), (0, 1, 1), @@ -3549,7 +3566,11 @@ def check_cholesky(L, data_np): ] dtypes = ['float32', 'float64'] for hybridize, dtype, shape in itertools.product([True, False], dtypes, shapes): - atol = rtol = 1e-2 + rtol = 1e-3 + atol = 1e-5 + if dtype == 'float32': + rtol = 1e-2 + atol = 1e-4 test_cholesky = TestCholesky() if hybridize: @@ -3571,19 +3592,7 @@ def check_cholesky(L, data_np): if 0 in shape: data_np = np.ones(shape) else: - data_np_l = _np.random.uniform(-10., 10., shape) - if dtype == 'float32': - data_np_l_flat = data_np_l.reshape((-1, shape[-2], shape[-1])) - else: - data_np_l_flat = _np.tril(data_np_l.reshape((-1, shape[-2], shape[-1]))) - for i in range(data_np_l_flat.shape[0]): - for j in range(data_np_l_flat.shape[-1]): - if data_np_l_flat[i, j, j] < 0: - data_np_l_flat[i, j, j] = -data_np_l_flat[i, j, j] - elif data_np_l_flat[i, j, j] == 0: - data_np_l_flat[i, j, j] = 2 - data_np = _np.matmul(data_np_l_flat, data_np_l_flat.swapaxes(-1, -2)) - data_np = data_np.reshape(shape) + data_np = newSymmetricPositiveDefineMatrix_nD(shape) # When dtype is np.FP32, truncation from FP64 to FP32 could also be a source of # instability since the ground-truth gradient is computed using FP64 data. @@ -3696,7 +3705,7 @@ def test_np_linalg_solve(): class TestSolve(HybridBlock): def __init__(self): super(TestSolve, self).__init__() - + def hybrid_forward(self, F, a, b): return F.np.linalg.solve(a, b) @@ -3713,6 +3722,23 @@ def check_solve(x, a_np, b_np): assert x.shape == x_expected.shape assert_almost_equal(x.asnumpy(), x_expected, rtol=rtol, atol=atol) + def newInvertibleMatrix_2D(shape, max_cond=4): + while 1: + # generate well-conditioned matrices with small eigenvalues + D = _np.diag(_np.random.uniform(-1.0, 1.0, shape[-1])) + I = _np.eye(shape[-1]).reshape(shape) + v = _np.random.uniform(-10., 10., shape[-1]).reshape(shape[:-1] + (1,)) + v = v / _np.linalg.norm(v, axis=-2, keepdims=True) + v_T = _np.swapaxes(v, -1, -2) + U = I - 2 * _np.matmul(v, v_T) + a = _np.matmul(U, D) + if (_np.linalg.cond(a, 2) < max_cond): + return a + + def newInvertibleMatrix_nD(shape, max_cond=4): + n = int(np.prod(np.array(shape[:-2]))) if len(shape) > 2 else 1 + return _np.array([newInvertibleMatrix_2D(shape[-2:]) for i in range(n)]).reshape(shape) + def get_grad_b(A, X): dX = _np.ones_like(X) A_inv = _np.linalg.inv(A) @@ -3748,18 +3774,14 @@ def get_grad_b(A, X): b = _np.ones(shape) else: shape_a = shape - a = _np.random.rand(*shape_a) shape_b = list(shape_a) if nrh == -1: shape_b[-1] = 1 - x = _np.random.rand(*shape_b) - b = _np.matmul(a, x) - shape_b.pop() - b = b.reshape(shape_b) else : shape_b[-1] = nrh - x = _np.random.rand(*shape_b) - b = _np.matmul(a, x) + a = newInvertibleMatrix_nD(shape_a) + x = _np.random.randn(*shape_b) + b = _np.matmul(a, x) a = np.array(a, dtype=dtype) b = np.array(b, dtype=dtype) a.attach_grad() @@ -3784,6 +3806,106 @@ def get_grad_b(A, X): check_solve(mx_out, a, b) +def test_np_linalg_tensorinv(): + class TestTensorinv(HybridBlock): + def __init__(self, ind=2): + super(TestTensorinv, self).__init__() + self._ind = ind + + def hybrid_forward(self, F, a): + return F.np.linalg.tensorinv(a, ind=self._ind) + + def check_tensorinv(inv_a, a_np, ind): + try: + inv_a_expected = _np.linalg.tensorinv(a_np, ind=ind) + except Exception as e: + print(a_np) + print(a_np.shape) + print(e) + else: + assert inv_a.shape == inv_a_expected.shape + assert_almost_equal(inv_a.asnumpy(), inv_a_expected, rtol=rtol, atol=atol) + + def newInvertibleMatrix_2D(shape, max_cond=4): + while 1: + # generate well-conditioned matrices with small eigenvalues + D = _np.diag(_np.random.uniform(-1.0, 1.0, shape[-1])) + I = _np.eye(shape[-1]).reshape(shape) + v = _np.random.uniform(-10., 10., shape[-1]).reshape(shape[:-1] + (1,)) + v = v / _np.linalg.norm(v, axis=-2, keepdims=True) + v_T = _np.swapaxes(v, -1, -2) + U = I - 2 * _np.matmul(v, v_T) + a = _np.matmul(U, D) + if (_np.linalg.cond(a, 2) < max_cond): + return a + + def get_grad_A(A, ind): + inv_A = _np.linalg.tensorinv(A, ind) + d_inv_A = _np.ones_like(inv_A) + axes1 = len(A.shape) - ind + axes2 = ind + inv_A_trans_axes = tuple(_np.arange(len(A.shape)))[axes1:] + tuple(_np.arange(len(A.shape)))[:axes1] + inv_A_trans = _np.transpose(inv_A, inv_A_trans_axes) + temp_tensor = -_np.tensordot(inv_A_trans, d_inv_A, axes = axes1) + return _np.tensordot(temp_tensor, inv_A_trans, axes = axes2) + + shapes = [ + (1, 1, 1), + (1, 2, 2), + (1, 6, 2, 3), + (1, 10, 2, 5), + (1, 12, 3, 4), + (2, 1, 1), + (2, 1, 1, 1), + (2, 2, 5, 5, 2), + (2, 1, 6, 3, 2), + (2, 1, 8, 4, 2), + (2, 12, 1, 3, 4, 1), + (3, 1, 1, 1), + (3, 2, 3, 1, 6), + (3, 3, 2, 1, 2, 3, 1) + ] + dtypes = ['float32', 'float64'] + for hybridize, shape, dtype, in itertools.product([False, True], shapes, dtypes): + rtol = 1e-3 + atol = 1e-5 + if dtype == 'float32': + rtol = 1e-2 + atol = 1e-4 + ind = shape[0] + test_tensorinv = TestTensorinv(ind=ind) + if hybridize: + test_tensorinv.hybridize() + + prod_front = 1 + prod_back = 1 + for k in shape[1:ind + 1]: + prod_front *= k + for k in shape[1 + ind:]: + prod_back *= k + a_shape = (prod_back, prod_front) + a = newInvertibleMatrix_2D(a_shape) + a_shape = shape[1:] + inv_a_shape = shape[(1 + ind):] + shape[1:(ind + 1)] + a = np.array(a.reshape(a_shape), dtype=dtype) + a.attach_grad() + with mx.autograd.record(): + mx_out = test_tensorinv(a) + # check tensorinv validity + assert mx_out.shape == inv_a_shape + check_tensorinv(mx_out, a, ind) + + # check tensorinv backward + if 0 not in mx_out.shape: + mx.autograd.backward(mx_out) + grad_A_expected = get_grad_A(a.asnumpy(), ind) + assert_almost_equal(a.grad.asnumpy(), grad_A_expected, rtol=rtol, atol=atol) + + # check imperative once again + mx_out = np.linalg.tensorinv(a, ind) + check_tensorinv(mx_out, a, ind) + + @with_seed() @use_np def test_np_linalg_det(): From 843daf589b318e1fdd0dfba47939d2b2a41319d4 Mon Sep 17 00:00:00 2001 From: Rohit Kumar Srivastava Date: Sun, 15 Dec 2019 23:00:49 -0800 Subject: [PATCH 46/62] adding stacktrace in Jenkinsfile_utils.groovy to inspect Python2 failure cause in CI (#17065) --- ci/Jenkinsfile_utils.groovy | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/Jenkinsfile_utils.groovy b/ci/Jenkinsfile_utils.groovy index bf6c2d740b14..cdc02d99d7b0 100644 --- a/ci/Jenkinsfile_utils.groovy +++ b/ci/Jenkinsfile_utils.groovy @@ -278,6 +278,7 @@ def main_wrapper(args) { currentBuild.result = "SUCCESS" update_github_commit_status('SUCCESS', 'Job succeeded') } catch (caughtError) { + println(caughtError.getStackTrace()); node(NODE_UTILITY) { echo "caught ${caughtError}" err = caughtError From 897f4fae075c5261958de1a07cded0b5b3058a7a Mon Sep 17 00:00:00 2001 From: Zixuan Wei Date: Mon, 16 Dec 2019 16:03:12 +0800 Subject: [PATCH 47/62] [MKLDNN] mkldnn RNN operator enhancement (#17075) * mkldnn rnn operator enhancement `add` operation support Rename AddTo Add MXNET_USE_MKLDNN_RNN env Add Env var for switching to naive RNN impl and naive add/copy impl * Re-run CI, op:test_reduce failed on Unix-CPU * Rerun CI, Python2 CPU on Unix-CPU timeout --- docs/static_site/src/pages/api/faq/env_var.md | 12 +- src/common/utils.h | 20 +- src/operator/nn/mkldnn/mkldnn_rnn-inl.h | 35 +- src/operator/nn/mkldnn/mkldnn_rnn.cc | 311 ++++++++++++------ src/operator/rnn.cc | 11 +- tests/python/unittest/test_operator.py | 21 +- 6 files changed, 264 insertions(+), 146 deletions(-) diff --git a/docs/static_site/src/pages/api/faq/env_var.md b/docs/static_site/src/pages/api/faq/env_var.md index bc98c39d9570..57ab27630a8f 100644 --- a/docs/static_site/src/pages/api/faq/env_var.md +++ b/docs/static_site/src/pages/api/faq/env_var.md @@ -289,11 +289,11 @@ If ctypes is used, it must be `mxnet._ctypes.ndarray.NDArrayBase`. If no such algorithm exists given other constraints, MXNet will error out. This variable affects the choice of CUDNN convolution algorithms. Please see [CUDNN developer guide](https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html) for more details. -* MXNET_CPU_PARALLEL_COPY_SIZE +* MXNET_CPU_PARALLEL_SIZE - Values: Int ```(default=200000)``` - - The minimum size to call parallel copy by OpenMP in CPU2CPU mode. - - When the array size is bigger than or equal to this threshold, NDArray::Copy(from, to) is implemented by OpenMP with the Recommended OMP Thread Count. - - When the array size is less than this threshold, NDArray::Copy(from , to)) is implemented by memcpy in single thread. + - The minimum size to call parallel operations by OpenMP for CPU context. + - When the array size is bigger than or equal to this threshold, the operation implemented by OpenMP is executed with the Recommended OMP Thread Count. + - When the array size is less than this threshold, the operation is implemented naively in single thread. * MXNET_OPTIMIZER_AGGREGATION_SIZE - Values: Int ```(default=4)``` @@ -349,6 +349,10 @@ If ctypes is used, it must be `mxnet._ctypes.ndarray.NDArrayBase`. - Values: 0(false) or 1(true) ```(default=1)``` - If this variable is set, MXNet will simplify the computation graph, eliminating duplicated operations on the same inputs. +* MXNET_USE_MKLDNN_RNN + - Values: 0(false) or 1(true) ```(default=1)``` + - This variable controls whether to use the MKL-DNN backend in fused RNN operator for CPU context. There are two fusion implementations of RNN operator in MXNet. The MKL-DNN implementation has a better performance than the naive one, but the latter is more stable in the backward operation currently. + Settings for Minimum Memory Usage --------------------------------- - Make sure ```min(MXNET_EXEC_NUM_TEMP, MXNET_GPU_WORKER_NTHREADS) = 1``` diff --git a/src/common/utils.h b/src/common/utils.h index 9a9c686e73c9..2187ad053b66 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -769,7 +769,7 @@ inline void EmplaceBackZeros(const NDArrayStorageType stype, const mxnet::TShape */ template inline void ParallelCopy(DType* dst, const DType* src, index_t size) { - static index_t copy_block_size = dmlc::GetEnv("MXNET_CPU_PARALLEL_COPY_SIZE", 200000); + static index_t copy_block_size = dmlc::GetEnv("MXNET_CPU_PARALLEL_SIZE", 200000); if (size >= copy_block_size) { #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) for (index_t i = 0; i < size; ++i) { @@ -780,6 +780,24 @@ inline void ParallelCopy(DType* dst, const DType* src, index_t size) { } } +/*! + * \breif parallelize add by OpenMP + */ +template +inline void ParallelAdd(DType* dst, const DType* src, index_t size) { + static index_t add_block_size = dmlc::GetEnv("MXNET_CPU_PARALLEL_SIZE", 200000); + if (size >= add_block_size) { + #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) + for (index_t i = 0; i < size; ++i) { + dst[i] += src[i]; + } + } else { + for (index_t i = 0; i < size; ++i) { + dst[i] += src[i]; + } + } +} + /*! * \brief If numpy compatibility is turned off (default), the shapes passed in * by users follow the legacy shape definition: diff --git a/src/operator/nn/mkldnn/mkldnn_rnn-inl.h b/src/operator/nn/mkldnn/mkldnn_rnn-inl.h index ad3f7332a8f4..314106b98eb9 100644 --- a/src/operator/nn/mkldnn/mkldnn_rnn-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_rnn-inl.h @@ -120,25 +120,24 @@ class RnnPrimitive { template static RnnPrimitive Create(Args&&... args) { RnnPrimitive rnn_fwd_prim; - rnn_fwd_prim.pd_.reset( - new typename rnn_fwd::desc(std::forward(args)...), - [](typename rnn_fwd::desc* pd) { - delete reinterpret_cast(pd); + auto fwd_desc = typename rnn_fwd::desc(std::forward(args)...); + rnn_fwd_prim.fwd_pd_.reset( + new typename rnn_fwd::primitive_desc(fwd_desc, CpuEngine::Get()->get_engine()), + [](typename rnn_fwd::primitive_desc* pd) { + delete reinterpret_cast(pd); }); - const typename rnn_fwd::desc& fwd_desc = - *(reinterpret_cast(rnn_fwd_prim.pd_.get())); - typename rnn_fwd::primitive_desc fwd_pd(fwd_desc, CpuEngine::Get()->get_engine()); - rnn_fwd_prim.weights_layer_desc_ = fwd_pd.weights_layer_desc(); - rnn_fwd_prim.weights_iter_desc_ = fwd_pd.weights_iter_desc(); - rnn_fwd_prim.workspace_desc_ = fwd_pd.workspace_desc(); + auto fwd_pd = reinterpret_cast(rnn_fwd_prim.fwd_pd_.get()); + rnn_fwd_prim.weights_layer_desc_ = fwd_pd->weights_layer_desc(); + rnn_fwd_prim.weights_iter_desc_ = fwd_pd->weights_iter_desc(); + rnn_fwd_prim.workspace_desc_ = fwd_pd->workspace_desc(); - rnn_fwd_prim.primitive_ = std::shared_ptr(new rnn_fwd(fwd_pd)); + rnn_fwd_prim.primitive_ = std::shared_ptr(new rnn_fwd(*fwd_pd)); return rnn_fwd_prim; } RnnPrimitive() { - this->pd_ = nullptr; + this->fwd_pd_ = nullptr; this->primitive_ = nullptr; this->weights_layer_desc_ = mkldnn::memory::desc(); this->weights_iter_desc_ = mkldnn::memory::desc(); @@ -146,7 +145,7 @@ class RnnPrimitive { } RnnPrimitive(const RnnPrimitive& rnn_fwd_prim) { - this->pd_ = rnn_fwd_prim.pd_; + this->fwd_pd_ = rnn_fwd_prim.fwd_pd_; this->primitive_ = rnn_fwd_prim.primitive_; this->weights_layer_desc_ = rnn_fwd_prim.weights_layer_desc_; this->weights_iter_desc_ = rnn_fwd_prim.weights_iter_desc_; @@ -155,7 +154,7 @@ class RnnPrimitive { RnnPrimitive& operator=(const RnnPrimitive& rnn_fwd_prim) { if (this != &rnn_fwd_prim) { - this->pd_ = rnn_fwd_prim.pd_; + this->fwd_pd_ = rnn_fwd_prim.fwd_pd_; this->primitive_ = rnn_fwd_prim.primitive_; this->weights_layer_desc_ = rnn_fwd_prim.weights_layer_desc_; this->weights_iter_desc_ = rnn_fwd_prim.weights_iter_desc_; @@ -165,7 +164,7 @@ class RnnPrimitive { return *this; } - const void* GetPrimDesc() const { return pd_.get(); } + const void* GetPrimDesc() const { return fwd_pd_.get(); } const mkldnn::primitive& GetPrim() const { return *primitive_; } const mkldnn::memory::desc& GetLayerDesc() const { @@ -181,7 +180,7 @@ class RnnPrimitive { } private: - std::shared_ptr pd_; + std::shared_ptr fwd_pd_; std::shared_ptr primitive_; mkldnn::memory::desc weights_layer_desc_; mkldnn::memory::desc weights_iter_desc_; @@ -370,7 +369,9 @@ class MKLDNNRnnBackward { void SetDataGradsMem(void* diff_src, void* diff_state, void* diff_statecell, void* diff_out, void* diff_state_out, void* diff_statecell_out, const int dtype = mshadow::kFloat32); - void CommitWeightsDiff(void* diff_weights, void* diff_bias, const int dtype = mshadow::kFloat32); + void CommitWeightsDiff(void* diff_weights, void* diff_bias, + const OpReqType req, + const int dtype = mshadow::kFloat32); const mkldnn::primitive& GetBwd() const { return *bwd_.primitive_; } const mkldnn_args_map_t& GetArgsMap() const { return net_args_; } diff --git a/src/operator/nn/mkldnn/mkldnn_rnn.cc b/src/operator/nn/mkldnn/mkldnn_rnn.cc index e797b649d295..6da8f3b8a58a 100644 --- a/src/operator/nn/mkldnn/mkldnn_rnn.cc +++ b/src/operator/nn/mkldnn/mkldnn_rnn.cc @@ -213,13 +213,13 @@ RnnBwdPrimitive GetRnnBwdPrim(const MKLDNNRnnForwardTraining &fwd, auto dst_state_desc = layer_param.state_outputs ? memory::desc( layer_param.state_dims, data_type, tag::ldnc) : memory::desc(); - const void* fwd_desc = fwd.GetPrimDesc(); + const void* fwd_pd = fwd.GetPrimDesc(); auto bwd = RnnBwdPrimitive(); switch (mode) { case rnn_enum::kLstm: { - const lstm_forward::primitive_desc* desc = - reinterpret_cast(fwd_desc); - bwd = RnnBwdPrimitive::Create(*desc, + const lstm_forward::primitive_desc* pd = + reinterpret_cast(fwd_pd); + bwd = RnnBwdPrimitive::Create(*pd, prop, mkldnn_rnn_direction, // data desc src_layer_desc, src_state_desc, src_state_desc, weight_layer_desc, @@ -231,9 +231,9 @@ RnnBwdPrimitive GetRnnBwdPrim(const MKLDNNRnnForwardTraining &fwd, dst_state_desc); } break; case rnn_enum::kGru: { - const lbr_gru_forward::primitive_desc* desc = - reinterpret_cast(fwd_desc); - bwd = RnnBwdPrimitive::Create(*desc, + const lbr_gru_forward::primitive_desc* pd = + reinterpret_cast(fwd_pd); + bwd = RnnBwdPrimitive::Create(*pd, prop, mkldnn_rnn_direction, // data desc src_layer_desc, src_state_desc, weight_layer_desc, @@ -244,10 +244,10 @@ RnnBwdPrimitive GetRnnBwdPrim(const MKLDNNRnnForwardTraining &fwd, } break; case rnn_enum::kRnnRelu: case rnn_enum::kRnnTanh: { - const vanilla_rnn_forward::primitive_desc* desc = - reinterpret_cast(fwd_desc); + const vanilla_rnn_forward::primitive_desc* pd = + reinterpret_cast(fwd_pd); bwd = RnnBwdPrimitive::Create( - *desc, prop, + *pd, prop, mode == rnn_enum::kRnnTanh ? algorithm::eltwise_tanh : algorithm::eltwise_relu, mkldnn_rnn_direction, // data desc @@ -776,16 +776,8 @@ void MKLDNNRnnBackward::SetDataGradsMem( } } -template -void HalveWeightsDiff(DType* w, const size_t size) { - const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); - #pragma omp parallel for num_threads(omp_threads) - for (int i = 0; i < static_cast(size); ++i) { - w[i] *= 0.5; - } -} - -void MKLDNNRnnBackward::CommitWeightsDiff(void* diff_weights, void* diff_bias, int dtype) { +void MKLDNNRnnBackward::CommitWeightsDiff(void* diff_weights, void* diff_bias, + const OpReqType req, const int dtype) { using tag = mkldnn::memory::format_tag; auto& cpu_engine = CpuEngine::Get()->get_engine(); auto s = mkldnn::stream(cpu_engine); @@ -795,11 +787,12 @@ void MKLDNNRnnBackward::CommitWeightsDiff(void* diff_weights, void* diff_bias, i const int direction = param.bidirectional ? 2 : 1; const int ngates = GetRnnGatesNum(param.mode); const size_t dtype_bytes = mshadow::mshadow_sizeof(dtype); + const size_t wxh_size = param.single_w_size; + const size_t wx_size = param.input_size * param.state_size * ngates; + const size_t wh_size = param.state_size * param.state_size * ngates; const size_t wxh_bytes = param.single_w_size * dtype_bytes; const size_t wx_bytes = param.input_size * param.state_size * ngates * dtype_bytes; const size_t wh_bytes = param.state_size * param.state_size * ngates * dtype_bytes; - char* diff_wx_ptr = static_cast(diff_weights_layer_->get_data_handle()); - char* diff_wh_ptr = static_cast(diff_weights_iter_->get_data_handle()); /* naive weights layout is: 1st-layer: | wx_lr | wh_lr | wx_rl | wh_rl | @@ -807,68 +800,109 @@ void MKLDNNRnnBackward::CommitWeightsDiff(void* diff_weights, void* diff_bias, i size: | wxh_bytes | |wx_bytes|wh_bytes| */ - char* naive_weights = static_cast(diff_weights); - if (param.mode != rnn_enum::kGru) { - for (int shift = 0; shift < num_layer * direction; ++shift) { - std::memcpy(naive_weights + shift * wxh_bytes, - diff_wx_ptr + shift * wx_bytes, wx_bytes); - } - // align naive_weights to weights_iter memory - naive_weights += wx_bytes; - for (int shift = 0; shift < num_layer * direction; ++shift) { - std::memcpy(naive_weights + shift * wxh_bytes, - diff_wh_ptr + shift * wh_bytes, wh_bytes); - } - } else { - const size_t wx_bytes_per_gate = param.input_size * param.state_size * dtype_bytes; - const size_t wh_bytes_per_gate = param.state_size * param.state_size * dtype_bytes; - for (int shift = 0; shift < num_layer * direction; ++shift) { - std::memcpy(naive_weights + shift * wxh_bytes + wx_bytes_per_gate, - diff_wx_ptr + shift * wx_bytes, wx_bytes_per_gate); - std::memcpy(naive_weights + shift * wxh_bytes, - diff_wx_ptr + shift * wx_bytes + wx_bytes_per_gate, wx_bytes_per_gate); - std::memcpy(naive_weights + shift * wxh_bytes + 2 * wx_bytes_per_gate, - diff_wx_ptr + shift * wx_bytes + 2 * wx_bytes_per_gate, wx_bytes_per_gate); + if (kWriteTo == req) { + char* naive_weights = static_cast(diff_weights); + char* diff_wx_ptr = static_cast(diff_weights_layer_->get_data_handle()); + char* diff_wh_ptr = static_cast(diff_weights_iter_->get_data_handle()); + if (param.mode != rnn_enum::kGru) { + for (int shift = 0; shift < num_layer * direction; ++shift) { + std::memcpy(naive_weights + shift * wxh_bytes, + diff_wx_ptr + shift * wx_bytes, wx_bytes); + } + // align naive_weights to weights_iter memory + naive_weights += wx_bytes; + for (int shift = 0; shift < num_layer * direction; ++shift) { + std::memcpy(naive_weights + shift * wxh_bytes, + diff_wh_ptr + shift * wh_bytes, wh_bytes); + } + } else { + const size_t wx_bytes_per_gate = param.input_size * param.state_size * dtype_bytes; + const size_t wh_bytes_per_gate = param.state_size * param.state_size * dtype_bytes; + for (int shift = 0; shift < num_layer * direction; ++shift) { + std::memcpy(naive_weights + shift * wxh_bytes + wx_bytes_per_gate, + diff_wx_ptr + shift * wx_bytes, wx_bytes_per_gate); + std::memcpy(naive_weights + shift * wxh_bytes, + diff_wx_ptr + shift * wx_bytes + wx_bytes_per_gate, wx_bytes_per_gate); + std::memcpy(naive_weights + shift * wxh_bytes + 2 * wx_bytes_per_gate, + diff_wx_ptr + shift * wx_bytes + 2 * wx_bytes_per_gate, wx_bytes_per_gate); + } + // align naive_weights to weights_iter memory + naive_weights += wx_bytes; + for (int shift = 0; shift < num_layer * direction; ++shift) { + std::memcpy(naive_weights + shift * wxh_bytes + wh_bytes_per_gate, + diff_wh_ptr + shift * wh_bytes, wh_bytes_per_gate); + std::memcpy(naive_weights + shift * wxh_bytes, + diff_wh_ptr + shift * wh_bytes + wh_bytes_per_gate, wh_bytes_per_gate); + std::memcpy(naive_weights + shift * wxh_bytes + 2 * wh_bytes_per_gate, + diff_wh_ptr + shift * wh_bytes + 2 * wh_bytes_per_gate, wh_bytes_per_gate); + } } - // align naive_weights to weights_iter memory - naive_weights += wx_bytes; - for (int shift = 0; shift < num_layer * direction; ++shift) { - std::memcpy(naive_weights + shift * wxh_bytes + wh_bytes_per_gate, - diff_wh_ptr + shift * wh_bytes, wh_bytes_per_gate); - std::memcpy(naive_weights + shift * wxh_bytes, - diff_wh_ptr + shift * wh_bytes + wh_bytes_per_gate, wh_bytes_per_gate); - std::memcpy(naive_weights + shift * wxh_bytes + 2 * wh_bytes_per_gate, - diff_wh_ptr + shift * wh_bytes + 2 * wh_bytes_per_gate, wh_bytes_per_gate); + } else if (kAddTo == req) { + if (param.mode != rnn_enum::kGru) { + MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { + DType* naive_weights = static_cast(diff_weights); + DType* diff_wx_ptr = static_cast(diff_weights_layer_->get_data_handle()); + DType* diff_wh_ptr = static_cast(diff_weights_iter_->get_data_handle()); + for (int shift = 0; shift < num_layer * direction; ++shift) { + common::ParallelAdd(naive_weights + shift * wxh_size, + diff_wx_ptr + shift * wx_size, wx_size); + } + // align naive_weights to weights_iter memory + naive_weights += wx_size; + for (int shift = 0; shift < num_layer * direction; ++shift) { + common::ParallelAdd(naive_weights + shift * wxh_size, + diff_wh_ptr + shift * wh_size, wh_size); + } + }); + } else { + const size_t wx_size_per_gate = param.input_size * param.state_size; + const size_t wh_size_per_gate = param.state_size * param.state_size; + MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { + DType* naive_weights = static_cast(diff_weights); + DType* diff_wx_ptr = static_cast(diff_weights_layer_->get_data_handle()); + DType* diff_wh_ptr = static_cast(diff_weights_iter_->get_data_handle()); + for (int shift = 0; shift < num_layer * direction; ++shift) { + common::ParallelAdd(naive_weights + shift * wxh_size + wx_size_per_gate, + diff_wx_ptr + shift * wx_size, wx_size_per_gate); + common::ParallelAdd(naive_weights + shift * wxh_size, + diff_wx_ptr + shift * wx_size + wx_size_per_gate, wx_size_per_gate); + common::ParallelAdd(naive_weights + shift * wxh_size + 2 * wx_size_per_gate, + diff_wx_ptr + shift * wx_size + 2 * wx_size_per_gate, wx_size_per_gate); + } + // align naive_weights to weights_iter memory + naive_weights += wx_size; + for (int shift = 0; shift < num_layer * direction; ++shift) { + common::ParallelAdd(naive_weights + shift * wxh_size + wh_size_per_gate, + diff_wh_ptr + shift * wh_size, wh_size_per_gate); + common::ParallelAdd(naive_weights + shift * wxh_size, + diff_wh_ptr + shift * wh_size + wh_size_per_gate, wh_size_per_gate); + common::ParallelAdd(naive_weights + shift * wxh_size + 2 * wh_size_per_gate, + diff_wh_ptr + shift * wh_size + 2 * wh_size_per_gate, wh_size_per_gate); + } + }); } } - char* naive_bias = static_cast(diff_bias); - char* diff_bias_ptr = static_cast(this->diff_bias_->get_data_handle()); - const size_t bias_bytes = param.single_b_size * dtype_bytes; - const size_t naive_bias_bytes = param.naive_single_b_size * dtype_bytes; - if (param.mode != rnn_enum::kGru) { - MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { - DType* typed_bias = reinterpret_cast(diff_bias_ptr); - HalveWeightsDiff(typed_bias, num_layer * direction * param.single_b_size); - }); - for (int shift = 0; shift < num_layer * direction; ++shift) { - std::memcpy(naive_bias + shift * naive_bias_bytes, - diff_bias_ptr + shift * bias_bytes, bias_bytes); - std::memcpy(naive_bias + shift * naive_bias_bytes + bias_bytes, - diff_bias_ptr + shift * bias_bytes, bias_bytes); - } - } else { - const size_t bias_bytes_per_gate = param.state_size * dtype_bytes; - MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { + if (kWriteTo == req) { + const size_t bias_bytes = param.single_b_size * dtype_bytes; + const size_t naive_bias_bytes = param.naive_single_b_size * dtype_bytes; + char* naive_bias = static_cast(diff_bias); + char* diff_bias_ptr = static_cast(this->diff_bias_->get_data_handle()); + if (param.mode != rnn_enum::kGru) { + for (int shift = 0; shift < num_layer * direction; ++shift) { + std::memcpy(naive_bias + shift * naive_bias_bytes, + diff_bias_ptr + shift * bias_bytes, bias_bytes); + std::memcpy(naive_bias + shift * naive_bias_bytes + bias_bytes, + diff_bias_ptr + shift * bias_bytes, bias_bytes); + } + } else { + const size_t bias_bytes_per_gate = param.state_size * dtype_bytes; for (int shift = 0; shift < num_layer * direction; ++shift) { char* naive_reset = naive_bias + shift * naive_bias_bytes; char* naive_update = naive_reset + bias_bytes_per_gate; char* update = diff_bias_ptr + shift * bias_bytes; char* reset = update + bias_bytes_per_gate; - DType* typed_update = reinterpret_cast(update); - HalveWeightsDiff(typed_update, param.state_size * 2); - std::memcpy(naive_update, update, bias_bytes_per_gate); std::memcpy(naive_reset, reset, bias_bytes_per_gate); std::memcpy(naive_update + naive_bias_bytes / 2, update, bias_bytes_per_gate); @@ -881,7 +915,46 @@ void MKLDNNRnnBackward::CommitWeightsDiff(void* diff_weights, void* diff_bias, i std::memcpy(naive_new_bx, new_bx, bias_bytes_per_gate); std::memcpy(naive_new_bh, new_bh, bias_bytes_per_gate); } - }); + } + } else if (kAddTo == req) { + const size_t bias_size = param.single_b_size; + const size_t naive_bias_size = param.naive_single_b_size; + if (param.mode != rnn_enum::kGru) { + MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { + DType* naive_bias = static_cast(diff_bias); + DType* diff_bias_ptr = static_cast(this->diff_bias_->get_data_handle()); + for (int shift = 0; shift < num_layer * direction; ++shift) { + common::ParallelAdd(naive_bias + shift * naive_bias_size, + diff_bias_ptr + shift * bias_size, bias_size); + common::ParallelAdd(naive_bias + shift * naive_bias_size + bias_size, + diff_bias_ptr + shift * bias_size, bias_size); + } + }); + } else { + const size_t bias_size_per_gate = param.state_size; + MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { + DType* naive_bias = static_cast(diff_bias); + DType* diff_bias_ptr = static_cast(this->diff_bias_->get_data_handle()); + for (int shift = 0; shift < num_layer * direction; ++shift) { + DType* naive_reset = naive_bias + shift * naive_bias_size; + DType* naive_update = naive_reset + bias_size_per_gate; + DType* update = diff_bias_ptr + shift * bias_size; + DType* reset = update + bias_size_per_gate; + + common::ParallelAdd(naive_update, update, bias_size_per_gate); + common::ParallelAdd(naive_reset, reset, bias_size_per_gate); + common::ParallelAdd(naive_update + naive_bias_size / 2, update, bias_size_per_gate); + common::ParallelAdd(naive_reset + naive_bias_size / 2, reset, bias_size_per_gate); + + DType* naive_new_bx = naive_update + bias_size_per_gate; + DType* naive_new_bh = naive_new_bx + naive_bias_size / 2; + DType* new_bx = reset + bias_size_per_gate; + DType* new_bh = new_bx + bias_size_per_gate; + common::ParallelAdd(naive_new_bx, new_bx, bias_size_per_gate); + common::ParallelAdd(naive_new_bh, new_bh, bias_size_per_gate); + } + }); + } } } @@ -899,19 +972,11 @@ void MKLDNNRnnOp::Forward(const OpContext &ctx, const std::vector &inputs, const std::vector &req, const std::vector &outputs) { + TmpMemMgr::Get()->Init(ctx.requested[0]); // In the `autograd.record()` context, RNNOp is required to run into // forward_training mode. const bool is_training = (ctx.is_train || ctx.need_grad); - // check output requests - if (kAddTo == req[rnn_enum::kOut]) - LOG(FATAL) << "Currently, `add` operation is not supported by RNNs."; const RNNParam& default_param = full_param_.default_param; - if (default_param.state_outputs) { - if (kAddTo == req[rnn_enum::kStateOut]) - LOG(FATAL) << "Currently, `add` operation is not supported by RNNs."; - if (default_param.mode == rnn_enum::kLstm && kAddTo == req[rnn_enum::kStateCellOut]) - LOG(FATAL) << "Currently, `add` operation against lstm-cell output is not supported."; - } // Initialize weights version if (!initialized_ && weights_version_ == 0) { @@ -932,24 +997,40 @@ void MKLDNNRnnOp::Forward(const OpContext &ctx, // Get data type int data_dtype = inputs[rnn_enum::kData].dtype(); + // Get temporary memory for output, state_out, statecell_out + const int num_layers = default_param.num_layers; + const int seq_length = default_param.seq_length_; + const int batch_size = default_param.batch_size_; + const int state_size = default_param.state_size; + const int directions = default_param.bidirectional ? 2 : 1; + mkldnn::memory::desc dst_desc({seq_length, batch_size, directions * state_size}, + get_mkldnn_type(data_dtype), mkldnn::memory::format_tag::tnc); + mkldnn::memory::desc state_desc({num_layers, directions, batch_size, state_size}, + get_mkldnn_type(data_dtype), mkldnn::memory::format_tag::ldnc); + auto out_mem = CreateMKLDNNMem(outputs[rnn_enum::kOut], dst_desc, req[rnn_enum::kOut]); + mkldnn_output_t stateout_mem; + mkldnn_output_t statecellout_mem; // Get input & output NDArray char *src = static_cast(inputs[rnn_enum::kData].data().dptr_); char *src_state = static_cast(inputs[rnn_enum::kState].data().dptr_); - char *dst = req[rnn_enum::kOut] == kNullOp ? nullptr - : static_cast(outputs[rnn_enum::kOut].data().dptr_); + char *dst = static_cast(out_mem.second->get_data_handle()); char *dst_state = nullptr; // Output state char *src_state_cell = nullptr; // Used in LSTM for cell state char *dst_state_cell = nullptr; // Used in LSTM for cell state if (default_param.state_outputs && req[rnn_enum::kStateOut] != kNullOp) { - dst_state = static_cast(outputs[rnn_enum::kStateOut].data().dptr_); + stateout_mem = CreateMKLDNNMem( + outputs[rnn_enum::kStateOut], state_desc, req[rnn_enum::kStateOut]); + dst_state = static_cast(stateout_mem.second->get_data_handle()); } if (default_param.mode == rnn_enum::kLstm) { src_state_cell = static_cast(inputs[rnn_enum::kStateCell].data().dptr_); if (default_param.state_outputs && req[rnn_enum::kStateCellOut] != kNullOp) { - dst_state_cell = static_cast(outputs[rnn_enum::kStateCellOut].data().dptr_); + statecellout_mem = CreateMKLDNNMem( + outputs[rnn_enum::kStateCellOut], state_desc, req[rnn_enum::kStateCellOut]); + dst_state_cell = static_cast(statecellout_mem.second->get_data_handle()); } } @@ -1000,6 +1081,12 @@ void MKLDNNRnnOp::Forward(const OpContext &ctx, } else { for (auto& inf_lyr : fwd_inf_vec_) RegisterMKLDNNRnn(inf_lyr); } + CommitOutput(outputs[rnn_enum::kOut], out_mem); + if (default_param.state_outputs) { + CommitOutput(outputs[rnn_enum::kStateOut], stateout_mem); + if (default_param.mode == rnn_enum::kLstm) + CommitOutput(outputs[rnn_enum::kStateCellOut], statecellout_mem); + } MKLDNNStream::Get()->Submit(); } @@ -1008,18 +1095,9 @@ void MKLDNNRnnOp::Backward(const OpContext& ctx, const std::vector& req, const std::vector& outputs) { using tag = mkldnn::memory::format_tag; + TmpMemMgr::Get()->Init(ctx.requested[0]); const RNNParam& default_param = full_param_.default_param; - if (kAddTo == req[rnn_enum::kData] || kAddTo == req[rnn_enum::kParams]) - LOG(FATAL) << "Currently, `add` operations against gradients of input and weights" - << " are not supported by RNNs."; - if (default_param.state_outputs) { - if (kAddTo == req[rnn_enum::kStateOut]) - LOG(FATAL) << "Currently, `add` operation against gradients of begining state" - << " is not supported by RNNs."; - if (default_param.mode == rnn_enum::kLstm && req[rnn_enum::kStateCell]) - LOG(FATAL) << "Currently, `add` operation against gradients of begining cell-state" - << " is not supported by LSTM."; - } + // Initialize the bwd_vec_ if (bwd_vec_.size() != fwd_inf_vec_.size()) { bwd_vec_.clear(); @@ -1038,21 +1116,38 @@ void MKLDNNRnnOp::Backward(const OpContext& ctx, const int data_dtype = inputs[rnn_enum::kData].dtype(); const int w_dtype = inputs[rnn_enum::kParams].dtype(); const size_t w_bytes = mshadow::mshadow_sizeof(w_dtype); + // Get temporary memory for diff_src, diff_state, diff_statecell + const int num_layers = default_param.num_layers; + const int seq_length = default_param.seq_length_; + const int batch_size = default_param.batch_size_; + const int input_size = default_param.input_size_; + const int state_size = default_param.state_size; + const int directions = default_param.bidirectional ? 2 : 1; + mkldnn::memory::desc src_desc({seq_length, batch_size, input_size}, + get_mkldnn_type(data_dtype), tag::tnc); + mkldnn::memory::desc state_desc({num_layers, directions, batch_size, state_size}, + get_mkldnn_type(data_dtype), tag::ldnc); + auto diff_input_mem = CreateMKLDNNMem(outputs[rnn_enum::kData], src_desc, req[rnn_enum::kData]); + mkldnn_output_t diff_state_mem; + mkldnn_output_t diff_statecell_mem; // index description of outputs NDArray // 0 1 2 3 // | dx | dw | dhx | dcx| - char* dx = req[rnn_enum::kData] == kNullOp ? nullptr - : static_cast(outputs[rnn_enum::kData].data().dptr_); + char* dx = static_cast(diff_input_mem.second->get_data_handle()); char* dw = static_cast(outputs[rnn_enum::kParams].data().dptr_); char* db = dw + (inputs[rnn_enum::kParams].data().Size() - GetRnnBiasSize(default_param.num_layers, default_param.state_size, default_param.bidirectional + 1, default_param.mode)) * w_bytes; - char* dhx = req[rnn_enum::kState] == kNullOp ? nullptr - : static_cast(outputs[rnn_enum::kState].data().dptr_); + diff_state_mem = CreateMKLDNNMem( + outputs[rnn_enum::kState], state_desc, req[rnn_enum::kState]); + char* dhx = static_cast(diff_state_mem.second->get_data_handle()); char* dcx = nullptr; if (full_param_.default_param.mode == rnn_enum::kLstm - && req[rnn_enum::kStateCell] != kNullOp) - dcx = static_cast(outputs[rnn_enum::kStateCell].data().dptr_); + && req[rnn_enum::kStateCell] != kNullOp) { + diff_statecell_mem = CreateMKLDNNMem( + outputs[rnn_enum::kStateCell], state_desc, req[rnn_enum::kStateCell]); + dcx = static_cast(diff_statecell_mem.second->get_data_handle()); + } // index description of inputs NDArray // 0 1 2 3 4 5 6 7 8 9 @@ -1100,12 +1195,16 @@ void MKLDNNRnnOp::Backward(const OpContext& ctx, RegisterMKLDNNRnn(*bwd); } } + CommitOutput(outputs[rnn_enum::kData], diff_input_mem); + CommitOutput(outputs[rnn_enum::kState], diff_state_mem); + if (full_param_.default_param.mode == rnn_enum::kLstm) + CommitOutput(outputs[rnn_enum::kStateCell], diff_statecell_mem); MKLDNNStream::Get()->Submit(); // Commit weights diff if (req[rnn_enum::kParams] != kNullOp) { for (size_t lyr = 0; lyr < bwd_vec_.size(); ++lyr) { - bwd_vec_.at(lyr).CommitWeightsDiff(dw, db, w_dtype); + bwd_vec_.at(lyr).CommitWeightsDiff(dw, db, req[rnn_enum::kParams], w_dtype); dw += full_param_.layer_params.at(lyr).single_w_size * w_bytes; db += full_param_.layer_params.at(lyr).single_b_size * w_bytes; } diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc index 6d568c81bc1c..542968ef0a2c 100644 --- a/src/operator/rnn.cc +++ b/src/operator/rnn.cc @@ -181,6 +181,10 @@ static std::vector RNNResourceEx(const NodeAttrs& attrs, const if (param.p != 0 && 1.0f - param.p > 0) { request.emplace_back(ResourceRequest::kCuDNNDropoutDesc); } +#endif + } else { +#if MXNET_USE_MKLDNN == 1 + request.emplace_back(ResourceRequest::kTempSpace); #endif } return request; @@ -243,7 +247,8 @@ static OpStatePtr CreateRNNState(const nnvm::NodeAttrs &attrs, #if MXNET_USE_MKLDNN == 1 if ((in_types[0] == mshadow::kFloat32 || in_types[0] == mshadow::kFloat16) - && in_shapes[0].ndim() == 3 && ctx.dev_type == kCPU) { + && in_shapes[0].ndim() == 3 && ctx.dev_type == kCPU + && dmlc::GetEnv("MXNET_USE_MKLDNN_RNN", 1)) { const mxnet::TShape& data_shape = in_shapes[rnn_enum::kData]; state = OpStatePtr::Create(param, data_shape[0], data_shape[1], data_shape[2]); @@ -270,7 +275,7 @@ static void RNNStatefulComputeExCPU(const OpStatePtr& state_ptr, const std::vector& req, const std::vector& outputs) { if ((inputs[0].dtype() == mshadow::kFloat32 || inputs[0].dtype() == mshadow::kFloat16) && - inputs[0].shape().ndim() == 3) { + inputs[0].shape().ndim() == 3 && dmlc::GetEnv("MXNET_USE_MKLDNN_RNN", 1)) { MKLDNNRnnOp& op = state_ptr.get_state(); op.Forward(ctx, inputs, req, outputs); } else { @@ -284,7 +289,7 @@ static void RNNStatefulGradComputeExCPU(const OpStatePtr& state_ptr, const std::vector& req, const std::vector& outputs) { if ((inputs[0].dtype() == mshadow::kFloat32 || inputs[0].dtype() == mshadow::kFloat16) && - inputs[0].shape().ndim() == 3) { + inputs[0].shape().ndim() == 3 && dmlc::GetEnv("MXNET_USE_MKLDNN_RNN", 1)) { MKLDNNRnnOp& op = state_ptr.get_state(); op.Backward(ctx, inputs, req, outputs); } else { diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 7b0404d8abb7..66031d20d65b 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -36,15 +36,6 @@ import os def check_rnn_consistency(cell1, cell2, T, N, I, H, grad_req, rtol=1e-2, atol=1e-4): - if default_context().device_type == 'cpu': - # NOTE(zixuanweeei): Currently, we don't add `add` requests support on fused mkl-dnn rnn operator. - # We tracked this issue by https://github.com/apache/incubator-mxnet/issues/16578 - if isinstance(grad_req, dict) and 'add' in grad_req.values(): - print("Skip the test when requiring `add` operation against gradients on CPU context.") - return - if isinstance(grad_req, str) and grad_req == 'add': - print("Skip the test when requiring `add` operation against gradients on CPU context.") - return dshape = (N, T, I) data = mx.sym.Variable('data') @@ -182,9 +173,9 @@ def test_gru_sym(): stack.add(mx.rnn.GRUCell(H, prefix='l1_')) stack.add(mx.rnn.GRUCell(H, prefix='l2_')) - check_rnn_consistency(fused, stack, T, N, I, H, 'write', atol=2e-4) - check_rnn_consistency(fused, stack, T, N, I, H, 'add', atol=2e-4) - check_rnn_consistency(fused, stack, T, N, I, H, 'null', atol=2e-4) + check_rnn_consistency(fused, stack, T, N, I, H, 'write') + check_rnn_consistency(fused, stack, T, N, I, H, 'add') + check_rnn_consistency(fused, stack, T, N, I, H, 'null') @with_seed() @assert_raises_cudnn_not_satisfied(min_version='5.1.10') @@ -208,9 +199,9 @@ def test_gru_bidirectional(): mx.rnn.GRUCell(H, prefix='r1_'), output_prefix='bi_gru_1_')) - check_rnn_consistency(fused, stack, T, N, I, H, 'write', atol=2e-4) - check_rnn_consistency(fused, stack, T, N, I, H, 'add', atol=2e-4) - check_rnn_consistency(fused, stack, T, N, I, H, 'null', atol=2e-4) + check_rnn_consistency(fused, stack, T, N, I, H, 'write') + check_rnn_consistency(fused, stack, T, N, I, H, 'add') + check_rnn_consistency(fused, stack, T, N, I, H, 'null') @with_seed() @assert_raises_cudnn_not_satisfied(min_version='5.1.10') From 52c9a45abc8d1f8228ec9ed1bad7f137137fd96b Mon Sep 17 00:00:00 2001 From: Zhennan Qin Date: Mon, 16 Dec 2019 16:05:32 +0800 Subject: [PATCH 48/62] [MKLDNN] enable MaxPooling with full pooling convention (#16860) * [MKLDNN] enable MaxPooling for full pooling convention * Run CI * Fix UT * Add comment * Run CI --- src/operator/nn/mkldnn/mkldnn_pooling-inl.h | 20 +++++++++++++++++++- src/operator/nn/mkldnn/mkldnn_pooling.cc | 7 ------- src/operator/nn/pooling.cc | 5 ++--- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h index 22e9abd156a3..08d91af6fbb3 100644 --- a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h @@ -89,6 +89,14 @@ class MKLDNNPoolingBwd { const mkldnn::pooling_backward::primitive_desc &GetPd(); }; +inline int GetPaddingSizeFull(dim_t x, int padl, int padr, int k, int s) { + if ((x + padl + padr - k) % s != 0) { + return (padr + s - ((x + padl + padr - k) % s)); + } else { + return padr; + } +} + inline bool SupportMKLDNNPooling(const PoolingParam ¶m) { return param.kernel.ndim() == 2 && (param.pool_type == pool_enum::kMaxPooling || @@ -105,7 +113,17 @@ inline bool SupportMKLDNNPooling(const PoolingParam ¶m, if (param.pooling_convention == pool_enum::kValid) { return true; } else { - // currently, only max-pooling is supported for full convention + if (param.pool_type == pool_enum::kAvgPooling) { + CHECK_EQ(dshape.ndim(), 4); + // mkldnn works differently when padding is asymmetric, so let's skip this case. + if (param.pad[0] == GetPaddingSizeFull(dshape[2], param.pad[0], param.pad[0], param.kernel[0], + param.stride[0]) && + param.pad[1] == GetPaddingSizeFull(dshape[3], param.pad[1], param.pad[1], param.kernel[1], + param.stride[1])) { + return true; + } + return false; + } return param.pool_type == pool_enum::kMaxPooling; } } diff --git a/src/operator/nn/mkldnn/mkldnn_pooling.cc b/src/operator/nn/mkldnn/mkldnn_pooling.cc index 6eda2aa33b34..d2f79700051a 100644 --- a/src/operator/nn/mkldnn/mkldnn_pooling.cc +++ b/src/operator/nn/mkldnn/mkldnn_pooling.cc @@ -127,13 +127,6 @@ mkldnn::algorithm GetMKLDNNPoolAlgo(const PoolingParam ¶m) { } } -static inline int GetPaddingSizeFull(dim_t x, int padl, int padr, int k, int s) { - if ((x + padl + padr - k) % s != 0) { - return (padr + s - ((x + padl + padr - k) % s)); - } else { - return padr; - } -} mkldnn::pooling_forward::primitive_desc GetPoolingFwdPdesc( const PoolingParam ¶m, const bool is_train, const mkldnn::memory::desc &data_md, diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 485fc1345dfd..f998c33d16a8 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -278,9 +278,8 @@ void PoolingComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx, return; } - - if (SupportMKLDNN(inputs[0]) && - SupportMKLDNNPooling(param, inputs[0].shape())) { + if (SupportMKLDNN(inputs[0]) + && SupportMKLDNNPooling(param, inputs[0].shape())) { if (MKLDNNRequireWorkspace(param)) { CHECK_GT(outputs.size(), 1U); workspace = &outputs[1]; From db943e95e86f7eabfc45e1dd827d7b680ed43803 Mon Sep 17 00:00:00 2001 From: "Joshua Z. Zhang" Date: Mon, 16 Dec 2019 11:15:39 -0800 Subject: [PATCH 49/62] [BugFix] fix filter channel calculation in ModulatedDeformableConvV2 (#17070) * fix filter channel * fix slice during forward --- python/mxnet/gluon/contrib/cnn/conv_layers.py | 7 ++++--- tests/python/unittest/test_gluon_contrib.py | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/python/mxnet/gluon/contrib/cnn/conv_layers.py b/python/mxnet/gluon/contrib/cnn/conv_layers.py index 098463eca968..c4924c130a28 100644 --- a/python/mxnet/gluon/contrib/cnn/conv_layers.py +++ b/python/mxnet/gluon/contrib/cnn/conv_layers.py @@ -313,7 +313,8 @@ def __init__(self, channels, kernel_size=(1, 1), strides=(1, 1), padding=(0, 0), dilation = (dilation,) * len(kernel_size) self._op_name = op_name - offset_channels = 27 + offset_channels = num_deformable_group * 3 * kernel_size[0] * kernel_size[1] + self.offset_split_index = num_deformable_group * 2 * kernel_size[0] * kernel_size[1] self._kwargs_offset = { 'kernel': kernel_size, 'stride': strides, 'dilate': dilation, 'pad': padding, 'num_filter': offset_channels, 'num_group': groups, @@ -377,8 +378,8 @@ def hybrid_forward(self, F, x, offset_weight, deformable_conv_weight, offset_bia else: offset = F.Convolution(x, offset_weight, offset_bias, cudnn_off=True, **self._kwargs_offset) - offset_t = F.slice_axis(offset, axis=1, begin=0, end=18) - mask = F.slice_axis(offset, axis=1, begin=18, end=None) + offset_t = F.slice_axis(offset, axis=1, begin=0, end=self.offset_split_index) + mask = F.slice_axis(offset, axis=1, begin=self.offset_split_index, end=None) mask = F.sigmoid(mask) * 2 if deformable_conv_bias is None: diff --git a/tests/python/unittest/test_gluon_contrib.py b/tests/python/unittest/test_gluon_contrib.py index fdba553c8560..0ed0d4e8a545 100644 --- a/tests/python/unittest/test_gluon_contrib.py +++ b/tests/python/unittest/test_gluon_contrib.py @@ -411,6 +411,10 @@ def test_ModulatedDeformableConvolution(): net = nn.HybridSequential() net.add( DeformableConvolution(10, kernel_size=(3, 3), strides=1, padding=0), + DeformableConvolution(10, kernel_size=(1, 1), strides=1, padding=0), + DeformableConvolution(10, kernel_size=(5, 5), strides=1, padding=0), + DeformableConvolution(10, kernel_size=(3, 5), strides=1, padding=0), + DeformableConvolution(10, kernel_size=(5, 1), strides=1, padding=0, num_deformable_group=2), DeformableConvolution(10, kernel_size=(3, 2), strides=1, padding=0, activation='relu', offset_use_bias=False, use_bias=False), DeformableConvolution(10, kernel_size=(3, 2), strides=1, padding=0, activation='relu', From 491246567737d09cbc9e1dfc9b24bfb36201a0af Mon Sep 17 00:00:00 2001 From: Leonard Lausen Date: Tue, 17 Dec 2019 07:32:33 +0900 Subject: [PATCH 50/62] Update pypi_publish.py to disable nighlty build upload to Pypi (#17082) --- cd/python/pypi/pypi_publish.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cd/python/pypi/pypi_publish.py b/cd/python/pypi/pypi_publish.py index 94d6e8703a52..7e09f644c734 100755 --- a/cd/python/pypi/pypi_publish.py +++ b/cd/python/pypi/pypi_publish.py @@ -48,6 +48,10 @@ def post_wheel(path): print('Would have run: {}'.format(cmd)) return 0 else: + print('Skipping publishing nightly builds to Pypi.') + print('See https://github.com/pypa/pypi-support/issues/50 for details') + return 0 + # DO NOT PRINT CMD IN THIS BLOCK, includes password p = subprocess.run(cmd.split(' '), stdout=subprocess.PIPE) From 18c9a69e2f75c3f157149ec3344faef8ba12ff7f Mon Sep 17 00:00:00 2001 From: Lukas Adamowicz Date: Mon, 16 Dec 2019 19:09:58 -0500 Subject: [PATCH 51/62] Install current OpenCV (#17090) Recommend installing the latest version of OpenCV instead of OpenCV 3 in OS X install guide. OpenCV 3 does not work. We already commonly build with OpenCV 4 on other platforms. --- docs/static_site/src/pages/get_started/osx_setup.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/static_site/src/pages/get_started/osx_setup.md b/docs/static_site/src/pages/get_started/osx_setup.md index b5234126e334..369edb36c148 100644 --- a/docs/static_site/src/pages/get_started/osx_setup.md +++ b/docs/static_site/src/pages/get_started/osx_setup.md @@ -88,7 +88,7 @@ Install the dependencies, required for MXNet, with the following commands: brew install graphviz brew install openblas brew tap homebrew/core - brew install opencv@3 + brew install opencv # If building with MKLDNN brew install llvm From 814be5953420846d6b0835acd5591ac09fdff2e5 Mon Sep 17 00:00:00 2001 From: "yuri@FreeBSD" Date: Mon, 16 Dec 2019 17:18:08 -0800 Subject: [PATCH 52/62] Add #include needed for waitpid (#17078) --- tests/cpp/engine/omp_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cpp/engine/omp_test.cc b/tests/cpp/engine/omp_test.cc index 2be7d9d0307c..f4ef421a8595 100644 --- a/tests/cpp/engine/omp_test.cc +++ b/tests/cpp/engine/omp_test.cc @@ -25,6 +25,7 @@ #if defined(unix) || defined(__unix__) || defined(__unix) #include #include +#include #include From f86a8d10074f983b990ef420b96d664e911ee1bf Mon Sep 17 00:00:00 2001 From: Haibin Lin Date: Mon, 16 Dec 2019 20:48:31 -0800 Subject: [PATCH 53/62] [API] unified API for custom kvstores (#17010) * abstract kvstore api * add test * reorg folder * add split kvstore.py to kvstore and base * fix import * add horovod class * add registration * add unit test for kvstore base * add nightly test * fix pushpull * fix test * simply API * Trainer API * fix a bug * Fix typo * update horovod tutorial * better error message * fix incorrect usage of name * + query capacity * unit test for test kvstore * add trainer test * refactor * renmae * remove horovod example * revert horovod example * rename test_kvstore_custom.py * more tests * more tests for teststore * fix type name * fix lint * fix lint * fix lint * Update dist_device_sync_kvstore_custom.py * address CR * add optimizer test * add optimizer test --- ci/docker/runtime_functions.sh | 1 + python/mxnet/__init__.py | 24 +- python/mxnet/gluon/trainer.py | 50 +- python/mxnet/kvstore/__init__.py | 24 + python/mxnet/kvstore/base.py | 455 ++++++++++++++++++ python/mxnet/{ => kvstore}/kvstore.py | 183 +++---- python/mxnet/{ => kvstore}/kvstore_server.py | 6 +- python/mxnet/model.py | 35 +- src/kvstore/kvstore_local.h | 4 +- .../dist_device_sync_kvstore_custom.py | 96 ++++ tests/python/unittest/test_gluon_trainer.py | 33 +- tests/python/unittest/test_kvstore_custom.py | 195 ++++++++ 12 files changed, 955 insertions(+), 151 deletions(-) create mode 100644 python/mxnet/kvstore/__init__.py create mode 100644 python/mxnet/kvstore/base.py rename python/mxnet/{ => kvstore}/kvstore.py (85%) rename python/mxnet/{ => kvstore}/kvstore_server.py (97%) create mode 100644 tests/nightly/dist_device_sync_kvstore_custom.py create mode 100644 tests/python/unittest/test_kvstore_custom.py diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 9198eee32896..b658f953a78a 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -1418,6 +1418,7 @@ integrationtest_ubuntu_gpu_dist_kvstore() { export DMLC_LOG_STACK_TRACE_DEPTH=10 cd tests/nightly/ ../../tools/launch.py -n 4 --launcher local python dist_device_sync_kvstore.py + ../../tools/launch.py -n 4 --launcher local python dist_device_sync_kvstore_custom.py ../../tools/launch.py -n 4 --launcher local python dist_sync_kvstore.py --type=init_gpu popd } diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index fb9680a6db18..ed6e81daaca8 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -27,6 +27,10 @@ from .util import is_np_shape, set_np_shape, np_shape, use_np_shape from .util import is_np_array, np_array, use_np_array, use_np from . import base + +# version info +__version__ = base.__version__ + from . import contrib from . import ndarray from . import ndarray as nd @@ -59,8 +63,6 @@ from . import callback # from . import misc from . import lr_scheduler -# use mx.kv as short for kvstore -from . import kvstore as kv # Runtime compile module from . import rtc # Attribute scope to add attributes to symbolic graphs @@ -84,22 +86,20 @@ from . import test_utils from . import rnn - from . import gluon -# Dynamic library module should be done after ndarray and symbol are initialized -from . import library -from . import tvmop - -__version__ = base.__version__ - -# Dist kvstore module which launches a separate process when role is set to "server". -# This should be done after other modules are initialized. +# With the native kvstore module (such as 'dist_sync_device'), the module launches a separate +# process when role is set to "server". This should be done after other modules are initialized. # Otherwise this may result in errors when unpickling custom LR scheduler/optimizers. # For example, the LRScheduler in gluoncv depends on a specific version of MXNet, and # checks the __version__ attr of MXNet, which is not set on kvstore server due to the # fact that kvstore-server module is imported before the __version__ attr is set. -from . import kvstore_server +# use mx.kv as short for kvstore +from . import kvstore as kv + +# Dynamic library module should be done after ndarray and symbol are initialized +from . import library +from . import tvmop from . import numpy_op_signature from . import numpy_dispatch_protocol diff --git a/python/mxnet/gluon/trainer.py b/python/mxnet/gluon/trainer.py index 1ab86af2b93f..966ed2cc9964 100644 --- a/python/mxnet/gluon/trainer.py +++ b/python/mxnet/gluon/trainer.py @@ -23,6 +23,7 @@ from .. import optimizer as opt from ..model import _create_kvstore, _create_sparse_kvstore from .parameter import ParameterDict, Parameter +from ..kvstore import KVStore class Trainer(object): """Applies an `Optimizer` on a set of Parameters. Trainer should @@ -153,9 +154,10 @@ def _init_params(self): else: param_arrays = param._check_and_get(param._data, list) idx = self._param2idx[param.name] - self._kvstore.init(idx, param_arrays[0]) - if param._stype == 'default': - self._kvstore.pull(idx, param_arrays, priority=-idx) + if param._stype != 'default': + self._kvstore.init(idx, param_arrays[0]) + else: + self._kvstore.broadcast(idx, param_arrays[0], param_arrays) self._params_to_init = params_to_init @@ -218,6 +220,10 @@ def _init_kvstore(self): raise ValueError("Cannot set update_on_kvstore=False on dist kvstore " "when sparse gradients are present.") update_on_kvstore = config['update_on_kvstore'] + # raise err if a custom kvstore is used for sparse training + if not isinstance(kvstore, KVStore): + raise ValueError("Cannot use {} for multi-device training with sparse gradients" + .format(type(kvstore))) else: # Training with dense weight and dense gradients. @@ -234,6 +240,12 @@ def _init_kvstore(self): "when training in async mode.") if config['update_on_kvstore'] is not None: update_on_kvstore = config['update_on_kvstore'] + # raise err if update_on_kvstore is set to True with kvstores that do not support optimizers + if update_on_kvstore and not type(kvstore).is_capable('optimizer'): + if config['update_on_kvstore']: + raise ValueError("Please set update_on_kvstore=False " + "when training with {}".format(type(kvstore))) + update_on_kvstore = False # set grad compression and optimizers if kvstore: @@ -357,14 +369,30 @@ def allreduce_grads(self): self._allreduce_grads() def _allreduce_grads(self): - if self._kvstore: - for i, param in enumerate(self._params): - if param.grad_req != 'null': + # nothing to reduce + if not self._kvstore: + return + for i, param in enumerate(self._params): + if param.grad_req != 'null': - self._kvstore.push(i, param.list_grad(), priority=-i) - if not self._update_on_kvstore: - self._kvstore.pull(i, param.list_grad(), priority=-i, + grad_list = param.list_grad() + # sparse gradients, call push and pull separately + if grad_list[0].stype != 'default': + self._kvstore.push(i, grad_list, priority=-i) + if param._stype == 'default': + if self._update_on_kvstore: + pull_list = param.list_data() + else: + pull_list = param.list_grad() + self._kvstore.pull(i, pull_list, priority=-i, ignore_sparse=self._distributed) + else: + # allreduce dense gradients if not update_on_kvstore, + # otherwise push dense gradients, pull dense weights + if self._update_on_kvstore: + self._kvstore.pushpull(i, grad_list, out=param.list_data(), priority=-i) + else: + self._kvstore.pushpull(i, grad_list, priority=-i) def update(self, batch_size, ignore_stale_grad=False): """Makes one step of parameter update. @@ -419,10 +447,6 @@ def _update(self, ignore_stale_grad=False): %(param.name, str(data.context))) if self._kvstore and self._update_on_kvstore: - if param._stype == 'default': - # 'row_sparse' parameters are not pulled immediately - they're pulled - # in `Block.forward` - self._kvstore.pull(i, param.list_data(), priority=-i) continue for upd, arr, grad in zip(updates, param.list_data(), param.list_grad()): diff --git a/python/mxnet/kvstore/__init__.py b/python/mxnet/kvstore/__init__.py new file mode 100644 index 000000000000..ccb58a1c6229 --- /dev/null +++ b/python/mxnet/kvstore/__init__.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# coding: utf-8 +"""Key-value store for distributed communication""" +from .kvstore import * +from .base import * +from .kvstore_server import * diff --git a/python/mxnet/kvstore/base.py b/python/mxnet/kvstore/base.py new file mode 100644 index 000000000000..50310b6a6899 --- /dev/null +++ b/python/mxnet/kvstore/base.py @@ -0,0 +1,455 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# coding: utf-8 +""" Key value store interface of MXNet for parameter synchronization.""" +from __future__ import absolute_import + +from array import array +import ctypes +import warnings +from ..ndarray import NDArray +from ..base import _LIB, c_str_array, c_handle_array, c_array, c_array_buf, c_str +from ..base import check_call, string_types +from ..base import KVStoreHandle +from ..profiler import set_kvstore_handle + +__all__ = ['create', 'KVStoreBase'] + +def _ctype_key_value(keys, vals): + """Returns ctype arrays for the key-value args, and the whether string keys are used. + For internal use only. + """ + if isinstance(keys, (tuple, list)): + assert(len(keys) == len(vals)) + c_keys = [] + c_vals = [] + use_str_keys = None + for key, val in zip(keys, vals): + c_key_i, c_val_i, str_keys_i = _ctype_key_value(key, val) + c_keys += c_key_i + c_vals += c_val_i + use_str_keys = str_keys_i if use_str_keys is None else use_str_keys + assert(use_str_keys == str_keys_i), "inconsistent types of keys detected." + c_keys_arr = c_array(ctypes.c_char_p, c_keys) if use_str_keys \ + else c_array(ctypes.c_int, c_keys) + c_vals_arr = c_array(ctypes.c_void_p, c_vals) + return (c_keys_arr, c_vals_arr, use_str_keys) + + assert(isinstance(keys, (int,) + string_types)), \ + "unexpected type for keys: " + str(type(keys)) + use_str_keys = isinstance(keys, string_types) + if isinstance(vals, NDArray): + c_keys = c_str_array([keys]) if use_str_keys \ + else c_array_buf(ctypes.c_int, array('i', [keys])) + return (c_keys, c_handle_array([vals]), use_str_keys) + else: + for value in vals: + assert(isinstance(value, NDArray)) + c_keys = c_str_array([keys] * len(vals)) if use_str_keys \ + else c_array_buf(ctypes.c_int, array('i', [keys] * len(vals))) + return (c_keys, c_handle_array(vals), use_str_keys) + +def _ctype_dict(param_dict): + """Returns ctype arrays for keys and values(converted to strings) in a dictionary""" + assert(isinstance(param_dict, dict)), \ + "unexpected type for param_dict: " + str(type(param_dict)) + c_keys = c_array(ctypes.c_char_p, [c_str(k) for k in param_dict.keys()]) + c_vals = c_array(ctypes.c_char_p, [c_str(str(v)) for v in param_dict.values()]) + return (c_keys, c_vals) + +class KVStoreBase(object): + """An abstract key-value store interface for data parallel training.""" + + def broadcast(self, key, value, out, priority=0): + """ Broadcast the `value` NDArray at rank 0 to all ranks, + and store the result in `out` + + Parameters + ---------- + key : str or int + The key. + + value : NDArray + The value corresponding to the key to broadcast + + out : NDArray, or list of NDArray + Values corresponding to the key to store the result + + priority : int, optional + The priority of the operation. + Higher priority operations are likely to be executed before other actions. + """ + raise NotImplementedError() + + def pushpull(self, key, value, out=None, priority=0): + """ Performs push and pull a single value or a sequence of values from the store. + + This function is coalesced form of push and pull operations. + + `value` is pushed to the kvstore server for summation with the specified keys, + and the results are pulled from the server to `out`. If `out` is not specified + the pulled values are written to `value`. + + Note that for allreduce based approaches such as horovod, there is no notion of + server or store. This function performs allreduce. + + Parameters + ---------- + key : str or int + The key. + + value : NDArray, or list of NDArray + Values corresponding to the keys. + + out: NDArray, or list of NDArray + Values corresponding to the key. + + priority : int, optional + The priority of the operation. + Higher priority operations are likely to be executed before other actions. + """ + raise NotImplementedError() + + def set_optimizer(self, optimizer): + """ Registers an optimizer with the kvstore. + + When using a single machine, this function updates the local optimizer. + If using multiple machines and this operation is invoked from a worker node, + it will serialized the optimizer with pickle and send it to all servers. + The function returns after all servers have been updated. + + Parameters + ---------- + optimizer : KVStoreBase + The new optimizer for the store + """ + raise NotImplementedError() + + OPTIMIZER = 'optimizer' + + @staticmethod + def is_capable(capability): + """Queries if the KVStore type supports certain capability, such as optimizer algorithm, + gradient compression, sparsity, etc. + + Parameters + ---------- + capability: str + The capability to query + + Returns + ------- + result : bool + Whether the capability is supported or not. + """ + raise NotImplementedError() + + def save_optimizer_states(self, fname, dump_optimizer=False): + """Saves the optimizer (updater) state to a file. This is often used when checkpointing + the model during training. + + Parameters + ---------- + fname : str + Path to the output states file. + dump_optimizer : bool, default False + Whether to also save the optimizer itself. This would also save optimizer + information such as learning rate and weight decay schedules. + """ + raise NotImplementedError() + + def load_optimizer_states(self, fname): + """Loads the optimizer (updater) state from the file. + + Parameters + ---------- + fname : str + Path to input states file. + """ + raise NotImplementedError() + + @property + def type(self): + """ Returns the type of this kvstore backend. + + Returns + ------- + type : str + the string type + """ + raise NotImplementedError() + + @property + def rank(self): + """ Returns the rank of this worker node. + + Returns + ------- + rank : int + The rank of this node, which is in range [0, num_workers()) + """ + raise NotImplementedError() + + @property + def num_workers(self): + """Returns the number of worker nodes. + + Returns + ------- + size :int + The number of worker nodes. + """ + raise NotImplementedError() + + kv_registry = {} + + @staticmethod + def register(klass): + """Registers a new KVStore. + Once a kvstore is registered, we can create an instance of this + kvstore with `create` later. + + Examples + -------- + >>> @mx.kvstore.KVStoreBase.register + ... class MyKVStore(mx.kvstore.KVStoreBase): + ... pass + >>> kv = mx.kv.create('MyKVStore') + >>> print(type(kv)) + + """ + assert(isinstance(klass, type)) + name = klass.__name__.lower() + if name in KVStoreBase.kv_registry: + warnings.warn('WARNING: New kvstore %s.%s is overriding ' + 'existing kvstore %s.%s' % + (klass.__module__, klass.__name__, + KVStoreBase.kv_registry[name].__module__, + KVStoreBase.kv_registry[name].__name__)) + KVStoreBase.kv_registry[name] = klass + return klass + +@KVStoreBase.register +class TestStore(KVStoreBase): + """A key-value store for testing.""" + + def broadcast(self, key, value, out, priority=0): + """ Broadcast the `value` NDArray at rank 0 to all ranks, + and store the result in `out` + + Parameters + ---------- + key : str or int + The key. + + value : NDArray + The value corresponding to the key to broadcast + + out : NDArray, or list of NDArray + Values corresponding to the key to store the result + + priority : int, optional + The priority of the operation. + Higher priority operations are likely to be executed before other actions. + """ + out = out if isinstance(out, list) else [out] + for o in out: + o[:] = value + + def pushpull(self, key, value, out=None, priority=0): + """ Performs push and pull a single value or a sequence of values from the store. + + This function is coalesced form of push and pull operations. + + `value` is pushed to the kvstore server for summation with the specified keys, + and the results are pulled from the server to `out`. If `out` is not specified + the pulled values are written to `value`. + + Parameters + ---------- + key : str or int + The key. + + value : NDArray, or list of NDArray + Values corresponding to the keys. + + out: NDArray, or list of NDArray + Values corresponding to the key. + + priority : int, optional + The priority of the operation. + Higher priority operations are likely to be executed before other actions. + """ + ctx = value[0].context + if isinstance(value, NDArray): + if out is not None: + out = out if isinstance(out, list) else [out] + for o in out: + o[:] = value + else: + reduced_value = sum([val.as_in_context(ctx) for val in value]) + if out is None: + for v in value: + v[:] = reduced_value + else: + out = out if isinstance(out, list) else [out] + for o in out: + o[:] = reduced_value + + @staticmethod + def is_capable(capability): + """Queries if the KVStore type supports certain capability, such as optimizer algorithm, + gradient compression, sparsity, etc. + + Parameters + ---------- + capability: str + The capability to query + + Returns + ------- + result : bool + Whether the capability is supported or not. + """ + if capability.lower() == KVStoreBase.OPTIMIZER: + return False + else: + raise ValueError('Unknown capability: {}'.format(capability)) + + @property + def type(self): + """ Returns the type of this kvstore. + + Returns + ------- + type : str + the string type + """ + return 'teststore' + + @property + def rank(self): + """ Returns the rank of this worker node. + + Returns + ------- + rank : int + The rank of this node, which is in range [0, num_workers()) + """ + return 0 + + @property + def num_workers(self): + """Returns the number of worker nodes. + + Returns + ------- + size :int + The number of worker nodes. + """ + return 1 + + def set_optimizer(self, optimizer): + """ Registers an optimizer with the kvstore. + + When using a single machine, this function updates the local optimizer. + If using multiple machines and this operation is invoked from a worker node, + it will serialized the optimizer with pickle and send it to all servers. + The function returns after all servers have been updated. + + Parameters + ---------- + optimizer : KVStoreBase + The new optimizer for the store + """ + raise NotImplementedError() + + def save_optimizer_states(self, fname, dump_optimizer=False): + """Saves the optimizer (updater) state to a file. This is often used when checkpointing + the model during training. + + Parameters + ---------- + fname : str + Path to the output states file. + dump_optimizer : bool, default False + Whether to also save the optimizer itself. This would also save optimizer + information such as learning rate and weight decay schedules. + """ + raise NotImplementedError() + + def load_optimizer_states(self, fname): + """Loads the optimizer (updater) state from the file. + + Parameters + ---------- + fname : str + Path to input states file. + """ + raise NotImplementedError() + +def create(name='local'): + """Creates a new KVStore. + + For single machine training, there are two commonly used types: + + ``local``: Copies all gradients to CPU memory and updates weights there. + + ``device``: Aggregates gradients and updates weights on GPUs. With this setting, + the KVStore also attempts to use GPU peer-to-peer communication, + potentially accelerating the communication. + + For distributed training, KVStore also supports a number of types: + + ``dist_sync``: Behaves similarly to ``local`` but with one major difference. + With ``dist_sync``, batch-size now means the batch size used on each machine. + So if there are ``n`` machines and we use batch size ``b``, + then ``dist_sync`` behaves like ``local`` with batch size ``n * b``. + + ``dist_device_sync``: Identical to ``dist_sync`` with the difference similar + to ``device`` vs ``local``. + + ``dist_async``: Performs asynchronous updates. + The weights are updated whenever gradients are received from any machine. + No two updates happen on the same weight at the same time. However, the order is not + guaranteed. + + Parameters + ---------- + name : {'local', 'device', 'nccl', 'dist_sync', 'dist_device_sync', 'dist_async', 'horovod'} + The type of KVStore. + Returns + ------- + kv : KVStoreBase + The created KVStore. + """ + if not isinstance(name, string_types): + raise TypeError('name must be a string') + name = name.lower() + # first lookup the registry + if name in KVStoreBase.kv_registry: + return KVStoreBase.kv_registry[name]() + else: + # fall back to the native kvstore implementation + handle = KVStoreHandle() + check_call(_LIB.MXKVStoreCreate(c_str(name), + ctypes.byref(handle))) + from .kvstore import KVStore + kv = KVStore(handle) + set_kvstore_handle(kv.handle) + return kv diff --git a/python/mxnet/kvstore.py b/python/mxnet/kvstore/kvstore.py similarity index 85% rename from python/mxnet/kvstore.py rename to python/mxnet/kvstore/kvstore.py index 61c64ec0984f..236ce773e547 100644 --- a/python/mxnet/kvstore.py +++ b/python/mxnet/kvstore/kvstore.py @@ -19,58 +19,17 @@ """ Key value store interface of MXNet for parameter synchronization.""" from __future__ import absolute_import -from array import array -import ctypes import pickle -from .ndarray import NDArray -from .ndarray import _ndarray_cls -from .base import _LIB, c_str_array, c_handle_array, c_array, c_array_buf, c_str -from .base import check_call, string_types, mx_uint, py_str -from .base import NDArrayHandle, KVStoreHandle -from . import optimizer as opt -from .profiler import set_kvstore_handle - -def _ctype_key_value(keys, vals): - """Returns ctype arrays for the key-value args, and the whether string keys are used. - For internal use only. - """ - if isinstance(keys, (tuple, list)): - assert(len(keys) == len(vals)) - c_keys = [] - c_vals = [] - use_str_keys = None - for key, val in zip(keys, vals): - c_key_i, c_val_i, str_keys_i = _ctype_key_value(key, val) - c_keys += c_key_i - c_vals += c_val_i - use_str_keys = str_keys_i if use_str_keys is None else use_str_keys - assert(use_str_keys == str_keys_i), "inconsistent types of keys detected." - c_keys_arr = c_array(ctypes.c_char_p, c_keys) if use_str_keys \ - else c_array(ctypes.c_int, c_keys) - c_vals_arr = c_array(ctypes.c_void_p, c_vals) - return (c_keys_arr, c_vals_arr, use_str_keys) - - assert(isinstance(keys, (int,) + string_types)), \ - "unexpected type for keys: " + str(type(keys)) - use_str_keys = isinstance(keys, string_types) - if isinstance(vals, NDArray): - c_keys = c_str_array([keys]) if use_str_keys \ - else c_array_buf(ctypes.c_int, array('i', [keys])) - return (c_keys, c_handle_array([vals]), use_str_keys) - else: - for value in vals: - assert(isinstance(value, NDArray)) - c_keys = c_str_array([keys] * len(vals)) if use_str_keys \ - else c_array_buf(ctypes.c_int, array('i', [keys] * len(vals))) - return (c_keys, c_handle_array(vals), use_str_keys) - -def _ctype_dict(param_dict): - """Returns ctype arrays for keys and values(converted to strings) in a dictionary""" - assert(isinstance(param_dict, dict)), \ - "unexpected type for param_dict: " + str(type(param_dict)) - c_keys = c_array(ctypes.c_char_p, [c_str(k) for k in param_dict.keys()]) - c_vals = c_array(ctypes.c_char_p, [c_str(str(v)) for v in param_dict.values()]) - return (c_keys, c_vals) +import ctypes +from ..ndarray import NDArray +from ..ndarray import _ndarray_cls +from ..base import _LIB, c_str +from ..base import check_call, mx_uint, py_str +from ..base import NDArrayHandle, KVStoreHandle +from .. import optimizer as opt +from .base import _ctype_key_value, _ctype_dict, KVStoreBase + +__all__ = ['KVStore'] def _updater_wrapper(updater): """A wrapper for the user-defined handle.""" @@ -91,8 +50,10 @@ def _get_kvstore_server_command_type(command): assert (command in command_types), "Unknown command type to send to server" return command_types[command] -class KVStore(object): + +class KVStore(KVStoreBase): """A key-value store for synchronization of values, over multiple devices.""" + def __init__(self, handle): """Initializes a new KVStore. @@ -110,6 +71,62 @@ def __init__(self, handle): def __del__(self): check_call(_LIB.MXKVStoreFree(self.handle)) + def broadcast(self, key, value, out, priority=0): + """ Broadcast the `value` NDArray at rank 0 to all ranks, + and store the result in `out`. + + Note that the native KVStore does not support broadcasting the same key more than once. + + Parameters + ---------- + key : str, or int + The key. + + value : NDArray + The value corresponding to the key to broadcast + + out : NDArray, list of NDArray + Values corresponding to the key to store the result + + priority : int, optional + The priority of the operation. + Higher priority operations are likely to be executed before other actions. + + Examples + -------- + >>> # broadcast a single key-value pair + >>> shape = (2,3) + >>> kv = mx.kv.create('local') + >>> a = mx.nd.zeros(shape) + >>> kv.broadcast('3', mx.nd.ones(shape)*2, out=a) + >>> print a.asnumpy() + [[ 2. 2. 2.] + [ 2. 2. 2.]] + + """ + self.init(key, value) + self.pull(key, out=out, priority=priority) + + @staticmethod + def is_capable(capability): + """Queries if the KVStore type supports certain capability, such as optimizer algorithm, + gradient compression, sparsity, etc. + + Parameters + ---------- + capability: str + The capability to query + + Returns + ------- + result : bool + Whether the capability is supported or not. + """ + if capability.lower() == KVStoreBase.OPTIMIZER: + return True + else: + raise ValueError('Unknown capability: {}'.format(capability)) + def init(self, key, value): """ Initializes a single or a sequence of key-value pairs into the store. @@ -327,27 +344,25 @@ def pushpull(self, key, value, out=None, priority=0): key : str, int, or sequence of str or int Keys. - value : NDArray, RowSparseNDArray, list of NDArray or RowSparseNDArray, - or list of list of NDArray or RowSparseNDArray + value : NDArray, list of NDArray, or list of list of NDArray Values corresponding to the keys. out: NDArray or list of NDArray or list of list of NDArray Values corresponding to the keys. priority : int, optional - The priority of the pull operation. - Higher priority pull operations are likely to be executed before - other pull actions. + The priority of the operation. + Higher priority operations are likely to be executed before other actions. Examples -------- - >>> # push a single key-value pair + >>> # pushpull a single key-value pair >>> kv.pushpull('3', mx.nd.ones(shape)*8, out=a) >>> print a.asnumpy() [[ 8. 8. 8.] [ 8. 8. 8.]] - >>> # aggregate the value and the push + >>> # aggregate the value and then pushpull >>> gpus = [mx.gpu(i) for i in range(4)] >>> b = [mx.nd.ones(shape, gpu) for gpu in gpus] >>> kv.pushpull('3', b, out=a) @@ -355,11 +370,11 @@ def pushpull(self, key, value, out=None, priority=0): [[ 4. 4. 4.] [ 4. 4. 4.]] - >>> # push a list of keys. + >>> # pushpull a list of keys. >>> # single device >>> keys = ['4', '5', '6'] >>> b = [mx.nd.zeros(shape)]*len(keys) - >>> kv.push(keys, [mx.nd.ones(shape)]*len(keys), out=b) + >>> kv.pushpull(keys, [mx.nd.ones(shape)]*len(keys), out=b) >>> print b[1].asnumpy() [[ 1. 1. 1.] [ 1. 1. 1.]] @@ -371,8 +386,8 @@ def pushpull(self, key, value, out=None, priority=0): >>> print b[1][1].asnumpy() [[ 4. 4. 4.] [ 4. 4. 4.]] - """ + """ cvkeys, cvals, use_str_keys = _ctype_key_value(key, value) if out is not None: cokeys, couts, _ = _ctype_key_value(key, out) @@ -709,47 +724,3 @@ def _send_command_to_servers(self, head, body): """ check_call(_LIB.MXKVStoreSendCommmandToServers( self.handle, mx_uint(head), c_str(body))) - -def create(name='local'): - """Creates a new KVStore. - - For single machine training, there are two commonly used types: - - ``local``: Copies all gradients to CPU memory and updates weights there. - - ``device``: Aggregates gradients and updates weights on GPUs. With this setting, - the KVStore also attempts to use GPU peer-to-peer communication, - potentially accelerating the communication. - - For distributed training, KVStore also supports a number of types: - - ``dist_sync``: Behaves similarly to ``local`` but with one major difference. - With ``dist_sync``, batch-size now means the batch size used on each machine. - So if there are ``n`` machines and we use batch size ``b``, - then ``dist_sync`` behaves like ``local`` with batch size ``n * b``. - - ``dist_device_sync``: Identical to ``dist_sync`` with the difference similar - to ``device`` vs ``local``. - - ``dist_async``: Performs asynchronous updates. - The weights are updated whenever gradients are received from any machine. - No two updates happen on the same weight at the same time. However, the order is not - guaranteed. - - Parameters - ---------- - name : {'local', 'device', 'nccl', 'dist_sync', 'dist_device_sync', 'dist_async'} - The type of KVStore. - Returns - ------- - kv : KVStore - The created KVStore. - """ - if not isinstance(name, string_types): - raise TypeError('name must be a string') - handle = KVStoreHandle() - check_call(_LIB.MXKVStoreCreate(c_str(name), - ctypes.byref(handle))) - kv = KVStore(handle) - set_kvstore_handle(kv.handle) - return kv diff --git a/python/mxnet/kvstore_server.py b/python/mxnet/kvstore/kvstore_server.py similarity index 97% rename from python/mxnet/kvstore_server.py rename to python/mxnet/kvstore/kvstore_server.py index 2504b4674a83..592d2500755f 100644 --- a/python/mxnet/kvstore_server.py +++ b/python/mxnet/kvstore/kvstore_server.py @@ -22,8 +22,10 @@ import sys import pickle import logging -from .base import _LIB, check_call -from .kvstore import create +from ..base import _LIB, check_call +from .base import create + +__all__ = ['KVStoreServer'] class KVStoreServer(object): """The key-value store server.""" diff --git a/python/mxnet/model.py b/python/mxnet/model.py index f718db080a9c..51ab576a7356 100644 --- a/python/mxnet/model.py +++ b/python/mxnet/model.py @@ -98,6 +98,10 @@ def _create_kvstore(kvstore, num_device, arg_params): elif isinstance(kvstore, kvs.KVStore): kv = kvstore elif isinstance(kvstore, str): + if kvstore in kvs.KVStoreBase.kv_registry: + # we do not assume all custom kvstore supports + # updates on kvstore with optimizers + return (kvs.create(kvstore), False) # create kvstore using the string type if num_device == 1 and 'dist' not in kvstore: # no need to use kv for single device and single machine @@ -110,6 +114,10 @@ def _create_kvstore(kvstore, num_device, arg_params): arg_params.values()) if max_size > 1024 * 1024 * 16: update_on_kvstore = False + elif isinstance(kvstore, kvs.KVStoreBase): + # we do not assume all custom kvstore supports + # updates on kvstore with optimizers + return (kvstore, False) else: raise TypeError('kvstore must be KVStore, str or None') @@ -122,10 +130,10 @@ def _initialize_kvstore(kvstore, param_arrays, arg_params, param_names, update_o """Initialize kvstore""" for idx, param_on_devs in enumerate(param_arrays): name = param_names[idx] - kvstore.init(name, arg_params[name]) - - if update_on_kvstore: - kvstore.pull(name, param_on_devs, priority=-idx) + if not update_on_kvstore or arg_params[name].stype != 'default': + kvstore.init(name, arg_params[name]) + else: + kvstore.broadcast(name, arg_params[name], out=param_on_devs) def _update_params_on_kvstore_nccl(param_arrays, grad_arrays, kvstore, param_names): """Perform update of param_arrays from grad_arrays on NCCL kvstore.""" @@ -142,9 +150,9 @@ def _update_params_on_kvstore_nccl(param_arrays, grad_arrays, kvstore, param_nam while start < size: end = start + batch if start + batch < size else size # push gradient, priority is negative index - kvstore.push(valid_param_names[start:end], valid_grad_arrays[start:end], priority=-start) # pull back the weights - kvstore.pull(valid_param_names[start:end], valid_param_arrays[start:end], priority=-start) + kvstore.pushpull(valid_param_names[start:end], valid_grad_arrays[start:end], + out=valid_param_arrays[start:end], priority=-start) start = end def _update_params_on_kvstore(param_arrays, grad_arrays, kvstore, param_names): @@ -155,9 +163,12 @@ def _update_params_on_kvstore(param_arrays, grad_arrays, kvstore, param_names): continue name = param_names[index] # push gradient, priority is negative index - kvstore.push(name, grad_list, priority=-index) # pull back the weights - kvstore.pull(name, arg_list, priority=-index) + if grad_list[0].stype == 'default' and arg_list[0].stype == 'default': + kvstore.pushpull(name, grad_list, out=arg_list, priority=-index) + else: + kvstore.push(name, grad_list, priority=-index) + kvstore.pull(name, out=arg_list, priority=-index) def _update_params(param_arrays, grad_arrays, updater, num_device, kvstore=None, param_names=None): @@ -171,9 +182,11 @@ def _update_params(param_arrays, grad_arrays, updater, num_device, if kvstore: name = param_names[index] # push gradient, priority is negative index - kvstore.push(name, grad_list, priority=-index) - # pull back the sum gradients, to the same locations. - kvstore.pull(name, grad_list, priority=-index) + if grad_list[0].stype == 'default' and arg_list[0].stype == 'default': + kvstore.pushpull(name, grad_list, priority=-index) + else: + kvstore.push(name, grad_list, priority=-index) + kvstore.pull(name, out=grad_list, priority=-index) for k, p in enumerate(zip(arg_list, grad_list)): # faked an index here, to make optimizer create diff # state for the same index but on diff devs, TODO(mli) diff --git a/src/kvstore/kvstore_local.h b/src/kvstore/kvstore_local.h index ad70bc15ea0a..d6d37f0a142b 100644 --- a/src/kvstore/kvstore_local.h +++ b/src/kvstore/kvstore_local.h @@ -196,7 +196,9 @@ class KVStoreLocal : public KVStore { const std::vector& values) { for (size_t i = 0; i < keys.size(); ++i) { CHECK(local_.find(keys[i]) == local_.end()) - << "duplicate init of key " << keys[i]; + << "duplicate init of key " << keys[i] + << ". Please double check if you called kv.init or kv.broadcast with this key " + << "multiple times"; local_[keys[i]] = values[i].Copy(pinned_ctx_); comm_->Init(keys[i], values[i].storage_type(), values[i].shape(), values[i].dtype()); } diff --git a/tests/nightly/dist_device_sync_kvstore_custom.py b/tests/nightly/dist_device_sync_kvstore_custom.py new file mode 100644 index 000000000000..05ba2610ba75 --- /dev/null +++ b/tests/nightly/dist_device_sync_kvstore_custom.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sys +sys.path.insert(0, "../../python/") +import mxnet as mx +import numpy as np +import numpy.random as rnd +import time +import argparse + +# parser +parser = argparse.ArgumentParser(description='kvstore test') +parser.add_argument('--name', type=str, default='dist_device_sync') +args = parser.parse_args() + +def check_diff_to_scalar(A, x, rank=None): + """ assert A == x""" + assert(np.sum(np.abs((A - x).asnumpy())) == 0), (rank, A.asnumpy(), x) + +# setup +keys = ['3', '5', '7'] +init_test_keys = [str(i) for i in range(200,300)] +init_test_keys_big = [str(i) for i in range(300,400)] +init_test_keys_device = [str(i) for i in range(400,500)] +init_test_keys_device_big = [str(i) for i in range(500,600)] + +shape = (2, 3) +big_shape = (1200, 1200) # bigger than MXNET_KVSTORE_BIGARRAY_BOUND + +kv = mx.kv.create(args.name) +my_rank = kv.rank +my_num_workers = kv.num_workers + +def test_pushpull(): + num_gpus = 2 + def check_default_keys(nrepeat=3): + # init kv dns keys + kv.broadcast('3', mx.nd.ones(shape, ctx=mx.gpu()), mx.nd.ones(shape, ctx=mx.gpu())) + kv.broadcast('99', mx.nd.ones(big_shape, ctx=mx.gpu()), mx.nd.ones(big_shape, ctx=mx.gpu())) + for i in range(nrepeat): + scale = my_rank + 1 + num = (my_num_workers + 1) * my_num_workers * num_gpus / 2 + + arrs = [mx.nd.ones(shape, ctx=mx.gpu(j)) * scale for j in range(num_gpus)] + # inplace + kv.pushpull('3', arrs) + for arr in arrs: + check_diff_to_scalar(arr, num) + + big_arrs = [mx.nd.ones(big_shape, ctx=mx.gpu(j)) * scale for j in range(num_gpus)] + # inplace + kv.pushpull('99', big_arrs) + for big_arr in big_arrs: + check_diff_to_scalar(big_arr, num) + + check_default_keys(nrepeat=3) + print('worker ' + str(my_rank) + ' is done') + +def test_broadcast(): + def check_broadcast(kv, cur_keys, cur_shape, device=False): + ctx = mx.gpu(0) if device else mx.cpu(0) + val = [mx.nd.zeros(cur_shape, ctx) for i in cur_keys] + for i in range(len(cur_keys)): + expected = i + kv.broadcast(cur_keys[i], [mx.nd.ones(cur_shape, ctx) * i], out=val[i]) + check_diff_to_scalar(val[i], expected, my_rank) + check_broadcast(kv, init_test_keys, shape) + check_broadcast(kv, init_test_keys_big, big_shape) + check_broadcast(kv, init_test_keys_device, shape, device=True) + check_broadcast(kv, init_test_keys_device_big, big_shape, device=True) + print('worker ' + str(my_rank) + ' is initialized') + +def test_type(): + assert kv.type == args.name + +if __name__ == "__main__": + test_type() + test_broadcast() + test_push_pull() diff --git a/tests/python/unittest/test_gluon_trainer.py b/tests/python/unittest/test_gluon_trainer.py index 9f02733d0a25..fbd04ee1beec 100644 --- a/tests/python/unittest/test_gluon_trainer.py +++ b/tests/python/unittest/test_gluon_trainer.py @@ -26,6 +26,11 @@ from copy import deepcopy from nose.tools import raises, assert_raises +def dict_equ(a, b): + assert set(a) == set(b) + for k in a: + assert (a[k].asnumpy() == b[k].asnumpy()).all() + @with_seed() @raises(RuntimeError) def test_multi_trainer(): @@ -41,12 +46,27 @@ def test_multi_trainer(): # multiple trainers for a sparse Parameter is not allowed trainer1 = gluon.Trainer([x], 'sgd') +@with_seed() +def test_trainer_with_teststore(): + x = gluon.Parameter('x', shape=(10,)) + x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros') + kv = mx.kv.create('teststore') + trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5}, kvstore=kv) + with mx.autograd.record(): + for w in x.list_data(): + y = w + 1 + y.backward() + trainer.step(1) + + assert trainer._update_on_kvstore == False + assert (x.data(mx.cpu(1)).asnumpy() == -2).all() + # Expect exceptions if update_on_kvstore is set to True, + # because TestStore does not support that + invalid_trainer = gluon.Trainer([x], 'sgd', kvstore=kv, update_on_kvstore=True) + assert_raises(ValueError, invalid_trainer._init_kvstore) + @with_seed() def test_trainer(): - def dict_equ(a, b): - assert set(a) == set(b) - for k in a: - assert (a[k].asnumpy() == b[k].asnumpy()).all() x = gluon.Parameter('x', shape=(10,)) x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros') trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5}) @@ -120,7 +140,8 @@ def test_trainer_save_load(): @with_seed() def test_trainer_sparse_save_load(): - x = gluon.Parameter('x', shape=(10, 1), lr_mult=1.0, stype='row_sparse') + x = gluon.Parameter('x', shape=(10, 1), lr_mult=1.0, + stype='row_sparse', grad_stype='row_sparse') x.initialize(ctx=[mx.cpu(0)], init='zeros') trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 0.1}) all_rows = mx.nd.arange(0, 10, ctx=mx.cpu(0)) @@ -236,7 +257,7 @@ def check_trainer_sparse_kv(kv, stype, grad_stype, update_on_kv, expected): # the updated parameter should be based on the loaded checkpoint mx.nd.waitall() updated_w = x.data(mx.cpu(0)) if stype == 'default' else x.row_sparse_data(all_rows) - assert (updated_w == -0.2).asnumpy().all() + assert (updated_w == -0.2).asnumpy().all(), updated_w except Exception as err: assert isinstance(err, expected) diff --git a/tests/python/unittest/test_kvstore_custom.py b/tests/python/unittest/test_kvstore_custom.py new file mode 100644 index 000000000000..4f1f309d24c1 --- /dev/null +++ b/tests/python/unittest/test_kvstore_custom.py @@ -0,0 +1,195 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: skip-file +import mxnet as mx +import numpy as np +import unittest +from mxnet.test_utils import rand_ndarray, assert_almost_equal +from common import setup_module, with_seed, assertRaises, teardown +from mxnet.base import py_str, MXNetError + +shape = (4, 4) +keys = [5, 7, 11] +str_keys = ['b', 'c', 'd'] + +def check_diff_to_scalar(A, x): + """ assert A == x""" + assert(np.sum(np.abs((A - x).asnumpy())) == 0), (A, x) + +def init_kv(name='device'): + return mx.kv.create(name) + +@with_seed() +def test_broadcast_single_kv_pair(): + """single key-value pair push & pull""" + def check_single_kv_pair(kv, key): + # single output + ones = mx.nd.ones(shape) + out = mx.nd.empty(shape) + kv.broadcast(key, ones, out) + check_diff_to_scalar(out, 1) + # list output + out_list = [mx.nd.empty(shape)] * 3 + key_list = key + key + kv.broadcast(key_list, ones, out_list) + for o in out_list: + check_diff_to_scalar(o, 1) + + for name in ['device', 'teststore']: + check_single_kv_pair(init_kv(name), 3) + check_single_kv_pair(init_kv(name), 'a') + +@with_seed() +def test_broadcast_list_kv_pair(): + """list key-value pair push & pull""" + def check_list_kv_pair(kv, key): + ones = [mx.nd.ones(shape)] * len(key) + out = [mx.nd.empty(shape)] * len(key) + kv.broadcast(key, ones, out) + for o in out: + check_diff_to_scalar(o, 1) + out_list = [[mx.nd.empty(shape)] * 2 for _ in range(len(key))] + key_list = [k + k for k in key] + kv.broadcast(key_list, ones, out_list) + for o in out_list: + for oo in o: + check_diff_to_scalar(oo, 1) + + check_list_kv_pair(init_kv(), keys) + check_list_kv_pair(init_kv(), str_keys) + +@with_seed() +def test_pushpull_single_kv_pair(): + """aggregate value on muliple devices""" + def check_aggregator(kv, key, key_list=None): + kv.broadcast(key, mx.nd.zeros(shape), out=mx.nd.empty(shape)) + # devices + num_devs = 4 + devs = [mx.Context('cpu', i) for i in range(num_devs)] + + # single + vals = [mx.nd.ones(shape, d) for d in devs] + outs = [mx.nd.empty(shape, d) for d in devs] + + kv.pushpull(key, vals, out=outs) + for out in outs: + check_diff_to_scalar(out, num_devs) + + # inplace + kv.pushpull(key, vals) + for val in vals: + check_diff_to_scalar(val, num_devs) + + # list + if key_list is None: + return + num_keys = len(key_list) + kv.broadcast(key_list, [mx.nd.zeros(shape)] * num_keys, + out=[mx.nd.empty(shape)] * num_keys) + vals = [[mx.nd.ones(shape, d)*2.0 for d in devs]] * num_keys + outs = [[mx.nd.empty(shape, d) for d in devs]] * num_keys + kv.pushpull(key_list, vals, out=outs) + for out in outs: + for o in out: + check_diff_to_scalar(o, num_devs * 2.0) + + # inplace + kv.pushpull(key_list, vals) + for val in vals: + for v in val: + check_diff_to_scalar(v, num_devs * 2.0) + + check_aggregator(init_kv('device'), 3, keys) + check_aggregator(init_kv('device'), 'a', str_keys) + check_aggregator(init_kv('teststore'), 3) + check_aggregator(init_kv('teststore'), 'a') + +@with_seed() +def test_pushpull_list_kv_pair(): + """aggregate value on muliple devices""" + def check_aggregator(kv, key, key_list=None): + kv.broadcast(key, mx.nd.zeros(shape), out=mx.nd.empty(shape)) + # devices + num_devs = 4 + devs = [mx.Context('cpu', i) for i in range(num_devs)] + + # single + vals = [mx.nd.ones(shape, d) for d in devs] + outs = [mx.nd.empty(shape, d) for d in devs] + + kv.pushpull(key, vals, out=outs) + for out in outs: + check_diff_to_scalar(out, num_devs) + + # list + if key_list is None: + return + num_keys = len(key_list) + kv.broadcast(key_list, [mx.nd.zeros(shape)] * num_keys, + out=[mx.nd.empty(shape)] * num_keys) + vals = [[mx.nd.ones(shape, d)*2.0 for d in devs]] * num_keys + outs = [[mx.nd.empty(shape, d) for d in devs]] * num_keys + kv.pushpull(key_list, vals, out=outs) + for out in outs: + for o in out: + check_diff_to_scalar(o, num_devs * 2.0) + + check_aggregator(init_kv('device'), 3, keys) + check_aggregator(init_kv('device'), 'a', str_keys) + check_aggregator(init_kv('teststore'), 3) + check_aggregator(init_kv('teststore'), 'a') + + +@with_seed() +def test_custom_store(): + kv = mx.kv.create('teststore') + out = mx.nd.empty((1,)) + kv.broadcast(1, mx.nd.ones((1,)), out=out) + check_diff_to_scalar(out, 1) + assert type(kv).is_capable('optimizer') == False + kv.broadcast(1, mx.nd.ones((1,)), out=out) + check_diff_to_scalar(out, 1) + arr_list = [mx.nd.empty((1,))] * 2 + kv.pushpull(1, [mx.nd.ones((1,))] * 2, out=arr_list) + for arr in arr_list: + check_diff_to_scalar(arr, 2) + kv.pushpull(1, arr_list) + for arr in arr_list: + check_diff_to_scalar(arr, 4) + +@with_seed() +def test_get_type_device(): + kvtype = 'teststore' + kv = mx.kv.create(kvtype) + assert kv.type == kvtype + +@with_seed() +def test_set_optimizer(): + def check_unsupported_methods(kv): + assert not kv.is_capable('optimizer') + optimizer = mx.optimizer.create('sgd') + assertRaises(NotImplementedError, kv.set_optimizer, optimizer) + assertRaises(NotImplementedError, kv.save_optimizer_states, 'test') + assertRaises(NotImplementedError, kv.load_optimizer_states, 'test') + + kv = mx.kv.create('teststore') + check_unsupported_methods(kv) + +if __name__ == '__main__': + import nose + nose.runmodule() From a01ded2a04d0736d88dd421b2e2c548bb603a917 Mon Sep 17 00:00:00 2001 From: "yuri@FreeBSD" Date: Tue, 17 Dec 2019 13:44:34 -0800 Subject: [PATCH 54/62] FreeBSD comparibility patch (#17077) --- 3rdparty/mshadow/mshadow/packet-inl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/mshadow/mshadow/packet-inl.h b/3rdparty/mshadow/mshadow/packet-inl.h index f5a89bfa8421..58cbc4005aaf 100644 --- a/3rdparty/mshadow/mshadow/packet-inl.h +++ b/3rdparty/mshadow/mshadow/packet-inl.h @@ -6,7 +6,7 @@ #ifndef MSHADOW_PACKET_INL_H_ #define MSHADOW_PACKET_INL_H_ -#ifdef __APPLE__ +#if defined(__APPLE__) || defined(__FreeBSD__) #include #else #include From faa283228d8e4aa391dd0877b7388996e9a0e223 Mon Sep 17 00:00:00 2001 From: Wang Jiajun Date: Wed, 18 Dec 2019 10:57:35 +0800 Subject: [PATCH 55/62] Add im2col and col2im operator (#16502) * add im2col * add col2im * fix typo * add docs * add unittest * more tests * fix lint * fix doc * fix request * trigger CI --- src/operator/nn/im2col-inl.h | 259 +++++++++++++++++++++++ src/operator/nn/im2col.cc | 272 +++++++++++++++++++++++++ src/operator/nn/im2col.cu | 45 ++++ tests/python/unittest/test_operator.py | 147 +++++++++++++ 4 files changed, 723 insertions(+) create mode 100644 src/operator/nn/im2col-inl.h create mode 100644 src/operator/nn/im2col.cc create mode 100644 src/operator/nn/im2col.cu diff --git a/src/operator/nn/im2col-inl.h b/src/operator/nn/im2col-inl.h new file mode 100644 index 000000000000..b5caa035f911 --- /dev/null +++ b/src/operator/nn/im2col-inl.h @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2015 by Contributors + * \file im2col-inl.h + * \brief + * \author Jiajun Wang +*/ + +#ifndef MXNET_OPERATOR_NN_IM2COL_INL_H_ +#define MXNET_OPERATOR_NN_IM2COL_INL_H_ +#include +#include "../mxnet_op.h" +#include "../mshadow_op.h" +#include "../elemwise_op_common.h" +#include "./im2col.h" + +namespace mxnet { +namespace op { + +struct Im2colParam : public dmlc::Parameter { + mxnet::TShape kernel; + mxnet::TShape stride; + mxnet::TShape dilate; + mxnet::TShape pad; + DMLC_DECLARE_PARAMETER(Im2colParam) { + DMLC_DECLARE_FIELD(kernel).describe("Sliding kernel size: (w,), (h, w) or (d, h, w)."); + DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0)) + .describe("The stride between adjacent sliding blocks in spatial dimension: " + "(w,), (h, w) or (d, h, w). Defaults to 1 for each dimension."); + DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, 0)) + .describe("The spacing between adjacent kernel points: (w,), (h, w) or (d, h, w). " + "Defaults to 1 for each dimension."); + DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0)) + .describe("The zero-value padding size on both sides of spatial dimension: " + "(w,), (h, w) or (d, h, w). Defaults to no padding."); + } + + index_t DilatedKernelSize(int dim) const { + return 1 + (kernel[dim] - 1) * dilate[dim]; + } +}; // struct Im2colParam + + +template +void Im2colCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + const Im2colParam& param = nnvm::get(attrs.parsed); + Stream *s = ctx.get_stream(); + const mxnet::TShape im_shape = inputs[0].shape_; + const mxnet::TShape col_shape = outputs[0].shape_; + const index_t num = im_shape[0]; + + const int spatial_size = param.kernel.ndim(); + mxnet::TShape col_buffer_shape(1 + spatial_size, 1); + col_buffer_shape[0] = col_shape[1]; + for (int i = 0; i < spatial_size; ++i) { + const index_t pad_size = im_shape[i + 2] + 2 * param.pad[i]; + const index_t output_size = (pad_size - param.DilatedKernelSize(i)) / param.stride[i] + 1; + col_buffer_shape[i + 1] = output_size; + } + + MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, { + Tensor im = inputs[0].get_with_shape( + Shape4(im_shape[0], im_shape[1], im_shape[2], im_shape[3]), s); + Tensor col = outputs[0].get_with_shape( + Shape3(col_shape[0], col_shape[1], col_shape[2]), s); + + if (req[0] == kNullOp) return; + if (req[0] != kAddTo) { + for (index_t n = 0; n < num; ++n) { + im2col(s, im[n].dptr_, im_shape, col_buffer_shape, + param.kernel, param.pad, param.stride, param.dilate, col[n].dptr_); + } + } else { + Tensor tcol = ctx.requested[0] + .get_space_typed(Shape2(col_shape[1], col_shape[2]), s); + for (index_t n = 0; n < num; ++n) { + im2col(s, im[n].dptr_, im_shape, col_buffer_shape, + param.kernel, param.pad, param.stride, param.dilate, tcol.dptr_); + Tensor ocol = col[n]; + ocol += tcol; + } + } + }); +} + +template +void Im2colGradCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + const Im2colParam& param = nnvm::get(attrs.parsed); + Stream *s = ctx.get_stream(); + + const mxnet::TShape im_shape = outputs[0].shape_; + const mxnet::TShape col_shape = inputs[0].shape_; + const index_t num = im_shape[0]; + + const int spatial_size = param.kernel.ndim(); + mxnet::TShape col_buffer_shape(1 + spatial_size, 1); + col_buffer_shape[0] = col_shape[1]; + for (int i = 0; i < spatial_size; ++i) { + const index_t pad_size = im_shape[i + 2] + 2 * param.pad[i]; + const index_t output_size = (pad_size - param.DilatedKernelSize(i)) / param.stride[i] + 1; + col_buffer_shape[i + 1] = output_size; + } + + MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, { + Tensor im_grad = outputs[0].get_with_shape( + Shape4(im_shape[0], im_shape[1], im_shape[2], im_shape[3]), s); + Tensor col_grad = inputs[0].get_with_shape( + Shape3(col_shape[0], col_shape[1], col_shape[2]), s); + + for (index_t n = 0; n < num; ++n) { + col2im(s, col_grad[n].dptr_, im_shape, col_buffer_shape, + param.kernel, param.pad, param.stride, param.dilate, + im_grad[n].dptr_, req[0]); + } + }); +} + +struct Col2imParam : public dmlc::Parameter { + mxnet::TShape output_size; + mxnet::TShape kernel; + mxnet::TShape stride; + mxnet::TShape dilate; + mxnet::TShape pad; + DMLC_DECLARE_PARAMETER(Col2imParam) { + DMLC_DECLARE_FIELD(output_size) + .describe("The spatial dimension of image array: (w,), (h, w) or (d, h, w)."); + DMLC_DECLARE_FIELD(kernel).describe("Sliding kernel size: (w,), (h, w) or (d, h, w)."); + DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0)) + .describe("The stride between adjacent sliding blocks in spatial dimension: " + "(w,), (h, w) or (d, h, w). Defaults to 1 for each dimension."); + DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, 0)) + .describe("The spacing between adjacent kernel points: (w,), (h, w) or (d, h, w). " + "Defaults to 1 for each dimension."); + DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0)) + .describe("The zero-value padding size on both sides of spatial dimension: " + "(w,), (h, w) or (d, h, w). Defaults to no padding."); + } + + index_t DilatedKernelSize(int dim) const { + return 1 + (kernel[dim] - 1) * dilate[dim]; + } +}; // struct Col2imParam + +template +void Col2imCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + const Col2imParam& param = nnvm::get(attrs.parsed); + Stream *s = ctx.get_stream(); + const mxnet::TShape im_shape = outputs[0].shape_; + const mxnet::TShape col_shape = inputs[0].shape_; + const index_t num = im_shape[0]; + + const int spatial_size = param.kernel.ndim(); + mxnet::TShape col_buffer_shape(1 + spatial_size, 1); + col_buffer_shape[0] = col_shape[1]; + for (int i = 0; i < spatial_size; ++i) { + const index_t pad_size = im_shape[i + 2] + 2 * param.pad[i]; + const index_t output_size = (pad_size - param.DilatedKernelSize(i)) / param.stride[i] + 1; + col_buffer_shape[i + 1] = output_size; + } + + MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, { + Tensor im = outputs[0].get_with_shape( + Shape4(im_shape[0], im_shape[1], im_shape[2], im_shape[3]), s); + Tensor col = inputs[0].get_with_shape( + Shape3(col_shape[0], col_shape[1], col_shape[2]), s); + + for (index_t n = 0; n < num; ++n) { + col2im(s, col[n].dptr_, im_shape, col_buffer_shape, + param.kernel, param.pad, param.stride, param.dilate, + im[n].dptr_, req[0]); + } + }); +} + +template +void Col2imGradCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + const Col2imParam& param = nnvm::get(attrs.parsed); + Stream *s = ctx.get_stream(); + + const mxnet::TShape im_shape = inputs[0].shape_; + const mxnet::TShape col_shape = outputs[0].shape_; + const index_t batch_size = im_shape[0]; + + const int spatial_size = param.kernel.ndim(); + mxnet::TShape col_buffer_shape(1 + spatial_size, 1); + col_buffer_shape[0] = im_shape[1]; + for (int i = 0; i < spatial_size; ++i) { + const index_t pad_size = im_shape[i + 2] + 2 * param.pad[i]; + const index_t output_size = (pad_size - param.DilatedKernelSize(i)) / param.stride[i] + 1; + col_buffer_shape[i + 1] = output_size; + } + + MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, { + Tensor im_grad = inputs[0].get_with_shape( + Shape4(im_shape[0], im_shape[1], im_shape[2], im_shape[3]), s); + Tensor col_grad = outputs[0].get_with_shape( + Shape3(col_shape[0], col_shape[1], col_shape[2]), s); + + if (req[0] == kNullOp) return; + if (req[0] != kAddTo) { + for (index_t n = 0; n < batch_size; ++n) { + im2col(s, im_grad[n].dptr_, im_shape, col_buffer_shape, + param.kernel, param.pad, param.stride, param.dilate, col_grad[n].dptr_); + } + } else { + Tensor tgrad = ctx.requested[0] + .get_space_typed(Shape2(col_shape[1], col_shape[2]), s); + for (index_t n = 0; n < batch_size; ++n) { + im2col(s, im_grad[n].dptr_, im_shape, col_buffer_shape, + param.kernel, param.pad, param.stride, param.dilate, tgrad.dptr_); + Tensor cgrad = col_grad[n]; + cgrad += tgrad; + } + } + }); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NN_IM2COL_INL_H_ diff --git a/src/operator/nn/im2col.cc b/src/operator/nn/im2col.cc new file mode 100644 index 000000000000..ae493f1bc594 --- /dev/null +++ b/src/operator/nn/im2col.cc @@ -0,0 +1,272 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2015 by Contributors + * \file im2col.cc + * \brief + * \author Jiajun Wang +*/ + +#include "./im2col-inl.h" +#include "../operator_common.h" +#include "mxnet/op_attr_types.h" + +namespace mxnet { +namespace op { + +DMLC_REGISTER_PARAMETER(Im2colParam); +DMLC_REGISTER_PARAMETER(Col2imParam); + +template +void SlidingParser(nnvm::NodeAttrs* attrs) { + using namespace mshadow; + PType param_; + try { + param_.Init(attrs->dict); + } catch (const dmlc::ParamError& e) { + std::ostringstream os; + os << e.what(); + os << ", in operator " << attrs->op->name << "(" + << "name=\"" << attrs->name << "\""; + for (const auto& k : attrs->dict) { + os << ", " << k.first << "=\"" << k.second << "\""; + } + os << ")"; + throw dmlc::ParamError(os.str()); + } + + if (param_.kernel.ndim() == 1) { + if (param_.stride.ndim() == 0) param_.stride = Shape1(1); + if (param_.dilate.ndim() == 0) param_.dilate = Shape1(1); + if (param_.pad.ndim() == 0) param_.pad = Shape1(0); + } else if (param_.kernel.ndim() == 2) { + if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1); + if (param_.dilate.ndim() == 0) param_.dilate = Shape2(1, 1); + if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0); + } else { + CHECK_EQ(param_.kernel.ndim(), 3U) << param_.kernel.ndim() << "D convolution not supported"; + if (param_.stride.ndim() == 0) param_.stride = Shape3(1, 1, 1); + if (param_.dilate.ndim() == 0) param_.dilate = Shape3(1, 1, 1); + if (param_.pad.ndim() == 0) param_.pad = Shape3(0, 0, 0); + } + CHECK_EQ(param_.kernel.ndim(), param_.stride.ndim()) + << "Stride must have the same number of dimensions with kernel_size," + << "but kernel_size is set to " << param_.kernel << " while stride is " + << param_.stride; + CHECK_EQ(param_.kernel.ndim(), param_.dilate.ndim()) + << "Dilate must have the same number of dimensions with kernel_size," + << "but kernel_size is set to " << param_.kernel << " while dilate is " + << param_.dilate; + CHECK_EQ(param_.kernel.ndim(), param_.pad.ndim()) + << "Padding must have the same number of dimensions with kernel_size," + << "but kernel_size is set to " << param_.kernel << " while padding is " + << param_.pad; + attrs->parsed = std::move(param_); +} + +NNVM_REGISTER_OP(im2col) +.describe(R"(Extract sliding blocks from input array. + +This operator is used in vanilla convolution implementation to transform the sliding +blocks on image to column matrix, then the convolution operation can be computed +by matrix multiplication between column and convolution weight. Due to the close +relation between im2col and convolution, the concept of **kernel**, **stride**, +**dilate** and **pad** in this operator are inherited from convolution operation. + +Given the input data of shape :math:`(N, C, *)`, where :math:`N` is the batch size, +:math:`C` is the channel size, and :math:`*` is the arbitrary spatial dimension, +the output column array is always with shape :math:`(N, C \times \prod(\text{kernel}), W)`, +where :math:`C \times \prod(\text{kernel})` is the block size, and :math:`W` is the +block number which is the spatial size of the convolution output with same input parameters. +Only 1-D, 2-D and 3-D of spatial dimension is supported in this operator. + +)" ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(SlidingParser) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"data"}; +}) +.set_attr("FListOutputNames", + [](const NodeAttrs& attrs) { + return std::vector{"output"}; +}) +.set_attr("FInferShape", [](const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_shape, mxnet::ShapeVector *out_shape){ + using namespace mshadow; + CHECK_EQ(in_shape->size(), 1U); + const Im2colParam& param = nnvm::get(attrs.parsed); + if (mxnet::op::shape_is_none(in_shape->at(0))) { + return false; + } + + CHECK_GT(param.kernel.Size(), 0U) \ + << "incorrect kernel size: " << param.kernel; + CHECK_GT(param.stride.Size(), 0U) \ + << "incorrect stride size: " << param.stride; + CHECK_GT(param.dilate.Size(), 0U) \ + << "incorrect dilate size: " << param.dilate; + + index_t out_dim = 1; + mxnet::TShape dshape(in_shape->at(0)); + for (int i = 0; i < param.kernel.ndim(); ++i) { + const index_t pad_size = dshape[i + 2] + 2 * param.pad[i]; + const index_t dilated_kernel_size = param.DilatedKernelSize(i); + CHECK_LE(dilated_kernel_size, pad_size) + << "kernel size exceed input"; + const index_t output_size = (pad_size - dilated_kernel_size) / param.stride[i] + 1; + out_dim *= output_size; + } + SHAPE_ASSIGN_CHECK(*out_shape, 0, Shape3(dshape[0], dshape[1] * param.kernel.Size(), out_dim)); + return true; +}) +.set_attr("FInferType", [](const nnvm::NodeAttrs& attrs, + std::vector *in_type, std::vector *out_type) { + CHECK_EQ(in_type->size(), 1U); + if (mxnet::op::type_is_none(in_type->at(0))) { + return false; + } + + int dtype = in_type->at(0); + TYPE_ASSIGN_CHECK(*out_type, 0, dtype); + return true; +}) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; +}) +.set_attr("FCompute", Im2colCompute) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_im2col"}) +.add_argument("data", "NDArray-or-Symbol", "Input array to extract sliding blocks.") +.add_arguments(Im2colParam::__FIELDS__()); + +NNVM_REGISTER_OP(_backward_im2col) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(SlidingParser) +.set_attr("TIsBackward", true) +.set_attr("FCompute", Im2colGradCompute); + +NNVM_REGISTER_OP(col2im) +.describe(R"(Combining the output column matrix of im2col back to image array. + +Like :class:`~mxnet.ndarray.im2col`, this operator is also used in the vanilla convolution +implementation. Despite the name, col2im is not the reverse operation of im2col. Since there +may be overlaps between neighbouring sliding blocks, the column elements cannot be directly +put back into image. Instead, they are accumulated (i.e., summed) in the input image +just like the gradient computation, so col2im is the gradient of im2col and vice versa. + +Using the notation in im2col, given an input column array of shape +:math:`(N, C \times \prod(\text{kernel}), W)`, this operator accumulates the column elements +into output array of shape :math:`(N, C, \text{output_size}[0], \text{output_size}[1], \dots)`. +Only 1-D, 2-D and 3-D of spatial dimension is supported in this operator. + +)" ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(SlidingParser) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"data"}; +}) +.set_attr("FListOutputNames", + [](const NodeAttrs& attrs) { + return std::vector{"output"}; +}) +.set_attr("FInferShape", [](const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_shape, mxnet::ShapeVector *out_shape){ + using namespace mshadow; + CHECK_EQ(in_shape->size(), 1U); + const Col2imParam& param = nnvm::get(attrs.parsed); + if (mxnet::op::shape_is_none(in_shape->at(0))) { + return false; + } + + CHECK_EQ(param.kernel.ndim(), param.output_size.ndim()) + << "Output size must have the same number of dimensions with kernel_size," + << "but kernel_size is set to " << param.kernel << " while output size is " + << param.output_size; + + CHECK_GT(param.output_size.Size(), 0U) \ + << "incorrect output size: " << param.output_size; + CHECK_GT(param.kernel.Size(), 0U) \ + << "incorrect kernel size: " << param.kernel; + CHECK_GT(param.stride.Size(), 0U) \ + << "incorrect stride size: " << param.stride; + CHECK_GT(param.dilate.Size(), 0U) \ + << "incorrect dilate size: " << param.dilate; + + const int spatial_size = param.kernel.ndim(); + mxnet::TShape dshape(in_shape->at(0)); + + index_t out_dim = 1; + for (int i = 0; i < spatial_size; ++i) { + const index_t pad_size = param.output_size[i] + 2 * param.pad[i]; + const index_t dilated_kernel_size = param.DilatedKernelSize(i); + CHECK_LE(dilated_kernel_size, pad_size) + << "kernel size exceed output size"; + const index_t output_size = (pad_size - dilated_kernel_size) / param.stride[i] + 1; + out_dim *= output_size; + } + + CHECK_EQ(dshape[2], out_dim) + << "output size does not match convolution parameters"; + CHECK_EQ(dshape[1] % param.kernel.Size(), 0) + << "the second dim of input shape should be multiples of kernel size"; + + mxnet::TShape oshape(param.kernel.ndim() + 2, 1); + oshape[0] = dshape[0]; + oshape[1] = dshape[1] / param.kernel.Size(); + for (int i = 0; i < spatial_size; ++i) { + oshape[i + 2] = param.output_size[i]; + } + SHAPE_ASSIGN_CHECK(*out_shape, 0, oshape); + return true; +}) +.set_attr("FInferType", [](const nnvm::NodeAttrs& attrs, + std::vector *in_type, std::vector *out_type) { + CHECK_EQ(in_type->size(), 1U); + if (mxnet::op::type_is_none(in_type->at(0))) { + return false; + } + + int dtype = in_type->at(0); + TYPE_ASSIGN_CHECK(*out_type, 0, dtype); + return true; +}) +.set_attr("FCompute", Col2imCompute) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_col2im"}) +.add_argument("data", "NDArray-or-Symbol", "Input array to combine sliding blocks.") +.add_arguments(Col2imParam::__FIELDS__()); + +NNVM_REGISTER_OP(_backward_col2im) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(SlidingParser) +.set_attr("TIsBackward", true) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; +}) +.set_attr("FCompute", Col2imGradCompute); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/nn/im2col.cu b/src/operator/nn/im2col.cu new file mode 100644 index 000000000000..94d5b504611f --- /dev/null +++ b/src/operator/nn/im2col.cu @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2015 by Contributors + * \file im2col.cu + * \brief + * \author Jiajun Wang +*/ + +#include "./im2col-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(im2col) +.set_attr("FCompute", Im2colCompute); + +NNVM_REGISTER_OP(_backward_im2col) +.set_attr("FCompute", Im2colGradCompute); + +NNVM_REGISTER_OP(col2im) +.set_attr("FCompute", Col2imCompute); + +NNVM_REGISTER_OP(_backward_col2im) +.set_attr("FCompute", Col2imGradCompute); + +} // namespace op +} // namespace mxnet diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 66031d20d65b..d59c3063f95a 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9374,6 +9374,153 @@ def check_random_uniform(): assertRaises(MXNetError, mx.nd.random_uniform, alpha, beta, shape) +@with_seed() +def test_im2col_col2im(): + def compute_output_size(spatial, kernel, stride=1, dilate=1, pad=0): + pad_size = spatial + 2 * pad + dilated_kernel = dilate * (kernel - 1) + 1 + return (pad_size - dilated_kernel) // stride + 1 + + def build_kwargs(kernel, stride=1, dilate=1, pad=0): + return {'kernel': (kernel, kernel), + 'stride': (stride, stride), + 'dilate': (dilate, dilate), + 'pad': (pad, pad)} + + # use im2col to compute convolution + def test_conv_compute(input_shape, num_filter, kernel, stride=1, dilate=1, pad=0): + batch_size = input_shape[0] + channel = input_shape[1] + kwargs = build_kwargs(kernel, stride, dilate, pad) + data = mx.nd.uniform(shape=input_shape) + col = mx.nd.im2col(data, **kwargs) + w = mx.nd.uniform(shape=(num_filter, channel, kernel, kernel)) + c1 = mx.nd.dot(col.transpose((0, 2, 1)), w.reshape(num_filter, -1).T).transpose((0, 2, 1)) + hos = compute_output_size(input_shape[2], kernel, stride, dilate, pad) + wos = compute_output_size(input_shape[3], kernel, stride, dilate, pad) + c1 = c1.reshape((batch_size, num_filter, hos, wos)) + + c2 = mx.nd.Convolution(data, num_filter=num_filter, weight=w, no_bias=True, **kwargs) + assert_almost_equal(c1.asnumpy(), c2.asnumpy(), rtol=1e-5, atol=1e-5) + + test_conv_compute( + input_shape = (5, 3, 30, 20), + num_filter = 10, + kernel = 3 + ) + + test_conv_compute( + input_shape = (5, 3, 30, 20), + num_filter = 10, + kernel = 3, + stride = 2 + ) + + test_conv_compute( + input_shape = (5, 3, 30, 20), + num_filter = 10, + kernel = 3, + stride = 2, + dilate = 2 + ) + + test_conv_compute( + input_shape = (5, 3, 30, 20), + num_filter = 10, + kernel = 3, + stride = 2, + dilate = 2, + pad = 1 + ) + + # use composite of im2col and col2im to reconstruct image + def test_reconstruct(input_shape, kernel, stride=1, dilate=1, pad=0): + batch_size = input_shape[0] + channel = input_shape[1] + kwargs = build_kwargs(kernel, stride, dilate, pad) + data = mx.nd.uniform(shape=input_shape) + col = mx.nd.im2col(data, **kwargs) + im1 = mx.nd.col2im(col, input_shape[2:], **kwargs) + + im2 = mx.nd.col2im(mx.nd.ones_like(col), input_shape[2:], **kwargs) * data + assert_almost_equal(im1.asnumpy(), im2.asnumpy(), rtol=1e-5, atol=1e-5) + + test_reconstruct( + input_shape = (5, 3, 30, 20), + kernel = 3 + ) + + test_reconstruct( + input_shape = (5, 3, 30, 20), + kernel = 3, + stride = 2 + ) + + test_reconstruct( + input_shape = (5, 3, 30, 20), + kernel = 3, + stride = 2, + dilate = 2 + ) + + test_reconstruct( + input_shape = (5, 3, 30, 20), + kernel = 3, + stride = 2, + dilate = 2, + pad = 1 + ) + + # test gradient + # the grad of im2col is col2im, and vice versa + def test_grad(input_shape, kernel, stride=1, dilate=1, pad=0): + # im2col + data = mx.sym.Variable('data') + kwargs = build_kwargs(kernel, stride, dilate, pad) + sym = mx.sym.im2col(data, **kwargs) + + im = mx.nd.uniform(shape=input_shape) + col = mx.nd.im2col(im, **kwargs) + col_shape = col.shape + expected = mx.nd.col2im(col, input_shape[2:], **kwargs) + check_symbolic_backward(sym, [im.asnumpy()], [col.asnumpy()], [expected.asnumpy()]) + + # col2im + data = mx.sym.Variable('data') + sym = mx.sym.col2im(data, input_shape[2:], **kwargs) + + col = mx.nd.uniform(shape=col_shape) + im = mx.nd.col2im(col, input_shape[2:], **kwargs) + expected = mx.nd.im2col(im, **kwargs) + check_symbolic_backward(sym, [col.asnumpy()], [im.asnumpy()], [expected.asnumpy()]) + + test_grad( + input_shape = (5, 3, 30, 20), + kernel = 3 + ) + + test_grad( + input_shape = (5, 3, 30, 20), + kernel = 3, + stride = 2 + ) + + test_grad( + input_shape = (5, 3, 30, 20), + kernel = 3, + stride = 2, + dilate = 2 + ) + + test_grad( + input_shape = (5, 3, 30, 20), + kernel = 3, + stride = 2, + dilate = 2, + pad = 1 + ) + + if __name__ == '__main__': import nose nose.runmodule() From ed0954717048d76e2dc63206c26cb7ab138a69d7 Mon Sep 17 00:00:00 2001 From: kshitij12345 Date: Thu, 19 Dec 2019 00:20:09 +0530 Subject: [PATCH 56/62] [MXNET-978] Higher Order Gradient Support `arcsin`, `arccos`. (#15515) * support arcsin, arccos for higher order grad * add relevant tests * add small note for computation * update comments * use NodeOpGen * retrigger CI * address comment * rename grad_x -> x_grad * retrigger CI * retrigger CI --- src/operator/tensor/elemwise_unary_op_trig.cc | 53 ++++++++++++++++++- .../python/unittest/test_higher_order_grad.py | 38 +++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index a436ebb284a3..e5d662a1b262 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -188,7 +188,31 @@ The storage type of ``arcsin`` output depends upon the input storage type: .set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arcsin" }); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arcsin, - unary_bwd); + unary_bwd) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // ograds[0]: head_grad_grads (dL/dxgrad) + // inputs[0]: dL/dy + // inputs[1]: x (ElemwiseGradUseIn) + // f(x) = arcsin(x) + // n: f'(x) = 1/(1-x^2)^1/2 + // f''(x) = f'(x) * x/(1-x^2) + // Note: x/(1-x^2) = x * f'(x)^2 + auto dydx = n->inputs[0]; + auto x = n->inputs[1]; + auto dydx_mul_grad_x = nnvm::NodeEntry{n}; + auto op = mxnet::util::NodeOpGen{n}; + + auto x_grad = op.div(dydx_mul_grad_x, dydx); + auto x_grad_square = op.square(x_grad); + auto x_grad_square_mul_x = op.mul(x_grad_square, x); + auto x_grad_grad = op.mul(dydx_mul_grad_x, x_grad_square_mul_x); + + std::vector ret; + ret.emplace_back(op.mul(ograds[0], x_grad)); + ret.emplace_back(op.mul(ograds[0], x_grad_grad)); + return ret; + }); // arccos MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(arccos, cpu, mshadow_op::arccos) @@ -207,7 +231,32 @@ The storage type of ``arccos`` output is always dense .set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arccos" }); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arccos, - unary_bwd); + unary_bwd) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // ograds[0]: head_grad_grads (dL/dxgrad) + // inputs[0]: dL/dy + // inputs[1]: x (ElemwiseGradUseIn) + // f(x) = arccos(x) + // n: f'(x) = -1/(1-x^2)^1/2 + // f''(x) = f'(x) * x/(1-x^2) + // Note: x/(1-x^2) = x * f'(x)^2 + auto dydx = n->inputs[0]; + auto x = n->inputs[1]; + auto dydx_mul_grad_x = nnvm::NodeEntry{n}; + auto op = mxnet::util::NodeOpGen{n}; + + auto x_grad = op.div(dydx_mul_grad_x, dydx); + auto x_grad_square = op.square(x_grad); + auto x_grad_square_mul_x = op.mul(x_grad_square, x); + auto x_grad_grad = op.mul(dydx_mul_grad_x, x_grad_square_mul_x); + + std::vector ret; + ret.emplace_back(op.mul(ograds[0], x_grad)); + ret.emplace_back(op.mul(ograds[0], x_grad_grad)); + return ret; + }); + // arctan MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(arctan, cpu, mshadow_op::arctan) diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 527c35d5dd94..eeba4226dc36 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -133,6 +133,44 @@ def grad_grad_op(x): array, tanh, grad_grad_op, rtol=1e-6, atol=1e-6) +@with_seed() +def test_arcsin(): + def arcsin(x): + return nd.arcsin(x) + + def grad_grad_op(x): + return x / nd.sqrt((1-x**2)**3) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + # Hack: Decrease std_dev to make + # sure all elements + # are in range -1 to 1 + # i.e. Domain of arcsin + array *= 0.2 + check_second_order_unary(array, arcsin, grad_grad_op) + + +@with_seed() +def test_arccos(): + def arccos(x): + return nd.arccos(x) + + def grad_grad_op(x): + return -x / nd.sqrt((1-x**2)**3) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + # Hack: Decrease std_dev to make + # sure all elements + # are in range -1 to 1 + # i.e. Domain of arccos + array *= 0.2 + check_second_order_unary(array, arccos, grad_grad_op) + + @with_seed() def test_arctan(): def arctan(x): From a18250d57ecf34b1499e590b1eea9453d02ab05a Mon Sep 17 00:00:00 2001 From: Zhennan Qin Date: Thu, 19 Dec 2019 07:55:08 +0800 Subject: [PATCH 57/62] Add silent option to quantization script (#17094) * Add silent option to quantization script * Refactor code * Fix lint --- .../quantization/imagenet_gen_qsym_mkldnn.py | 70 +++++++++------ python/mxnet/contrib/quantization.py | 90 +++++++++++-------- .../quantization/quantize_graph_pass.cc | 26 ++++-- 3 files changed, 117 insertions(+), 69 deletions(-) diff --git a/example/quantization/imagenet_gen_qsym_mkldnn.py b/example/quantization/imagenet_gen_qsym_mkldnn.py index a4c1491039b9..130282714e30 100644 --- a/example/quantization/imagenet_gen_qsym_mkldnn.py +++ b/example/quantization/imagenet_gen_qsym_mkldnn.py @@ -140,17 +140,23 @@ def save_params(fname, arg_params, aux_params, logger=None): help='If enabled, the quantize op will ' 'be calibrated offline if calibration mode is ' 'enabled') + parser.add_argument('--quiet', action='store_true', default=False, + help='suppress most of log') args = parser.parse_args() ctx = mx.cpu(0) - logging.basicConfig() - logger = logging.getLogger('logger') - logger.setLevel(logging.INFO) + logger = None + if not args.quiet: + logging.basicConfig() + logger = logging.getLogger('logger') + logger.setLevel(logging.INFO) - logger.info(args) - logger.info('shuffle_dataset=%s' % args.shuffle_dataset) + if logger: + logger.info(args) + logger.info('shuffle_dataset=%s' % args.shuffle_dataset) calib_mode = args.calib_mode - logger.info('calibration mode set to %s' % calib_mode) + if logger: + logger.info('calibration mode set to %s' % calib_mode) # download calibration dataset if calib_mode != 'none': @@ -158,13 +164,16 @@ def save_params(fname, arg_params, aux_params, logger=None): # download model if not args.no_pretrained: - logger.info('Get pre-trained model from MXNet or Gluoncv modelzoo.') - logger.info('If you want to use custom model, please set --no-pretrained.') + if logger: + logger.info('Get pre-trained model from MXNet or Gluoncv modelzoo.') + logger.info('If you want to use custom model, please set --no-pretrained.') if args.model in ['imagenet1k-resnet-152', 'imagenet1k-inception-bn']: - logger.info('model %s is downloaded from MXNet modelzoo' % args.model) + if logger: + logger.info('model %s is downloaded from MXNet modelzoo' % args.model) prefix, epoch = download_model(model_name=args.model, logger=logger) else: - logger.info('model %s is converted from GluonCV' % args.model) + if logger: + logger.info('model %s is converted from GluonCV' % args.model) prefix = convert_from_gluon(model_name=args.model, image_shape=args.image_shape, classes=1000, logger=logger) rgb_mean = '123.68,116.779,103.939' rgb_std = '58.393, 57.12, 57.375' @@ -178,14 +187,16 @@ def save_params(fname, arg_params, aux_params, logger=None): # get batch size batch_size = args.batch_size - logger.info('batch size = %d for calibration' % batch_size) + if logger: + logger.info('batch size = %d for calibration' % batch_size) # get number of batches for calibration num_calib_batches = args.num_calib_batches - if calib_mode == 'none': - logger.info('skip calibration step as calib_mode is none') - else: - logger.info('number of batches = %d for calibration' % num_calib_batches) + if logger: + if calib_mode == 'none': + logger.info('skip calibration step as calib_mode is none') + else: + logger.info('number of batches = %d for calibration' % num_calib_batches) # get number of threads for decoding the dataset data_nthreads = args.data_nthreads @@ -195,7 +206,8 @@ def save_params(fname, arg_params, aux_params, logger=None): exclude_first_conv = args.exclude_first_conv if args.quantized_dtype == "uint8": - logger.info('quantized dtype is set to uint8, will exclude first conv.') + if logger: + logger.info('quantized dtype is set to uint8, will exclude first conv.') exclude_first_conv = True excluded_sym_names = [] if not args.no_pretrained: @@ -242,42 +254,48 @@ def save_params(fname, arg_params, aux_params, logger=None): else: raise ValueError('Currently, model %s is not supported in this script' % args.model) else: - logger.info('Please set proper RGB configs for model %s' % args.model) + if logger: + logger.info('Please set proper RGB configs for model %s' % args.model) # add rgb mean/std of your model. rgb_mean = '0,0,0' rgb_std = '0,0,0' # add layer names you donnot want to quantize. - logger.info('Please set proper excluded_sym_names for model %s' % args.model) + if logger: + logger.info('Please set proper excluded_sym_names for model %s' % args.model) excluded_sym_names += ['layers'] if exclude_first_conv: excluded_sym_names += ['layers'] - logger.info('These layers have been excluded %s' % excluded_sym_names) + if logger: + logger.info('These layers have been excluded %s' % excluded_sym_names) label_name = args.label_name - logger.info('label_name = %s' % label_name) + if logger: + logger.info('label_name = %s' % label_name) data_shape = tuple([int(i) for i in image_shape.split(',')]) - logger.info('Input data shape = %s' % str(data_shape)) - - logger.info('rgb_mean = %s' % rgb_mean) + if logger: + logger.info('Input data shape = %s' % str(data_shape)) + logger.info('rgb_mean = %s' % rgb_mean) + logger.info('rgb_std = %s' % rgb_std) rgb_mean = [float(i) for i in rgb_mean.split(',')] mean_args = {'mean_r': rgb_mean[0], 'mean_g': rgb_mean[1], 'mean_b': rgb_mean[2]} - logger.info('rgb_std = %s' % rgb_std) rgb_std = [float(i) for i in rgb_std.split(',')] std_args = {'std_r': rgb_std[0], 'std_g': rgb_std[1], 'std_b': rgb_std[2]} combine_mean_std = {} combine_mean_std.update(mean_args) combine_mean_std.update(std_args) if calib_mode == 'none': - logger.info('Quantizing FP32 model %s' % args.model) + if logger: + logger.info('Quantizing FP32 model %s' % args.model) qsym, qarg_params, aux_params = quantize_model_mkldnn(sym=sym, arg_params=arg_params, aux_params=aux_params, ctx=ctx, excluded_sym_names=excluded_sym_names, calib_mode=calib_mode, quantized_dtype=args.quantized_dtype, logger=logger) sym_name = '%s-symbol.json' % (prefix + '-quantized') else: - logger.info('Creating ImageRecordIter for reading calibration dataset') + if logger: + logger.info('Creating ImageRecordIter for reading calibration dataset') data = mx.io.ImageRecordIter(path_imgrec=args.calib_dataset, label_width=1, preprocess_threads=data_nthreads, diff --git a/python/mxnet/contrib/quantization.py b/python/mxnet/contrib/quantization.py index b0714037bb42..01051ab7c8e4 100644 --- a/python/mxnet/contrib/quantization.py +++ b/python/mxnet/contrib/quantization.py @@ -188,8 +188,8 @@ def collect(self, name, arr): return handle = ctypes.cast(arr, NDArrayHandle) arr = NDArray(handle, writable=False).copyto(cpu()).asnumpy() - if self.logger is not None: - self.logger.info("Collecting layer %s histogram of shape %s" % (name, arr.shape)) + if self.logger: + self.logger.debug("Collecting layer %s histogram of shape %s" % (name, arr.shape)) min_range = np.min(arr) max_range = np.max(arr) th = max(abs(min_range), abs(max_range)) @@ -224,9 +224,9 @@ def collect(self, name, arr): max(cur_min_max[1], max_range)) else: self.min_max_dict[name] = (min_range, max_range) - if self.logger is not None: - self.logger.info("Collecting layer %s min_range=%f, max_range=%f" - % (name, min_range, max_range)) + if self.logger: + self.logger.debug("Collecting layer %s min_range=%f, max_range=%f" + % (name, min_range, max_range)) def _calibrate_quantized_sym(qsym, th_dict): """Given a dictionary containing the thresholds for quantizing the layers, @@ -358,18 +358,19 @@ def _get_optimal_thresholds(hist_dict, quantized_dtype, num_quantized_bins=255, else: th_dict[name] = (-th, th) del hist_dict[name] # release the memory - if logger is not None: - logger.info('layer=%s, min_val=%f, max_val=%f, th=%f, divergence=%f' - % (name, min_val, max_val, th, divergence)) + if logger: + logger.debug('layer=%s, min_val=%f, max_val=%f, th=%f, divergence=%f' + % (name, min_val, max_val, th, divergence)) return th_dict -def _load_sym(sym, logger=logging): +def _load_sym(sym, logger=None): """Given a str as a path the symbol .json file or a symbol, returns a Symbol object.""" if isinstance(sym, str): # sym is a symbol file path cur_path = os.path.dirname(os.path.realpath(__file__)) symbol_file_path = os.path.join(cur_path, sym) - logger.info('Loading symbol from file %s' % symbol_file_path) + if logger: + logger.info('Loading symbol from file %s' % symbol_file_path) return sym_load(symbol_file_path) elif isinstance(sym, Symbol): return sym @@ -378,14 +379,15 @@ def _load_sym(sym, logger=logging): ' while received type %s' % str(type(sym))) -def _load_params(params, logger=logging): +def _load_params(params, logger=None): """Given a str as a path to the .params file or a pair of params, returns two dictionaries representing arg_params and aux_params. """ if isinstance(params, str): cur_path = os.path.dirname(os.path.realpath(__file__)) param_file_path = os.path.join(cur_path, params) - logger.info('Loading params from file %s' % param_file_path) + if logger: + logger.info('Loading params from file %s' % param_file_path) save_dict = nd_load(param_file_path) arg_params = {} aux_params = {} @@ -451,7 +453,7 @@ def quantize_model(sym, arg_params, aux_params, data_names=('data',), label_names=('softmax_label',), ctx=cpu(), excluded_sym_names=None, excluded_op_names=None, calib_mode='entropy', calib_data=None, num_calib_examples=None, - quantized_dtype='int8', quantize_mode='smart', logger=logging): + quantized_dtype='int8', quantize_mode='smart', logger=None): """User-level API for generating a quantized model from a FP32 model w/ or w/o calibration. The backend quantized operators are only enabled for Linux systems. Please do not run inference using the quantized models on Windows for now. @@ -530,7 +532,9 @@ def quantize_model(sym, arg_params, aux_params, ' the names of the operators that will not be quantized,' ' while received type %s' % str(type(excluded_op_names))) - logger.info('Quantizing symbol') + if logger: + os.environ['MXNET_QUANTIZATION_VERBOSE'] = '1' + logger.info('Quantizing symbol') if quantized_dtype not in ('int8', 'uint8', 'auto'): raise ValueError('unknown quantized_dtype %s received,' ' expected `int8`, `uint8` or `auto`' % quantized_dtype) @@ -561,21 +565,24 @@ def quantize_model(sym, arg_params, aux_params, include_layer=calib_layer, max_num_examples=num_calib_examples, logger=logger) - logger.info('Collected layer outputs from FP32 model using %d examples' % num_examples) - logger.info('Calculating optimal thresholds for quantization') + if logger: + logger.info('Collected layer outputs from FP32 model using %d examples' % num_examples) + logger.info('Calculating optimal thresholds for quantization') th_dict = _get_optimal_thresholds(hist_dict, quantized_dtype, logger=logger) elif calib_mode == 'naive': th_dict, num_examples = _collect_layer_output_min_max( mod, calib_data, quantized_dtype, include_layer=calib_layer, max_num_examples=num_calib_examples, logger=logger) - logger.info('Collected layer output min/max values from FP32 model using %d examples' - % num_examples) + if logger: + logger.info('Collected layer output min/max values from FP32 model using %d examples' + % num_examples) else: raise ValueError('unknown calibration mode %s received,' ' expected `none`, `naive`, or `entropy`' % calib_mode) qsym = _calibrate_quantized_sym(qsym, th_dict) - logger.info('Quantizing parameters') + if logger: + logger.info('Quantizing parameters') qarg_params = _quantize_params(qsym, arg_params, th_dict) return qsym, qarg_params, aux_params @@ -584,7 +591,7 @@ def quantize_model_mkldnn(sym, arg_params, aux_params, data_names=('data',), label_names=('softmax_label',), ctx=cpu(), excluded_sym_names=None, excluded_op_names=None, calib_mode='entropy', calib_data=None, num_calib_examples=None, - quantized_dtype='int8', quantize_mode='smart', logger=logging): + quantized_dtype='int8', quantize_mode='smart', logger=None): """User-level API for generating a fusion + quantized model from a FP32 model w/ or w/o calibration with Intel MKL-DNN. The backend quantized operators are only enabled for Linux systems. Please do not run @@ -621,7 +628,7 @@ def quantize_model_mkldnn(sym, arg_params, aux_params, def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), excluded_sym_names=None, excluded_op_names=None, calib_mode='entropy', - quantized_dtype='int8', quantize_mode='full', logger=logging): + quantized_dtype='int8', quantize_mode='full', logger=None): """User-level API for generating a quantized model from a FP32 model w/o calibration and a collector for naive or entropy calibration. The backend quantized operators are only enabled for Linux systems. Please do not run @@ -676,7 +683,9 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), ' while received type %s' % str(type(excluded_sym_names))) if not isinstance(ctx, Context): raise ValueError('currently only supports single ctx, while received %s' % str(ctx)) - logger.info('Quantizing graph') + if logger: + os.environ['MXNET_QUANTIZATION_VERBOSE'] = '1' + logger.info('Quantizing graph') if quantized_dtype not in ('int8', 'uint8', 'auto'): raise ValueError('unknown quantized_dtype %s received,' ' expected `int8`, `uint8` or `auto`' % quantized_dtype) @@ -693,20 +702,24 @@ def quantize_graph(sym, arg_params, aux_params, ctx=cpu(), if calib_mode == 'entropy': collector = _LayerHistogramCollector( include_layer=calib_layer, logger=logger) - logger.info( - 'Create a layer output collector for entropy calibration.') + if logger: + logger.info( + 'Create a layer output collector for entropy calibration.') elif calib_mode == 'naive': collector = _LayerOutputMinMaxCollector(quantized_dtype=quantized_dtype, include_layer=calib_layer, logger=logger) - logger.info( - 'Create a layer output minmax collector for naive calibration') + if logger: + logger.info( + 'Create a layer output minmax collector for naive calibration') else: raise ValueError('unknown calibration mode %s received,' ' expected `none`, `naive`, or `entropy`' % calib_mode) - logger.info('Collector created, please use set_monitor_callback' - ' to collect calibration information.') + if logger: + logger.info('Collector created, please use set_monitor_callback' + ' to collect calibration information.') - logger.info('Quantizing parameters') + if logger: + logger.info('Quantizing parameters') qarg_params = _quantize_params(qsym, arg_params, th_dict) return qsym, qarg_params, aux_params, collector @@ -751,7 +764,8 @@ def calib_graph(qsym, arg_params, aux_params, collector, th_dict = {} if calib_mode is not None and calib_mode != 'none': if calib_mode == 'entropy': - logger.info('Calculating optimal thresholds for quantization') + if logger: + logger.info('Calculating optimal thresholds for quantization') th_dict = _get_optimal_thresholds( collector.hist_dict, quantized_dtype, logger=logger) elif calib_mode == 'naive': @@ -763,7 +777,8 @@ def calib_graph(qsym, arg_params, aux_params, collector, else: raise ValueError('please set calibration mode to naive or entropy.') - logger.info('Quantizing parameters') + if logger: + logger.info('Quantizing parameters') qarg_params = _quantize_params(qsym, arg_params, th_dict) return qsym, qarg_params, aux_params @@ -771,7 +786,7 @@ def calib_graph(qsym, arg_params, aux_params, collector, def quantize_net(network, quantized_dtype='auto', quantize_mode='full', exclude_layers=None, exclude_layers_match=None, exclude_operators=None, calib_data=None, data_shapes=None, calib_mode='none', - num_calib_examples=None, ctx=cpu(), logger=logging): + num_calib_examples=None, ctx=cpu(), logger=None): """User-level API for Gluon users to generate a quantized SymbolBlock from a FP32 HybridBlock w/ or w/o calibration. The backend quantized operators are only enabled for Linux systems. Please do not run inference using the quantized models on Windows for now. @@ -825,7 +840,8 @@ def quantize_net(network, quantized_dtype='auto', quantize_mode='full', ------- """ - logger.info('Export HybridBlock') + if logger: + logger.info('Export HybridBlock') network.hybridize() import mxnet as mx if calib_data is not None: @@ -881,7 +897,8 @@ def __exit__(self, exc_type, exc_value, traceback): for layers in list(symnet.get_internals()): if layers.name.find(name_match) != -1: exclude_layers.append(layers.name) - logger.info('These layers have been excluded %s' % exclude_layers) + if logger: + logger.info('These layers have been excluded %s' % exclude_layers) if ctx == mx.cpu(): symnet = symnet.get_backend_symbol('MKLDNN_QUANTIZE') @@ -906,8 +923,9 @@ def __exit__(self, exc_type, exc_value, traceback): mod.set_params(args, auxs, allow_missing=False, force_init=True) num_examples = _collect_layer_statistics(mod, calib_data, collector, num_calib_examples, logger) - logger.info('Collected layer output values from FP32 model using %d examples' - % num_examples) + if logger: + logger.info('Collected layer output values from FP32 model using %d examples' + % num_examples) qsym, qarg_params, aux_params = calib_graph( qsym=qsym, arg_params=args, aux_params=auxs, collector=collector, calib_mode=calib_mode, quantized_dtype=quantized_dtype, logger=logger) diff --git a/src/operator/quantization/quantize_graph_pass.cc b/src/operator/quantization/quantize_graph_pass.cc index 182f6339308a..01365067ce93 100644 --- a/src/operator/quantization/quantize_graph_pass.cc +++ b/src/operator/quantization/quantize_graph_pass.cc @@ -275,12 +275,15 @@ Graph QuantizeGraph(Graph &&src) { std::unordered_map mirror_map; std::unordered_map reverse_mirror_map; nnvm::NodeEntryMap mirror_entry_map; + static int verbose = dmlc::GetEnv("MXNET_QUANTIZATION_VERBOSE", 0); DFSVisit(src.outputs, [&](const NodePtr& node) { NodePtr new_node = Node::Create(); // If the currently visited node needs quantization, insert a quantize op node before the // current node and replace the current node with the quantized version in the new graph. if (quantized_node_map.count(node)) { - std::cout << node->attrs.name << " is quantized." << std::endl; + if (verbose) { + LOG(INFO) << node->attrs.name << " is quantized."; + } new_node = quantized_node_map[node]; // add data into quantized op input @@ -395,7 +398,8 @@ Graph QuantizeGraph(Graph &&src) { // (e.g., a quantized_conv2d node), and insert a dequantize op node in the new graph if there // are any. Otherwise, simply add a copy of the current node's entry to the inputs of // the new_node. - if (!node->is_variable()) std::cout << node->attrs.name << " is NOT quantized." << std::endl; + if (verbose && !node->is_variable()) + LOG(INFO) << node->attrs.name << " is NOT quantized."; *new_node = *node; new_node->inputs.clear(); for (const auto& e : node->inputs) { @@ -516,15 +520,20 @@ static inline void SetCalibTableForEntry( out_data_name = out_data_name.substr(prefix.size()); } const auto calib_table_iter = calib_table.find(out_data_name); + static int verbose = dmlc::GetEnv("MXNET_QUANTIZATION_VERBOSE", 0); if (calib_table_iter != calib_table.end()) { - std::cout << "Set calibration result to " << node->attrs.name - << " : min=" << calib_table_iter->second.first - << " max=" << calib_table_iter->second.second << std::endl; + if (verbose) { + LOG(INFO) << "Set calibration result to " << node->attrs.name + << " : min=" << calib_table_iter->second.first + << " max=" << calib_table_iter->second.second; + } node->attrs.dict["min_calib_range"] = std::to_string(calib_table_iter->second.first); node->attrs.dict["max_calib_range"] = std::to_string(calib_table_iter->second.second); if (node->op() && node->op()->attr_parser) node->op()->attr_parser(&(node->attrs)); } else { - std::cout << "Can't find calibration result for " << node->attrs.name << std::endl; + if (verbose) { + LOG(INFO) << "Can't find calibration result for " << node->attrs.name; + } } } @@ -535,7 +544,10 @@ Graph SetCalibTableToQuantizedGraph(Graph&& g) { Op::GetAttr("FNeedCalibrateInput"); static const auto& need_calib_output_map = Op::GetAttr("FNeedCalibrateOutput"); - std::cout << "Set calibration result to quantized symbol." << std::endl; + static int verbose = dmlc::GetEnv("MXNET_QUANTIZATION_VERBOSE", 0); + if (verbose) { + LOG(INFO) << "Set calibration result to quantized symbol."; + } DFSVisit(g.outputs, [&](const NodePtr& node) { if (need_calib_input_map.count(node->op())) { const auto calib_idx = need_calib_input_map[node->op()](node->attrs); From a7f33eb1e1a0e1b1959c5184363844c2b346536f Mon Sep 17 00:00:00 2001 From: Minghao Liu <40382964+Tommliu@users.noreply.github.com> Date: Thu, 19 Dec 2019 14:15:37 +0800 Subject: [PATCH 58/62] numpy bincount (#16965) --- python/mxnet/ndarray/numpy/_op.py | 59 ++++++- python/mxnet/numpy/multiarray.py | 53 +++++- python/mxnet/numpy_dispatch_protocol.py | 1 + python/mxnet/symbol/numpy/_symbol.py | 36 +++- src/operator/numpy/np_bincount_op-inl.h | 147 ++++++++++++++++ src/operator/numpy/np_bincount_op.cc | 133 +++++++++++++++ src/operator/numpy/np_bincount_op.cu | 160 ++++++++++++++++++ .../unittest/test_numpy_interoperability.py | 18 ++ tests/python/unittest/test_numpy_op.py | 50 ++++++ 9 files changed, 654 insertions(+), 3 deletions(-) create mode 100644 src/operator/numpy/np_bincount_op-inl.h create mode 100644 src/operator/numpy/np_bincount_op.cc create mode 100644 src/operator/numpy/np_bincount_op.cu diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index c111a95a707a..8d56c1f651a3 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -40,7 +40,7 @@ 'blackman', 'flip', 'around', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', 'tril', 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', 'greater_equal', 'less_equal', 'hsplit', 'rot90', 'einsum', 'true_divide', 'nonzero', 'shares_memory', - 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where'] + 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where', 'bincount'] @set_module('mxnet.ndarray.numpy') @@ -5929,3 +5929,60 @@ def where(condition, x=None, y=None): return nonzero(condition) else: return _npi.where(condition, x, y, out=None) + + +@set_module('mxnet.ndarray.numpy') +def bincount(x, weights=None, minlength=0): + """ + Count number of occurrences of each value in array of non-negative ints. + + Parameters + ---------- + x : ndarray + input array, 1 dimension, nonnegative ints. + weights: ndarray + input weigths same shape as x. (Optional) + minlength: int + A minimum number of bins for the output. (Optional) + + Returns + -------- + out : ndarray + the result of binning the input array. The length of out is equal to amax(x)+1. + + Raises + -------- + Value Error + If the input is not 1-dimensional, or contains elements with negative values, + or if minlength is negative + TypeError + If the type of the input is float or complex. + + Examples + -------- + >>> np.bincount(np.arange(5)) + array([1, 1, 1, 1, 1]) + >>> np.bincount(np.array([0, 1, 1, 3, 2, 1, 7])) + array([1, 3, 1, 1, 0, 0, 0, 1]) + + >>> x = np.array([0, 1, 1, 3, 2, 1, 7, 23]) + >>> np.bincount(x).size == np.amax(x)+1 + True + + >>> np.bincount(np.arange(5, dtype=float)) + Traceback (most recent call last): + File "", line 1, in + TypeError: array cannot be safely cast to required type + + >>> w = np.array([0.3, 0.5, 0.2, 0.7, 1., -0.6]) # weights + >>> x = np.array([0, 1, 1, 2, 2, 2]) + >>> np.bincount(x, weights=w) + array([ 0.3, 0.7, 1.1]) + """ + if not isinstance(x, NDArray): + raise TypeError("Input data should be NDarray") + if minlength < 0: + raise ValueError("Minlength value should greater than 0") + if weights is None: + return _npi.bincount(x, minlength=minlength, has_weights=False) + return _npi.bincount(x, weights=weights, minlength=minlength, has_weights=True) diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index 5795c62942df..c3c6f4db8ba0 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -59,7 +59,7 @@ 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', 'tril', 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', 'greater_equal', 'less_equal', 'hsplit', 'rot90', 'einsum', 'true_divide', 'nonzero', 'shares_memory', - 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where'] + 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where', 'bincount'] # Return code for dispatching indexing function call _NDARRAY_UNSUPPORTED_INDEXING = -1 @@ -7892,3 +7892,54 @@ def where(condition, x=None, y=None): [ 0., 3., -1.]]) """ return _mx_nd_np.where(condition, x, y) + + +@set_module('mxnet.numpy') +def bincount(x, weights=None, minlength=0): + """ + Count number of occurrences of each value in array of non-negative ints. + + Parameters + ---------- + x : ndarray + input array, 1 dimension, nonnegative ints. + weights: ndarray + input weigths same shape as x. (Optional) + minlength: int + A minimum number of bins for the output. (Optional) + + Returns + -------- + out : ndarray + the result of binning the input array. The length of out is equal to amax(x)+1. + + Raises + -------- + Value Error + If the input is not 1-dimensional, or contains elements with negative values, + or if minlength is negative + TypeError + If the type of the input is float or complex. + + Examples + -------- + >>> np.bincount(np.arange(5)) + array([1, 1, 1, 1, 1]) + >>> np.bincount(np.array([0, 1, 1, 3, 2, 1, 7])) + array([1, 3, 1, 1, 0, 0, 0, 1]) + + >>> x = np.array([0, 1, 1, 3, 2, 1, 7, 23]) + >>> np.bincount(x).size == np.amax(x)+1 + True + + >>> np.bincount(np.arange(5, dtype=float)) + Traceback (most recent call last): + File "", line 1, in + TypeError: array cannot be safely cast to required type + + >>> w = np.array([0.3, 0.5, 0.2, 0.7, 1., -0.6]) # weights + >>> x = np.array([0, 1, 1, 2, 2, 2]) + >>> np.bincount(x, weights=w) + array([ 0.3, 0.7, 1.1]) + """ + return _mx_nd_np.bincount(x, weights=weights, minlength=minlength) diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index e93720564774..bd5c388a5100 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -147,6 +147,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs): 'resize', 'where', 'full_like', + 'bincount' ] diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index c61d5b2d393d..0fb0d538082d 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -48,7 +48,7 @@ 'blackman', 'flip', 'around', 'hypot', 'bitwise_xor', 'bitwise_or', 'rad2deg', 'deg2rad', 'unique', 'lcm', 'tril', 'identity', 'take', 'ldexp', 'vdot', 'inner', 'outer', 'equal', 'not_equal', 'greater', 'less', 'greater_equal', 'less_equal', 'hsplit', 'rot90', 'einsum', 'true_divide', 'shares_memory', - 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where'] + 'may_share_memory', 'diff', 'resize', 'nan_to_num', 'where', 'bincount'] @set_module('mxnet.symbol.numpy') @@ -5468,4 +5468,38 @@ def load_json(json_str): return _Symbol(handle) +@set_module('mxnet.symbol.numpy') +def bincount(x, weights=None, minlength=0): + """ + Count number of occurrences of each value in array of non-negative ints. + + Parameters + ---------- + x : _Symbol + input data + weights: _Symbol + input weigths same shape as x. (Optional) + minlength: int + A minimum number of bins for the output. (Optional) + + Returns + -------- + out : _Symbol + the result of binning the input data. The length of out is equal to amax(x)+1. + + Raises: + -------- + Value Error + If the input is not 1-dimensional, or contains elements with negative values, + or if minlength is negative + TypeError + If the type of the input is float or complex. + """ + if minlength < 0: + raise ValueError("Minlength value should greater than 0") + if weights is None: + return _npi.bincount(x, minlength=minlength, has_weights=False) + return _npi.bincount(x, weights=weights, minlength=minlength, has_weights=True) + + _set_np_symbol_class(_Symbol) diff --git a/src/operator/numpy/np_bincount_op-inl.h b/src/operator/numpy/np_bincount_op-inl.h new file mode 100644 index 000000000000..254ea8fdec22 --- /dev/null +++ b/src/operator/numpy/np_bincount_op-inl.h @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_bicount_op-inl.h + * \brief numpy compatible bincount operator + */ +#ifndef MXNET_OPERATOR_NUMPY_NP_BINCOUNT_OP_INL_H_ +#define MXNET_OPERATOR_NUMPY_NP_BINCOUNT_OP_INL_H_ + +#include +#include +#include +#include "../mshadow_op.h" +#include "../mxnet_op.h" +#include "../operator_common.h" +#include "../elemwise_op_common.h" +#include "np_broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +struct NumpyBincountParam : public dmlc::Parameter { + int minlength; + bool has_weights; + DMLC_DECLARE_PARAMETER(NumpyBincountParam) { + DMLC_DECLARE_FIELD(minlength) + .set_default(0) + .describe("A minimum number of bins for the output array" + "If minlength is specified, there will be at least this" + "number of bins in the output array"); + DMLC_DECLARE_FIELD(has_weights) + .set_default(false) + .describe("Determine whether Bincount has weights."); + } +}; + +inline bool NumpyBincountType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + const NumpyBincountParam& param = nnvm::get(attrs.parsed); + if (!param.has_weights) { + return ElemwiseType<1, 1>(attrs, in_attrs, out_attrs) && in_attrs->at(0) != -1; + } else { + CHECK_EQ(out_attrs->size(), 1U); + CHECK_EQ(in_attrs->size(), 2U); + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(1)); + TYPE_ASSIGN_CHECK(*in_attrs, 1, out_attrs->at(0)); + return out_attrs->at(0) != -1 && in_attrs->at(0) != -1; + } +} + +inline bool NumpyBincountStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + const NumpyBincountParam& param = nnvm::get(attrs.parsed); + if (param.has_weights) { + CHECK_EQ(in_attrs->size(), 2U); + } else { + CHECK_EQ(in_attrs->size(), 1U); + } + CHECK_EQ(out_attrs->size(), 1U); + for (int &attr : *in_attrs) { + CHECK_EQ(attr, kDefaultStorage) << "Only default storage is supported"; + } + for (int &attr : *out_attrs) { + attr = kDefaultStorage; + } + *dispatch_mode = DispatchMode::kFComputeEx; + return true; +} + +template +void NumpyBincountForwardImpl(const OpContext &ctx, + const NDArray &data, + const NDArray &weights, + const NDArray &out, + const size_t &data_n, + const int &minlength); + +template +void NumpyBincountForwardImpl(const OpContext &ctx, + const NDArray &data, + const NDArray &out, + const size_t &data_n, + const int &minlength); + +template +void NumpyBincountForward(const nnvm::NodeAttrs& attrs, + const OpContext &ctx, + const std::vector &inputs, + const std::vector &req, + const std::vector &outputs) { + CHECK_GE(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + CHECK(req[0] == kWriteTo); + const NumpyBincountParam& param = nnvm::get(attrs.parsed); + const bool has_weights = param.has_weights; + const int minlength = param.minlength; + const NDArray &data = inputs[0]; + const NDArray &out = outputs[0]; + CHECK_LE(data.shape().ndim(), 1U) << "Input only accept 1d array"; + CHECK(!common::is_float(data.dtype())) <<"Input data should be int type"; + size_t N = data.shape().Size(); + if (N == 0) { + mshadow::Stream *stream = ctx.get_stream(); + mxnet::TShape s(1, minlength); + const_cast(out).Init(s); + MSHADOW_TYPE_SWITCH(out.dtype(), OType, { + mxnet_op::Kernel::Launch( + stream, minlength, out.data().dptr()); + }); + } else { + if (has_weights) { + CHECK_EQ(inputs.size(), 2U); + const NDArray &weights = inputs[1]; + CHECK_EQ(data.shape(), weights.shape()) << "weights should has same size as input"; + NumpyBincountForwardImpl(ctx, data, weights, out, N, minlength); + } else { + NumpyBincountForwardImpl(ctx, data, out, N, minlength); + } + } +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_NP_BINCOUNT_OP_INL_H_ diff --git a/src/operator/numpy/np_bincount_op.cc b/src/operator/numpy/np_bincount_op.cc new file mode 100644 index 000000000000..6256db176977 --- /dev/null +++ b/src/operator/numpy/np_bincount_op.cc @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_bicount_op.cc + * \brief numpy compatible bincount operator CPU registration + */ + +#include "./np_bincount_op-inl.h" + +namespace mxnet { +namespace op { + +void BinNumberCount(const NDArray& data, const int& minlength, + const NDArray& out, const size_t& N) { + int bin = minlength; + MSHADOW_TYPE_SWITCH(data.dtype(), DType, { + DType* data_ptr = data.data().dptr(); + for (size_t i = 0; i < N; i++) { + CHECK_GE(data_ptr[i], 0) << "input should be nonnegative number"; + if (data_ptr[i] + 1 > bin) { + bin = data_ptr[i] + 1; + } + } + }); // bin number = max(max(data) + 1, minlength) + mxnet::TShape s(1, bin); + const_cast(out).Init(s); // set the output shape forcefully +} + +template +void BincountCpuWeights(const DType* data, const OType* weights, + OType* out, const size_t& data_n) { + for (size_t i = 0; i < data_n; i++) { + int target = data[i]; + out[target] += weights[i]; + } +} + +template +void BincountCpu(const DType* data, OType* out, const size_t& data_n) { + for (size_t i = 0; i < data_n; i++) { + int target = data[i]; + out[target] += 1; + } +} + +template<> +void NumpyBincountForwardImpl(const OpContext &ctx, + const NDArray &data, + const NDArray &weights, + const NDArray &out, + const size_t &data_n, + const int &minlength) { + using namespace mxnet_op; + BinNumberCount(data, minlength, out, data_n); + mshadow::Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH(data.dtype(), DType, { + MSHADOW_TYPE_SWITCH(weights.dtype(), OType, { + size_t out_size = out.shape()[0]; + Kernel::Launch(s, out_size, out.data().dptr()); + BincountCpuWeights(data.data().dptr(), weights.data().dptr(), + out.data().dptr(), data_n); + }); + }); +} + +template<> +void NumpyBincountForwardImpl(const OpContext &ctx, + const NDArray &data, + const NDArray &out, + const size_t &data_n, + const int &minlength) { + using namespace mxnet_op; + BinNumberCount(data, minlength, out, data_n); + mshadow::Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH(data.dtype(), DType, { + MSHADOW_TYPE_SWITCH(out.dtype(), OType, { + size_t out_size = out.shape()[0]; + Kernel::Launch(s, out_size, out.data().dptr()); + BincountCpu(data.data().dptr(), out.data().dptr(), data_n); + }); + }); +} + +DMLC_REGISTER_PARAMETER(NumpyBincountParam); + +NNVM_REGISTER_OP(_npi_bincount) +.set_attr_parser(ParamParser) +.set_num_inputs([](const NodeAttrs& attrs) { + const NumpyBincountParam& params = + nnvm::get(attrs.parsed); + return params.has_weights? 2 : 1; + }) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + const NumpyBincountParam& params = + nnvm::get(attrs.parsed); + return params.has_weights ? + std::vector{"data", "weights"} : + std::vector{"data"}; + }) +.set_attr("FResourceRequest", +[](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; +}) +.set_attr("FInferType", NumpyBincountType) +.set_attr("FInferStorageType", NumpyBincountStorageType) +.set_attr("FComputeEx", NumpyBincountForward) +.set_attr("FGradient", MakeZeroGradNodes) +.add_argument("data", "NDArray-or-Symbol", "Data") +.add_argument("weights", "NDArray-or-Symbol", "Weights") +.add_arguments(NumpyBincountParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_bincount_op.cu b/src/operator/numpy/np_bincount_op.cu new file mode 100644 index 000000000000..ed1f90f00c16 --- /dev/null +++ b/src/operator/numpy/np_bincount_op.cu @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_bicount_op.cu + * \brief numpy compatible bincount operator GPU registration + */ + +#include "./np_bincount_op-inl.h" +#include +#include +#include "../tensor/util/tensor_util-inl.cuh" +#include "../tensor/util/tensor_util-inl.h" + +namespace mxnet { +namespace op { + +struct BincountFusedKernel { + template + static MSHADOW_XINLINE void Map(int i, const DType* data, OType* out) { + int idx = data[i]; + atomicAdd(&out[idx], 1); + } + + template + static MSHADOW_XINLINE void Map(int i, const DType* data, const OType* weights, + OType* out) { + int idx = data[i]; + atomicAdd(&out[idx], weights[i]); + } +}; + +struct is_valid_check { + template + MSHADOW_XINLINE static void Map(int i, char* invalid_ptr, const DType* data) { + if (data[i] < 0) *invalid_ptr = 1; + } +}; + +template +bool CheckInvalidInput(mshadow::Stream *s, const DType *data, const size_t& data_size, + char* is_valid_ptr) { + using namespace mxnet_op; + int32_t is_valid = 0; + Kernel::Launch(s, 1, is_valid_ptr); + Kernel::Launch(s, data_size, is_valid_ptr, data); + CUDA_CALL(cudaMemcpyAsync(&is_valid, is_valid_ptr, sizeof(char), + cudaMemcpyDeviceToHost, mshadow::Stream::GetStream(s))); + CUDA_CALL(cudaStreamSynchronize(mshadow::Stream::GetStream(s))); + return is_valid == 0; +} + +template<> +void NumpyBincountForwardImpl(const OpContext &ctx, + const NDArray &data, + const NDArray &weights, + const NDArray &out, + const size_t &data_n, + const int &minlength) { + using namespace mxnet_op; + mshadow::Stream *s = ctx.get_stream(); + + MXNET_NO_FLOAT16_TYPE_SWITCH(data.dtype(), DType, { + DType* h_ptr; + DType* d_ptr; + int bin = minlength; + d_ptr = data.data().dptr(); + Tensor workspace = ctx.requested[0] + .get_space_typed(Shape1(1), s); + char* is_valid_ptr = reinterpret_cast(workspace.dptr_); + bool is_valid = CheckInvalidInput(s, d_ptr, data_n, is_valid_ptr); + CHECK(is_valid) << "Input should be nonnegative number"; // check invalid input + + h_ptr = reinterpret_cast(malloc(data_n*sizeof(DType))); + CUDA_CALL(cudaMemcpyAsync(h_ptr, d_ptr, data_n*sizeof(DType), cudaMemcpyDeviceToHost, + mshadow::Stream::GetStream(s))); + CUDA_CALL(cudaStreamSynchronize(mshadow::Stream::GetStream(s))); + for (size_t i = 0; i < data_n; i++) { + if (h_ptr[i] + 1 > bin) bin = h_ptr[i] + 1; + } + free(h_ptr); + mxnet::TShape s(1, bin); + const_cast(out).Init(s); // set the output shape forcefully + }); + + MSHADOW_TYPE_SWITCH(data.dtype(), DType, { + MSHADOW_TYPE_SWITCH(weights.dtype(), OType, { + size_t out_size = out.shape().Size(); + Kernel::Launch(s, out_size, out.data().dptr()); + Kernel::Launch( + s, data_n, data.data().dptr(), weights.data().dptr(), + out.data().dptr()); + }); + }); +} + +template<> +void NumpyBincountForwardImpl(const OpContext &ctx, + const NDArray &data, + const NDArray &out, + const size_t &data_n, + const int &minlength) { + using namespace mxnet_op; + mshadow::Stream *s = ctx.get_stream(); + + MXNET_NO_FLOAT16_TYPE_SWITCH(data.dtype(), DType, { + DType* h_ptr; + DType* d_ptr; + int bin = minlength; + d_ptr = data.data().dptr(); + Tensor workspace = ctx.requested[0] + .get_space_typed(Shape1(1), s); + char* is_valid_ptr = reinterpret_cast(workspace.dptr_); + bool is_valid = CheckInvalidInput(s, d_ptr, data_n, is_valid_ptr); + CHECK(is_valid) << "Input should be nonnegative number"; // check invalid input + + h_ptr = reinterpret_cast(malloc(data_n*sizeof(DType))); + CUDA_CALL(cudaMemcpyAsync(h_ptr, d_ptr, data_n*sizeof(DType), cudaMemcpyDeviceToHost, + mshadow::Stream::GetStream(s))); + CUDA_CALL(cudaStreamSynchronize(mshadow::Stream::GetStream(s))); + for (size_t i = 0; i < data_n; i++) { + if (h_ptr[i] + 1 > bin) bin = h_ptr[i] + 1; + } + free(h_ptr); + mxnet::TShape s(1, bin); + const_cast(out).Init(s); // set the output shape forcefully + }); + + MSHADOW_TYPE_SWITCH(data.dtype(), DType, { + MSHADOW_TYPE_SWITCH(out.dtype(), OType, { + size_t out_size = out.shape().Size(); + Kernel::Launch(s, out_size, out.data().dptr()); + Kernel::Launch( + s, data_n, data.data().dptr(), out.data().dptr()); + }); + }); +} + +NNVM_REGISTER_OP(_npi_bincount) +.set_attr("FComputeEx", NumpyBincountForward); + +} // namespace op +} // namespace mxnet diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index a670f794860f..5b5af8b20e36 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -67,6 +67,23 @@ def _add_workload_unravel_index(): OpArgMngr.add_workload('unravel_index', np.array([],dtype=_np.int64), (10, 3, 5)) OpArgMngr.add_workload('unravel_index', np.array([3], dtype=_np.int32), (2,2)) +def _add_workload_bincount(): + y = np.arange(4).astype(int) + y1 = np.array([1, 5, 2, 4, 1], dtype=_np.int64) + y2 = np.array((), dtype=_np.int8) + w = np.array([0.2, 0.3, 0.5, 0.1]) + w1 = np.array([0.2, 0.3, 0.5, 0.1, 0.2]) + + OpArgMngr.add_workload('bincount', y) + OpArgMngr.add_workload('bincount', y1) + OpArgMngr.add_workload('bincount', y, w) + OpArgMngr.add_workload('bincount', y1, w1) + OpArgMngr.add_workload('bincount', y1, w1, 8) + OpArgMngr.add_workload('bincount', y, minlength=3) + OpArgMngr.add_workload('bincount', y, minlength=8) + OpArgMngr.add_workload('bincount', y2, minlength=0) + OpArgMngr.add_workload('bincount', y2, minlength=5) + def _add_workload_diag(): def get_mat(n): @@ -1409,6 +1426,7 @@ def _prepare_workloads(): _add_workload_around() _add_workload_argsort() _add_workload_append() + _add_workload_bincount() _add_workload_broadcast_arrays(array_pool) _add_workload_broadcast_to() _add_workload_clip() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 545466bf0814..b39703b8ebda 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -5670,6 +5670,56 @@ def hybrid_forward(self, F, a): assert_almost_equal(elem_mx.asnumpy(), elem_np, rtol=rtol, atol=atol) +@with_seed() +@use_np +def test_np_bincount(): + class TestBincount(HybridBlock): + def __init__(self, minlength=0): + super(TestBincount, self).__init__() + self._minlength = minlength + + def hybrid_forward(self, F, a): + return F.np.bincount(a, None, self._minlength) + + class TestBincountWeights(HybridBlock): + def __init__(self, minlength=0): + super(TestBincountWeights, self).__init__() + self._minlength = minlength + + def hybrid_forward(self, F, a, weights): + return F.np.bincount(a, weights, self._minlength) + + dtypes = [np.int8, np.uint8, np.int32, np.int64] + weight_types = [np.int32, np.int64, np.float16, np.float32, np.float64] + shapes = [(), (5,), (10,), (15,), (20,), (30,), (50,)] + min_lengths = [0, 5, 20, 50] + has_weights = [True, False] + combinations = itertools.product([True, False], shapes, dtypes, weight_types, has_weights, min_lengths) + for hybridize, shape, dtype, weight_type, has_weight, minlength in combinations: + rtol = 1e-2 if weight_type == np.float16 else 1e-3 + atol = 1e-4 if weight_type == np.float16 else 1e-5 + if shape != (): + data = np.random.uniform(0, 10, size=shape).astype(dtype) + weights = np.random.uniform(0, 10, size=shape).astype(weight_type) if has_weight else None + else: + data = np.array(()).astype(dtype) + weights = np.array(()).astype(weight_type) if has_weight else None + weights_np = weights.asnumpy() if has_weight else None + test_bincount = TestBincountWeights(minlength) if has_weight else TestBincount(minlength) + if hybridize: + test_bincount.hybridize() + mx_out = test_bincount(data, weights) if has_weight else test_bincount(data) + np_out = _np.bincount(data.asnumpy(), weights_np, minlength) + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + # No backward operation for operator bincount at this moment + + # Test imperative once again + mx_out = np.bincount(data, weights, minlength) + np_out = _np.bincount(data.asnumpy(), weights_np, minlength) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + + if __name__ == '__main__': import nose nose.runmodule() From 521c477ad32864d887481abf6c53acae3b717cf6 Mon Sep 17 00:00:00 2001 From: Yiyan66 <57363390+Yiyan66@users.noreply.github.com> Date: Thu, 19 Dec 2019 14:42:09 +0800 Subject: [PATCH 59/62] [numpy] add op bitwise_not (#16947) * all * test op not change * all * delete describe * have output * add test * only * solve conflict --- python/mxnet/ndarray/numpy/_op.py | 110 +++++++++++++++++- python/mxnet/numpy/multiarray.py | 108 ++++++++++++++++- python/mxnet/numpy_dispatch_protocol.py | 2 + python/mxnet/symbol/numpy/_symbol.py | 94 ++++++++++++++- src/operator/mshadow_op.h | 12 ++ .../numpy/np_elemwise_unary_op_basic.cc | 14 +++ .../numpy/np_elemwise_unary_op_basic.cu | 3 + src/operator/operator_tune.cc | 1 + src/operator/tensor/elemwise_unary_op.h | 17 +++ .../unittest/test_numpy_interoperability.py | 20 ++++ tests/python/unittest/test_numpy_op.py | 66 ++++++++++- 11 files changed, 437 insertions(+), 10 deletions(-) diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index 8d56c1f651a3..02e42145fb18 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -28,8 +28,8 @@ from . import _internal as _npi from ..ndarray import NDArray -__all__ = ['shape', 'zeros', 'zeros_like', 'ones', 'ones_like', 'full', 'full_like', - 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', +__all__ = ['shape', 'zeros', 'zeros_like', 'ones', 'ones_like', 'full', 'full_like', 'invert', + 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'bitwise_not', 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', 'histogram', @@ -2690,6 +2690,112 @@ def floor(x, out=None, **kwargs): return _unary_func_helper(x, _npi.floor, _np.floor, out=out, **kwargs) +@set_module('mxnet.ndarray.numpy') +@wrap_np_unary_func +def bitwise_not(x, out=None, **kwargs): + r""" + Compute bit-wise inversion, or bit-wise NOT, element-wise. + Computes the bit-wise NOT of the underlying binary representation of + the integers in the input arrays. This ufunc implements the C/Python + operator ``~``. + + Parameters + ---------- + x : array_like + Only integer and boolean types are handled. + out : ndarray, None, or tuple of ndarray and None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. A tuple (possible only as a + keyword argument) must have length equal to the number of outputs. + + Returns + ------- + out : ndarray or scalar + Result. + This is a scalar if `x` is a scalar. + + See Also + -------- + bitwise_and, bitwise_or, bitwise_xor + logical_not + binary_repr : + Return the binary representation of the input number as a string. + + Examples + -------- + We've seen that 13 is represented by ``00001101``. + The invert or bit-wise NOT of 13 is then: + + >>> x = np.invert(np.array(13, dtype=np.uint8)) + >>> x + 242 + >>> np.binary_repr(x, width=8) + '11110010' + + Notes + ----- + `bitwise_not` is an alias for `invert`: + + >>> np.bitwise_not is np.invert + True + """ + return _unary_func_helper(x, _npi.bitwise_not, _np.bitwise_not, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +@wrap_np_unary_func +def invert(x, out=None, **kwargs): + r""" + Compute bit-wise inversion, or bit-wise NOT, element-wise. + Computes the bit-wise NOT of the underlying binary representation of + the integers in the input arrays. This ufunc implements the C/Python + operator ``~``. + + Parameters + ---------- + x : array_like + Only integer and boolean types are handled. + out : ndarray, None, or tuple of ndarray and None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. A tuple (possible only as a + keyword argument) must have length equal to the number of outputs. + + Returns + ------- + out : ndarray or scalar + Result. + This is a scalar if `x` is a scalar. + + See Also + -------- + bitwise_and, bitwise_or, bitwise_xor + logical_not + binary_repr : + Return the binary representation of the input number as a string. + + Examples + -------- + We've seen that 13 is represented by ``00001101``. + The invert or bit-wise NOT of 13 is then: + + >>> x = np.invert(np.array(13, dtype=np.uint8)) + >>> x + 242 + >>> np.binary_repr(x, width=8) + '11110010' + + Notes + ----- + `bitwise_not` is an alias for `invert`: + + >>> np.bitwise_not is np.invert + True + """ + return _unary_func_helper(x, _npi.bitwise_not, _np.bitwise_not, out=out, **kwargs) + + @set_module('mxnet.ndarray.numpy') @wrap_np_unary_func def trunc(x, out=None, **kwargs): diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index c3c6f4db8ba0..4910b4d6b925 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -47,8 +47,8 @@ from ..ndarray.ndarray import _storage_type __all__ = ['ndarray', 'empty', 'array', 'shape', 'zeros', 'zeros_like', 'ones', 'ones_like', 'full', 'full_like', - 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', - 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', + 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'bitwise_not', + 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'invert', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'histogram', 'fix', 'ceil', 'floor', 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'append', 'argsort', @@ -3981,6 +3981,110 @@ def floor(x, out=None, **kwargs): """ return _mx_nd_np.floor(x, out=out, **kwargs) +@set_module('mxnet.numpy') +@wrap_np_unary_func +def invert(x, out=None, **kwargs): + r""" + Compute bit-wise inversion, or bit-wise NOT, element-wise. + Computes the bit-wise NOT of the underlying binary representation of + the integers in the input arrays. This ufunc implements the C/Python + operator ``~``. + + Parameters + ---------- + x : array_like + Only integer and boolean types are handled. + out : ndarray, None, or tuple of ndarray and None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. A tuple (possible only as a + keyword argument) must have length equal to the number of outputs. + + Returns + ------- + out : ndarray or scalar + Result. + This is a scalar if `x` is a scalar. + + See Also + -------- + bitwise_and, bitwise_or, bitwise_xor + logical_not + binary_repr : + Return the binary representation of the input number as a string. + + Examples + -------- + We've seen that 13 is represented by ``00001101``. + The invert or bit-wise NOT of 13 is then: + + >>> x = np.invert(np.array(13, dtype=np.uint8)) + >>> x + 242 + >>> np.binary_repr(x, width=8) + '11110010' + + Notes + ----- + `bitwise_not` is an alias for `invert`: + + >>> np.bitwise_not is np.invert + True + """ + return _mx_nd_np.bitwise_not(x, out=out, **kwargs) + +@set_module('mxnet.numpy') +@wrap_np_unary_func +def bitwise_not(x, out=None, **kwargs): + r""" + Compute bit-wise inversion, or bit-wise NOT, element-wise. + Computes the bit-wise NOT of the underlying binary representation of + the integers in the input arrays. This ufunc implements the C/Python + operator ``~``. + + Parameters + ---------- + x : array_like + Only integer and boolean types are handled. + out : ndarray, None, or tuple of ndarray and None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. A tuple (possible only as a + keyword argument) must have length equal to the number of outputs. + + Returns + ------- + out : ndarray or scalar + Result. + This is a scalar if `x` is a scalar. + + See Also + -------- + bitwise_and, bitwise_or, bitwise_xor + logical_not + binary_repr : + Return the binary representation of the input number as a string. + + Examples + -------- + We've seen that 13 is represented by ``00001101``. + The invert or bit-wise NOT of 13 is then: + + >>> x = np.invert(np.array(13, dtype=np.uint8)) + >>> x + 242 + >>> np.binary_repr(x, width=8) + '11110010' + + Notes + ----- + `bitwise_not` is an alias for `invert`: + + >>> np.bitwise_not is np.invert + True + """ + return _mx_nd_np.bitwise_not(x, out=out, **kwargs) + @set_module('mxnet.numpy') @wrap_np_unary_func diff --git a/python/mxnet/numpy_dispatch_protocol.py b/python/mxnet/numpy_dispatch_protocol.py index bd5c388a5100..c7e9dd1398eb 100644 --- a/python/mxnet/numpy_dispatch_protocol.py +++ b/python/mxnet/numpy_dispatch_protocol.py @@ -219,6 +219,8 @@ def _register_array_function(): 'square', 'cbrt', 'reciprocal', + 'invert', + 'bitwise_not', 'remainder', 'sin', 'cos', diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index 0fb0d538082d..6efc333cc16c 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -36,7 +36,7 @@ except ImportError: from builtins import slice as py_slice -__all__ = ['zeros', 'zeros_like', 'ones', 'ones_like', 'full_like', +__all__ = ['zeros', 'zeros_like', 'ones', 'ones_like', 'full_like', 'bitwise_not', 'invert', 'add', 'subtract', 'multiply', 'divide', 'mod', 'remainder', 'power', 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', @@ -1040,6 +1040,98 @@ def ones(shape, dtype=_np.float32, order='C', ctx=None): return _npi.ones(shape=shape, ctx=ctx, dtype=dtype) +@set_module('mxnet.symbol.numpy') +@wrap_np_unary_func +def invert(x, out=None, **kwargs): + r""" + Compute bit-wise inversion, or bit-wise NOT, element-wise. + Computes the bit-wise NOT of the underlying binary representation of + the integers in the input arrays. This ufunc implements the C/Python + operator ``~``. + Parameters + ---------- + x : array_like + Only integer and boolean types are handled. + out : ndarray, None, or tuple of ndarray and None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. A tuple (possible only as a + keyword argument) must have length equal to the number of outputs. + Returns + ------- + out : ndarray or scalar + Result. + This is a scalar if `x` is a scalar. + See Also + -------- + bitwise_and, bitwise_or, bitwise_xor + logical_not + binary_repr : + Return the binary representation of the input number as a string. + Examples + -------- + We've seen that 13 is represented by ``00001101``. + The invert or bit-wise NOT of 13 is then: + >>> x = np.invert(np.array(13, dtype=np.uint8)) + >>> x + 242 + >>> np.binary_repr(x, width=8) + '11110010' + Notes + ----- + `bitwise_not` is an alias for `invert`: + >>> np.bitwise_not is np.invert + True + """ + return _unary_func_helper(x, _npi.bitwise_not, _np.bitwise_not, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +@wrap_np_unary_func +def bitwise_not(x, out=None, **kwargs): + r""" + Compute bit-wise inversion, or bit-wise NOT, element-wise. + Computes the bit-wise NOT of the underlying binary representation of + the integers in the input arrays. This ufunc implements the C/Python + operator ``~``. + Parameters + ---------- + x : array_like + Only integer and boolean types are handled. + out : ndarray, None, or tuple of ndarray and None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. A tuple (possible only as a + keyword argument) must have length equal to the number of outputs. + Returns + ------- + out : ndarray or scalar + Result. + This is a scalar if `x` is a scalar. + See Also + -------- + bitwise_and, bitwise_or, bitwise_xor + logical_not + binary_repr : + Return the binary representation of the input number as a string. + Examples + -------- + We've seen that 13 is represented by ``00001101``. + The invert or bit-wise NOT of 13 is then: + >>> x = np.invert(np.array(13, dtype=np.uint8)) + >>> x + 242 + >>> np.binary_repr(x, width=8) + '11110010' + Notes + ----- + `bitwise_not` is an alias for `invert`: + >>> np.bitwise_not is np.invert + True + """ + return _unary_func_helper(x, _npi.bitwise_not, _np.bitwise_not, out=out, **kwargs) + + @set_module('mxnet.symbol.numpy') def full(shape, fill_value, dtype=None, order='C', ctx=None, out=None): # pylint: disable=too-many-arguments """ diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index cf35e8858039..e3a3c0443428 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -359,6 +359,18 @@ MXNET_UNARY_MATH_OP(negation, -a); MXNET_UNARY_MATH_OP(reciprocal, 1.0f / math::id(a)); +struct bitwise_not : public mxnet_op::tunable { + template::value, int>::type = 0> + MSHADOW_XINLINE static DType Map(DType a) { + return ~static_cast(a); + } + + MSHADOW_XINLINE static bool Map(bool a) { + return !a; + } +}; + MXNET_UNARY_MATH_OP(reciprocal_grad, -1.0f / math::sqr(a)); MXNET_UNARY_MATH_OP(sigmoid, 1.0f / (1.0f + math::exp(-a))); diff --git a/src/operator/numpy/np_elemwise_unary_op_basic.cc b/src/operator/numpy/np_elemwise_unary_op_basic.cc index cad736aab65b..5e15d7ad4e67 100644 --- a/src/operator/numpy/np_elemwise_unary_op_basic.cc +++ b/src/operator/numpy/np_elemwise_unary_op_basic.cc @@ -169,6 +169,20 @@ Example:: )code" ADD_FILELINE) .set_attr("FGradient", MakeZeroGradNodes); +// bitwise_not +NNVM_REGISTER_OP(_npi_bitwise_not) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FInferShape", ElemwiseShape<1, 1>) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"x"}; +}) +.set_attr("FCompute", UnaryOp::ComputeInt) +.add_argument("x", "NDArray-or-Symbol", "The input array.") +.set_attr("FGradient", MakeZeroGradNodes); + // trunc MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_trunc, "x", mshadow_op::trunc) .describe(R"code(Return the truncated value of the input, element-wise. diff --git a/src/operator/numpy/np_elemwise_unary_op_basic.cu b/src/operator/numpy/np_elemwise_unary_op_basic.cu index af8834f01664..517ef9c2b52a 100644 --- a/src/operator/numpy/np_elemwise_unary_op_basic.cu +++ b/src/operator/numpy/np_elemwise_unary_op_basic.cu @@ -53,6 +53,9 @@ MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_ceil, mshadow_op::ceil); MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_floor, mshadow_op::floor); +NNVM_REGISTER_OP(_npi_bitwise_not) +.set_attr("FCompute", UnaryOp::ComputeInt); + MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_trunc, mshadow_op::trunc); MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_fix, mshadow_op::fix); diff --git a/src/operator/operator_tune.cc b/src/operator/operator_tune.cc index db898f8840f0..7ca594d21e59 100644 --- a/src/operator/operator_tune.cc +++ b/src/operator/operator_tune.cc @@ -311,6 +311,7 @@ IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::radians); // NOLINT() IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::radians_grad); // NOLINT() IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::nt); // NOLINT() IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::np_logical_not); // NOLINT() +IMPLEMENT_UNARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::bitwise_not); // NOLINT() IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::nt); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::clip); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::clip); // NOLINT() diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 05e10ffa4e16..4486b0dcd712 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -252,6 +252,23 @@ class UnaryOp : public OpBase { }); } + template + static void ComputeInt(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + mshadow::Stream *s = ctx.get_stream(); + MXNET_INT_TYPE_SWITCH(outputs[0].type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { + if (inputs[0].Size() != 0) { + mxnet_op::Kernel, xpu>::Launch( + s, inputs[0].Size(), outputs[0].dptr(), inputs[0].dptr()); + } + }); + }); + } + template static void ComputeLogic(const nnvm::NodeAttrs& attrs, const OpContext& ctx, diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 5b5af8b20e36..fcdf547bfbec 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -1242,6 +1242,24 @@ def _add_workload_logical_not(array_pool): OpArgMngr.add_workload('logical_not', np.array([True, False, True, False], dtype=np.bool)) +def _add_workload_bitwise_not(): + OpArgMngr.add_workload('bitwise_not', np.array([True, False, True, False], dtype=np.bool)) + for dtype in [np.int8, np.int32, np.int64]: + zeros = np.array([0], dtype=dtype) + ones = np.array([-1], dtype=dtype) + OpArgMngr.add_workload('bitwise_not', zeros) + OpArgMngr.add_workload('bitwise_not', ones) + + +def _add_workload_invert(): + OpArgMngr.add_workload('invert', np.array([True, False, True, False], dtype=np.bool)) + for dtype in [np.int8, np.int32, np.int64]: + zeros = np.array([0], dtype=dtype) + ones = np.array([-1], dtype=dtype) + OpArgMngr.add_workload('invert', zeros) + OpArgMngr.add_workload('invert', ones) + + def _add_workload_vdot(): OpArgMngr.add_workload('vdot', np.random.normal(size=(2, 4)), np.random.normal(size=(4, 2))) OpArgMngr.add_workload('vdot', np.random.normal(size=(2, 4)).astype(np.float64), np.random.normal(size=(2, 4)).astype(np.float64)) @@ -1526,6 +1544,8 @@ def _prepare_workloads(): _add_workload_turnc(array_pool) _add_workload_floor(array_pool) _add_workload_logical_not(array_pool) + _add_workload_bitwise_not() + _add_workload_invert() _add_workload_vdot() _add_workload_vstack(array_pool) _add_workload_column_stack() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index b39703b8ebda..af9228d45991 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -1778,6 +1778,66 @@ def hybrid_forward(self, F, a, *args, **kwargs): check_unary_func(func, ref_grad, shape, low, high) +@with_seed() +@use_np +def test_np_bitwise_not(): + def check_unary_func(func, ref_grad, shape, low, high): + class TestUnary(HybridBlock): + def __init__(self, func): + super(TestUnary, self).__init__() + self._func = func + + def hybrid_forward(self, F, a, *args, **kwargs): + return getattr(F.np, self._func)(a) + + np_func = getattr(_np, func) + mx_func = TestUnary(func) + np_test_data = _np.random.uniform(low, high, shape).astype(_np.int32) + mx_test_data = mx.numpy.array(np_test_data) + for hybridize in [True, False]: + if hybridize: + mx_func.hybridize() + if ref_grad: + mx_test_data.attach_grad() + np_out = np_func(np_test_data) + with mx.autograd.record(): + y = mx_func(mx_test_data) + assert y.shape == np_out.shape + assert_almost_equal(y.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + if np_out.dtype == np.bool_: + assert y.dtype == np.bool_ + + if ref_grad: + y.backward() + assert_almost_equal(mx_test_data.grad.asnumpy(), ref_grad(np_test_data), rtol=1e-1, atol=1e-2, equal_nan=True) + + np_out = getattr(_np, func)(np_test_data) + mx_out = getattr(mx.np, func)(mx_test_data) + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + + assertRaises(NotImplementedError, getattr(np, func), mx_test_data, where=False) + assertRaises(NotImplementedError, getattr(np, func), mx_test_data, subok=False) + assertRaises(NotImplementedError, getattr(np, func), mx_test_data, dtype=_np.int8) + assertRaises(TypeError, getattr(np, func), mx_test_data, dtype="abcdefg") + assertRaises(NotImplementedError, getattr(np, func), mx_test_data, casting='safe') + assertRaises(TypeError, getattr(np, func), mx_test_data, casting='mxnet') + assertRaises(NotImplementedError, getattr(np, func), mx_test_data, order='C') + assertRaises(NotImplementedError, getattr(np, func), mx_test_data, order='mxnet') + + funcs = { + 'bitwise_not' : (None, -5, 5), + 'invert' : (None, -5, 5), + } + ndim = random.choice([2, 3, 4]) + shape = random.choice([rand_shape_nd(ndim, dim=3), (1, 0, 2)]) + for shape in [rand_shape_nd(ndim, dim=3), (1, 0, 2)]: + for func, func_data in funcs.items(): + ref_grad, low, high = func_data + check_unary_func(func, ref_grad, shape, low, high) + + @with_seed() @use_np def test_np_binary_funcs(): @@ -3760,11 +3820,7 @@ def get_grad_b(A, X): nrhs = (-1, 0, 1, 2, 3) dtypes = ['float32', 'float64'] for hybridize, shape, dtype, nrh in itertools.product([False, True], shapes, dtypes, nrhs): - rtol = 1e-3 - atol = 1e-5 - if dtype == 'float32': - rtol = 1e-2 - atol = 1e-4 + rtol, atol =1e-2, 1e-4 test_solve = TestSolve() if hybridize: test_solve.hybridize() From 8a3519934f3ee5e9ac9406c2a4edb377af5e8cc7 Mon Sep 17 00:00:00 2001 From: Przemyslaw Tredak Date: Fri, 20 Dec 2019 08:34:36 -0800 Subject: [PATCH 60/62] Improve the speed of the pointwise fusion graph pass (#17114) * Debug the long startup time * Optimize backward fusion * Figure out why the fusion pass is called twice * Cleaning * Small optimization --- src/executor/simple_partition_pass.h | 98 ++++++++++++++++++---------- src/imperative/cached_op.cc | 22 ++++--- 2 files changed, 76 insertions(+), 44 deletions(-) diff --git a/src/executor/simple_partition_pass.h b/src/executor/simple_partition_pass.h index 5b26a4523c13..ea1dcf39b8ba 100644 --- a/src/executor/simple_partition_pass.h +++ b/src/executor/simple_partition_pass.h @@ -102,8 +102,7 @@ class BidirectionalGraph { std::vector> get_subsets(FCompatible is_compatible) { std::vector> subgraphs; std::unordered_set incomp_set; - std::unordered_set all_set(nodes.size()); - std::vector separation_sets; + std::vector> separation_sets; // Check each node for compatibility // and, if it is incompatible, mark nodes // on each side of it as not possible to be @@ -111,48 +110,79 @@ class BidirectionalGraph { for (Node& node : nodes) { if (!is_compatible(node.nnvmptr)) { incomp_set.insert(&node); - std::unordered_set in_graph; - std::unordered_set out_graph; - std::vector dummy_head; - dummy_head.emplace_back(&node); - DFS(dummy_head, false, [&out_graph, &is_compatible](Node* node) { - if (is_compatible(node->nnvmptr)) - out_graph.insert(node); - }); - DFS(dummy_head, true, [&in_graph, is_compatible](Node* node) { - if (is_compatible(node->nnvmptr)) - in_graph.insert(node); - }); - if (!(in_graph.empty() || out_graph.empty())) - separation_sets.push_back(std::make_pair(in_graph, out_graph)); } - all_set.emplace(&node); } - IncompMap incomp_map; - std::unordered_set comp_set; - comp_set.insert(all_set.begin(), all_set.end()); - for (Node* n : incomp_set) { - comp_set.erase(n); + for (Node& node : nodes) { + if (incomp_set.count(&node) != 0) { + // Check if all your inputs are incompatible too. + // If so, then your separation set does not matter, + // because it will covered by the sets of your inputs + bool inside_node = true; + for (Node* input : node.inputs) { + if (incomp_set.count(input) == 0) { + inside_node = false; + } + } + if (!inside_node) { + std::unordered_set in_graph; + std::unordered_set out_graph; + std::vector dummy_head; + dummy_head.emplace_back(&node); + DFS(dummy_head, false, [&out_graph](Node* node) { + out_graph.insert(node); + }); + DFS(dummy_head, true, [&in_graph](Node* node) { + in_graph.insert(node); + }); + separation_sets.push_back(std::make_pair(true, + std::make_pair(in_graph, out_graph))); + } else { + separation_sets.push_back(std::make_pair(false, PairSet())); + } + } else { + separation_sets.push_back(std::make_pair(false, PairSet())); + } } + IncompMap incomp_map; // For each node construct the map of nodes that cannot be in // the same subset - for (Node* n : comp_set) { - for (PairSet p : separation_sets) { - if (p.first.count(n)) { - incomp_map[n].insert(p.second.begin(), p.second.end()); - } else if (p.second.count(n)) { - incomp_map[n].insert(p.first.begin(), p.first.end()); + index_t num_nodes = nodes.size(); + for (index_t i = 0; i < num_nodes; ++i) { + const auto n = &(nodes[i]); + if (incomp_set.count(n) == 0) { + for (index_t j = i + 1; j < num_nodes; ++j) { + const auto& sep_set_pair = separation_sets[j]; + if (sep_set_pair.first && incomp_map[n].count(&nodes[j]) == 0) { + const auto& p = sep_set_pair.second; + if (p.first.count(n)) { + incomp_map[n].insert(p.second.begin(), p.second.end()); + } else if (p.second.count(n)) { + incomp_map[n].insert(p.first.begin(), p.first.end()); + } + } + } + for (index_t j = i - 1; j >= 0; --j) { + const auto& sep_set_pair = separation_sets[j]; + if (sep_set_pair.first && incomp_map[n].count(&nodes[j]) == 0) { + const auto& p = sep_set_pair.second; + if (p.first.count(n)) { + incomp_map[n].insert(p.second.begin(), p.second.end()); + } else if (p.second.count(n)) { + incomp_map[n].insert(p.first.begin(), p.first.end()); + } + } + } + for (Node* incomp_n : incomp_set) { + incomp_map[n].erase(incomp_n); } - } - for (Node* incomp_n : incomp_set) { - incomp_map[n].erase(incomp_n); } } std::unordered_set unused_set; - unused_set.reserve(comp_set.size()); - for (auto& n : comp_set) { - unused_set.insert(n); + for (auto& n : nodes) { + if (incomp_set.count(&n) == 0) { + unused_set.insert(&n); + } } std::unordered_set visited; std::deque stack(outputs.begin(), outputs.end()); diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc index ec5a79a2e675..1edd9897ec82 100644 --- a/src/imperative/cached_op.cc +++ b/src/imperative/cached_op.cc @@ -1032,17 +1032,19 @@ OpStatePtr CachedOp::Forward( CHECK_EQ(inputs.size(), num_inputs()); Context default_ctx = inputs[0]->ctx(); - auto state_ptr = GetCachedOpState(default_ctx); - auto& state = state_ptr.get_state(); + { + auto state_ptr = GetCachedOpState(default_ctx); + auto& state = state_ptr.get_state(); - const auto& idx = state.info.fwd_graph.indexed_graph(); - for (size_t i = 0; i < inputs.size(); ++i) { - CHECK_EQ(inputs[i]->ctx(), default_ctx) - << "CachedOp requires all inputs to live on the same context. But " - << idx[idx.input_nodes()[0]].source->attrs.name - << " is on " << default_ctx << " while " - << idx[idx.input_nodes()[i]].source->attrs.name - << " is on " << inputs[i]->ctx(); + const auto& idx = state.info.fwd_graph.indexed_graph(); + for (size_t i = 0; i < inputs.size(); ++i) { + CHECK_EQ(inputs[i]->ctx(), default_ctx) + << "CachedOp requires all inputs to live on the same context. But " + << idx[idx.input_nodes()[0]].source->attrs.name + << " is on " << default_ctx << " while " + << idx[idx.input_nodes()[i]].source->attrs.name + << " is on " << inputs[i]->ctx(); + } } int prev_bulk_size = Engine::Get()->set_bulk_size(config_.forward_bulk_size); From 615f609da748a771901478cf37e37064763b0d8c Mon Sep 17 00:00:00 2001 From: liuzh91 Date: Sat, 21 Dec 2019 08:31:40 +0800 Subject: [PATCH 61/62] fix parameter names in the estimator api (#17051) --- .../contrib/estimator/batch_processor.py | 4 +-- .../gluon/contrib/estimator/estimator.py | 30 ++++++++--------- .../unittest/test_gluon_batch_processor.py | 4 +-- tests/python/unittest/test_gluon_estimator.py | 32 +++++++++---------- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/python/mxnet/gluon/contrib/estimator/batch_processor.py b/python/mxnet/gluon/contrib/estimator/batch_processor.py index 4985f8c81bf3..aa5adbfdea5f 100644 --- a/python/mxnet/gluon/contrib/estimator/batch_processor.py +++ b/python/mxnet/gluon/contrib/estimator/batch_processor.py @@ -61,8 +61,8 @@ def evaluate_batch(self, estimator, Batch axis to split the validation data into devices. """ data, label = self._get_data_and_label(val_batch, estimator.context, batch_axis) - pred = [estimator.eval_net(x) for x in data] - loss = [estimator.evaluation_loss(y_hat, y) for y_hat, y in zip(pred, label)] + pred = [estimator.val_net(x) for x in data] + loss = [estimator.val_loss(y_hat, y) for y_hat, y in zip(pred, label)] return data, label, pred, loss diff --git a/python/mxnet/gluon/contrib/estimator/estimator.py b/python/mxnet/gluon/contrib/estimator/estimator.py index 09f43151e235..ed8a53d7c3a6 100644 --- a/python/mxnet/gluon/contrib/estimator/estimator.py +++ b/python/mxnet/gluon/contrib/estimator/estimator.py @@ -61,22 +61,19 @@ class Estimator(object): Trainer to apply optimizer on network parameters. context : Context or list of Context Device(s) to run the training on. - evaluation_loss : gluon.loss.loss - Loss (objective) function to calculate during validation. If set evaluation_loss - None, it will use the same loss function as self.loss - eval_net : gluon.Block + val_net : gluon.Block The model used for validation. The validation model does not necessarily belong to the same model class as the training model. But the two models typically share the same architecture. Therefore the validation model can reuse parameters of the training model. - The code example of consruction of eval_net sharing the same network parameters as + The code example of consruction of val_net sharing the same network parameters as the training net is given below: >>> net = _get_train_network() - >>> eval_net = _get_test_network(params=net.collect_params()) + >>> val_net = _get_test_network(params=net.collect_params()) >>> net.initialize(ctx=ctx) - >>> est = Estimator(net, loss, eval_net=eval_net) + >>> est = Estimator(net, loss, val_net=val_net) Proper namespace match is required for weight sharing between two networks. Most networks inheriting :py:class:`Block` can share their parameters correctly. An exception is @@ -84,6 +81,9 @@ class Estimator(object): the naming in mxnet Gluon API, please refer to the site (https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/blocks/naming.html) for future information. + val_loss : gluon.loss.loss + Loss (objective) function to calculate during validation. If set val_loss + None, it will use the same loss function as self.loss batch_processor: BatchProcessor BatchProcessor provides customized fit_batch() and evaluate_batch() methods """ @@ -113,8 +113,8 @@ def __init__(self, net, initializer=None, trainer=None, context=None, - evaluation_loss=None, - eval_net=None, + val_net=None, + val_loss=None, batch_processor=None): self.net = net self.loss = self._check_loss(loss) @@ -122,12 +122,12 @@ def __init__(self, net, self._val_metrics = _check_metrics(val_metrics) self._add_default_training_metrics() self._add_validation_metrics() - self.evaluation_loss = self.loss - if evaluation_loss is not None: - self.evaluation_loss = self._check_loss(evaluation_loss) - self.eval_net = self.net - if eval_net is not None: - self.eval_net = eval_net + self.val_loss = self.loss + if val_loss is not None: + self.val_loss = self._check_loss(val_loss) + self.val_net = self.net + if val_net is not None: + self.val_net = val_net self.logger = logging.Logger(name='Estimator', level=logging.INFO) self.logger.addHandler(logging.StreamHandler(sys.stdout)) diff --git a/tests/python/unittest/test_gluon_batch_processor.py b/tests/python/unittest/test_gluon_batch_processor.py index 4bd6f769aa44..8604713fc129 100644 --- a/tests/python/unittest/test_gluon_batch_processor.py +++ b/tests/python/unittest/test_gluon_batch_processor.py @@ -84,7 +84,7 @@ def test_batch_processor_validation(): ctx = mx.cpu() loss = gluon.loss.L2Loss() acc = mx.metric.Accuracy() - evaluation_loss = gluon.loss.L1Loss() + val_loss = gluon.loss.L1Loss() net.initialize(ctx=ctx) processor = BatchProcessor() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) @@ -93,7 +93,7 @@ def test_batch_processor_validation(): train_metrics=acc, trainer=trainer, context=ctx, - evaluation_loss=evaluation_loss, + val_loss=val_loss, batch_processor=processor) # Input dataloader est.fit(train_data=dataloader, diff --git a/tests/python/unittest/test_gluon_estimator.py b/tests/python/unittest/test_gluon_estimator.py index 924dd083bef4..ca61e4b40caa 100644 --- a/tests/python/unittest/test_gluon_estimator.py +++ b/tests/python/unittest/test_gluon_estimator.py @@ -88,7 +88,7 @@ def test_validation(): ctx = mx.cpu() loss = gluon.loss.L2Loss() acc = mx.metric.Accuracy() - evaluation_loss = gluon.loss.L1Loss() + val_loss = gluon.loss.L1Loss() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, @@ -96,7 +96,7 @@ def test_validation(): train_metrics=acc, trainer=trainer, context=ctx, - evaluation_loss=evaluation_loss) + val_loss=val_loss) # Input dataloader est.fit(train_data=dataloader, val_data=dataloader, @@ -376,16 +376,16 @@ def test_default_handlers(): assert isinstance(handlers[1], MetricHandler) assert isinstance(handlers[4], LoggingHandler) -def test_eval_net(): - ''' test estimator with a different evaluation net ''' +def test_val_net(): + ''' test estimator with different training and validation networks ''' ''' test weight sharing of sequential networks without namescope ''' net = _get_test_network() - eval_net = _get_test_network(params=net.collect_params()) + val_net = _get_test_network(params=net.collect_params()) dataloader, dataiter = _get_test_data() num_epochs = 1 ctx = mx.cpu() loss = gluon.loss.L2Loss() - evaluation_loss = gluon.loss.L2Loss() + val_loss = gluon.loss.L2Loss() acc = mx.metric.Accuracy() net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) @@ -394,8 +394,8 @@ def test_eval_net(): train_metrics=acc, trainer=trainer, context=ctx, - evaluation_loss=evaluation_loss, - eval_net=eval_net) + val_loss=val_loss, + val_net=val_net) with assert_raises(RuntimeError): est.fit(train_data=dataloader, @@ -404,7 +404,7 @@ def test_eval_net(): ''' test weight sharing of sequential networks with namescope ''' net = _get_test_network_with_namescope() - eval_net = _get_test_network_with_namescope(params=net.collect_params()) + val_net = _get_test_network_with_namescope(params=net.collect_params()) net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, @@ -412,8 +412,8 @@ def test_eval_net(): train_metrics=acc, trainer=trainer, context=ctx, - evaluation_loss=evaluation_loss, - eval_net=eval_net) + val_loss=val_loss, + val_net=val_net) est.fit(train_data=dataloader, val_data=dataloader, @@ -422,20 +422,20 @@ def test_eval_net(): ''' test weight sharing of two resnets ''' net = gluon.model_zoo.vision.resnet18_v1(pretrained=False, ctx=ctx) net.output = gluon.nn.Dense(10) - eval_net = gluon.model_zoo.vision.resnet18_v1(pretrained=False, ctx=ctx) - eval_net.output = gluon.nn.Dense(10, params=net.collect_params()) + val_net = gluon.model_zoo.vision.resnet18_v1(pretrained=False, ctx=ctx) + val_net.output = gluon.nn.Dense(10, params=net.collect_params()) dataset = gluon.data.ArrayDataset(mx.nd.zeros((10, 3, 224, 224)), mx.nd.zeros((10, 10))) dataloader = gluon.data.DataLoader(dataset=dataset, batch_size=5) net.initialize(ctx=ctx) - eval_net.initialize(ctx=ctx) + val_net.initialize(ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) est = Estimator(net=net, loss=loss, train_metrics=acc, trainer=trainer, context=ctx, - evaluation_loss=evaluation_loss, - eval_net=eval_net) + val_loss=val_loss, + val_net=val_net) est.fit(train_data=dataloader, val_data=dataloader, From d000c3baa32171964f1b8ed3780472af0e05be1a Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Sat, 21 Dec 2019 10:31:27 +0800 Subject: [PATCH 62/62] [Numpy ]Modify np.random.shuffle to enable inplace by default (#17133) * shuffle done * fix dodstring --- python/mxnet/ndarray/numpy/random.py | 38 +++++++++++++++++++++++++++- python/mxnet/numpy/random.py | 38 +++++++++++++++++++++++++++- python/mxnet/symbol/numpy/random.py | 38 +++++++++++++++++++++++++++- src/operator/random/shuffle_op.cc | 2 +- 4 files changed, 112 insertions(+), 4 deletions(-) diff --git a/python/mxnet/ndarray/numpy/random.py b/python/mxnet/ndarray/numpy/random.py index 9d1a6f9119ee..e67c766c6bdf 100644 --- a/python/mxnet/ndarray/numpy/random.py +++ b/python/mxnet/ndarray/numpy/random.py @@ -23,7 +23,7 @@ from ..ndarray import NDArray -__all__ = ['randint', 'uniform', 'normal', "choice", "rand", "multinomial"] +__all__ = ['randint', 'uniform', 'normal', "choice", "rand", "multinomial", "shuffle"] def randint(low, high=None, size=None, dtype=None, ctx=None, out=None): @@ -344,3 +344,39 @@ def rand(*size, **kwargs): for s in size: output_shape += (s,) return uniform(0, 1, size=output_shape, **kwargs) + + +def shuffle(x): + """ + Modify a sequence in-place by shuffling its contents. + + This function only shuffles the array along the first axis of a + multi-dimensional array. The order of sub-arrays is changed but + their contents remain the same. + + Parameters + ---------- + x: ndarray + The array or list to be shuffled. + + Returns + ------- + None + + Examples + -------- + >>> arr = np.arange(10) + >>> np.random.shuffle(arr) + >>> arr + array([5., 1., 0., 6., 7., 3., 9., 8., 4., 2.]) # random + + Multi-dimensional arrays are only shuffled along the first axis: + + >>> arr = np.arange(9).reshape((3, 3)) + >>> np.random.shuffle(arr) + >>> arr + array([[6., 7., 8.], # random + [3., 4., 5.], + [0., 1., 2.]]) + """ + _npi.shuffle(x, out=x) diff --git a/python/mxnet/numpy/random.py b/python/mxnet/numpy/random.py index 1cad4a55c466..ebc24de63282 100644 --- a/python/mxnet/numpy/random.py +++ b/python/mxnet/numpy/random.py @@ -20,7 +20,7 @@ from __future__ import absolute_import from ..ndarray import numpy as _mx_nd_np -__all__ = ["randint", "uniform", "normal", "choice", "rand", "multinomial"] +__all__ = ["randint", "uniform", "normal", "choice", "rand", "multinomial", "shuffle"] def randint(low, high=None, size=None, dtype=None, ctx=None, out=None): @@ -321,3 +321,39 @@ def rand(*size, **kwargs): for s in size: output_shape += (s,) return _mx_nd_np.random.uniform(0, 1, size=output_shape, **kwargs) + + +def shuffle(x): + """ + Modify a sequence in-place by shuffling its contents. + + This function only shuffles the array along the first axis of a + multi-dimensional array. The order of sub-arrays is changed but + their contents remain the same. + + Parameters + ---------- + x: ndarray + The array or list to be shuffled. + + Returns + ------- + None + + Examples + -------- + >>> arr = np.arange(10) + >>> np.random.shuffle(arr) + >>> arr + array([5., 1., 0., 6., 7., 3., 9., 8., 4., 2.]) # random + + Multi-dimensional arrays are only shuffled along the first axis: + + >>> arr = np.arange(9).reshape((3, 3)) + >>> np.random.shuffle(arr) + >>> arr + array([[6., 7., 8.], # random + [3., 4., 5.], + [0., 1., 2.]]) + """ + _mx_nd_np.random.shuffle(x) diff --git a/python/mxnet/symbol/numpy/random.py b/python/mxnet/symbol/numpy/random.py index 48bccb64a2b4..94c29f407acc 100644 --- a/python/mxnet/symbol/numpy/random.py +++ b/python/mxnet/symbol/numpy/random.py @@ -21,7 +21,7 @@ from ...context import current_context from . import _internal as _npi -__all__ = ['randint', 'uniform', 'normal', 'rand'] +__all__ = ['randint', 'uniform', 'normal', 'rand', 'shuffle'] def randint(low, high=None, size=None, dtype=None, ctx=None, out=None): @@ -288,3 +288,39 @@ def choice(a, size=None, replace=True, p=None, ctx=None, out=None): return _npi.choice(a=a, size=size, replace=replace, ctx=ctx, weighted=False, out=out) else: return _npi.choice(p, a=a, size=size, replace=replace, ctx=ctx, weighted=True, out=out) + + +def shuffle(x): + """ + Modify a sequence in-place by shuffling its contents. + + This function only shuffles the array along the first axis of a + multi-dimensional array. The order of sub-arrays is changed but + their contents remain the same. + + Parameters + ---------- + x: _Symbol + The array or list to be shuffled. + + Returns + ------- + None + + Examples + -------- + >>> arr = np.arange(10) + >>> np.random.shuffle(arr) + >>> arr + array([5., 1., 0., 6., 7., 3., 9., 8., 4., 2.]) # random + + Multi-dimensional arrays are only shuffled along the first axis: + + >>> arr = np.arange(9).reshape((3, 3)) + >>> np.random.shuffle(arr) + >>> arr + array([[6., 7., 8.], # random + [3., 4., 5.], + [0., 1., 2.]]) + """ + _npi.shuffle(x, out=x) diff --git a/src/operator/random/shuffle_op.cc b/src/operator/random/shuffle_op.cc index 86797c136bab..0f64fbc51449 100644 --- a/src/operator/random/shuffle_op.cc +++ b/src/operator/random/shuffle_op.cc @@ -122,7 +122,7 @@ void ShuffleForwardCPU(const nnvm::NodeAttrs& attrs, NNVM_REGISTER_OP(_shuffle) .add_alias("shuffle") -.add_alias("_np__random_shuffle") +.add_alias("_npi_shuffle") .describe(R"code(Randomly shuffle the elements. This shuffles the array along the first axis.