diff --git a/example/deep-embedded-clustering/autoencoder.py b/example/deep-embedded-clustering/autoencoder.py new file mode 100644 index 000000000000..096f04529c3b --- /dev/null +++ b/example/deep-embedded-clustering/autoencoder.py @@ -0,0 +1,206 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: disable=missing-docstring, arguments-differ +from __future__ import print_function + +import logging + +import mxnet as mx +import numpy as np +import model +from solver import Solver, Monitor + + +class AutoEncoderModel(model.MXModel): + def setup(self, dims, sparseness_penalty=None, pt_dropout=None, + ft_dropout=None, input_act=None, internal_act='relu', output_act=None): + self.N = len(dims) - 1 + self.dims = dims + self.stacks = [] + self.pt_dropout = pt_dropout + self.ft_dropout = ft_dropout + self.input_act = input_act + self.internal_act = internal_act + self.output_act = output_act + + self.data = mx.symbol.Variable('data') + for i in range(self.N): + if i == 0: + decoder_act = input_act + idropout = None + else: + decoder_act = internal_act + idropout = pt_dropout + if i == self.N-1: + encoder_act = output_act + odropout = None + else: + encoder_act = internal_act + odropout = pt_dropout + istack, iargs, iargs_grad, iargs_mult, iauxs = self.make_stack( + i, self.data, dims[i], dims[i+1], sparseness_penalty, + idropout, odropout, encoder_act, decoder_act + ) + self.stacks.append(istack) + self.args.update(iargs) + self.args_grad.update(iargs_grad) + self.args_mult.update(iargs_mult) + self.auxs.update(iauxs) + self.encoder, self.internals = self.make_encoder( + self.data, dims, sparseness_penalty, ft_dropout, internal_act, output_act) + self.decoder = self.make_decoder( + self.encoder, dims, sparseness_penalty, ft_dropout, internal_act, input_act) + if input_act == 'softmax': + self.loss = self.decoder + else: + self.loss = mx.symbol.LinearRegressionOutput(data=self.decoder, label=self.data) + + def make_stack(self, istack, data, num_input, num_hidden, sparseness_penalty=None, + idropout=None, odropout=None, encoder_act='relu', decoder_act='relu'): + x = data + if idropout: + x = mx.symbol.Dropout(data=x, p=idropout) + x = mx.symbol.FullyConnected(name='encoder_%d'%istack, data=x, num_hidden=num_hidden) + if encoder_act: + x = mx.symbol.Activation(data=x, act_type=encoder_act) + if encoder_act == 'sigmoid' and sparseness_penalty: + x = mx.symbol.IdentityAttachKLSparseReg( + data=x, name='sparse_encoder_%d' % istack, penalty=sparseness_penalty) + if odropout: + x = mx.symbol.Dropout(data=x, p=odropout) + x = mx.symbol.FullyConnected(name='decoder_%d'%istack, data=x, num_hidden=num_input) + if decoder_act == 'softmax': + x = mx.symbol.Softmax(data=x, label=data, prob_label=True, act_type=decoder_act) + elif decoder_act: + x = mx.symbol.Activation(data=x, act_type=decoder_act) + if decoder_act == 'sigmoid' and sparseness_penalty: + x = mx.symbol.IdentityAttachKLSparseReg( + data=x, name='sparse_decoder_%d' % istack, penalty=sparseness_penalty) + x = mx.symbol.LinearRegressionOutput(data=x, label=data) + else: + x = mx.symbol.LinearRegressionOutput(data=x, label=data) + + args = {'encoder_%d_weight'%istack: mx.nd.empty((num_hidden, num_input), self.xpu), + 'encoder_%d_bias'%istack: mx.nd.empty((num_hidden,), self.xpu), + 'decoder_%d_weight'%istack: mx.nd.empty((num_input, num_hidden), self.xpu), + 'decoder_%d_bias'%istack: mx.nd.empty((num_input,), self.xpu),} + args_grad = {'encoder_%d_weight'%istack: mx.nd.empty((num_hidden, num_input), self.xpu), + 'encoder_%d_bias'%istack: mx.nd.empty((num_hidden,), self.xpu), + 'decoder_%d_weight'%istack: mx.nd.empty((num_input, num_hidden), self.xpu), + 'decoder_%d_bias'%istack: mx.nd.empty((num_input,), self.xpu),} + args_mult = {'encoder_%d_weight'%istack: 1.0, + 'encoder_%d_bias'%istack: 2.0, + 'decoder_%d_weight'%istack: 1.0, + 'decoder_%d_bias'%istack: 2.0,} + auxs = {} + if encoder_act == 'sigmoid' and sparseness_penalty: + auxs['sparse_encoder_%d_moving_avg' % istack] = mx.nd.ones(num_hidden, self.xpu) * 0.5 + if decoder_act == 'sigmoid' and sparseness_penalty: + auxs['sparse_decoder_%d_moving_avg' % istack] = mx.nd.ones(num_input, self.xpu) * 0.5 + init = mx.initializer.Uniform(0.07) + for k, v in args.items(): + init(mx.initializer.InitDesc(k), v) + + return x, args, args_grad, args_mult, auxs + + def make_encoder(self, data, dims, sparseness_penalty=None, dropout=None, internal_act='relu', + output_act=None): + x = data + internals = [] + N = len(dims) - 1 + for i in range(N): + x = mx.symbol.FullyConnected(name='encoder_%d'%i, data=x, num_hidden=dims[i+1]) + if internal_act and i < N-1: + x = mx.symbol.Activation(data=x, act_type=internal_act) + if internal_act == 'sigmoid' and sparseness_penalty: + x = mx.symbol.IdentityAttachKLSparseReg( + data=x, name='sparse_encoder_%d' % i, penalty=sparseness_penalty) + elif output_act and i == N-1: + x = mx.symbol.Activation(data=x, act_type=output_act) + if output_act == 'sigmoid' and sparseness_penalty: + x = mx.symbol.IdentityAttachKLSparseReg( + data=x, name='sparse_encoder_%d' % i, penalty=sparseness_penalty) + if dropout: + x = mx.symbol.Dropout(data=x, p=dropout) + internals.append(x) + return x, internals + + def make_decoder(self, feature, dims, sparseness_penalty=None, dropout=None, + internal_act='relu', input_act=None): + x = feature + N = len(dims) - 1 + for i in reversed(range(N)): + x = mx.symbol.FullyConnected(name='decoder_%d'%i, data=x, num_hidden=dims[i]) + if internal_act and i > 0: + x = mx.symbol.Activation(data=x, act_type=internal_act) + if internal_act == 'sigmoid' and sparseness_penalty: + x = mx.symbol.IdentityAttachKLSparseReg( + data=x, name='sparse_decoder_%d' % i, penalty=sparseness_penalty) + elif input_act and i == 0: + x = mx.symbol.Activation(data=x, act_type=input_act) + if input_act == 'sigmoid' and sparseness_penalty: + x = mx.symbol.IdentityAttachKLSparseReg( + data=x, name='sparse_decoder_%d' % i, penalty=sparseness_penalty) + if dropout and i > 0: + x = mx.symbol.Dropout(data=x, p=dropout) + return x + + def layerwise_pretrain(self, X, batch_size, n_iter, optimizer, l_rate, decay, + lr_scheduler=None, print_every=1000): + def l2_norm(label, pred): + return np.mean(np.square(label-pred))/2.0 + solver = Solver(optimizer, momentum=0.9, wd=decay, learning_rate=l_rate, + lr_scheduler=lr_scheduler) + solver.set_metric(mx.metric.CustomMetric(l2_norm)) + solver.set_monitor(Monitor(print_every)) + data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=True, + last_batch_handle='roll_over') + for i in range(self.N): + if i == 0: + data_iter_i = data_iter + else: + X_i = list(model.extract_feature( + self.internals[i-1], self.args, self.auxs, data_iter, X.shape[0], + self.xpu).values())[0] + data_iter_i = mx.io.NDArrayIter({'data': X_i}, batch_size=batch_size, + last_batch_handle='roll_over') + logging.info('Pre-training layer %d...', i) + solver.solve(self.xpu, self.stacks[i], self.args, self.args_grad, self.auxs, + data_iter_i, 0, n_iter, {}, False) + + def finetune(self, X, batch_size, n_iter, optimizer, l_rate, decay, lr_scheduler=None, + print_every=1000): + def l2_norm(label, pred): + return np.mean(np.square(label-pred))/2.0 + solver = Solver(optimizer, momentum=0.9, wd=decay, learning_rate=l_rate, + lr_scheduler=lr_scheduler) + solver.set_metric(mx.metric.CustomMetric(l2_norm)) + solver.set_monitor(Monitor(print_every)) + data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=True, + last_batch_handle='roll_over') + logging.info('Fine tuning...') + solver.solve(self.xpu, self.loss, self.args, self.args_grad, self.auxs, data_iter, + 0, n_iter, {}, False) + + def eval(self, X): + batch_size = 100 + data_iter = mx.io.NDArrayIter({'data': X}, batch_size=batch_size, shuffle=False, + last_batch_handle='pad') + Y = list(model.extract_feature( + self.loss, self.args, self.auxs, data_iter, X.shape[0], self.xpu).values())[0] + return np.mean(np.square(Y-X))/2.0 \ No newline at end of file diff --git a/example/deep-embedded-clustering/data.py b/example/deep-embedded-clustering/data.py new file mode 100644 index 000000000000..9fd472e6a8b1 --- /dev/null +++ b/example/deep-embedded-clustering/data.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: disable=missing-docstring +from __future__ import print_function + +import os +import numpy as np +from sklearn.datasets import fetch_mldata + + +def get_mnist(): + """ Gets MNIST dataset """ + + np.random.seed(1234) # set seed for deterministic ordering + data_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) + data_path = os.path.join(data_path, '../../data') + mnist = fetch_mldata('MNIST original', data_home=data_path) + p = np.random.permutation(mnist.data.shape[0]) + X = mnist.data[p].astype(np.float32)*0.02 + Y = mnist.target[p] + return X, Y + + + + diff --git a/example/deep-embedded-clustering/model.py b/example/deep-embedded-clustering/model.py new file mode 100644 index 000000000000..777634e3cf88 --- /dev/null +++ b/example/deep-embedded-clustering/model.py @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: disable=missing-docstring +from __future__ import print_function + +import mxnet as mx +import numpy as np +try: + import cPickle as pickle +except ModuleNotFoundError: + import pickle + + +def extract_feature(sym, args, auxs, data_iter, N, xpu=mx.cpu()): + input_buffs = [mx.nd.empty(shape, ctx=xpu) for k, shape in data_iter.provide_data] + input_names = [k for k, shape in data_iter.provide_data] + args = dict(args, **dict(zip(input_names, input_buffs))) + exe = sym.bind(xpu, args=args, aux_states=auxs) + outputs = [[] for _ in exe.outputs] + output_buffs = None + + data_iter.hard_reset() + for batch in data_iter: + for data, buff in zip(batch.data, input_buffs): + data.copyto(buff) + exe.forward(is_train=False) + if output_buffs is None: + output_buffs = [mx.nd.empty(i.shape, ctx=mx.cpu()) for i in exe.outputs] + else: + for out, buff in zip(outputs, output_buffs): + out.append(buff.asnumpy()) + for out, buff in zip(exe.outputs, output_buffs): + out.copyto(buff) + for out, buff in zip(outputs, output_buffs): + out.append(buff.asnumpy()) + outputs = [np.concatenate(i, axis=0)[:N] for i in outputs] + return dict(zip(sym.list_outputs(), outputs)) + + +class MXModel(object): + def __init__(self, xpu=mx.cpu(), *args, **kwargs): + self.xpu = xpu + self.loss = None + self.args = {} + self.args_grad = {} + self.args_mult = {} + self.auxs = {} + self.setup(*args, **kwargs) + + def save(self, fname): + args_save = {key: v.asnumpy() for key, v in self.args.items()} + with open(fname, 'wb') as fout: + pickle.dump(args_save, fout) + + def load(self, fname): + with open(fname, 'rb') as fin: + args_save = pickle.load(fin) + for key, v in args_save.items(): + if key in self.args: + self.args[key][:] = v + + def setup(self, *args, **kwargs): + raise NotImplementedError("must override this") \ No newline at end of file diff --git a/example/deep-embedded-clustering/solver.py b/example/deep-embedded-clustering/solver.py new file mode 100644 index 000000000000..567c78eeb06c --- /dev/null +++ b/example/deep-embedded-clustering/solver.py @@ -0,0 +1,151 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: disable=missing-docstring +from __future__ import print_function + +import logging + +import mxnet as mx +import numpy as np + + +class Monitor(object): + def __init__(self, interval, level=logging.DEBUG, stat=None): + self.interval = interval + self.level = level + if stat is None: + def mean_abs(x): + return np.fabs(x).mean() + self.stat = mean_abs + else: + self.stat = stat + + def forward_end(self, i, internals): + if i % self.interval == 0 and logging.getLogger().isEnabledFor(self.level): + for key in sorted(internals.keys()): + arr = internals[key] + logging.log(self.level, 'Iter:%d param:%s\t\tstat(%s):%s', + i, key, self.stat.__name__, str(self.stat(arr.asnumpy()))) + + def backward_end(self, i, weights, grads, metric=None): + if i % self.interval == 0 and logging.getLogger().isEnabledFor(self.level): + for key in sorted(grads.keys()): + arr = grads[key] + logging.log(self.level, 'Iter:%d param:%s\t\tstat(%s):%s\t\tgrad_stat:%s', + i, key, self.stat.__name__, + str(self.stat(weights[key].asnumpy())), str(self.stat(arr.asnumpy()))) + if i % self.interval == 0 and metric is not None: + logging.log(logging.INFO, 'Iter:%d metric:%f', i, metric.get()[1]) + metric.reset() + + +class Solver(object): + def __init__(self, optimizer, **kwargs): + if isinstance(optimizer, str): + self.optimizer = mx.optimizer.create(optimizer, **kwargs) + else: + self.optimizer = optimizer + self.updater = mx.optimizer.get_updater(self.optimizer) + self.monitor = None + self.metric = None + self.iter_end_callback = None + self.iter_start_callback = None + + def set_metric(self, metric): + self.metric = metric + + def set_monitor(self, monitor): + self.monitor = monitor + + def set_iter_end_callback(self, callback): + self.iter_end_callback = callback + + def set_iter_start_callback(self, callback): + self.iter_start_callback = callback + + def solve(self, xpu, sym, args, args_grad, auxs, + data_iter, begin_iter, end_iter, args_lrmult=None, debug=False): + if args_lrmult is None: + args_lrmult = dict() + input_desc = data_iter.provide_data + data_iter.provide_label + input_names = [k for k, shape in input_desc] + input_buffs = [mx.nd.empty(shape, ctx=xpu) for k, shape in input_desc] + args = dict(args, **dict(zip(input_names, input_buffs))) + + output_names = sym.list_outputs() + if debug: + sym_group = [] + for x in sym.get_internals(): + if x.name not in args: + if x.name not in output_names: + x = mx.symbol.BlockGrad(x, name=x.name) + sym_group.append(x) + sym = mx.symbol.Group(sym_group) + exe = sym.bind(xpu, args=args, args_grad=args_grad, aux_states=auxs) + + assert len(sym.list_arguments()) == len(exe.grad_arrays) + update_dict = { + name: nd for name, nd in zip(sym.list_arguments(), exe.grad_arrays) if nd is not None + } + batch_size = input_buffs[0].shape[0] + self.optimizer.rescale_grad = 1.0/batch_size + self.optimizer.set_lr_mult(args_lrmult) + + output_dict = {} + output_buff = {} + internal_dict = dict(zip(input_names, input_buffs)) + for key, arr in zip(sym.list_outputs(), exe.outputs): + if key in output_names: + output_dict[key] = arr + output_buff[key] = mx.nd.empty(arr.shape, ctx=mx.cpu()) + else: + internal_dict[key] = arr + + data_iter.reset() + for i in range(begin_iter, end_iter): + if self.iter_start_callback is not None: + if self.iter_start_callback(i): + return + try: + batch = data_iter.next() + except StopIteration: + data_iter.reset() + batch = data_iter.next() + for data, buff in zip(batch.data+batch.label, input_buffs): + data.copyto(buff) + exe.forward(is_train=True) + if self.monitor is not None: + self.monitor.forward_end(i, internal_dict) + for key in output_dict: + output_dict[key].copyto(output_buff[key]) + + exe.backward() + for key, arr in update_dict.items(): + self.updater(key, arr, args[key]) + + if self.metric is not None: + self.metric.update([input_buffs[-1]], + [output_buff[output_names[0]]]) + + if self.monitor is not None: + self.monitor.backward_end(i, args, update_dict, self.metric) + + if self.iter_end_callback is not None: + if self.iter_end_callback(i): + return + exe.outputs[0].wait_to_read() \ No newline at end of file